|
|
|
|
|
|
// Definitions |
|
|
|
//-------------------------------------------------------------------------------------------------- |
|
|
|
|
|
|
|
#pragma kernel SubsurfaceScatteringQualityNormal SubsurfaceScattering=SubsurfaceScatteringQualityNormal SSS_ENABLE_NEAR_FIELD=0 |
|
|
|
#pragma kernel SubsurfaceScatteringQualityUltra SubsurfaceScattering=SubsurfaceScatteringQualityUltra SSS_ENABLE_NEAR_FIELD=1 |
|
|
|
|
|
|
|
// TODO: use sharp load hoisting on PS4. |
|
|
|
#define SSS_TAA_INTEGRATION 1 // Smoother results at the cost of a tiny amount of flickering in under-sampled areas |
|
|
|
#define SSS_ENABLE_NEAR_FIELD 0 // Greatly increases the number of samples. Comes at a high cost. |
|
|
|
#define SSS_RANDOM_ROTATION 1 // Hides undersampling artifacts with high-frequency noise. TAA blurs the noise. |
|
|
|
#define SSS_USE_TANGENT_PLANE 0 // Improves the accuracy of the approximation(0 -> 1st order). High cost. Does not work with back-facing normals. |
|
|
|
#define SSS_CLAMP_ARTIFACT 0 // Reduces bleeding. Use with SSS_USE_TANGENT_PLANE. |
|
|
|
#define SSS_DEBUG_LOD 0 |
|
|
|
|
|
|
|
|
|
|
void EvaluateSample(uint i, uint n, uint profileID, uint iR, uint iP, float2 centerCoord, int2 cacheOffset, |
|
|
|
float3 shapeParam, float3 centerPosVS, float mmPerUnit, float2 pixelsPerMm, |
|
|
|
float3 tangentX, float3 tangentY, float4x4 projMatrix, |
|
|
|
float startAngle, float3 tangentX, float3 tangentY, float4x4 projMatrix, |
|
|
|
float r = _FilterKernels[profileID][i][iR]; |
|
|
|
float r = _FilterKernels[profileID][i][iR]; |
|
|
|
#if (SSS_TAA_INTEGRATION != 0) |
|
|
|
// Note that we repeat the pattern twice during the TAA cycle to reduce flickering. |
|
|
|
float sinPsi = _TaaFrameRotation.x; |
|
|
|
float cosPsi = _TaaFrameRotation.y; |
|
|
|
// The angle 'psi' is loop-invariant. All the trigonometry is done at compile time. |
|
|
|
// The angle 'psi' is loop-invariant. |
|
|
|
float sinPsi = sin(startAngle); |
|
|
|
float cosPsi = cos(startAngle); |
|
|
|
|
|
|
|
// cos(a + b) = cos(a) * cos(b) - sin(a) * sin(b) |
|
|
|
// sin(a + b) = sin(a) * cos(b) + cos(a) * sin(b) |
|
|
|
float cosSum = cos(phi) * cosPsi - sin(phi) * sinPsi; |
|
|
|
|
|
|
#else |
|
|
|
float2 vec = r * float2(cos(phi), sin(phi)); |
|
|
|
#endif |
|
|
|
|
|
|
|
// Compute the screen-space position and the squared distance (in mm) in the image plane. |
|
|
|
int2 position; float xy2; |
|
|
|
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
#pragma kernel SubsurfaceScattering |
|
|
|
|
|
|
|
void SubsurfaceScattering(uint3 reorderedGroupId : SV_GroupID, |
|
|
|
uint groupThreadId : SV_GroupThreadID) |
|
|
|
void SubsurfaceScattering(uint2 groupId : SV_GroupID, |
|
|
|
uint groupThreadId : SV_GroupThreadID) |
|
|
|
|
|
|
|
// We dispatch 4x swizzled 16x16 groups per a 32x32 macrotile. |
|
|
|
// Therefore, we need to reorder. TODO: macrotile order. |
|
|
|
uint2 groupQuad = DeinterleaveQuad(reorderedGroupId.x); |
|
|
|
uint2 groupId = uint2(reorderedGroupId.y * 2 + groupQuad.x, reorderedGroupId.z * 2 + groupQuad.y); |
|
|
|
|
|
|
|
// Arrange threads in the Morton order to optimally match the memory layout of GCN tiles. |
|
|
|
uint2 groupCoord = DecodeMorton2D(groupThreadId); |
|
|
|
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
#if SSS_RANDOM_ROTATION |
|
|
|
float startAngle = TWO_PI * GenerateHashedRandomFloat(asuint(centerPosVS)); |
|
|
|
#else |
|
|
|
float startAngle = 0; |
|
|
|
#endif |
|
|
|
|
|
|
|
// Use more samples for SS regions larger than 5x5 pixels (rotated by 45 degrees). |
|
|
|
bool useNearFieldKernel = SSS_ENABLE_NEAR_FIELD && maxDistInPixels > SSS_LOD_THRESHOLD; |
|
|
|
|
|
|
|
|
|
|
// Compute the indices used to access the individual components of the float4 of the kernel. |
|
|
|
uint iR = useNearFieldKernel ? 0 : 2; // radius |
|
|
|
uint iP = useNearFieldKernel ? 1 : 3; // rcp(pdf) |
|
|
|
uint n = useNearFieldKernel ? SSS_N_SAMPLES_NEAR_FIELD : SSS_N_SAMPLES_FAR_FIELD; |
|
|
|
|
|
|
|
float centerRadius = _FilterKernels[profileID][0][iR]; |
|
|
|
float centerRcpPdf = _FilterKernels[profileID][0][iP]; |
|
|
|
|
|
|
float3 totalIrradiance = centerWeight * centerIrradiance; |
|
|
|
float3 totalWeight = centerWeight; |
|
|
|
|
|
|
|
int i, n; // Declare once to avoid the warning from the Unity shader compiler. |
|
|
|
uint i; // Declare once to avoid the warning from the Unity shader compiler. |
|
|
|
for (i = 1, n = SSS_N_SAMPLES_FAR_FIELD; i < n; i++) |
|
|
|
for (i = 1; i < SSS_N_SAMPLES_FAR_FIELD; i++) |
|
|
|
tangentX, tangentY, projMatrix, |
|
|
|
startAngle, tangentX, tangentY, projMatrix, |
|
|
|
totalIrradiance, totalWeight); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
UNITY_UNROLL |
|
|
|
for (i = SSS_N_SAMPLES_FAR_FIELD, n = SSS_N_SAMPLES_NEAR_FIELD; i < n; i++) |
|
|
|
for (i = SSS_N_SAMPLES_FAR_FIELD; i < SSS_N_SAMPLES_NEAR_FIELD; i++) |
|
|
|
tangentX, tangentY, projMatrix, |
|
|
|
startAngle, tangentX, tangentY, projMatrix, |
|
|
|
totalIrradiance, totalWeight); |
|
|
|
} |
|
|
|
|