Evgenii Golubev
8 年前
当前提交
f30f49e7
共有 6 个文件被更改,包括 731 次插入 和 16 次删除
-
46Assets/ScriptableRenderPipeline/HDRenderPipeline/HDRenderPipeline.cs
-
5Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs
-
604Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute
-
10Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute.meta
-
72Assets/ScriptableRenderPipeline/ShaderLibrary/SpaceFillingCurves.hlsl
-
10Assets/ScriptableRenderPipeline/ShaderLibrary/SpaceFillingCurves.hlsl.meta
|
|||
// ===================== Performs integration of the Disney BSSRDF over a disk ===================== |
|||
|
|||
//-------------------------------------------------------------------------------------------------- |
|||
// Definitions |
|||
//-------------------------------------------------------------------------------------------------- |
|||
|
|||
#pragma enable_d3d11_debug_symbols |
|||
|
|||
// Tweak parameters. |
|||
#define SSS_BILATERAL_FILTER 1 |
|||
#define SSS_USE_TANGENT_PLANE 0 |
|||
#define SSS_CLAMP_ARTIFACT 0 |
|||
#define SSS_DEBUG_LOD 0 |
|||
#define SSS_DEBUG_NORMAL_VS 0 |
|||
|
|||
// Do not modify these. |
|||
#define SSS_PASS 1 |
|||
#define MILLIMETERS_PER_METER 1000 |
|||
#define CENTIMETERS_PER_METER 100 |
|||
#define GROUP_SIZE_1D 16 |
|||
#define GROUP_SIZE_2D (GROUP_SIZE_1D * GROUP_SIZE_1D) |
|||
#define TEXTURE_CACHE_BORDER 2 |
|||
#define TEXTURE_CACHE_SIZE_1D (GROUP_SIZE_1D + 2 * TEXTURE_CACHE_BORDER) |
|||
|
|||
//-------------------------------------------------------------------------------------------------- |
|||
// Included headers |
|||
//-------------------------------------------------------------------------------------------------- |
|||
|
|||
#include "../../../../ShaderLibrary/Common.hlsl" |
|||
#include "../../../../ShaderLibrary/SpaceFillingCurves.hlsl" |
|||
#include "../../../ShaderConfig.cs.hlsl" |
|||
#include "../../../ShaderVariables.hlsl" |
|||
#define UNITY_MATERIAL_LIT |
|||
#include "../../../Material/Material.hlsl" |
|||
#include "../../../Lighting/LightDefinition.cs.hlsl" |
|||
|
|||
//-------------------------------------------------------------------------------------------------- |
|||
// Inputs & outputs |
|||
//-------------------------------------------------------------------------------------------------- |
|||
|
|||
float _WorldScales[SSS_N_PROFILES]; // Size of the world unit in meters |
|||
float _FilterKernelsNearField[SSS_N_PROFILES][SSS_N_SAMPLES_NEAR_FIELD][2]; // 0 = radius, 1 = reciprocal of the PDF |
|||
float _FilterKernelsFarField[SSS_N_PROFILES][SSS_N_SAMPLES_FAR_FIELD][2]; // 0 = radius, 1 = reciprocal of the PDF |
|||
|
|||
DECLARE_GBUFFER_TEXTURE(_GBufferTexture); // Contains the albedo and SSS parameters |
|||
TEXTURE2D(_DepthTexture); // Z-buffer |
|||
TEXTURE2D(_StencilTexture); // DXGI_FORMAT_R8_UINT is not supported by Unity |
|||
TEXTURE2D(_HTile); // DXGI_FORMAT_R8_UINT is not supported by Unity |
|||
TEXTURE2D(_IrradianceSource); // Includes transmitted light |
|||
|
|||
// Contains the HDR color for non-SSS materials. |
|||
// In case of SSS, it only contains the specular lighting, which we additively blend with the SSS lighting. |
|||
RW_TEXTURE2D(float4, _CameraColorTexture); |
|||
|
|||
//-------------------------------------------------------------------------------------------------- |
|||
// Implementation |
|||
//-------------------------------------------------------------------------------------------------- |
|||
|
|||
// Computes the value of the integrand over a disk: (2 * PI * r) * KernelVal(). |
|||
// N.b.: the returned value is multiplied by 4. It is irrelevant due to weight renormalization. |
|||
float3 KernelValCircle(float r, float3 S) |
|||
{ |
|||
float3 expOneThird = exp(((-1.0 / 3.0) * r) * S); |
|||
return /* 0.25 * */ S * (expOneThird + expOneThird * expOneThird * expOneThird); |
|||
} |
|||
|
|||
// Computes F(r)/P(r), s.t. r = sqrt(a^2 + b^2). |
|||
// Rescaling of the PDF is handled by 'totalWeight'. |
|||
float3 ComputeBilateralWeight(float a2, float b, float mmPerUnit, float3 S, float rcpPdf) |
|||
{ |
|||
#if (SSS_BILATERAL_FILTER == 0) |
|||
b = 0; |
|||
#endif |
|||
|
|||
#if SSS_USE_TANGENT_PLANE |
|||
// Both 'a2' and 'b2' require unit conversion. |
|||
float r = sqrt(a2 + b * b) * mmPerUnit; |
|||
#else |
|||
// Only 'b2' requires unit conversion. |
|||
float r = sqrt(a2 + (b * mmPerUnit) * (b * mmPerUnit)); |
|||
#endif |
|||
|
|||
#if SSS_CLAMP_ARTIFACT |
|||
return saturate(KernelValCircle(r, S) * rcpPdf); |
|||
#else |
|||
return KernelValCircle(r, S) * rcpPdf; |
|||
#endif |
|||
} |
|||
|
|||
#define SSS_ITER(i, n, kernel, profileID, shapeParam, centerPosUnSS, centerPosVS, \ |
|||
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, \ |
|||
totalIrradiance, totalWeight) \ |
|||
{ \ |
|||
float r = kernel[profileID][i][0]; \ |
|||
/* The relative sample position is known at compile time. */ \ |
|||
float phi = SampleDiskFibonacci(i, n).y; \ |
|||
float2 vec = r * float2(cos(phi), sin(phi)); \ |
|||
\ |
|||
/* Compute the screen-space position and the associated irradiance. */ \ |
|||
float2 position; float3 irradiance; \ |
|||
/* Compute the squared distance (in mm) in the screen-aligned plane. */ \ |
|||
float dXY2; \ |
|||
\ |
|||
if (useTangentPlane) \ |
|||
{ \ |
|||
/* 'vec' is given relative to the tangent frame. */ \ |
|||
float3 relPosVS = vec.x * tangentX + vec.y * tangentY; \ |
|||
float3 positionVS = centerPosVS + relPosVS; \ |
|||
float4 positionCS = mul(projMatrix, float4(positionVS, 1)); \ |
|||
float2 positionSS = ComputeScreenSpacePosition(positionCS); \ |
|||
\ |
|||
position = positionSS * _ScreenSize.xy; \ |
|||
irradiance = LOAD_TEXTURE2D(_IrradianceSource, position).rgb; \ |
|||
dXY2 = dot(relPosVS.xy, relPosVS.xy); \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
/* 'vec' is given directly in screen-space. */ \ |
|||
position = centerPosUnSS + vec * pixelsPerMm; \ |
|||
irradiance = LOAD_TEXTURE2D(_IrradianceSource, position).rgb; \ |
|||
dXY2 = r * r; \ |
|||
} \ |
|||
\ |
|||
/* TODO: see if making this a [branch] improves performance. */ \ |
|||
[flatten] \ |
|||
if (any(irradiance)) \ |
|||
{ \ |
|||
/* Apply bilateral weighting. */ \ |
|||
float z = LOAD_TEXTURE2D(_MainDepthTexture, position).r; \ |
|||
float d = LinearEyeDepth(z, _ZBufferParams); \ |
|||
float t = d - centerPosVS.z; \ |
|||
float p = kernel[profileID][i][1]; \ |
|||
float3 w = ComputeBilateralWeight(dXY2, t, mmPerUnit, shapeParam, p); \ |
|||
\ |
|||
totalIrradiance += w * irradiance; \ |
|||
totalWeight += w; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
/*************************************************************************/ \ |
|||
/* The irradiance is 0. This could happen for 3 reasons. */ \ |
|||
/* Most likely, the surface fragment does not have an SSS material. */ \ |
|||
/* Alternatively, our sample comes from a region without any geometry. */ \ |
|||
/* Finally, the surface fragment could be completely shadowed. */ \ |
|||
/* Our blur is energy-preserving, so 'centerWeight' should be set to 0. */ \ |
|||
/* We do not terminate the loop since we want to gather the contribution */ \ |
|||
/* of the remaining samples (e.g. in case of hair covering skin). */ \ |
|||
/* Note: See comment in the output of deferred.shader */ \ |
|||
/*************************************************************************/ \ |
|||
} \ |
|||
} |
|||
|
|||
#define SSS_LOOP(n, kernel, profileID, shapeParam, centerPosUnSS, centerPosVS, \ |
|||
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, \ |
|||
totalIrradiance, totalWeight) \ |
|||
{ \ |
|||
float centerRadius = kernel[profileID][0][0]; \ |
|||
float centerRcpPdf = kernel[profileID][0][1]; \ |
|||
float3 centerWeight = KernelValCircle(centerRadius, shapeParam) * centerRcpPdf; \ |
|||
\ |
|||
totalIrradiance = centerWeight * centerIrradiance; \ |
|||
totalWeight = centerWeight; \ |
|||
\ |
|||
/* Integrate over the screen-aligned or tangent plane in the view space. */ \ |
|||
[unroll] \ |
|||
for (uint i = 1; i < n; i++) \ |
|||
{ \ |
|||
SSS_ITER(i, n, kernel, profileID, shapeParam, centerPosUnSS, centerPosVS, \ |
|||
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, \ |
|||
totalIrradiance, totalWeight) \ |
|||
} \ |
|||
} |
|||
|
|||
bool StencilTest(int2 pixelCoord, float stencilRef) |
|||
{ |
|||
int2 tileCoord = pixelCoord / 8; |
|||
|
|||
// Perform the stencil test (reject at the tile rate). |
|||
bool passedStencilTest = stencilRef == LOAD_TEXTURE2D(_HTile, tileCoord).r; |
|||
|
|||
[branch] if (passedStencilTest) |
|||
{ |
|||
// Our copy of HTile does not allow to accept at the tile rate. |
|||
// Therefore, we have to additionally perform the stencil test at the pixel rate. |
|||
passedStencilTest = stencilRef == LOAD_TEXTURE2D(_StencilTexture, pixelCoord).r; |
|||
} |
|||
|
|||
return passedStencilTest; |
|||
} |
|||
|
|||
#pragma kernel SubsurfaceScattering |
|||
|
|||
groupshared float4 textureCache[TEXTURE_CACHE_SIZE_1D][TEXTURE_CACHE_SIZE_1D]; // float4(irradiance, linearDepth) |
|||
groupshared bool processGroup; |
|||
|
|||
[numthreads(GROUP_SIZE_2D, 1, 1)] |
|||
void SubsurfaceScattering(uint2 groupId : SV_GroupID, |
|||
uint groupThreadId : SV_GroupThreadID) |
|||
{ |
|||
const uint waveIndex = groupThreadId / 64; |
|||
const uint laneIndex = groupThreadId % 64; |
|||
const uint quadIndex = laneIndex / 4; |
|||
|
|||
// Arrange threads in the Morton order to optimally match the memory layout of GCN tiles. |
|||
const uint mortonCode = groupThreadId; |
|||
const uint2 localCoord = DecodeMorton2D(mortonCode); |
|||
const uint2 tileAnchor = groupId * GROUP_SIZE_1D; |
|||
const uint2 pixelCoord = tileAnchor + localCoord; |
|||
const uint2 cacheCoord = localCoord + TEXTURE_CACHE_BORDER; |
|||
const uint2 cacheMinPt = tileAnchor - TEXTURE_CACHE_BORDER; |
|||
const uint2 cacheMaxPt = cacheMinPt + TEXTURE_CACHE_SIZE_1D; |
|||
const float stencilRef = STENCILLIGHTINGUSAGE_SPLIT_LIGHTING; |
|||
|
|||
[branch] if (groupThreadId == 0) |
|||
{ |
|||
// Check whether the thread group needs to perform any work. |
|||
float s00 = LOAD_TEXTURE2D(_HTile, 2 * groupId + uint2(0, 0)).r; |
|||
float s10 = LOAD_TEXTURE2D(_HTile, 2 * groupId + uint2(1, 0)).r; |
|||
float s01 = LOAD_TEXTURE2D(_HTile, 2 * groupId + uint2(0, 1)).r; |
|||
float s11 = LOAD_TEXTURE2D(_HTile, 2 * groupId + uint2(1, 1)).r; |
|||
|
|||
// Perform the stencil test (reject at the tile rate). |
|||
processGroup = (stencilRef == s00 || stencilRef == s10 || stencilRef == s01 || stencilRef == s11); |
|||
} |
|||
|
|||
// Wait for the LDS. |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
|
|||
[branch] if (!processGroup) { return; } |
|||
|
|||
float3 centerIrradiance; |
|||
float centerDepth; |
|||
float4 cachedValue = float4(0, 0, 0, 0); |
|||
|
|||
bool passedStencilTest = StencilTest(pixelCoord, stencilRef); |
|||
|
|||
[branch] if (passedStencilTest) |
|||
{ |
|||
centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb; |
|||
centerDepth = LOAD_TEXTURE2D(_DepthTexture, pixelCoord).r; |
|||
cachedValue = float4(centerIrradiance, LinearEyeDepth(centerDepth, _ZBufferParams)); |
|||
} |
|||
|
|||
// Populate the central region of the LDS cache. |
|||
textureCache[cacheCoord.x][cacheCoord.y] = cachedValue; |
|||
|
|||
const uint numBorderQuadsPerWave = TEXTURE_CACHE_SIZE_1D / 2 - 1; |
|||
const uint halfCacheWidthInQuads = TEXTURE_CACHE_SIZE_1D / 4; |
|||
|
|||
[branch] if (quadIndex < numBorderQuadsPerWave) |
|||
{ |
|||
// Fetch another texel into the LDS. |
|||
uint2 startQuad = halfCacheWidthInQuads * uint2(waveIndex & 1, waveIndex >> 1); |
|||
|
|||
uint2 quadCoord; |
|||
|
|||
// The traversal order is such that the quad's X coordinate is monotonically increasing. |
|||
// Note: the compiler can heavily optimize the code below, as the switch is scalar, |
|||
// and there are very few unique values due to the symmetry. |
|||
switch (waveIndex) |
|||
{ |
|||
case 0: |
|||
quadCoord.x = max(0, (int)(quadIndex - (halfCacheWidthInQuads - 1))); |
|||
quadCoord.y = max(0, (int)((halfCacheWidthInQuads - 1) - quadIndex)); |
|||
break; |
|||
case 1: |
|||
quadCoord.x = min(quadIndex, halfCacheWidthInQuads - 1); |
|||
quadCoord.y = max(0, (int)(quadIndex - (halfCacheWidthInQuads - 1))); |
|||
break; |
|||
case 2: |
|||
quadCoord.x = max(0, (int)(quadIndex - (halfCacheWidthInQuads - 1))); |
|||
quadCoord.y = min(quadIndex, halfCacheWidthInQuads - 1); |
|||
break; |
|||
case 3: |
|||
quadCoord.x = min(quadIndex, halfCacheWidthInQuads - 1); |
|||
quadCoord.y = min(halfCacheWidthInQuads - 1, 2 * (halfCacheWidthInQuads - 1) - quadIndex); |
|||
break; |
|||
} |
|||
|
|||
uint2 cacheCoord2 = (startQuad + quadCoord) * 2 + uint2(laneIndex & 1, (laneIndex >> 1) & 1); |
|||
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2 - TEXTURE_CACHE_BORDER); |
|||
float4 cachedValue2 = float4(0, 0, 0, 0); |
|||
|
|||
[branch] if (StencilTest(pixelCoord2, stencilRef)) |
|||
{ |
|||
float3 irradiance2 = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord2).rgb; |
|||
float depth2 = LOAD_TEXTURE2D(_DepthTexture, pixelCoord2).r; |
|||
cachedValue2 = float4(irradiance2, LinearEyeDepth(depth2, _ZBufferParams)); |
|||
} |
|||
|
|||
// Populate the border region of the LDS cache. |
|||
textureCache[cacheCoord2.x][cacheCoord2.y] = cachedValue2; |
|||
} |
|||
|
|||
// Wait for the LDS. |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
|
|||
[branch] if (!passedStencilTest) { return; } |
|||
|
|||
PositionInputs posInput = GetPositionInput(pixelCoord, _ScreenSize.zw); |
|||
|
|||
float3 unused; |
|||
|
|||
// The result of the stencil test allows us to statically determine the material type (SSS). |
|||
BSDFData bsdfData; |
|||
FETCH_GBUFFER(gbuffer, _GBufferTexture, pixelCoord); |
|||
DECODE_FROM_GBUFFER(gbuffer, MATERIALFEATUREFLAGS_LIT_SSS, bsdfData, unused); |
|||
|
|||
int profileID = bsdfData.subsurfaceProfile; |
|||
float distScale = bsdfData.subsurfaceRadius; |
|||
float3 shapeParam = _ShapeParams[profileID].rgb; |
|||
float maxDistance = _ShapeParams[profileID].a; |
|||
|
|||
// Reconstruct the view-space position corresponding to the central sample. |
|||
float2 centerPosSS = posInput.positionSS; |
|||
float2 cornerPosSS = centerPosSS + 0.5 * _ScreenSize.zw; |
|||
float3 centerPosVS = ComputeViewSpacePosition(centerPosSS, centerDepth, _InvProjMatrix); |
|||
float3 cornerPosVS = ComputeViewSpacePosition(cornerPosSS, centerDepth, _InvProjMatrix); |
|||
|
|||
// Rescaling the filter is equivalent to inversely scaling the world. |
|||
float mmPerUnit = MILLIMETERS_PER_METER * (_WorldScales[profileID] / distScale); |
|||
float unitsPerMm = rcp(mmPerUnit); |
|||
|
|||
// Compute the view-space dimensions of the pixel as a quad projected onto geometry. |
|||
float2 unitsPerPixel = 2 * abs(cornerPosVS.xy - centerPosVS.xy); |
|||
float2 pixelsPerMm = rcp(unitsPerPixel) * unitsPerMm; |
|||
|
|||
// We perform point sampling. Therefore, we can avoid the cost |
|||
// of filtering if we stay within the bounds of the current pixel. |
|||
// We use the value of 1 instead of 0.5 as an optimization. |
|||
// N.b.: our LoD selection algorithm is the same regardless of |
|||
// whether we integrate over the tangent plane or not, since we |
|||
// don't want the orientation of the tangent plane to create |
|||
// divergence of execution across the warp. |
|||
float maxDistInPixels = maxDistance * max(pixelsPerMm.x, pixelsPerMm.y); |
|||
|
|||
[branch] if (distScale == 0 || maxDistInPixels < 1) |
|||
{ |
|||
#if SSS_DEBUG_LOD |
|||
_CameraColorTexture[pixelCoord] = float4(0, 0, 1, 1); |
|||
#else |
|||
_CameraColorTexture[pixelCoord] += float4(bsdfData.diffuseColor * centerIrradiance, 1); |
|||
#endif |
|||
return; |
|||
} |
|||
|
|||
const bool useTangentPlane = SSS_USE_TANGENT_PLANE != 0; |
|||
|
|||
float4x4 viewMatrix, projMatrix; |
|||
GetLeftHandedViewSpaceMatrices(viewMatrix, projMatrix); |
|||
|
|||
// Compute the tangent frame in view space. |
|||
float3 normalVS = mul((float3x3)viewMatrix, bsdfData.normalWS); |
|||
float3 tangentX = GetLocalFrame(normalVS)[0] * unitsPerMm; |
|||
float3 tangentY = GetLocalFrame(normalVS)[1] * unitsPerMm; |
|||
|
|||
#if SSS_DEBUG_NORMAL_VS |
|||
// We expect the view-space normal to be front-facing. |
|||
if (normalVS.z >= 0) { _CameraColorTexture[pixelCoord] = float4(1, 0, 0, 1); return; } |
|||
#endif |
|||
|
|||
// Accumulate filtered irradiance and bilateral weights (for renormalization). |
|||
float3 totalIrradiance, totalWeight; |
|||
|
|||
// Use fewer samples for SS regions smaller than 5x5 pixels (rotated by 45 degrees). |
|||
[branch] if (maxDistInPixels < SSS_LOD_THRESHOLD) |
|||
{ |
|||
#if SSS_DEBUG_LOD |
|||
_CameraColorTexture[pixelCoord] = float4(0.5, 0.5, 0, 1); return; |
|||
#else |
|||
SSS_LOOP(SSS_N_SAMPLES_FAR_FIELD, _FilterKernelsFarField, |
|||
profileID, shapeParam, pixelCoord, centerPosVS, |
|||
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, |
|||
totalIrradiance, totalWeight) |
|||
#endif |
|||
} |
|||
else |
|||
{ |
|||
#if SSS_DEBUG_LOD |
|||
_CameraColorTexture[pixelCoord] = float4(1, 0, 0, 1); return; |
|||
#else |
|||
SSS_LOOP(SSS_N_SAMPLES_NEAR_FIELD, _FilterKernelsNearField, |
|||
profileID, shapeParam, pixelCoord, centerPosVS, |
|||
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, |
|||
totalIrradiance, totalWeight) |
|||
#endif |
|||
} |
|||
|
|||
_CameraColorTexture[pixelCoord] += float4(bsdfData.diffuseColor * totalIrradiance / totalWeight, 1); |
|||
|
|||
// _CameraColorTexture[pixelCoord] = float4((mortonCode % 256) / 255.0, (groupId.x % 256) / 15.0, (groupId.y % 256) / 15.0, 1); |
|||
// _CameraColorTexture[pixelCoord] = float4((mortonCode % 256) / 255.0, 0, 0, 1); |
|||
// _CameraColorTexture[pixelCoord] = float4((pixelCoord.x % 256) / 255.0, (pixelCoord.y % 256) / 255.0, 0, 1); |
|||
// _CameraColorTexture[pixelCoord] = float4(numBorderTexelsPerWave == (16 + 18) ? 1 : 0, 0, 0, 1); |
|||
} |
|||
|
|||
/* |
|||
float4 Frag(Varyings input) : SV_Target |
|||
{ |
|||
PositionInputs posInput = GetPositionInput(input.positionCS.xy, _ScreenSize.zw); |
|||
|
|||
float3 unused; |
|||
|
|||
// Note: When we are in this SubsurfaceScattering shader we know that we are a SSS material. This shader is strongly coupled with the deferred Lit.shader. |
|||
// We can use the material classification facility to help the compiler to know we use SSS material and optimize the code (and don't require to read gbuffer with materialId). |
|||
uint featureFlags = MATERIALFEATUREFLAGS_LIT_SSS; |
|||
|
|||
BSDFData bsdfData; |
|||
FETCH_GBUFFER(gbuffer, _GBufferTexture, posInput.unPositionSS); |
|||
DECODE_FROM_GBUFFER(gbuffer, featureFlags, bsdfData, unused); |
|||
|
|||
int profileID = bsdfData.subsurfaceProfile; |
|||
float distScale = bsdfData.subsurfaceRadius; |
|||
#ifdef SSS_MODEL_DISNEY |
|||
float3 shapeParam = _ShapeParams[profileID].rgb; |
|||
float maxDistance = _ShapeParams[profileID].a; |
|||
#else |
|||
float maxDistance = _FilterKernelsBasic[profileID][SSS_BASIC_N_SAMPLES - 1].a; |
|||
#endif |
|||
|
|||
// Take the first (central) sample. |
|||
// TODO: copy its neighborhood into LDS. |
|||
float2 centerPosition = posInput.unPositionSS; |
|||
float3 centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, centerPosition).rgb; |
|||
|
|||
// Reconstruct the view-space position. |
|||
float2 centerPosSS = posInput.positionSS; |
|||
float2 cornerPosSS = centerPosSS + 0.5 * _ScreenSize.zw; |
|||
float centerDepth = LOAD_TEXTURE2D(_MainDepthTexture, centerPosition).r; |
|||
float3 centerPosVS = ComputeViewSpacePosition(centerPosSS, centerDepth, _InvProjMatrix); |
|||
float3 cornerPosVS = ComputeViewSpacePosition(cornerPosSS, centerDepth, _InvProjMatrix); |
|||
|
|||
#ifdef SSS_MODEL_DISNEY |
|||
// Rescaling the filter is equivalent to inversely scaling the world. |
|||
float mmPerUnit = MILLIMETERS_PER_METER * (_WorldScales[profileID] / distScale); |
|||
float unitsPerMm = rcp(mmPerUnit); |
|||
|
|||
// Compute the view-space dimensions of the pixel as a quad projected onto geometry. |
|||
float2 unitsPerPixel = 2 * abs(cornerPosVS.xy - centerPosVS.xy); |
|||
float2 pixelsPerMm = rcp(unitsPerPixel) * unitsPerMm; |
|||
|
|||
// We perform point sampling. Therefore, we can avoid the cost |
|||
// of filtering if we stay within the bounds of the current pixel. |
|||
// We use the value of 1 instead of 0.5 as an optimization. |
|||
// N.b.: our LoD selection algorithm is the same regardless of |
|||
// whether we integrate over the tangent plane or not, since we |
|||
// don't want the orientation of the tangent plane to create |
|||
// divergence of execution across the warp. |
|||
float maxDistInPixels = maxDistance * max(pixelsPerMm.x, pixelsPerMm.y); |
|||
|
|||
[branch] |
|||
if (distScale == 0 || maxDistInPixels < 1) |
|||
{ |
|||
#if SSS_DEBUG_LOD |
|||
return float4(0, 0, 1, 1); |
|||
#else |
|||
return float4(bsdfData.diffuseColor * centerIrradiance, 1); |
|||
#endif |
|||
} |
|||
|
|||
const bool useTangentPlane = SSS_USE_TANGENT_PLANE != 0; |
|||
|
|||
float4x4 viewMatrix, projMatrix; |
|||
GetLeftHandedViewSpaceMatrices(viewMatrix, projMatrix); |
|||
|
|||
// Compute the tangent frame in view space. |
|||
float3 normalVS = mul((float3x3)viewMatrix, bsdfData.normalWS); |
|||
float3 tangentX = GetLocalFrame(normalVS)[0] * unitsPerMm; |
|||
float3 tangentY = GetLocalFrame(normalVS)[1] * unitsPerMm; |
|||
|
|||
#if SSS_DEBUG_NORMAL_VS |
|||
// We expect the view-space normal to be front-facing. |
|||
if (normalVS.z >= 0) return float4(1, 0, 0, 1); |
|||
#endif |
|||
|
|||
// Accumulate filtered irradiance and bilateral weights (for renormalization). |
|||
float3 totalIrradiance, totalWeight; |
|||
|
|||
// Use fewer samples for SS regions smaller than 5x5 pixels (rotated by 45 degrees). |
|||
[branch] |
|||
if (maxDistInPixels < SSS_LOD_THRESHOLD) |
|||
{ |
|||
#if SSS_DEBUG_LOD |
|||
return float4(0.5, 0.5, 0, 1); |
|||
#else |
|||
SSS_LOOP(SSS_N_SAMPLES_FAR_FIELD, _FilterKernelsFarField, |
|||
profileID, shapeParam, centerPosition, centerPosVS, |
|||
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, |
|||
totalIrradiance, totalWeight) |
|||
#endif |
|||
} |
|||
else |
|||
{ |
|||
#if SSS_DEBUG_LOD |
|||
return float4(1, 0, 0, 1); |
|||
#else |
|||
SSS_LOOP(SSS_N_SAMPLES_NEAR_FIELD, _FilterKernelsNearField, |
|||
profileID, shapeParam, centerPosition, centerPosVS, |
|||
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, |
|||
totalIrradiance, totalWeight) |
|||
#endif |
|||
} |
|||
#else |
|||
// Rescaling the filter is equivalent to inversely scaling the world. |
|||
float metersPerUnit = _WorldScales[profileID] / distScale * SSS_BASIC_DISTANCE_SCALE; |
|||
float centimPerUnit = CENTIMETERS_PER_METER * metersPerUnit; |
|||
// Compute the view-space dimensions of the pixel as a quad projected onto geometry. |
|||
float2 unitsPerPixel = 2 * abs(cornerPosVS.xy - centerPosVS.xy); |
|||
float2 pixelsPerCm = rcp(centimPerUnit * unitsPerPixel); |
|||
|
|||
// Compute the filtering direction. |
|||
#ifdef SSS_FILTER_HORIZONTAL_AND_COMBINE |
|||
float2 unitDirection = float2(1, 0); |
|||
#else |
|||
float2 unitDirection = float2(0, 1); |
|||
#endif |
|||
|
|||
float2 scaledDirection = pixelsPerCm * unitDirection; |
|||
float phi = 0; // Random rotation; unused for now |
|||
float2x2 rotationMatrix = float2x2(cos(phi), -sin(phi), sin(phi), cos(phi)); |
|||
float2 rotatedDirection = mul(rotationMatrix, scaledDirection); |
|||
|
|||
// Load (1 / (2 * WeightedVariance)) for bilateral weighting. |
|||
#if RBG_BILATERAL_WEIGHTS |
|||
float3 halfRcpVariance = _HalfRcpWeightedVariances[profileID].rgb; |
|||
#else |
|||
float halfRcpVariance = _HalfRcpWeightedVariances[profileID].a; |
|||
#endif |
|||
|
|||
#ifndef SSS_FILTER_HORIZONTAL_AND_COMBINE |
|||
bsdfData.diffuseColor = float3(1, 1, 1); |
|||
#endif |
|||
|
|||
// Take the first (central) sample. |
|||
float2 samplePosition = posInput.unPositionSS; |
|||
float3 sampleWeight = _FilterKernelsBasic[profileID][0].rgb; |
|||
float3 sampleIrradiance = LOAD_TEXTURE2D(_IrradianceSource, samplePosition).rgb; |
|||
|
|||
// We perform point sampling. Therefore, we can avoid the cost |
|||
// of filtering if we stay within the bounds of the current pixel. |
|||
// We use the value of 1 instead of 0.5 as an optimization. |
|||
float maxDistInPixels = maxDistance * max(pixelsPerCm.x, pixelsPerCm.y); |
|||
|
|||
[branch] |
|||
if (distScale == 0 || maxDistInPixels < 1) |
|||
{ |
|||
#if SSS_DEBUG_LOD |
|||
return float4(0, 0, 1, 1); |
|||
#else |
|||
return float4(bsdfData.diffuseColor * sampleIrradiance, 1); |
|||
#endif |
|||
} |
|||
|
|||
#if SSS_DEBUG_LOD |
|||
return float4(0.5, 0.5, 0, 1); |
|||
#endif |
|||
|
|||
// Accumulate filtered irradiance and bilateral weights (for renormalization). |
|||
float3 totalIrradiance = sampleWeight * sampleIrradiance; |
|||
float3 totalWeight = sampleWeight; |
|||
|
|||
[unroll] |
|||
for (int i = 1; i < SSS_BASIC_N_SAMPLES; i++) |
|||
{ |
|||
samplePosition = posInput.unPositionSS + rotatedDirection * _FilterKernelsBasic[profileID][i].a; |
|||
sampleWeight = _FilterKernelsBasic[profileID][i].rgb; |
|||
sampleIrradiance = LOAD_TEXTURE2D(_IrradianceSource, samplePosition).rgb; |
|||
|
|||
[flatten] |
|||
if (any(sampleIrradiance)) |
|||
{ |
|||
// Apply bilateral weighting. |
|||
// Ref #1: Skin Rendering by Pseudo–Separable Cross Bilateral Filtering. |
|||
// Ref #2: Separable SSS, Supplementary Materials, Section E. |
|||
float rawDepth = LOAD_TEXTURE2D(_MainDepthTexture, samplePosition).r; |
|||
float sampleDepth = LinearEyeDepth(rawDepth, _ZBufferParams); |
|||
float zDistance = centimPerUnit * sampleDepth - (centimPerUnit * centerPosVS.z); |
|||
sampleWeight *= exp(-zDistance * zDistance * halfRcpVariance); |
|||
|
|||
totalIrradiance += sampleWeight * sampleIrradiance; |
|||
totalWeight += sampleWeight; |
|||
} |
|||
else |
|||
{ |
|||
// The irradiance is 0. This could happen for 3 reasons. |
|||
// Most likely, the surface fragment does not have an SSS material. |
|||
// Alternatively, our sample comes from a region without any geometry. |
|||
// Finally, the surface fragment could be completely shadowed. |
|||
// Our blur is energy-preserving, so 'centerWeight' should be set to 0. |
|||
// We do not terminate the loop since we want to gather the contribution |
|||
// of the remaining samples (e.g. in case of hair covering skin). |
|||
} |
|||
} |
|||
#endif |
|||
|
|||
return float4(bsdfData.diffuseColor * totalIrradiance / totalWeight, 1); |
|||
} |
|||
ENDHLSL |
|||
} |
|||
} |
|||
Fallback Off |
|||
} |
|||
|
|||
*/ |
|
|||
fileFormatVersion: 2 |
|||
guid: b06a7993621def248addd55d0fe931b1 |
|||
timeCreated: 1500310187 |
|||
licenseType: Pro |
|||
ComputeShaderImporter: |
|||
externalObjects: {} |
|||
currentAPIMask: 4 |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
#ifndef UNITY_SPACE_FILLING_SURVES_INCLUDED |
|||
#define UNITY_SPACE_FILLING_SURVES_INCLUDED |
|||
|
|||
// "Insert" a 0 bit after each of the 16 low bits of x. |
|||
// Ref: https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ |
|||
uint Part1By1(uint x) |
|||
{ |
|||
x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210 |
|||
x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 |
|||
x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 |
|||
x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 |
|||
x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 |
|||
return x; |
|||
} |
|||
|
|||
// "Insert" two 0 bits after each of the 10 low bits of x/ |
|||
// Ref: https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ |
|||
uint Part1By2(uint x) |
|||
{ |
|||
x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 |
|||
x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 |
|||
x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 |
|||
x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 |
|||
x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 |
|||
return x; |
|||
} |
|||
|
|||
// Inverse of Part1By1 - "delete" all odd-indexed bits/ |
|||
// Ref: https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ |
|||
uint Compact1By1(uint x) |
|||
{ |
|||
x &= 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0 |
|||
x = (x ^ (x >> 1)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10 |
|||
x = (x ^ (x >> 2)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210 |
|||
x = (x ^ (x >> 4)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210 |
|||
x = (x ^ (x >> 8)) & 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210 |
|||
return x; |
|||
} |
|||
|
|||
// Inverse of Part1By2 - "delete" all bits not at positions divisible by 3/ |
|||
// Ref: https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ |
|||
uint Compact1By2(uint x) |
|||
{ |
|||
x &= 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 |
|||
x = (x ^ (x >> 2)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 |
|||
x = (x ^ (x >> 4)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 |
|||
x = (x ^ (x >> 8)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 |
|||
x = (x ^ (x >> 16)) & 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 |
|||
return x; |
|||
} |
|||
|
|||
uint EncodeMorton2D(uint2 coord) |
|||
{ |
|||
return (Part1By1(coord.y) << 1) + Part1By1(coord.x); |
|||
} |
|||
|
|||
uint EncodeMorton3D(uint3 coord) |
|||
{ |
|||
return (Part1By2(coord.z) << 2) + (Part1By2(coord.y) << 1) + Part1By2(coord.x); |
|||
} |
|||
|
|||
uint2 DecodeMorton2D(uint code) |
|||
{ |
|||
return uint2(Compact1By1(code >> 0), Compact1By1(code >> 1)); |
|||
} |
|||
|
|||
uint3 DecodeMorton3D(uint code) |
|||
{ |
|||
return uint3(Compact1By2(code >> 0), Compact1By2(code >> 1), Compact1By2(code >> 2)); |
|||
} |
|||
|
|||
#endif // UNITY_SPACE_FILLING_SURVES_INCLUDED |
|
|||
fileFormatVersion: 2 |
|||
guid: 063144fddd2c1be41b9d09dec6314fc7 |
|||
timeCreated: 1500391830 |
|||
licenseType: Pro |
|||
ShaderImporter: |
|||
externalObjects: {} |
|||
defaultTextures: [] |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
撰写
预览
正在加载...
取消
保存
Reference in new issue