浏览代码

Port SSS to compute WIP

/RenderPassXR_Sandbox
Evgenii Golubev 8 年前
当前提交
f30f49e7
共有 6 个文件被更改,包括 731 次插入16 次删除
  1. 46
      Assets/ScriptableRenderPipeline/HDRenderPipeline/HDRenderPipeline.cs
  2. 5
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs
  3. 604
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute
  4. 10
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute.meta
  5. 72
      Assets/ScriptableRenderPipeline/ShaderLibrary/SpaceFillingCurves.hlsl
  6. 10
      Assets/ScriptableRenderPipeline/ShaderLibrary/SpaceFillingCurves.hlsl.meta

46
Assets/ScriptableRenderPipeline/HDRenderPipeline/HDRenderPipeline.cs


public void SetupComputeShader(ComputeShader cs, CommandBuffer cmd)
{
cmd.SetComputeMatrixParam(cs, "_ViewMatrix", viewMatrix);
cmd.SetComputeMatrixParam(cs, "_InvViewMatrix", viewMatrix.inverse);
cmd.SetComputeMatrixParam(cs, "_ProjMatrix", projMatrix);
cmd.SetComputeMatrixParam(cs, "_InvProjMatrix", projMatrix.inverse);
cmd.SetComputeMatrixParam(cs, "_ViewProjMatrix", viewProjMatrix);
cmd.SetComputeMatrixParam(cs, "_InvViewProjMatrix", viewProjMatrix.inverse);
cmd.SetComputeVectorParam(cs, "_InvProjParam", invProjParam);
cmd.SetComputeVectorParam(cs, "_ScreenSize", screenSize);
cmd.SetComputeMatrixParam(cs, "_PrevViewProjMatrix", prevViewProjMatrix);
cmd.SetComputeVectorArrayParam(cs, "_FrustumPlanes", frustumPlaneEquations);
cmd.SetComputeMatrixParam(cs, "_ViewMatrix", viewMatrix);
cmd.SetComputeMatrixParam(cs, "_InvViewMatrix", viewMatrix.inverse);
cmd.SetComputeMatrixParam(cs, "_ProjMatrix", projMatrix);
cmd.SetComputeMatrixParam(cs, "_InvProjMatrix", projMatrix.inverse);
cmd.SetComputeMatrixParam(cs, "_ViewProjMatrix", viewProjMatrix);
cmd.SetComputeMatrixParam(cs, "_InvViewProjMatrix", viewProjMatrix.inverse);
cmd.SetComputeVectorParam(cs, "_InvProjParam", invProjParam);
cmd.SetComputeVectorParam(cs, "_ScreenSize", screenSize);
cmd.SetComputeMatrixParam(cs, "_PrevViewProjMatrix", prevViewProjMatrix);
cmd.SetComputeVectorArrayParam(cs, "_FrustumPlanes", frustumPlaneEquations);
// Copy values set by Unity which are not configured in scripts.
cmd.SetComputeVectorParam(cs, "unity_OrthoParams", Shader.GetGlobalVector("unity_OrthoParams"));
cmd.SetComputeVectorParam(cs, "_ProjectionParams", Shader.GetGlobalVector("_ProjectionParams"));
cmd.SetComputeVectorParam(cs, "_ScreenParams", Shader.GetGlobalVector("_ScreenParams"));
cmd.SetComputeVectorParam(cs, "_ZBufferParams", Shader.GetGlobalVector("_ZBufferParams"));
cmd.SetComputeVectorParam(cs, "_WorldSpaceCameraPos", Shader.GetGlobalVector("_WorldSpaceCameraPos"));
}
}

Material m_CopyStencilBuffer;
// Various set of material use in render loop
Material m_SssHorizontalFilterAndCombinePass;
ComputeShader m_SubsurfaceScatteringCS;
int m_SubsurfaceScatteringKernel;
Material m_SssHorizontalFilterAndCombinePass;
// <<< Old SSS Model
Material m_CameraMotionVectorsMaterial;

// Old SSS Model >>>
public void CreateSssMaterials(bool useDisneySSS)
{
m_SubsurfaceScatteringCS = AssetDatabase.LoadAssetAtPath<ComputeShader>("Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute");
m_SubsurfaceScatteringKernel = m_SubsurfaceScatteringCS.FindKernel("SubsurfaceScattering");
Utilities.Destroy(m_SssVerticalFilterPass);
m_SssVerticalFilterPass = Utilities.CreateEngineMaterial("Hidden/HDRenderPipeline/SubsurfaceScattering");
Utilities.SelectKeyword(m_SssVerticalFilterPass, "SSS_MODEL_DISNEY", "SSS_MODEL_BASIC", useDisneySSS);

{
if (sssSettings.useDisneySSS)
{
hdCamera.SetupComputeShader(m_SubsurfaceScatteringCS, cmd);
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_GBufferTexture0", m_gbufferManager.GetGBuffers()[0]);
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_GBufferTexture1", m_gbufferManager.GetGBuffers()[1]);
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_GBufferTexture2", m_gbufferManager.GetGBuffers()[2]);
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_GBufferTexture3", m_gbufferManager.GetGBuffers()[3]);
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_DepthTexture", GetDepthTexture());
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_StencilTexture", GetStencilTexture());
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_HTile", GetHTile());
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_IrradianceSource", m_CameraDiffuseIrradianceBufferRT);
cmd.SetComputeTextureParam(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, "_CameraColorTexture", m_CameraColorBufferRT);
cmd.DispatchCompute(m_SubsurfaceScatteringCS, m_SubsurfaceScatteringKernel, ((int)hdCamera.screenSize.x + 15) / 16, ((int)hdCamera.screenSize.y + 15) / 16, 1);
return;
cmd.SetGlobalTexture("_IrradianceSource", m_CameraDiffuseIrradianceBufferRT); // Cannot set a RT on a material
m_SssHorizontalFilterAndCombinePass.SetFloatArray("_WorldScales", sssParameters.worldScales);
m_SssHorizontalFilterAndCombinePass.SetFloatArray("_FilterKernelsNearField", sssParameters.filterKernelsNearField);

5
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs


cmd.SetComputeVectorParam(shadeOpaqueShader, "_SinTime", Shader.GetGlobalVector("_SinTime"));
cmd.SetComputeVectorParam(shadeOpaqueShader, "_CosTime", Shader.GetGlobalVector("_CosTime"));
cmd.SetComputeVectorParam(shadeOpaqueShader, "unity_DeltaTime", Shader.GetGlobalVector("unity_DeltaTime"));
cmd.SetComputeVectorParam(shadeOpaqueShader, "_WorldSpaceCameraPos", Shader.GetGlobalVector("_WorldSpaceCameraPos"));
cmd.SetComputeVectorParam(shadeOpaqueShader, "_ProjectionParams", Shader.GetGlobalVector("_ProjectionParams"));
cmd.SetComputeVectorParam(shadeOpaqueShader, "_ScreenParams", Shader.GetGlobalVector("_ScreenParams"));
cmd.SetComputeVectorParam(shadeOpaqueShader, "_ZBufferParams", Shader.GetGlobalVector("_ZBufferParams"));
cmd.SetComputeVectorParam(shadeOpaqueShader, "unity_OrthoParams", Shader.GetGlobalVector("unity_OrthoParams"));
cmd.SetComputeIntParam(shadeOpaqueShader, "_EnvLightSkyEnabled", Shader.GetGlobalInt("_EnvLightSkyEnabled"));
Texture skyTexture = Shader.GetGlobalTexture("_SkyTexture");

604
Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute


// ===================== Performs integration of the Disney BSSRDF over a disk =====================
//--------------------------------------------------------------------------------------------------
// Definitions
//--------------------------------------------------------------------------------------------------
#pragma enable_d3d11_debug_symbols
// Tweak parameters.
#define SSS_BILATERAL_FILTER 1
#define SSS_USE_TANGENT_PLANE 0
#define SSS_CLAMP_ARTIFACT 0
#define SSS_DEBUG_LOD 0
#define SSS_DEBUG_NORMAL_VS 0
// Do not modify these.
#define SSS_PASS 1
#define MILLIMETERS_PER_METER 1000
#define CENTIMETERS_PER_METER 100
#define GROUP_SIZE_1D 16
#define GROUP_SIZE_2D (GROUP_SIZE_1D * GROUP_SIZE_1D)
#define TEXTURE_CACHE_BORDER 2
#define TEXTURE_CACHE_SIZE_1D (GROUP_SIZE_1D + 2 * TEXTURE_CACHE_BORDER)
//--------------------------------------------------------------------------------------------------
// Included headers
//--------------------------------------------------------------------------------------------------
#include "../../../../ShaderLibrary/Common.hlsl"
#include "../../../../ShaderLibrary/SpaceFillingCurves.hlsl"
#include "../../../ShaderConfig.cs.hlsl"
#include "../../../ShaderVariables.hlsl"
#define UNITY_MATERIAL_LIT
#include "../../../Material/Material.hlsl"
#include "../../../Lighting/LightDefinition.cs.hlsl"
//--------------------------------------------------------------------------------------------------
// Inputs & outputs
//--------------------------------------------------------------------------------------------------
float _WorldScales[SSS_N_PROFILES]; // Size of the world unit in meters
float _FilterKernelsNearField[SSS_N_PROFILES][SSS_N_SAMPLES_NEAR_FIELD][2]; // 0 = radius, 1 = reciprocal of the PDF
float _FilterKernelsFarField[SSS_N_PROFILES][SSS_N_SAMPLES_FAR_FIELD][2]; // 0 = radius, 1 = reciprocal of the PDF
DECLARE_GBUFFER_TEXTURE(_GBufferTexture); // Contains the albedo and SSS parameters
TEXTURE2D(_DepthTexture); // Z-buffer
TEXTURE2D(_StencilTexture); // DXGI_FORMAT_R8_UINT is not supported by Unity
TEXTURE2D(_HTile); // DXGI_FORMAT_R8_UINT is not supported by Unity
TEXTURE2D(_IrradianceSource); // Includes transmitted light
// Contains the HDR color for non-SSS materials.
// In case of SSS, it only contains the specular lighting, which we additively blend with the SSS lighting.
RW_TEXTURE2D(float4, _CameraColorTexture);
//--------------------------------------------------------------------------------------------------
// Implementation
//--------------------------------------------------------------------------------------------------
// Computes the value of the integrand over a disk: (2 * PI * r) * KernelVal().
// N.b.: the returned value is multiplied by 4. It is irrelevant due to weight renormalization.
float3 KernelValCircle(float r, float3 S)
{
float3 expOneThird = exp(((-1.0 / 3.0) * r) * S);
return /* 0.25 * */ S * (expOneThird + expOneThird * expOneThird * expOneThird);
}
// Computes F(r)/P(r), s.t. r = sqrt(a^2 + b^2).
// Rescaling of the PDF is handled by 'totalWeight'.
float3 ComputeBilateralWeight(float a2, float b, float mmPerUnit, float3 S, float rcpPdf)
{
#if (SSS_BILATERAL_FILTER == 0)
b = 0;
#endif
#if SSS_USE_TANGENT_PLANE
// Both 'a2' and 'b2' require unit conversion.
float r = sqrt(a2 + b * b) * mmPerUnit;
#else
// Only 'b2' requires unit conversion.
float r = sqrt(a2 + (b * mmPerUnit) * (b * mmPerUnit));
#endif
#if SSS_CLAMP_ARTIFACT
return saturate(KernelValCircle(r, S) * rcpPdf);
#else
return KernelValCircle(r, S) * rcpPdf;
#endif
}
#define SSS_ITER(i, n, kernel, profileID, shapeParam, centerPosUnSS, centerPosVS, \
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, \
totalIrradiance, totalWeight) \
{ \
float r = kernel[profileID][i][0]; \
/* The relative sample position is known at compile time. */ \
float phi = SampleDiskFibonacci(i, n).y; \
float2 vec = r * float2(cos(phi), sin(phi)); \
\
/* Compute the screen-space position and the associated irradiance. */ \
float2 position; float3 irradiance; \
/* Compute the squared distance (in mm) in the screen-aligned plane. */ \
float dXY2; \
\
if (useTangentPlane) \
{ \
/* 'vec' is given relative to the tangent frame. */ \
float3 relPosVS = vec.x * tangentX + vec.y * tangentY; \
float3 positionVS = centerPosVS + relPosVS; \
float4 positionCS = mul(projMatrix, float4(positionVS, 1)); \
float2 positionSS = ComputeScreenSpacePosition(positionCS); \
\
position = positionSS * _ScreenSize.xy; \
irradiance = LOAD_TEXTURE2D(_IrradianceSource, position).rgb; \
dXY2 = dot(relPosVS.xy, relPosVS.xy); \
} \
else \
{ \
/* 'vec' is given directly in screen-space. */ \
position = centerPosUnSS + vec * pixelsPerMm; \
irradiance = LOAD_TEXTURE2D(_IrradianceSource, position).rgb; \
dXY2 = r * r; \
} \
\
/* TODO: see if making this a [branch] improves performance. */ \
[flatten] \
if (any(irradiance)) \
{ \
/* Apply bilateral weighting. */ \
float z = LOAD_TEXTURE2D(_MainDepthTexture, position).r; \
float d = LinearEyeDepth(z, _ZBufferParams); \
float t = d - centerPosVS.z; \
float p = kernel[profileID][i][1]; \
float3 w = ComputeBilateralWeight(dXY2, t, mmPerUnit, shapeParam, p); \
\
totalIrradiance += w * irradiance; \
totalWeight += w; \
} \
else \
{ \
/*************************************************************************/ \
/* The irradiance is 0. This could happen for 3 reasons. */ \
/* Most likely, the surface fragment does not have an SSS material. */ \
/* Alternatively, our sample comes from a region without any geometry. */ \
/* Finally, the surface fragment could be completely shadowed. */ \
/* Our blur is energy-preserving, so 'centerWeight' should be set to 0. */ \
/* We do not terminate the loop since we want to gather the contribution */ \
/* of the remaining samples (e.g. in case of hair covering skin). */ \
/* Note: See comment in the output of deferred.shader */ \
/*************************************************************************/ \
} \
}
#define SSS_LOOP(n, kernel, profileID, shapeParam, centerPosUnSS, centerPosVS, \
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, \
totalIrradiance, totalWeight) \
{ \
float centerRadius = kernel[profileID][0][0]; \
float centerRcpPdf = kernel[profileID][0][1]; \
float3 centerWeight = KernelValCircle(centerRadius, shapeParam) * centerRcpPdf; \
\
totalIrradiance = centerWeight * centerIrradiance; \
totalWeight = centerWeight; \
\
/* Integrate over the screen-aligned or tangent plane in the view space. */ \
[unroll] \
for (uint i = 1; i < n; i++) \
{ \
SSS_ITER(i, n, kernel, profileID, shapeParam, centerPosUnSS, centerPosVS, \
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm, \
totalIrradiance, totalWeight) \
} \
}
bool StencilTest(int2 pixelCoord, float stencilRef)
{
int2 tileCoord = pixelCoord / 8;
// Perform the stencil test (reject at the tile rate).
bool passedStencilTest = stencilRef == LOAD_TEXTURE2D(_HTile, tileCoord).r;
[branch] if (passedStencilTest)
{
// Our copy of HTile does not allow to accept at the tile rate.
// Therefore, we have to additionally perform the stencil test at the pixel rate.
passedStencilTest = stencilRef == LOAD_TEXTURE2D(_StencilTexture, pixelCoord).r;
}
return passedStencilTest;
}
#pragma kernel SubsurfaceScattering
groupshared float4 textureCache[TEXTURE_CACHE_SIZE_1D][TEXTURE_CACHE_SIZE_1D]; // float4(irradiance, linearDepth)
groupshared bool processGroup;
[numthreads(GROUP_SIZE_2D, 1, 1)]
void SubsurfaceScattering(uint2 groupId : SV_GroupID,
uint groupThreadId : SV_GroupThreadID)
{
const uint waveIndex = groupThreadId / 64;
const uint laneIndex = groupThreadId % 64;
const uint quadIndex = laneIndex / 4;
// Arrange threads in the Morton order to optimally match the memory layout of GCN tiles.
const uint mortonCode = groupThreadId;
const uint2 localCoord = DecodeMorton2D(mortonCode);
const uint2 tileAnchor = groupId * GROUP_SIZE_1D;
const uint2 pixelCoord = tileAnchor + localCoord;
const uint2 cacheCoord = localCoord + TEXTURE_CACHE_BORDER;
const uint2 cacheMinPt = tileAnchor - TEXTURE_CACHE_BORDER;
const uint2 cacheMaxPt = cacheMinPt + TEXTURE_CACHE_SIZE_1D;
const float stencilRef = STENCILLIGHTINGUSAGE_SPLIT_LIGHTING;
[branch] if (groupThreadId == 0)
{
// Check whether the thread group needs to perform any work.
float s00 = LOAD_TEXTURE2D(_HTile, 2 * groupId + uint2(0, 0)).r;
float s10 = LOAD_TEXTURE2D(_HTile, 2 * groupId + uint2(1, 0)).r;
float s01 = LOAD_TEXTURE2D(_HTile, 2 * groupId + uint2(0, 1)).r;
float s11 = LOAD_TEXTURE2D(_HTile, 2 * groupId + uint2(1, 1)).r;
// Perform the stencil test (reject at the tile rate).
processGroup = (stencilRef == s00 || stencilRef == s10 || stencilRef == s01 || stencilRef == s11);
}
// Wait for the LDS.
GroupMemoryBarrierWithGroupSync();
[branch] if (!processGroup) { return; }
float3 centerIrradiance;
float centerDepth;
float4 cachedValue = float4(0, 0, 0, 0);
bool passedStencilTest = StencilTest(pixelCoord, stencilRef);
[branch] if (passedStencilTest)
{
centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb;
centerDepth = LOAD_TEXTURE2D(_DepthTexture, pixelCoord).r;
cachedValue = float4(centerIrradiance, LinearEyeDepth(centerDepth, _ZBufferParams));
}
// Populate the central region of the LDS cache.
textureCache[cacheCoord.x][cacheCoord.y] = cachedValue;
const uint numBorderQuadsPerWave = TEXTURE_CACHE_SIZE_1D / 2 - 1;
const uint halfCacheWidthInQuads = TEXTURE_CACHE_SIZE_1D / 4;
[branch] if (quadIndex < numBorderQuadsPerWave)
{
// Fetch another texel into the LDS.
uint2 startQuad = halfCacheWidthInQuads * uint2(waveIndex & 1, waveIndex >> 1);
uint2 quadCoord;
// The traversal order is such that the quad's X coordinate is monotonically increasing.
// Note: the compiler can heavily optimize the code below, as the switch is scalar,
// and there are very few unique values due to the symmetry.
switch (waveIndex)
{
case 0:
quadCoord.x = max(0, (int)(quadIndex - (halfCacheWidthInQuads - 1)));
quadCoord.y = max(0, (int)((halfCacheWidthInQuads - 1) - quadIndex));
break;
case 1:
quadCoord.x = min(quadIndex, halfCacheWidthInQuads - 1);
quadCoord.y = max(0, (int)(quadIndex - (halfCacheWidthInQuads - 1)));
break;
case 2:
quadCoord.x = max(0, (int)(quadIndex - (halfCacheWidthInQuads - 1)));
quadCoord.y = min(quadIndex, halfCacheWidthInQuads - 1);
break;
case 3:
quadCoord.x = min(quadIndex, halfCacheWidthInQuads - 1);
quadCoord.y = min(halfCacheWidthInQuads - 1, 2 * (halfCacheWidthInQuads - 1) - quadIndex);
break;
}
uint2 cacheCoord2 = (startQuad + quadCoord) * 2 + uint2(laneIndex & 1, (laneIndex >> 1) & 1);
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2 - TEXTURE_CACHE_BORDER);
float4 cachedValue2 = float4(0, 0, 0, 0);
[branch] if (StencilTest(pixelCoord2, stencilRef))
{
float3 irradiance2 = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord2).rgb;
float depth2 = LOAD_TEXTURE2D(_DepthTexture, pixelCoord2).r;
cachedValue2 = float4(irradiance2, LinearEyeDepth(depth2, _ZBufferParams));
}
// Populate the border region of the LDS cache.
textureCache[cacheCoord2.x][cacheCoord2.y] = cachedValue2;
}
// Wait for the LDS.
GroupMemoryBarrierWithGroupSync();
[branch] if (!passedStencilTest) { return; }
PositionInputs posInput = GetPositionInput(pixelCoord, _ScreenSize.zw);
float3 unused;
// The result of the stencil test allows us to statically determine the material type (SSS).
BSDFData bsdfData;
FETCH_GBUFFER(gbuffer, _GBufferTexture, pixelCoord);
DECODE_FROM_GBUFFER(gbuffer, MATERIALFEATUREFLAGS_LIT_SSS, bsdfData, unused);
int profileID = bsdfData.subsurfaceProfile;
float distScale = bsdfData.subsurfaceRadius;
float3 shapeParam = _ShapeParams[profileID].rgb;
float maxDistance = _ShapeParams[profileID].a;
// Reconstruct the view-space position corresponding to the central sample.
float2 centerPosSS = posInput.positionSS;
float2 cornerPosSS = centerPosSS + 0.5 * _ScreenSize.zw;
float3 centerPosVS = ComputeViewSpacePosition(centerPosSS, centerDepth, _InvProjMatrix);
float3 cornerPosVS = ComputeViewSpacePosition(cornerPosSS, centerDepth, _InvProjMatrix);
// Rescaling the filter is equivalent to inversely scaling the world.
float mmPerUnit = MILLIMETERS_PER_METER * (_WorldScales[profileID] / distScale);
float unitsPerMm = rcp(mmPerUnit);
// Compute the view-space dimensions of the pixel as a quad projected onto geometry.
float2 unitsPerPixel = 2 * abs(cornerPosVS.xy - centerPosVS.xy);
float2 pixelsPerMm = rcp(unitsPerPixel) * unitsPerMm;
// We perform point sampling. Therefore, we can avoid the cost
// of filtering if we stay within the bounds of the current pixel.
// We use the value of 1 instead of 0.5 as an optimization.
// N.b.: our LoD selection algorithm is the same regardless of
// whether we integrate over the tangent plane or not, since we
// don't want the orientation of the tangent plane to create
// divergence of execution across the warp.
float maxDistInPixels = maxDistance * max(pixelsPerMm.x, pixelsPerMm.y);
[branch] if (distScale == 0 || maxDistInPixels < 1)
{
#if SSS_DEBUG_LOD
_CameraColorTexture[pixelCoord] = float4(0, 0, 1, 1);
#else
_CameraColorTexture[pixelCoord] += float4(bsdfData.diffuseColor * centerIrradiance, 1);
#endif
return;
}
const bool useTangentPlane = SSS_USE_TANGENT_PLANE != 0;
float4x4 viewMatrix, projMatrix;
GetLeftHandedViewSpaceMatrices(viewMatrix, projMatrix);
// Compute the tangent frame in view space.
float3 normalVS = mul((float3x3)viewMatrix, bsdfData.normalWS);
float3 tangentX = GetLocalFrame(normalVS)[0] * unitsPerMm;
float3 tangentY = GetLocalFrame(normalVS)[1] * unitsPerMm;
#if SSS_DEBUG_NORMAL_VS
// We expect the view-space normal to be front-facing.
if (normalVS.z >= 0) { _CameraColorTexture[pixelCoord] = float4(1, 0, 0, 1); return; }
#endif
// Accumulate filtered irradiance and bilateral weights (for renormalization).
float3 totalIrradiance, totalWeight;
// Use fewer samples for SS regions smaller than 5x5 pixels (rotated by 45 degrees).
[branch] if (maxDistInPixels < SSS_LOD_THRESHOLD)
{
#if SSS_DEBUG_LOD
_CameraColorTexture[pixelCoord] = float4(0.5, 0.5, 0, 1); return;
#else
SSS_LOOP(SSS_N_SAMPLES_FAR_FIELD, _FilterKernelsFarField,
profileID, shapeParam, pixelCoord, centerPosVS,
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm,
totalIrradiance, totalWeight)
#endif
}
else
{
#if SSS_DEBUG_LOD
_CameraColorTexture[pixelCoord] = float4(1, 0, 0, 1); return;
#else
SSS_LOOP(SSS_N_SAMPLES_NEAR_FIELD, _FilterKernelsNearField,
profileID, shapeParam, pixelCoord, centerPosVS,
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm,
totalIrradiance, totalWeight)
#endif
}
_CameraColorTexture[pixelCoord] += float4(bsdfData.diffuseColor * totalIrradiance / totalWeight, 1);
// _CameraColorTexture[pixelCoord] = float4((mortonCode % 256) / 255.0, (groupId.x % 256) / 15.0, (groupId.y % 256) / 15.0, 1);
// _CameraColorTexture[pixelCoord] = float4((mortonCode % 256) / 255.0, 0, 0, 1);
// _CameraColorTexture[pixelCoord] = float4((pixelCoord.x % 256) / 255.0, (pixelCoord.y % 256) / 255.0, 0, 1);
// _CameraColorTexture[pixelCoord] = float4(numBorderTexelsPerWave == (16 + 18) ? 1 : 0, 0, 0, 1);
}
/*
float4 Frag(Varyings input) : SV_Target
{
PositionInputs posInput = GetPositionInput(input.positionCS.xy, _ScreenSize.zw);
float3 unused;
// Note: When we are in this SubsurfaceScattering shader we know that we are a SSS material. This shader is strongly coupled with the deferred Lit.shader.
// We can use the material classification facility to help the compiler to know we use SSS material and optimize the code (and don't require to read gbuffer with materialId).
uint featureFlags = MATERIALFEATUREFLAGS_LIT_SSS;
BSDFData bsdfData;
FETCH_GBUFFER(gbuffer, _GBufferTexture, posInput.unPositionSS);
DECODE_FROM_GBUFFER(gbuffer, featureFlags, bsdfData, unused);
int profileID = bsdfData.subsurfaceProfile;
float distScale = bsdfData.subsurfaceRadius;
#ifdef SSS_MODEL_DISNEY
float3 shapeParam = _ShapeParams[profileID].rgb;
float maxDistance = _ShapeParams[profileID].a;
#else
float maxDistance = _FilterKernelsBasic[profileID][SSS_BASIC_N_SAMPLES - 1].a;
#endif
// Take the first (central) sample.
// TODO: copy its neighborhood into LDS.
float2 centerPosition = posInput.unPositionSS;
float3 centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, centerPosition).rgb;
// Reconstruct the view-space position.
float2 centerPosSS = posInput.positionSS;
float2 cornerPosSS = centerPosSS + 0.5 * _ScreenSize.zw;
float centerDepth = LOAD_TEXTURE2D(_MainDepthTexture, centerPosition).r;
float3 centerPosVS = ComputeViewSpacePosition(centerPosSS, centerDepth, _InvProjMatrix);
float3 cornerPosVS = ComputeViewSpacePosition(cornerPosSS, centerDepth, _InvProjMatrix);
#ifdef SSS_MODEL_DISNEY
// Rescaling the filter is equivalent to inversely scaling the world.
float mmPerUnit = MILLIMETERS_PER_METER * (_WorldScales[profileID] / distScale);
float unitsPerMm = rcp(mmPerUnit);
// Compute the view-space dimensions of the pixel as a quad projected onto geometry.
float2 unitsPerPixel = 2 * abs(cornerPosVS.xy - centerPosVS.xy);
float2 pixelsPerMm = rcp(unitsPerPixel) * unitsPerMm;
// We perform point sampling. Therefore, we can avoid the cost
// of filtering if we stay within the bounds of the current pixel.
// We use the value of 1 instead of 0.5 as an optimization.
// N.b.: our LoD selection algorithm is the same regardless of
// whether we integrate over the tangent plane or not, since we
// don't want the orientation of the tangent plane to create
// divergence of execution across the warp.
float maxDistInPixels = maxDistance * max(pixelsPerMm.x, pixelsPerMm.y);
[branch]
if (distScale == 0 || maxDistInPixels < 1)
{
#if SSS_DEBUG_LOD
return float4(0, 0, 1, 1);
#else
return float4(bsdfData.diffuseColor * centerIrradiance, 1);
#endif
}
const bool useTangentPlane = SSS_USE_TANGENT_PLANE != 0;
float4x4 viewMatrix, projMatrix;
GetLeftHandedViewSpaceMatrices(viewMatrix, projMatrix);
// Compute the tangent frame in view space.
float3 normalVS = mul((float3x3)viewMatrix, bsdfData.normalWS);
float3 tangentX = GetLocalFrame(normalVS)[0] * unitsPerMm;
float3 tangentY = GetLocalFrame(normalVS)[1] * unitsPerMm;
#if SSS_DEBUG_NORMAL_VS
// We expect the view-space normal to be front-facing.
if (normalVS.z >= 0) return float4(1, 0, 0, 1);
#endif
// Accumulate filtered irradiance and bilateral weights (for renormalization).
float3 totalIrradiance, totalWeight;
// Use fewer samples for SS regions smaller than 5x5 pixels (rotated by 45 degrees).
[branch]
if (maxDistInPixels < SSS_LOD_THRESHOLD)
{
#if SSS_DEBUG_LOD
return float4(0.5, 0.5, 0, 1);
#else
SSS_LOOP(SSS_N_SAMPLES_FAR_FIELD, _FilterKernelsFarField,
profileID, shapeParam, centerPosition, centerPosVS,
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm,
totalIrradiance, totalWeight)
#endif
}
else
{
#if SSS_DEBUG_LOD
return float4(1, 0, 0, 1);
#else
SSS_LOOP(SSS_N_SAMPLES_NEAR_FIELD, _FilterKernelsNearField,
profileID, shapeParam, centerPosition, centerPosVS,
useTangentPlane, tangentX, tangentY, mmPerUnit, pixelsPerMm,
totalIrradiance, totalWeight)
#endif
}
#else
// Rescaling the filter is equivalent to inversely scaling the world.
float metersPerUnit = _WorldScales[profileID] / distScale * SSS_BASIC_DISTANCE_SCALE;
float centimPerUnit = CENTIMETERS_PER_METER * metersPerUnit;
// Compute the view-space dimensions of the pixel as a quad projected onto geometry.
float2 unitsPerPixel = 2 * abs(cornerPosVS.xy - centerPosVS.xy);
float2 pixelsPerCm = rcp(centimPerUnit * unitsPerPixel);
// Compute the filtering direction.
#ifdef SSS_FILTER_HORIZONTAL_AND_COMBINE
float2 unitDirection = float2(1, 0);
#else
float2 unitDirection = float2(0, 1);
#endif
float2 scaledDirection = pixelsPerCm * unitDirection;
float phi = 0; // Random rotation; unused for now
float2x2 rotationMatrix = float2x2(cos(phi), -sin(phi), sin(phi), cos(phi));
float2 rotatedDirection = mul(rotationMatrix, scaledDirection);
// Load (1 / (2 * WeightedVariance)) for bilateral weighting.
#if RBG_BILATERAL_WEIGHTS
float3 halfRcpVariance = _HalfRcpWeightedVariances[profileID].rgb;
#else
float halfRcpVariance = _HalfRcpWeightedVariances[profileID].a;
#endif
#ifndef SSS_FILTER_HORIZONTAL_AND_COMBINE
bsdfData.diffuseColor = float3(1, 1, 1);
#endif
// Take the first (central) sample.
float2 samplePosition = posInput.unPositionSS;
float3 sampleWeight = _FilterKernelsBasic[profileID][0].rgb;
float3 sampleIrradiance = LOAD_TEXTURE2D(_IrradianceSource, samplePosition).rgb;
// We perform point sampling. Therefore, we can avoid the cost
// of filtering if we stay within the bounds of the current pixel.
// We use the value of 1 instead of 0.5 as an optimization.
float maxDistInPixels = maxDistance * max(pixelsPerCm.x, pixelsPerCm.y);
[branch]
if (distScale == 0 || maxDistInPixels < 1)
{
#if SSS_DEBUG_LOD
return float4(0, 0, 1, 1);
#else
return float4(bsdfData.diffuseColor * sampleIrradiance, 1);
#endif
}
#if SSS_DEBUG_LOD
return float4(0.5, 0.5, 0, 1);
#endif
// Accumulate filtered irradiance and bilateral weights (for renormalization).
float3 totalIrradiance = sampleWeight * sampleIrradiance;
float3 totalWeight = sampleWeight;
[unroll]
for (int i = 1; i < SSS_BASIC_N_SAMPLES; i++)
{
samplePosition = posInput.unPositionSS + rotatedDirection * _FilterKernelsBasic[profileID][i].a;
sampleWeight = _FilterKernelsBasic[profileID][i].rgb;
sampleIrradiance = LOAD_TEXTURE2D(_IrradianceSource, samplePosition).rgb;
[flatten]
if (any(sampleIrradiance))
{
// Apply bilateral weighting.
// Ref #1: Skin Rendering by Pseudo–Separable Cross Bilateral Filtering.
// Ref #2: Separable SSS, Supplementary Materials, Section E.
float rawDepth = LOAD_TEXTURE2D(_MainDepthTexture, samplePosition).r;
float sampleDepth = LinearEyeDepth(rawDepth, _ZBufferParams);
float zDistance = centimPerUnit * sampleDepth - (centimPerUnit * centerPosVS.z);
sampleWeight *= exp(-zDistance * zDistance * halfRcpVariance);
totalIrradiance += sampleWeight * sampleIrradiance;
totalWeight += sampleWeight;
}
else
{
// The irradiance is 0. This could happen for 3 reasons.
// Most likely, the surface fragment does not have an SSS material.
// Alternatively, our sample comes from a region without any geometry.
// Finally, the surface fragment could be completely shadowed.
// Our blur is energy-preserving, so 'centerWeight' should be set to 0.
// We do not terminate the loop since we want to gather the contribution
// of the remaining samples (e.g. in case of hair covering skin).
}
}
#endif
return float4(bsdfData.diffuseColor * totalIrradiance / totalWeight, 1);
}
ENDHLSL
}
}
Fallback Off
}
*/

10
Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute.meta


fileFormatVersion: 2
guid: b06a7993621def248addd55d0fe931b1
timeCreated: 1500310187
licenseType: Pro
ComputeShaderImporter:
externalObjects: {}
currentAPIMask: 4
userData:
assetBundleName:
assetBundleVariant:

72
Assets/ScriptableRenderPipeline/ShaderLibrary/SpaceFillingCurves.hlsl


#ifndef UNITY_SPACE_FILLING_SURVES_INCLUDED
#define UNITY_SPACE_FILLING_SURVES_INCLUDED
// "Insert" a 0 bit after each of the 16 low bits of x.
// Ref: https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
uint Part1By1(uint x)
{
x &= 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
x = (x ^ (x << 8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
x = (x ^ (x << 4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
x = (x ^ (x << 2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
x = (x ^ (x << 1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
return x;
}
// "Insert" two 0 bits after each of the 10 low bits of x/
// Ref: https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
uint Part1By2(uint x)
{
x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210
x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
return x;
}
// Inverse of Part1By1 - "delete" all odd-indexed bits/
// Ref: https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
uint Compact1By1(uint x)
{
x &= 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
x = (x ^ (x >> 1)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
x = (x ^ (x >> 2)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
x = (x ^ (x >> 4)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
x = (x ^ (x >> 8)) & 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
return x;
}
// Inverse of Part1By2 - "delete" all bits not at positions divisible by 3/
// Ref: https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
uint Compact1By2(uint x)
{
x &= 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
x = (x ^ (x >> 2)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
x = (x ^ (x >> 4)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
x = (x ^ (x >> 8)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
x = (x ^ (x >> 16)) & 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210
return x;
}
uint EncodeMorton2D(uint2 coord)
{
return (Part1By1(coord.y) << 1) + Part1By1(coord.x);
}
uint EncodeMorton3D(uint3 coord)
{
return (Part1By2(coord.z) << 2) + (Part1By2(coord.y) << 1) + Part1By2(coord.x);
}
uint2 DecodeMorton2D(uint code)
{
return uint2(Compact1By1(code >> 0), Compact1By1(code >> 1));
}
uint3 DecodeMorton3D(uint code)
{
return uint3(Compact1By2(code >> 0), Compact1By2(code >> 1), Compact1By2(code >> 2));
}
#endif // UNITY_SPACE_FILLING_SURVES_INCLUDED

10
Assets/ScriptableRenderPipeline/ShaderLibrary/SpaceFillingCurves.hlsl.meta


fileFormatVersion: 2
guid: 063144fddd2c1be41b9d09dec6314fc7
timeCreated: 1500391830
licenseType: Pro
ShaderImporter:
externalObjects: {}
defaultTextures: []
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存