浏览代码

Merge pull request #594 from EvgeniiG/master

Reduce the number of dependent texture reads for SSS
/main
GitHub 7 年前
当前提交
d6a58e9c
共有 23 个文件被更改,包括 138 次插入159 次删除
  1. 93
      ScriptableRenderPipeline/Core/ShaderLibrary/Common.hlsl
  2. 8
      ScriptableRenderPipeline/Core/ShaderLibrary/CommonLighting.hlsl
  3. 2
      ScriptableRenderPipeline/Core/ShaderLibrary/CommonMaterial.hlsl
  4. 4
      ScriptableRenderPipeline/Core/ShaderLibrary/ImageBasedLighting.hlsl
  5. 18
      ScriptableRenderPipeline/Core/ShaderLibrary/Macros.hlsl
  6. 2
      ScriptableRenderPipeline/Core/ShaderLibrary/NormalSurfaceGradient.hlsl
  7. 6
      ScriptableRenderPipeline/Core/ShaderLibrary/Packing.hlsl
  8. 2
      ScriptableRenderPipeline/Core/ShaderLibrary/VolumeRendering.hlsl
  9. 2
      ScriptableRenderPipeline/Core/Shadow/Shadow.cs
  10. 4
      ScriptableRenderPipeline/HDRenderPipeline/Debug/DebugViewMaterialGBuffer.shader
  11. 4
      ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/ClusteredUtils.hlsl
  12. 4
      ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/SortingComputeUtils.hlsl
  13. 2
      ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePassLoop.hlsl
  14. 4
      ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/lightlistbuild-bigtile.compute
  15. 4
      ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/lightlistbuild-clustered.compute
  16. 2
      ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/scrbound.compute
  17. 19
      ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Lit.hlsl
  18. 95
      ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute
  19. 2
      ScriptableRenderPipeline/HDRenderPipeline/ShaderPass/ShaderPassDepthOnly.hlsl
  20. 2
      ScriptableRenderPipeline/HDRenderPipeline/ShaderPass/ShaderPassForward.hlsl
  21. 2
      ScriptableRenderPipeline/HDRenderPipeline/ShaderPass/ShaderPassGBuffer.hlsl
  22. 6
      ScriptableRenderPipeline/HDRenderPipeline/Sky/AtmosphericScattering/AtmosphericScattering.hlsl
  23. 10
      ScriptableRenderPipeline/HDRenderPipeline/Sky/BlacksmithlSky/Resources/SkyBlacksmith.shader

93
ScriptableRenderPipeline/Core/ShaderLibrary/Common.hlsl


// unsigned integer bit field extract implementation
uint BitFieldExtract(uint data, uint numBits, uint offset)
{
uint mask = 0xFFFFFFFFu >> (32u - numBits);
uint mask = UINT_MAX >> (32u - numBits);
return (data >> offset) & mask;
}
#endif // INTRINSIC_BITFIELD_EXTRACT

#endif // INTRINSIC_CUBEMAP_FACE_ID
// ----------------------------------------------------------------------------
// Common math definition and fastmath function
// Common math functions
#define PI 3.14159265359
#define TWO_PI 6.28318530718
#define FOUR_PI 12.56637061436
#define INV_PI 0.31830988618
#define INV_TWO_PI 0.15915494309
#define INV_FOUR_PI 0.07957747155
#define HALF_PI 1.57079632679
#define INV_HALF_PI 0.636619772367
#define INFINITY asfloat(0x7F800000)
#define LOG2_E 1.44269504089
#define FLT_EPSILON 1.192092896e-07 // Smallest positive number, such that 1.0 + FLT_EPSILON != 1.0
#define FLT_MIN 1.175494351e-38 // Minimum representable positive floating-point number
#define FLT_MAX 3.402823466e+38 // Maximum representable floating-point number
#define HFLT_MIN 0.00006103515625 // 2^14 it is the same for 10, 11 and 16bit float. ref: https://www.khronos.org/opengl/wiki/Small_Float_Formats
float DegToRad(float deg)
{
return deg * (PI / 180.0);

// Using pow often result to a warning like this
// "pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them"
// PositivePow remove this warning when you know the value is positive and avoid inf/NAN.
TEMPLATE_2_FLT(PositivePow, base, power, return pow(max(abs(base), FLT_EPSILON), power))
TEMPLATE_2_FLT(PositivePow, base, power, return pow(max(abs(base), FLT_EPS), power))
// Ref: https://twitter.com/SebAaltonen/status/878250919879639040
// 2 mads (mad_sat and mad), faster than regular sign

// Z buffer to linear depth.
// Correctly handles oblique view frustums. Only valid for projection matrices!
// Ref: An Efficient Depth Linearization Method for Oblique View Frustums, Eq. 6.
float LinearEyeDepth(float2 positionSS, float depthRaw, float4 invProjParam)
float LinearEyeDepth(float2 positionSS, float deviceDepth, float4 invProjParam)
float4 positionCS = float4(positionSS * 2.0 - 1.0, depthRaw, 1.0);
float4 positionCS = float4(positionSS * 2.0 - 1.0, deviceDepth, 1.0);
}
// Z buffer to linear depth.
// Correctly handles oblique view frustums.
// Typically, this is the cheapest variant, provided you've already computed 'positionWS'.
float LinearEyeDepth(float3 positionWS, float3x3 viewProjMatrix)
{
return mul(viewProjMatrix, float4(positionWS, 1.0)).w;
}
// ----------------------------------------------------------------------------

return positionSS;
}
float4 ComputeClipSpacePosition(float2 positionSS, float depthRaw)
float4 ComputeClipSpacePosition(float2 positionSS, float deviceDepth)
return float4(positionSS * 2.0 - 1.0, depthRaw, 1.0);
return float4(positionSS * 2.0 - 1.0, deviceDepth, 1.0);
float3 ComputeViewSpacePosition(float2 positionSS, float depthRaw, float4x4 invProjMatrix)
float3 ComputeViewSpacePosition(float2 positionSS, float deviceDepth, float4x4 invProjMatrix)
float4 positionCS = ComputeClipSpacePosition(positionSS, depthRaw);
float4 positionCS = ComputeClipSpacePosition(positionSS, deviceDepth);
float4 positionVS = mul(invProjMatrix, positionCS);
// The view space uses a right-handed coordinate system.
positionVS.z = -positionVS.z;

float3 ComputeWorldSpacePosition(float2 positionSS, float depthRaw, float4x4 invViewProjMatrix)
float3 ComputeWorldSpacePosition(float2 positionSS, float deviceDepth, float4x4 invViewProjMatrix)
float4 positionCS = ComputeClipSpacePosition(positionSS, depthRaw);
float4 positionCS = ComputeClipSpacePosition(positionSS, deviceDepth);
float4 hpositionWS = mul(invViewProjMatrix, positionCS);
return hpositionWS.xyz / hpositionWS.w;
}

struct PositionInputs
{
// Normalize screen position (offset by 0.5)
float2 positionSS;
// Unormalize screen position (offset by 0.5)
uint2 unPositionSS;
uint2 unTileCoord;
float depthRaw; // raw depth from depth buffer
float depthVS;
float3 positionWS;
// TODO: improve the naming convention.
// Some options:
// positionNDC, positionSS, tileCoordSS
// pixelCoordUV, pixelCoordSS, tileCoordSS
// pixelCoordSS, pixelIndexSS, tileIndexSS
float3 positionWS; // World space position (could be camera-relative)
float2 positionSS; // Screen space pixel position : [0, 1) (with the half-pixel offset)
uint2 unPositionSS; // Screen space pixel index : [0, NumPixels)
uint2 unTileCoord; // Screen space tile index : [0, NumTiles)
float deviceDepth; // Depth from the depth buffer : [0, 1]
float linearDepth; // View space Z coordinate : [Near, Far]
};
// This function is use to provide an easy way to sample into a screen texture, either from a pixel or a compute shaders.

}
// From forward
// depthRaw and depthVS come directly form .zw of SV_Position
void UpdatePositionInput(float depthRaw, float depthVS, float3 positionWS, inout PositionInputs posInput)
// deviceDepth and linearDepth come directly from .zw of SV_Position
void UpdatePositionInput(float deviceDepth, float linearDepth, float3 positionWS, inout PositionInputs posInput)
posInput.depthRaw = depthRaw;
posInput.depthVS = depthVS;
posInput.positionWS = positionWS;
posInput.deviceDepth = deviceDepth;
posInput.linearDepth = linearDepth;
posInput.positionWS = positionWS;
void UpdatePositionInput(float depthRaw, float4x4 invViewProjMatrix, float4x4 viewProjMatrix, inout PositionInputs posInput)
void UpdatePositionInput(float deviceDepth, float4x4 invViewProjMatrix, float4x4 viewProjMatrix, inout PositionInputs posInput)
posInput.depthRaw = depthRaw;
posInput.positionWS = ComputeWorldSpacePosition(posInput.positionSS, depthRaw, invViewProjMatrix);
posInput.deviceDepth = deviceDepth;
posInput.positionWS = ComputeWorldSpacePosition(posInput.positionSS, deviceDepth, invViewProjMatrix);
posInput.depthVS = mul(viewProjMatrix, float4(posInput.positionWS, 1.0)).w;
posInput.linearDepth = mul(viewProjMatrix, float4(posInput.positionWS, 1.0)).w;
}
// The view direction 'V' points towards the camera.

posInput.positionWS += depthOffsetVS * (-V);
float4 positionCS = mul(viewProjMatrix, float4(posInput.positionWS, 1.0));
posInput.depthVS = positionCS.w;
posInput.depthRaw = positionCS.z / positionCS.w;
float4 positionCS = mul(viewProjMatrix, float4(posInput.positionWS, 1.0));
posInput.linearDepth = positionCS.w;
posInput.deviceDepth = positionCS.z / positionCS.w;
}
// ----------------------------------------------------------------------------

8
ScriptableRenderPipeline/Core/ShaderLibrary/CommonLighting.hlsl


// These clamping function to max of floating point 16 bit are use to prevent INF in code in case of extreme value
float ClampToFloat16Max(float value)
{
return min(value, 65504.0);
return min(value, HALF_MAX);
return min(value, 65504.0);
return min(value, HALF_MAX);
return min(value, 65504.0);
return min(value, HALF_MAX);
return min(value, 65504.0);
return min(value, HALF_MAX);
}
// Ligthing convention

2
ScriptableRenderPipeline/Core/ShaderLibrary/CommonMaterial.hlsl


// all pixels which belong to an SSS material are not black (those that don't always are).
float3 TagLightingForSSS(float3 subsurfaceLighting)
{
subsurfaceLighting.r = max(subsurfaceLighting.r, HFLT_MIN);
subsurfaceLighting.r = max(subsurfaceLighting.r, HALF_MIN);
return subsurfaceLighting;
}

4
ScriptableRenderPipeline/Core/ShaderLibrary/ImageBasedLighting.hlsl


float m = PerceptualRoughnessToRoughness(perceptualRoughness);
// Remap to spec power. See eq. 21 in --> https://dl.dropboxusercontent.com/u/55891920/papers/mm_brdf.pdf
float n = (2.0 / max(FLT_EPSILON, m * m)) - 2.0;
float n = (2.0 / max(FLT_EPS, m * m)) - 2.0;
n /= (4.0 * max(NdotR, FLT_EPSILON));
n /= (4.0 * max(NdotR, FLT_EPS));
// remap back to square root of real roughness (0.25 include both the sqrt root of the conversion and sqrt for going from roughness to perceptualRoughness)
perceptualRoughness = pow(2.0 / (n + 2.0), 0.25);

18
ScriptableRenderPipeline/Core/ShaderLibrary/Macros.hlsl


#define SAMPLE_TEXTURECUBE_ARRAY_LOD_ABSTRACT(textureName, samplerName, coord3, index, lod) SAMPLE_TEXTURECUBE_ARRAY_LOD(textureName, samplerName, coord3, index, lod)
#endif
#define PI 3.14159265358979323846
#define TWO_PI 6.28318530717958647693
#define FOUR_PI 12.5663706143591729538
#define INV_PI 0.31830988618379067154
#define INV_TWO_PI 0.15915494309189533577
#define INV_FOUR_PI 0.07957747154594766788
#define HALF_PI 1.57079632679489661923
#define INV_HALF_PI 0.63661977236758134308
#define LOG2_E 1.44269504088896340736
#define INFINITY asfloat(0x7F800000)
#define FLT_EPS 1.192092896e-07 // Smallest positive number, such that 1.0 + FLT_EPS != 1.0
#define FLT_MIN 1.175494351e-38 // Minimum representable positive floating-point number
#define FLT_MAX 3.402823466e+38 // Maximum representable floating-point number
#define HALF_MIN 6.103515625e-5 // 2^-14, the same value for 10, 11 and 16-bit: https://www.khronos.org/opengl/wiki/Small_Float_Formats
#define HALF_MAX 65504.0
#define UINT_MAX 0xFFFFFFFFu
#define TEMPLATE_1_FLT(FunctionName, Parameter1, FunctionBody) \
float FunctionName(float Parameter1) { FunctionBody; } \
float2 FunctionName(float2 Parameter1) { FunctionBody; } \

2
ScriptableRenderPipeline/Core/ShaderLibrary/NormalSurfaceGradient.hlsl


float3 SurfaceGradientFromPerturbedNormal(float3 nrmVertexNormal, float3 v)
{
float3 n = nrmVertexNormal;
float s = 1.0 / max(FLT_EPSILON, abs(dot(n, v)));
float s = 1.0 / max(FLT_EPS, abs(dot(n, v)));
return s * (dot(n, v) * n - v);
}

6
ScriptableRenderPipeline/Core/ShaderLibrary/Packing.hlsl


// Packs an integer stored using at most 'numBits' into a [0..1] float.
float PackInt(uint i, uint numBits)
{
uint maxInt = 0xFFFFFFFFu >> (32u - numBits);
uint maxInt = UINT_MAX >> (32u - numBits);
return saturate(i * rcp(maxInt));
}

uint maxInt = 0xFFFFFFFFu >> (32u - numBits);
uint maxInt = UINT_MAX >> (32u - numBits);
return (uint)(f * maxInt + 0.5); // Round instead of truncating
}

float UnpackUIntToFloat(uint src, uint numBits, uint offset)
{
uint maxInt = 0xFFFFFFFFu >> (32u - numBits);
uint maxInt = UINT_MAX >> (32u - numBits);
return float(BitFieldExtract(src, numBits, offset)) * rcp(maxInt);
}

2
ScriptableRenderPipeline/Core/ShaderLibrary/VolumeRendering.hlsl


// Absorption coefficient from Disney: http://blog.selfshadow.com/publications/s2015-shading-course/burley/s2015_pbs_disney_bsdf_notes.pdf
float3 TransmittanceColorAtDistanceToAbsorption(float3 transmittanceColor, float atDistance)
{
return -log(transmittanceColor + FLT_EPSILON) / max(atDistance, FLT_EPSILON);
return -log(transmittanceColor + FLT_EPS) / max(atDistance, FLT_EPS);
}

2
ScriptableRenderPipeline/Core/Shadow/Shadow.cs


}
m_TmpSortKeys.Sort( new SortReverter() );
m_TmpSortKeys.ExtractTo( shadowRequests, 0, out shadowRequestsCount, delegate(long key) { return (int) (key & 0xffffffff); } );
m_TmpSortKeys.ExtractTo( shadowRequests, 0, out shadowRequestsCount, delegate(long key) { return (int) (key & UINT_MAX); } );
}
protected override void PruneShadowCasters( Camera camera, List<VisibleLight> lights, ref VectorArray<int> shadowRequests, ref ShadowRequestVector requestsGranted, out uint totalRequestCount )

4
ScriptableRenderPipeline/HDRenderPipeline/Debug/DebugViewMaterialGBuffer.shader


BSDFData bsdfData;
BakeLightingData bakeLightingData;
DECODE_FROM_GBUFFER(posInput.unPositionSS, 0xFFFFFFFF, bsdfData, bakeLightingData.bakeDiffuseLighting);
DECODE_FROM_GBUFFER(posInput.unPositionSS, UINT_MAX, bsdfData, bakeLightingData.bakeDiffuseLighting);
#ifdef SHADOWS_SHADOWMASK
DecodeShadowMask(LOAD_TEXTURE2D(_ShadowMaskTexture, posInput.unPositionSS), bakeLightingData.bakeShadowMask);
#endif

if (_DebugViewMaterial == DEBUGVIEWGBUFFER_DEPTH)
{
float linearDepth = frac(posInput.depthVS * 0.1);
float linearDepth = frac(posInput.linearDepth * 0.1);
result = linearDepth.xxx;
}
// Caution: This value is not the same than the builtin data bakeDiffuseLighting. It also include emissive and multiply by the albedo

4
ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/ClusteredUtils.hlsl


float SuggestLogBase50(float tileFarPlane)
{
const float C = (float)(1 << g_iLog2NumClusters);
float rangeFittedDistance = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPSILON, 1.0);
float rangeFittedDistance = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPS, 1.0);
float suggested_base = pow((1.0 + sqrt(max(0.0, 1.0 - 4.0 * rangeFittedDistance * (1.0 - rangeFittedDistance)))) / (2.0 * rangeFittedDistance), 2.0 / C); //
return max(g_fClustBase, suggested_base);
}

{
const float C = (float)(1 << g_iLog2NumClusters);
float rangeFittedDistance = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPSILON, 1.0);
float rangeFittedDistance = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPS, 1.0);
float suggested_base = pow((1 / 2.3) * max(0.0, (0.8 / rangeFittedDistance) - 1), 4.0 / (C * 2)); // approximate inverse of d*x^4 + (-x) + (1-d) = 0 - d is normalized distance
return max(g_fClustBase, suggested_base);
}

4
ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/SortingComputeUtils.hlsl


// have to make this sort routine a macro unfortunately because hlsl doesn't take
// groupshared memory of unspecified length as an input parameter to a function.
// maxcapacity_in must be a power of two.
// all data from length_in and up to closest power of two will be filled with 0xffffffff
// all data from length_in and up to closest power of two will be filled with UINT_MAX
for(int t=length+localThreadID; t<N; t+=nrthreads) { data[t]=0xffffffff; } \
for(int t=length+localThreadID; t<N; t+=nrthreads) { data[t]=UINT_MAX; } \
GroupMemoryBarrierWithGroupSync(); \
\
for(int k=2; k<=N; k=2*k) \

2
ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePassLoop.hlsl


logBase = g_logBaseBuffer[tileIndex.y * _NumTileClusteredX + tileIndex.x];
}
int clustIdx = SnapToClusterIdxFlex(posInput.depthVS, logBase, g_isLogBaseBufferEnabled != 0);
int clustIdx = SnapToClusterIdxFlex(posInput.linearDepth, logBase, g_isLogBaseBufferEnabled != 0);
int nrClusters = (1 << g_iLog2NumClusters);
const int idx = ((lightCategory * nrClusters + clustIdx) * _NumTileClusteredY + tileIndex.y) * _NumTileClusteredX + tileIndex.x;

4
ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/lightlistbuild-bigtile.compute


SFiniteLightBound lgtDat = g_data[lightsListLDS[l]];
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius, g_isOrthographic!=0) )
lightsListLDS[l]=0xffffffff;
lightsListLDS[l]=UINT_MAX;
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)

int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
bool bFoundSepPlane = (resh*resf)<0;
if(bFoundSepPlane) lightsListLDS[l]=0xffffffff;
if(bFoundSepPlane) lightsListLDS[l]=UINT_MAX;
}
}
}

4
ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/lightlistbuild-clustered.compute


SFiniteLightBound lgtDat = g_data[coarseList[l]];
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius, g_isOrthographic!=0) )
coarseList[l]=0xffffffff;
coarseList[l]=UINT_MAX;
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)

int offs = 0;
for(int l=0; l<iNrCoarseLights; l++)
{
if(coarseList[l]!=0xffffffff)
if(coarseList[l]!=UINT_MAX)
coarseList[offs++] = coarseList[l];
}
lightOffsSph = offs;

2
ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/scrbound.compute


float fW = vPnts[k].w;
float fS = fW<0 ? -1 : 1;
float fWabs = fW<0 ? (-fW) : fW;
fW = fS * (fWabs<FLT_EPSILON ? FLT_EPSILON : fWabs);
fW = fS * (fWabs<FLT_EPS ? FLT_EPS : fWabs);
float3 vP = float3(vPnts[k].x/fW, vPnts[k].y/fW, vPnts[k].z/fW);
if(k==0) { vMin=vP; vMax=vP; }

19
ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Lit.hlsl


float depth = LinearEyeDepth(pyramidDepth, _ZBufferParams);
// Distance from point to the back plane
float depthFromPositionInput = depth - posInputs.depthVS;
float depthFromPositionInput = depth - posInputs.linearDepth;
float offset = dot(-V, positionWS - posInputs.positionWS);
float depthFromPosition = depthFromPositionInput - offset;

// If a user do a lighting architecture without material classification, this can be remove
#include "../../Lighting/TilePass/TilePass.cs.hlsl"
static int g_FeatureFlags = 0xFFFFFFFF;
static uint g_FeatureFlags = UINT_MAX;
bool HasMaterialFeatureFlag(int flag)
bool HasMaterialFeatureFlag(uint flag)
{
return ((g_FeatureFlags & flag) != 0);
}

// The material features system for material classification must allow compile time optimization (i.e everything should be static)
// Note that as we store materialId for Aniso based on content of RT2 we need to add few extra condition.
// The code is also call from MaterialFeatureFlagsFromGBuffer, so must work fully dynamic if featureFlags is 0xFFFFFFFF
// The code is also call from MaterialFeatureFlagsFromGBuffer, so must work fully dynamic if featureFlags is UINT_MAX
int supportsStandard = HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_STANDARD);
int supportsSSS = HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_SSS);
int supportsAniso = HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_ANISO);

DecodeFromGBuffer(
unPositionSS,
0xFFFFFFFF,
UINT_MAX,
bsdfData,
unused
);

float diffuseFGD;
// Area lights (17 VGPRs)
// TODO: 'orthoBasisViewNormal' is just a rotation around the normal and should thus be just 1x VGPR.
float3x3 orthoBasisViewNormal; // Right-handed view-dependent orthogonal basis around the normal (6x VGPRs)
float3x3 ltcTransformDiffuse; // Inverse transformation for Lambertian or Disney Diffuse (4x VGPRs)
float3x3 ltcTransformSpecular; // Inverse transformation for GGX (4x VGPRs)

float NdotL = saturate(dot(bsdfData.coatNormalWS, L));
float NdotV = preLightData.coatNdotV;
float LdotV = dot(L, V);
float invLenLV = rsqrt(max(2 * LdotV + 2, FLT_EPSILON));
float invLenLV = rsqrt(max(2 * LdotV + 2, FLT_EPS));
float NdotH = saturate((NdotL + NdotV) * invLenLV);
float LdotH = saturate(invLenLV * LdotV + invLenLV);

float NdotL = saturate(dot(bsdfData.normalWS, L)); // Must have the same value without the clamp
float NdotV = preLightData.NdotV; // Get the unaltered (geometric) version
float LdotV = dot(L, V);
float invLenLV = rsqrt(max(2 * LdotV + 2, FLT_EPSILON)); // invLenLV = rcp(length(L + V)) - caution about the case where V and L are opposite, it can happen, use max to avoid this
float invLenLV = rsqrt(max(2 * LdotV + 2, FLT_EPS)); // invLenLV = rcp(length(L + V)) - caution about the case where V and L are opposite, it can happen, use max to avoid this
float NdotH = saturate((NdotL + NdotV) * invLenLV);
float LdotH = saturate(invLenLV * LdotV + invLenLV);

#endif
#ifdef SHADOWS_SHADOWMASK
float fade = saturate(posInput.depthVS * lightData.fadeDistanceScaleAndBias.x + lightData.fadeDistanceScaleAndBias.y);
float fade = saturate(posInput.linearDepth * lightData.fadeDistanceScaleAndBias.x + lightData.fadeDistanceScaleAndBias.y);
// See comment in EvaluateBSDF_Punctual
shadow = lightData.dynamicShadowCasterOnly ? min(shadowMask, shadow) : shadow;

// Exit if texel is out of color buffer
// Or if the texel is from an object in front of the object
if (refractedBackPointDepth < posInput.depthVS
if (refractedBackPointDepth < posInput.linearDepth
|| any(refractedBackPointSS < 0.0)
|| any(refractedBackPointSS > 1.0))
{

95
ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute


#endif
groupshared bool processGroup;
bool StencilTest(int2 pixelCoord, float stencilRef)
{
bool passedStencilTest;
#if SSS_SAMPLE_TEST_HTILE
int2 tileCoord = pixelCoord / 8;
// Perform the stencil test (reject at the tile rate).
passedStencilTest = stencilRef == LOAD_TEXTURE2D(_HTile, tileCoord).r;
[branch] if (passedStencilTest)
#else
// It is extremely uncommon for individual samples to fail the HTile test.
// Unfortunately, our copy of HTile does not allow to accept at the tile rate.
// Therefore, we choose not to perform the HiS test here.
#endif
{
// Unfortunately, our copy of HTile does not allow to accept at the tile rate.
// Therefore, we have to additionally perform the stencil test at the pixel rate.
// We check the tagged irradiance buffer to avoid an extra stencil texture fetch.
passedStencilTest = TestLightingForSSS(LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb);
}
return passedStencilTest;
}
#if SSS_USE_LDS_CACHE
float4 LoadSampleFromCacheMemory(int2 cacheCoord)
{

// Returns {irradiance, linearDepth}.
float4 LoadSample(int2 pixelCoord, int2 cacheAnchor)
{
#if SSS_USE_LDS_CACHE
#if SSS_USE_LDS_CACHE
[branch] if (isInCache)
{
return LoadSampleFromCacheMemory(cacheCoord);

{
float stencilRef = STENCILLIGHTINGUSAGE_SPLIT_LIGHTING;
[branch] if (StencilTest(pixelCoord, stencilRef))
{
return LoadSampleFromVideoMemory(pixelCoord);
}
else
{
return float4(0, 0, 0, 0);
}
// Always load both irradiance and depth.
// Avoid dependent texture reads at the cost of extra bandwidth.
return LoadSampleFromVideoMemory(pixelCoord);
}
}

if (TestLightingForSSS(irradiance))
{
// Apply bilateral weighting.
float linearDepth = textureSample.a;
float z = linearDepth - centerPosVS.z;
float p = _FilterKernels[profileID][i][iP];
float3 w = ComputeBilateralWeight(xy2, z, mmPerUnit, shapeParam, p);
float viewZ = textureSample.a;
float relZ = viewZ - centerPosVS.z;
float rcpPdf = _FilterKernels[profileID][i][iP];
float3 weight = ComputeBilateralWeight(xy2, relZ, mmPerUnit, shapeParam, rcpPdf);
totalIrradiance += w * irradiance;
totalWeight += w;
totalIrradiance += weight * irradiance;
totalWeight += weight;
}
else
{

[branch] if (!processGroup) { return; }
float3 centerIrradiance = 0;
float centerDepth = 0;
float4 cachedValue = 0;
bool passedStencilTest = StencilTest((int2)pixelCoord, stencilRef);
float3 centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb;
float centerDepth = 0;
float centerViewZ = 0;
bool passedStencilTest = TestLightingForSSS(centerIrradiance);
// Save some bandwidth by only loading depth values for SSS pixels.
centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb;
centerDepth = LOAD_TEXTURE2D(_DepthTexture, pixelCoord).r;
cachedValue = float4(centerIrradiance, LinearEyeDepth(centerDepth, _ZBufferParams));
centerDepth = LOAD_TEXTURE2D(_DepthTexture, pixelCoord).r;
centerViewZ = LinearEyeDepth(centerDepth, _ZBufferParams);
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord.y, cacheCoord.x)] = cachedValue;
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord.y, cacheCoord.x)] = float4(centerIrradiance, centerViewZ);
uint numBorderQuadsPerWave = TEXTURE_CACHE_SIZE_1D / 2 - 1;
uint halfCacheWidthInQuads = TEXTURE_CACHE_SIZE_1D / 4;

uint2 quadCoord;
// The traversal order is such that the quad's X coordinate is monotonically increasing.
// The corner is always the near the block of the corresponding wavefront.
case 0:
case 0: // Bottom left
case 1:
case 1: // Bottom right
case 2:
case 2: // Top left
default: // 3
default: // Top right
uint2 cacheCoord2 = 2 * (startQuad + quadCoord) + uint2(laneIndex & 1, (laneIndex >> 1) & 1);
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2) - TEXTURE_CACHE_BORDER;
float4 cachedValue2 = 0;
uint2 cacheCoord2 = 2 * (startQuad + quadCoord) + uint2(laneIndex & 1, (laneIndex >> 1) & 1);
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2) - TEXTURE_CACHE_BORDER;
float3 irradiance2 = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord2).rgb;
float viewZ2 = 0;
[branch] if (StencilTest(pixelCoord2, stencilRef))
// Save some bandwidth by only loading depth values for SSS pixels.
[branch] if (TestLightingForSSS(irradiance2))
cachedValue2 = LoadSampleFromVideoMemory(pixelCoord2);
viewZ2 = LinearEyeDepth(LOAD_TEXTURE2D(_DepthTexture, pixelCoord2).r, _ZBufferParams);
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord2.y, cacheCoord2.x)] = cachedValue2;
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord2.y, cacheCoord2.x)] = float4(irradiance2, viewZ2);
}
// Wait for the LDS.

bool useNearFieldKernel = SSS_ENABLE_NEAR_FIELD && maxDistInPixels > SSS_LOD_THRESHOLD;
#if SSS_DEBUG_LOD
StoreResult(pixelCoord, useNearFieldKernel ? float3(1, 0, 0) : float3(0.5, 0.5, 0);
StoreResult(pixelCoord, useNearFieldKernel ? float3(1, 0, 0) : float3(0.5, 0.5, 0));
return;
#endif

2
ScriptableRenderPipeline/HDRenderPipeline/ShaderPass/ShaderPassDepthOnly.hlsl


outColor = float4(0.0, 0.0, 0.0, 0.0);
#ifdef _DEPTHOFFSET_ON
outputDepth = posInput.depthRaw;
outputDepth = posInput.deviceDepth;
#endif
}

2
ScriptableRenderPipeline/HDRenderPipeline/ShaderPass/ShaderPassForward.hlsl


}
#ifdef _DEPTHOFFSET_ON
outputDepth = posInput.depthRaw;
outputDepth = posInput.deviceDepth;
#endif
#ifdef DEBUG_DISPLAY

2
ScriptableRenderPipeline/HDRenderPipeline/ShaderPass/ShaderPassGBuffer.hlsl


ENCODE_VELOCITY_INTO_GBUFFER(builtinData.velocity, outVelocityBuffer);
#ifdef _DEPTHOFFSET_ON
outputDepth = posInput.depthRaw;
outputDepth = posInput.deviceDepth;
#endif
}

6
ScriptableRenderPipeline/HDRenderPipeline/Sky/AtmosphericScattering/AtmosphericScattering.hlsl


else if (_FogColorMode == FOGCOLORMODE_SKY_COLOR)
{
// Based on Uncharted 4 "Mip Sky Fog" trick: http://advances.realtimerendering.com/other/2016/naughty_dog/NaughtyDog_TechArt_Final.pdf
float mipLevel = (1.0 - _MipFogMaxMip * saturate((posInput.depthVS - _MipFogNear) / (_MipFogFar - _MipFogNear))) * _SkyTextureMipCount;
float mipLevel = (1.0 - _MipFogMaxMip * saturate((posInput.linearDepth - _MipFogNear) / (_MipFogFar - _MipFogNear))) * _SkyTextureMipCount;
float3 dir = normalize(posInput.positionWS - GetPrimaryCameraPosition());
return SampleSkyTexture(dir, mipLevel).rgb;
}

if (_AtmosphericScatteringType == FOGTYPE_EXPONENTIAL)
{
float3 fogColor = GetFogColor(posInput);
float fogFactor = _ExpFogDensity * (1.0f - Transmittance(OpticalDepthHomogeneous(1.0f / _ExpFogDistance, posInput.depthVS)));
float fogFactor = _ExpFogDensity * (1.0f - Transmittance(OpticalDepthHomogeneous(1.0f / _ExpFogDistance, posInput.linearDepth)));
float fogFactor = _LinearFogDensity * saturate((posInput.depthVS - _LinearFogStart) * _LinearFogOneOverRange);
float fogFactor = _LinearFogDensity * saturate((posInput.linearDepth - _LinearFogStart) * _LinearFogOneOverRange);
return float4(fogColor, fogFactor);
}
else // NONE

10
ScriptableRenderPipeline/HDRenderPipeline/Sky/BlacksmithlSky/Resources/SkyBlacksmith.shader


#ifdef PERFORM_SKY_OCCLUSION_TEST
// Determine whether the sky is occluded by the scene geometry.
// Do not perform blending with the environment map if the sky is occluded.
float depthRaw = max(_SkyDepth, LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).r);
float skyTexWeight = (depthRaw > _SkyDepth) ? 0.0 : 1.0;
float deviceDepth = max(_SkyDepth, LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).r);
float skyTexWeight = (deviceDepth > _SkyDepth) ? 0.0 : 1.0;
float depthRaw = _SkyDepth;
float deviceDepth = _SkyDepth;
depthRaw = _SkyDepth;
deviceDepth = _SkyDepth;
UpdatePositionInput(depthRaw, UNITY_MATRIX_I_VP, k_identity4x4, posInput);
UpdatePositionInput(deviceDepth, UNITY_MATRIX_I_VP, k_identity4x4, posInput);
float4 c1, c2, c3;
VolundTransferScatter(GetAbsolutePositionWS(posInput.positionWS), c1, c2, c3);

正在加载...
取消
保存