浏览代码

Optimize divergent execution of the anisotropic GGX

If the tile has any pixels with anisotropic GGX, we evaluate the entire tile with anisotropic GGX rather than first evaluating pixels with anisotropic GGX and then pixels with isotropic GGX.

Before

; --- Statistics for Deferred.compute on GCN (Pitcairn) ---
; SGPRs: 92 out of 104 used
; VGPRs: 128 out of 256 used
; LDS: 0 out of 32768 bytes used
; 0 bytes scratch space used
; Instructions: 2676 ALU, 183 Control Flow, 50 TFETCH

After

; --- Statistics for Deferred.compute on GCN (Pitcairn) ---
; SGPRs: 94 out of 104 used
; VGPRs: 128 out of 256 used
; LDS: 0 out of 32768 bytes used
; 0 bytes scratch space used
; Instructions: 2583 ALU, 183 Control Flow, 50 TFETCH
/main
Evgenii Golubev 7 年前
当前提交
f3c7dd5c
共有 2 个文件被更改,包括 62 次插入21 次删除
  1. 23
      ScriptableRenderPipeline/Core/ShaderLibrary/Common.hlsl
  2. 60
      ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Lit.hlsl

23
ScriptableRenderPipeline/Core/ShaderLibrary/Common.hlsl


// ----------------------------------------------------------------------------
#ifndef INTRINSIC_BITFIELD_EXTRACT
// unsigned integer bit field extract implementation
// Unsigned integer bit field extraction.
// Note that the intrinsic itself generates a vector instruction.
// Wrap this function with WaveReadFirstLane() to get scalar output.
uint BitFieldExtract(uint data, uint numBits, uint offset)
{
uint mask = UINT_MAX >> (32u - numBits);

bool IsBitSet(uint data, uint bitPos)
bool IsBitSet(uint data, uint offset)
{
return BitFieldExtract(data, 1u, offset) != 0;
}
void SetBit(inout uint data, uint offset)
{
data |= 1u << offset;
}
void ClearBit(inout uint data, uint offset)
{
data &= ~(1u << offset);
}
void ToggleBit(inout uint data, uint offset)
return BitFieldExtract(data, 1u, bitPos) != 0;
data ^= 1u << offset;
}
#ifndef INTRINSIC_WAVEREADFIRSTLANE

60
ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Lit.hlsl


bsdfData.roughness = PerceptualRoughnessToRoughness(bsdfData.perceptualRoughness);
bsdfData.materialId = surfaceData.materialId;
if (surfaceData.materialId != MATERIALID_LIT_ANISO)
{
// Notify the material classification system that we should not use
// the anisotropic GGX for forward rendering.
g_FeatureFlags &= ~MATERIALFEATUREFLAGS_LIT_ANISO;
}
// IMPORTANT: In case of foward or gbuffer pass we must know what we are statically, so compiler can do compile time optimization
if (bsdfData.materialId == MATERIALID_LIT_STANDARD)
{

bsdfData.materialId = MATERIALID_LIT_CLEAR_COAT;
}
// We avoid divergent evaluation of the GGX, as that nearly doubles the cost.
// If the tile has anisotropy, all the pixels within the tile are evaluated as anisotropic.
if (HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_ANISO))
{
float anisotropy;
float3 tangentWS;
if (bsdfData.materialId == MATERIALID_LIT_ANISO)
{
float metallic;
int octTangentSign;
UnpackFloatInt8bit(inGBuffer2.a, 4.0, metallic, octTangentSign);
inGBuffer2.r = (octTangentSign & 1) ? -inGBuffer2.r : inGBuffer2.r;
inGBuffer2.g = (octTangentSign & 2) ? -inGBuffer2.g : inGBuffer2.g;
tangentWS = UnpackNormalOctEncode(inGBuffer2.rg);
anisotropy = inGBuffer2.b * 2 - 1;
FillMaterialIdStandardData(baseColor, metallic, bsdfData);
}
else
{
anisotropy = 0;
tangentWS = GetLocalFrame(bsdfData.normalWS)[0];
}
FillMaterialIdAnisoData(bsdfData.roughness, bsdfData.normalWS, tangentWS, anisotropy, bsdfData);
}
if (bsdfData.materialId == MATERIALID_LIT_STANDARD && HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_STANDARD))
{
float metallic;

int subsurfaceProfile = UnpackByte(inGBuffer2.w);
FillMaterialIdSSSData(baseColor, subsurfaceProfile, subsurfaceRadius, thickness, bsdfData);
}
else if (bsdfData.materialId == MATERIALID_LIT_ANISO && HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_ANISO))
{
float metallic;
int octTangentSign;
UnpackFloatInt8bit(inGBuffer2.a, 4.0, metallic, octTangentSign);
FillMaterialIdStandardData(baseColor, metallic, bsdfData);
inGBuffer2.r = (octTangentSign & 1) ? -inGBuffer2.r : inGBuffer2.r;
inGBuffer2.g = (octTangentSign & 2) ? -inGBuffer2.g : inGBuffer2.g;
float3 tangentWS = UnpackNormalOctEncode(inGBuffer2.rg);
float anisotropy = inGBuffer2.b * 2 - 1;
FillMaterialIdAnisoData(bsdfData.roughness, bsdfData.normalWS, tangentWS, anisotropy, bsdfData);
}
else if (bsdfData.materialId == MATERIALID_LIT_CLEAR_COAT && HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_CLEAR_COAT))
{

float3 iblR;
// GGX aniso
if (bsdfData.materialId == MATERIALID_LIT_ANISO && HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_ANISO))
// We avoid divergent evaluation of the GGX, as that nearly doubles the cost.
// If the tile has anisotropy, all the pixels within the tile are evaluated as anisotropic.
if (HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_ANISO))
{
preLightData.TdotV = dot(bsdfData.tangentWS, V);
preLightData.BdotV = dot(bsdfData.bitangentWS, V);

float3 anisoIblNormalWS = GetAnisotropicModifiedNormal(grainDirWS, N, V, stretch);
iblR = reflect(-V, anisoIblNormalWS);
}
else // GGX iso
else
{
preLightData.TdotV = 0;
preLightData.BdotV = 0;

float DV;
if (bsdfData.materialId == MATERIALID_LIT_ANISO && HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_ANISO))
// We avoid divergent evaluation of the GGX, as that nearly doubles the cost.
// If the tile has anisotropy, all the pixels within the tile are evaluated as anisotropic.
if (HasMaterialFeatureFlag(MATERIALFEATUREFLAGS_LIT_ANISO))
{
float3 H = (L + V) * invLenLV;
// For anisotropy we must not saturate these values

正在加载...
取消
保存