
Optimize area lights a bit

Evgenii Golubev 7 年前
共有 2 个文件被更改,包括 59 次插入61 次删除
  1. 102
  2. 18


float diffuseFGD;
// area light
float3x3 ltcXformGGX; // TODO: make sure the compiler not wasting VGPRs on constants
float3x3 ltcXformDisneyDiffuse; // TODO: make sure the compiler not wasting VGPRs on constants
float3x3 orthoBasisVN; // Right-handed view-dependent orthogonal basis around the normal
float3x3 ltcXformGGX; // Sparse: should only use 4x VGPRs. Could be scalarized
float3x3 ltcXformDisneyDiffuse; // Sparse: should only use 4x VGPRs. Could be scalarized
float ltcGGXFresnelMagnitudeDiff; // The difference of magnitudes of GGX and Fresnel
float ltcGGXFresnelMagnitude;
float ltcDisneyDiffuseMagnitude;

preLightData.iblMipLevel = PerceptualRoughnessToMipmapLevel(bsdfData.perceptualRoughness);
// Area light
preLightData.orthoBasisVN[0] = normalize(V - bsdfData.normalWS * preLightData.NdotV);
preLightData.orthoBasisVN[1] = normalize(cross(bsdfData.normalWS, preLightData.orthoBasisVN[0]));
preLightData.orthoBasisVN[2] = bsdfData.normalWS;
// UVs for sampling the LUTs
float theta = FastACos(NdotV);
float2 uv = LTC_LUT_OFFSET + LTC_LUT_SCALE * float2(bsdfData.perceptualRoughness, theta * INV_HALF_PI);

lightData.diffuseScale *= intensity;
lightData.specularScale *= intensity;
// TODO: This could be precomputed.
// Translate the light s.t. the shaded point is at the origin of the coordinate system.
lightData.positionWS -= positionWS;
// TODO: some of this could be precomputed.
// Translate the endpoints s.t. the shaded point is at the origin of the coordinate system.
P1 -= positionWS;
P2 -= positionWS;
// Construct a view-dependent orthonormal basis around N.
// TODO: it could be stored in PreLightData, since all LTC lights compute it more than once.
float3x3 basis;
basis[0] = normalize(V - bsdfData.normalWS * preLightData.NdotV);
basis[1] = normalize(cross(bsdfData.normalWS, basis[0]));
basis[2] = bsdfData.normalWS;
// Rotate the endpoints into the local coordinate system (left-handed).
P1 = mul(P1, transpose(basis));
P2 = mul(P2, transpose(basis));
// Rotate the endpoints into the local coordinate system.
P1 = mul(P1, transpose(preLightData.orthoBasisVN));
P2 = mul(P2, transpose(preLightData.orthoBasisVN));
// Compute the binormal.
float3 B = normalize(cross(P1, P2));

lightData.diffuseScale *= intensity;
lightData.specularScale *= intensity;
// TODO: store 4 points and save 12 cycles (24x MADs - 12x MOVs).
float3 p0 = lightData.positionWS + lightData.right * halfWidth + lightData.up * halfHeight;
float3 p1 = lightData.positionWS + lightData.right * halfWidth + lightData.up * -halfHeight;
float3 p2 = lightData.positionWS + lightData.right * -halfWidth + lightData.up * -halfHeight;
float3 p3 = lightData.positionWS + lightData.right * -halfWidth + lightData.up * halfHeight;
// Translate the light s.t. the shaded point is at the origin of the coordinate system.
lightData.positionWS -= positionWS;
float4x3 matL = float4x3(p0, p1, p2, p3) - float4x3(positionWS, positionWS, positionWS, positionWS);
float4x3 lightVerts;
float ltcValue;
// TODO: some of this could be precomputed.
lightVerts[0] = lightData.positionWS + lightData.right * halfWidth + lightData.up * halfHeight;
lightVerts[1] = lightData.positionWS + lightData.right * halfWidth + lightData.up * -halfHeight;
lightVerts[2] = lightData.positionWS + lightData.right * -halfWidth + lightData.up * -halfHeight;
lightVerts[3] = lightData.positionWS + lightData.right * -halfWidth + lightData.up * halfHeight;
// Rotate the endpoints into the local coordinate system.
lightVerts = mul(lightVerts, transpose(preLightData.orthoBasisVN));
float3x3 ltcMatrix;
float ltcValue;
ltcValue = LTCEvaluate(matL, V, bsdfData.normalWS, preLightData.NdotV, k_identity3x3);
ltcMatrix = k_identity3x3;
ltcValue = LTCEvaluate(matL, V, bsdfData.normalWS, preLightData.NdotV, preLightData.ltcXformDisneyDiffuse);
ltcMatrix = preLightData.ltcXformDisneyDiffuse;
// Polygon irradiance in the transformed configuration.
ltcValue = PolygonIrradiance(mul(lightVerts, ltcMatrix));
[branch] if (bsdfData.enableTransmission)
// Flip the view vector and the normal. The bitangent stays the same.
float3x3 flipMatrix = float3x3(-1, 0, 0,
0, 1, 0,
0, 0, -1);
// Use the Lambertian approximation for performance reasons.
// The matrix multiplication should not generate any extra ALU on GCN.
ltcMatrix = mul(flipMatrix, k_identity3x3);
// Polygon irradiance in the transformed configuration.
ltcValue = PolygonIrradiance(mul(lightVerts, ltcMatrix));
// We use diffuse lighting for accumulation since it is going to be blurred during the SSS pass.
diffuseLighting += EvaluateTransmission(bsdfData, ltcValue, lightData.color, lightData.diffuseScale, 1);
ltcMatrix = preLightData.ltcXformGGX;
// Polygon irradiance in the transformed configuration.
ltcValue = PolygonIrradiance(mul(lightVerts, ltcMatrix));
ltcValue *= lightData.specularScale;
ltcValue = LTCEvaluate(matL, V, bsdfData.normalWS, preLightData.NdotV, preLightData.ltcXformGGX);
ltcValue *= lightData.specularScale;
[branch] if (bsdfData.enableTransmission)
#if 1 // Reference transmission implementation
float3 backN = -bsdfData.normalWS;
float3 backV = -V;
// Use the Lambertian model for performance and simplicity.
ltcValue = LTCEvaluate(matL, backV, backN, preLightData.NdotV, k_identity3x3);
#endif // Reference transmission implementation
// We use diffuse lighting for accumulation since it is going to be blurred during the SSS pass.
diffuseLighting += EvaluateTransmission(bsdfData, ltcValue, lightData.color, lightData.diffuseScale, 1);


// For polygonal lights.
float LTCEvaluate(float4x3 L, float3 V, float3 N, float NdotV, float3x3 invM)
// Construct a view-dependent orthonormal basis around N.
// TODO: it could be stored in PreLightData, since all LTC lights compute it more than once.
float3x3 basis;
basis[0] = normalize(V - N * NdotV);
basis[1] = normalize(cross(N, basis[0]));
basis[2] = N;
// rotate area light in local basis
invM = mul(transpose(basis), invM);
L = mul(L, invM);
// Polygon irradiance in the transformed configuration
return PolygonIrradiance(L);
float LineFpo(float tLDDL, float lrcpD, float rcpD)
// Compute: ((l / d) / (d * d + l * l)) + (1.0 / (d * d)) * atan(l / d).
