您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 

496 行
22 KiB

//--------------------------------------------------------------------------------------------------
// Definitions
//--------------------------------------------------------------------------------------------------
#pragma kernel VolumetricLightingAllLights VolumetricLighting=VolumetricLightingAllLights ENABLE_REPROJECTION=0 LIGHTLOOP_SINGLE_PASS
#pragma kernel VolumetricLightingAllLightsReproj VolumetricLighting=VolumetricLightingAllLightsReproj ENABLE_REPROJECTION=1 LIGHTLOOP_SINGLE_PASS
#pragma kernel VolumetricLightingClustered VolumetricLighting=VolumetricLightingClustered ENABLE_REPROJECTION=0 LIGHTLOOP_TILE_PASS USE_CLUSTERED_LIGHTLIST
#pragma kernel VolumetricLightingClusteredReproj VolumetricLighting=VolumetricLightingClusteredReproj ENABLE_REPROJECTION=1 LIGHTLOOP_TILE_PASS USE_CLUSTERED_LIGHTLIST
#pragma enable_d3d11_debug_symbols
#define DEBUG_REPROJECTION 0
#include "../../../ShaderPass/ShaderPass.cs.hlsl"
#define SHADERPASS SHADERPASS_VOLUMETRIC_LIGHTING
#define GROUP_SIZE_1D 16
#define GROUP_SIZE_2D (GROUP_SIZE_1D * GROUP_SIZE_1D)
//--------------------------------------------------------------------------------------------------
// Included headers
//--------------------------------------------------------------------------------------------------
#include "../../../../Core/ShaderLibrary/Common.hlsl"
#include "../../../../Core/ShaderLibrary/SpaceFillingCurves.hlsl"
#include "../../../../Core/ShaderLibrary/VolumeRendering.hlsl"
#include "../../../ShaderVariables.hlsl"
#include "../VolumetricLighting.cs.hlsl"
#define UNITY_MATERIAL_VOLUMETRIC // Define before including Lighting.hlsl and Material.hlsl
#include "../../../Lighting/Lighting.hlsl" // Includes Material.hlsl
#include "../../../Lighting/LightEvaluation.hlsl"
//--------------------------------------------------------------------------------------------------
// Inputs & outputs
//--------------------------------------------------------------------------------------------------
RW_TEXTURE3D(float4, _VBufferLightingIntegral); // RGB = radiance, A = optical depth
RW_TEXTURE3D(float4, _VBufferLightingFeedback); // RGB = radiance, A = interval length
TEXTURE3D(_VBufferLightingHistory); // RGB = radiance, A = interval length
// TODO: avoid creating another Constant Buffer...
CBUFFER_START(UnityVolumetricLighting)
float4 _VBufferSampleOffset; // {x, y, z}, w = unused
float4x4 _VBufferCoordToViewDirWS; // Actually just 3x3, but Unity can only set 4x4
CBUFFER_END
//--------------------------------------------------------------------------------------------------
// Implementation
//--------------------------------------------------------------------------------------------------
struct Ray
{
float3 originWS;
float3 directionWS; // Normalized, stratified
float ratioLenToZ; // 1 / ViewSpaceZ
float3 centerDirWS; // Not normalized, centered
};
float3 GetPointAtDistance(Ray ray, float t)
{
return ray.originWS + t * ray.directionWS;
}
float3 GetCenterAtDistance(Ray ray, float t)
{
return ray.originWS + t * ray.centerDirWS;
}
// Computes the light integral (in-scattered radiance) within the voxel.
// Multiplication by the scattering coefficient and the phase function is performed outside.
float3 EvaluateVoxelLighting(LightLoopContext context, uint featureFlags, PositionInputs posInput,
Ray ray, float t0, float t1, float dt, float rndVal, float extinction
#ifdef LIGHTLOOP_TILE_PASS
, uint clusterIndices[2], float clusterDepths[2])
#else
)
#endif
{
float3 voxelRadiance = 0;
BakeLightingData unused; // Unused for now, so define once
if (featureFlags & LIGHTFEATUREFLAGS_DIRECTIONAL)
{
float tOffset, weight;
ImportanceSampleHomogeneousMedium(rndVal, extinction, dt, tOffset, weight);
float t = t0 + tOffset;
posInput.positionWS = GetPointAtDistance(ray, t);
for (uint i = 0; i < _DirectionalLightCount; ++i)
{
// Fetch the light.
DirectionalLightData light = _DirectionalLightDatas[i];
float3 L = -light.forward; // Lights point backwards in Unity
float3 color; float attenuation;
EvaluateLight_Directional(context, posInput, light, unused, 0, L,
color, attenuation);
// Note: the 'weight' accounts for transmittance from 't0' to 't'.
float intensity = attenuation * weight;
// Compute the amount of in-scattered radiance.
voxelRadiance += intensity * color;
}
}
#ifdef LIGHTLOOP_TILE_PASS
// Loop over 1 or 2 light clusters.
int cluster = 0;
do
{
float tMin = max(t0, ray.ratioLenToZ * clusterDepths[cluster]);
float tMax = t1;
if (cluster == 0 && (clusterIndices[0] != clusterIndices[1]))
{
tMax = min(t1, ray.ratioLenToZ * clusterDepths[1]);
}
#else
float tMin = t0;
float tMax = t1;
#endif // LIGHTLOOP_TILE_PASS
if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL)
{
uint lightCount, lightStart;
#ifdef LIGHTLOOP_TILE_PASS
GetCountAndStartCluster(posInput.tileCoord, clusterIndices[cluster], LIGHTCATEGORY_PUNCTUAL,
lightStart, lightCount);
#else
lightCount = _PunctualLightCount;
lightStart = 0;
#endif // LIGHTLOOP_TILE_PASS
if (lightCount > 0)
{
LightData light = FetchLight(lightStart, 0);
uint i = 0, last = lightCount - 1;
// Box lights require special handling (see the next while loop).
while (i <= last && light.lightType != GPULIGHTTYPE_PROJECTOR_BOX)
{
float tEntr = tMin;
float tExit = tMax;
bool sampleLight = true;
// Perform ray-cone intersection for pyramid and spot lights.
if (light.lightType != GPULIGHTTYPE_POINT)
{
float lenMul = 1;
if (light.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID)
{
// 'light.right' and 'light.up' vectors are pre-scaled on the CPU
// s.t. if you were to place them at the distance of 1 directly in front
// of the light, they would give you the "footprint" of the light.
// For spot lights, the cone fit is exact.
// For pyramid lights, however, this is the "inscribed" cone
// (contained within the pyramid), and we want to intersect
// the "escribed" cone (which contains the pyramid).
// Therefore, we have to scale the radii by the sqrt(2).
lenMul = rsqrt(2);
}
float3 coneAxisX = lenMul * light.right;
float3 coneAxisY = lenMul * light.up;
sampleLight = IntersectRayCone(ray.originWS, ray.directionWS,
light.positionWS, light.forward,
coneAxisX, coneAxisY,
tMin, tMax, tEntr, tExit);
}
if (sampleLight)
{
float t, distSq, rcpPdf;
ImportanceSamplePunctualLight(rndVal, light.positionWS,
ray.originWS, ray.directionWS,
tEntr, tExit, t, distSq, rcpPdf);
posInput.positionWS = GetPointAtDistance(ray, t);
float3 lightToSample = posInput.positionWS - light.positionWS;
float dist = sqrt(distSq);
float3 L = -lightToSample * rsqrt(distSq);
float3 color; float attenuation;
EvaluateLight_Punctual(context, posInput, light, unused, 0, L, dist, distSq,
color, attenuation);
float intensity = attenuation * rcpPdf;
// Compute transmittance from 't0' to 't'.
intensity *= TransmittanceHomogeneousMedium(extinction, t - t0);
// Compute the amount of in-scattered radiance.
voxelRadiance += color * intensity;
}
light = FetchLight(lightStart, min(++i, last));
}
while (i <= last) // GPULIGHTTYPE_PROJECTOR_BOX
{
light = FetchLight(lightStart, min(++i, last));
light.lightType = GPULIGHTTYPE_PROJECTOR_BOX;
// Convert the box light from OBB to AABB.
// 'light.right' and 'light.up' vectors are pre-scaled on the CPU by (2/w) and (2/h).
float3x3 rotMat = float3x3(light.right, light.up, light.forward);
float3 o = mul(rotMat, ray.originWS - light.positionWS);
float3 d = mul(rotMat, ray.directionWS);
float range = light.size.x;
float3 boxPt0 = float3(-1, -1, 0);
float3 boxPt1 = float3( 1, 1, range);
float tEntr, tExit;
if (IntersectRayAABB(o, d, boxPt0, boxPt1, tMin, tMax, tEntr, tExit))
{
float tOffset, weight;
ImportanceSampleHomogeneousMedium(rndVal, extinction, tExit - tEntr, tOffset, weight);
float t = tEntr + tOffset;
posInput.positionWS = GetPointAtDistance(ray, t);
float3 L = -light.forward;
float3 color; float attenuation;
EvaluateLight_Punctual(context, posInput, light, unused, 0, L, 1, 1,
color, attenuation);
// Note: the 'weight' accounts for transmittance from 'tEntr' to 't'.
float intensity = attenuation * weight;
// Compute transmittance from 't0' to 'tEntr'.
intensity *= TransmittanceHomogeneousMedium(extinction, tEntr - t0);
// Compute the amount of in-scattered radiance.
voxelRadiance += intensity * color;
}
}
}
}
#ifdef LIGHTLOOP_TILE_PASS
cluster++;
// Check whether the voxel is completely inside the light cluster.
} while ((cluster < 2) && (clusterIndices[0] != clusterIndices[1]));
#endif // LIGHTLOOP_TILE_PASS
return voxelRadiance;
}
// Samples the linearly interpolated V-Buffer. Out-of-bounds loads return 0.
float4 SampleVBuffer(TEXTURE3D_ARGS(VBufferLighting, trilinearSampler),
float2 positionNDC, float linearDepth,
float2 VBufferScale,
float4 VBufferDepthEncodingParams)
{
int k = VBUFFER_SLICE_COUNT;
float z = linearDepth;
float d = EncodeLogarithmicDepth(z, VBufferDepthEncodingParams);
// Account for the visible area of the V-Buffer.
float2 uv = positionNDC * VBufferScale;
// TODO: Unity doesn't support samplers clamping to border, so we have to do it ourselves.
bool isInBounds = Min3(uv.x, uv.y, d) > 0 && Max3(uv.x, uv.y, d) < 1;
[branch] if (isInBounds)
{
// We use hardware trilinear filtering.
// In theory, this is wrong, since the distance between slices is log-encoded.
// In practice, doing the right thing in a loop is simply too expensive.
return SAMPLE_TEXTURE3D_LOD(VBufferLighting, trilinearSampler, float3(uv, d), 0);
}
else
{
return 0;
}
}
// Computes the in-scattered radiance along the ray.
void FillVolumetricLightingBuffer(LightLoopContext context, uint featureFlags,
PositionInputs posInput, Ray ray)
{
float z0 = _VBufferDepthEncodingParams.x; // Start integration from the near plane
float t0 = ray.ratioLenToZ * z0;
float de = rcp(VBUFFER_SLICE_COUNT); // Log-encoded distance between slices
float3 totalRadiance = 0;
float opticalDepth = 0;
uint sliceCountHack = max(VBUFFER_SLICE_COUNT, (uint)_VBufferDepthEncodingParams.x); // Prevent unrolling...
#ifdef LIGHTLOOP_TILE_PASS
// Our voxel is not necessarily completely inside a single light cluster.
// Note that Z-binning can solve this problem, as we can iterate over all Z-bins
// to compute min/max light indices, and then use this range for the entire slice.
uint clusterIndices[2];
float clusterDepths[2];
clusterIndices[0] = GetLightClusterIndex(posInput.tileCoord, z0);
clusterDepths[0] = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[0]);
#endif // LIGHTLOOP_TILE_PASS
// TODO: replace 'sliceCountHack' with VBUFFER_SLICE_COUNT when the shader compiler bug is fixed.
for (uint slice = 0; slice < sliceCountHack; slice++)
{
float e1 = slice * de + de; // (slice + 1) / sliceCount
float z1 = DecodeLogarithmicDepth(e1, _VBufferDepthEncodingParams);
float t1 = ray.ratioLenToZ * z1;
float dt = t1 - t0;
#ifdef LIGHTLOOP_TILE_PASS
clusterIndices[1] = GetLightClusterIndex(posInput.tileCoord, z1);
clusterDepths[1] = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[1]);
#endif
// Compute the -exact- position of the center of the voxel.
// It's important since the accumulated value of the integral is stored at the center.
// We will use it for participating media sampling and reprojection.
float tc = t0 + 0.5 * dt;
float3 centerWS = GetCenterAtDistance(ray, tc);
// Sample the participating medium at 'tc' (or 'centerWS').
// We consider it to be constant along the interval [t0, t1] (within the voxel).
// TODO: piecewise linear.
float3 scattering = _GlobalFog_Scattering;
float extinction = _GlobalFog_Extinction;
#if ENABLE_REPROJECTION
// This is a sequence of 7 equidistant numbers from 1/14 to 13/14.
// Each of them is the centroid of the interval of length 2/14.
float rndVal = _VBufferSampleOffset.z;
#else
float rndVal = 0.5;
#endif
float3 voxelRadiance = EvaluateVoxelLighting(context, featureFlags, posInput,
ray, t0, t1, dt, rndVal, extinction
#ifdef LIGHTLOOP_TILE_PASS
, clusterIndices, clusterDepths);
#else
);
#endif
#if ENABLE_REPROJECTION
// Reproject the history at 'centerWS'.
float2 reprojPosNDC = ComputeNormalizedDeviceCoordinates(centerWS, _PrevViewProjMatrix);
float reprojZ = mul(_PrevViewProjMatrix, float4(centerWS, 1)).w;
float4 reprojValue = SampleVBuffer(TEXTURE3D_PARAM(_VBufferLightingHistory, s_trilinear_clamp_sampler),
reprojPosNDC, reprojZ,
_VBufferResolutionAndScale.zw,
_VBufferDepthEncodingParams);
// Compute the exponential moving average over 'n' frames:
// X = (1 - a) * ValueAtFrame[n] + a * AverageOverPreviousFrames.
// We want each sample to be uniformly weighted by (1 / n):
// X = (1 / n) * Sum{i from 1 to n}{ValueAtFrame[i]}.
// Therefore, we get:
// (1 - a) = (1 / n) => a = (1 - 1 / n) = (n - 1) / n,
// X = (1 / n) * ValueAtFrame[n] + (1 - 1 / n) * AverageOverPreviousFrames.
// Why does it work? We need to make the following assumption:
// AverageOverPreviousFrames ≈ AverageOverFrames[n - 1].
// AverageOverFrames[n - 1] = (1 / (n - 1)) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}.
// This implies that the reprojected (accumulated) value has mostly converged.
// X = (1 / n) * ValueAtFrame[n] + ((n - 1) / n) * (1 / (n - 1)) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}.
// X = (1 / n) * ValueAtFrame[n] + (1 / n) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}.
// X = Sum{i from 1 to n}{ValueAtFrame[i] / n}.
float numFrames = 7;
float frameWeight = 1 / numFrames;
float historyWeight = 1 - frameWeight;
// The accuracy of the integral linearly decreases with the length of the interval.
// Therefore, reprojecting longer intervals should result in a lower confidence.
// TODO: doesn't seem to be worth it, removed for now.
// Perform temporal blending.
// Both radiance values are obtained by integrating over line segments of different length.
// Blending only makes sense if the length of both intervals is the same.
// Therefore, the reprojected radiance needs to be rescaled by (frame_dt / reproj_dt).
bool reprojSuccess = reprojValue.a != 0;
float blendFactor = reprojSuccess ? historyWeight : 0;
float reprojRcpLen = reprojSuccess ? rcp(reprojValue.a) : 0;
float lengthScale = dt * reprojRcpLen;
float3 reprojRadiance = reprojValue.rgb;
float3 blendedRadiance = (1 - blendFactor) * voxelRadiance + blendFactor * lengthScale * reprojRadiance;
// Store the feedback for the voxel.
_VBufferLightingFeedback[uint3(posInput.positionSS, slice)] = float4(blendedRadiance, dt);
#else
float3 blendedRadiance = voxelRadiance;
#endif
#if DEBUG_REPROJECTION
if (distance(voxelRadiance, reprojValue.rgb) > 0.1) blendedRadiance = float3(1000, 0, 0);
#endif
// Compute the transmittance from the camera to 't0'.
float transmittance = Transmittance(opticalDepth);
// Integral{a, b}{Transmittance(0, t) * L_s(t) dt} = Transmittance(0, a) * Integral{a, b}{Transmittance(0, t - a) * L_s(t) dt}.
totalRadiance += (transmittance * IsotropicPhaseFunction()) * scattering * blendedRadiance;
// Compute the optical depth up to the center of the interval.
opticalDepth += 0.5 * extinction * dt;
// Store the voxel data.
_VBufferLightingIntegral[uint3(posInput.positionSS, slice)] = float4(totalRadiance, opticalDepth);
// Compute the optical depth up to the end of the interval.
opticalDepth += 0.5 * extinction * dt;
t0 = t1;
#ifdef LIGHTLOOP_TILE_PASS
clusterIndices[0] = clusterIndices[1];
clusterDepths[0] = clusterDepths[1];
#endif // LIGHTLOOP_TILE_PASS
}
}
[numthreads(GROUP_SIZE_2D, 1, 1)]
void VolumetricLighting(uint2 groupId : SV_GroupID,
uint groupThreadId : SV_GroupThreadID)
{
// Perform compile-time checks.
if (!IsPower2(VBUFFER_TILE_SIZE) || !IsPower2(TILE_SIZE_CLUSTERED)) return;
// Note: any factor of 64 is a suitable wave size for our algorithm.
uint waveIndex = WaveReadFirstLane(groupThreadId / 64);
uint laneIndex = groupThreadId % 64;
uint quadIndex = laneIndex / 4;
// Arrange threads in the Morton order to optimally match the memory layout of GCN tiles.
uint2 groupCoord = DecodeMorton2D(groupThreadId);
uint2 groupOffset = groupId * GROUP_SIZE_1D;
uint2 voxelCoord = groupOffset + groupCoord;
uint2 tileCoord = voxelCoord * VBUFFER_TILE_SIZE / TILE_SIZE_CLUSTERED;
uint voxelsPerClusterTile = Sq((uint)(TILE_SIZE_CLUSTERED / VBUFFER_TILE_SIZE));
if (voxelsPerClusterTile >= 64)
{
// TODO: this is a compile-time test, make sure the compiler actually scalarizes.
tileCoord = WaveReadFirstLane(tileCoord);
}
[branch] if (voxelCoord.x >= (uint)_VBufferResolutionAndScale.x ||
voxelCoord.y >= (uint)_VBufferResolutionAndScale.y)
{
return;
}
float2 centerCoord = voxelCoord + 0.5;
#if ENABLE_REPROJECTION
float2 sampleCoord = centerCoord + _VBufferSampleOffset.xy;
#else
float2 sampleCoord = centerCoord;
#endif
// Compute the (stratified) ray direction s.t. its ViewSpaceZ = 1.
float3 rayDir = mul(-float3(sampleCoord, 1), (float3x3)_VBufferCoordToViewDirWS);
float lenSq = dot(rayDir, rayDir);
float lenRcp = rsqrt(lenSq);
float len = lenSq * lenRcp;
#if ENABLE_REPROJECTION
// Compute the ray direction which passes through the center of the voxel s.t. its ViewSpaceZ = 1.
float3 rayCenterDir = mul(-float3(centerCoord, 1), (float3x3)_VBufferCoordToViewDirWS);
#else
float3 rayCenterDir = rayDir;
#endif
Ray ray;
ray.originWS = GetCurrentViewPosition();
ray.ratioLenToZ = len;
ray.directionWS = rayDir * lenRcp;
ray.centerDirWS = rayCenterDir * lenRcp;
// TODO
LightLoopContext context;
context.shadowContext = InitShadowContext();
uint featureFlags = 0xFFFFFFFF;
PositionInputs posInput = GetPositionInput(voxelCoord, rcp(_VBufferResolutionAndScale.xy), tileCoord);
FillVolumetricLightingBuffer(context, featureFlags, posInput, ray);
}