您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
321 行
14 KiB
321 行
14 KiB
//--------------------------------------------------------------------------------------------------
|
|
// Definitions
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
#pragma kernel VolumetricLightingAllLights VolumetricLighting=VolumetricLightingAllLights LIGHTLOOP_SINGLE_PASS
|
|
#pragma kernel VolumetricLightingClustered VolumetricLighting=VolumetricLightingClustered LIGHTLOOP_TILE_PASS USE_CLUSTERED_LIGHTLIST
|
|
|
|
#pragma enable_d3d11_debug_symbols
|
|
|
|
#include "../../../ShaderPass/ShaderPass.cs.hlsl"
|
|
#define SHADERPASS SHADERPASS_VOLUMETRIC_LIGHTING
|
|
#define GROUP_SIZE_1D 16
|
|
#define GROUP_SIZE_2D (GROUP_SIZE_1D * GROUP_SIZE_1D)
|
|
|
|
//--------------------------------------------------------------------------------------------------
|
|
// Included headers
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
#include "../../../../Core/ShaderLibrary/Common.hlsl"
|
|
#include "../../../../Core/ShaderLibrary/SpaceFillingCurves.hlsl"
|
|
#include "../../../../Core/ShaderLibrary/VolumeRendering.hlsl"
|
|
|
|
#include "../../../ShaderVariables.hlsl"
|
|
#include "../VolumetricLighting.cs.hlsl"
|
|
#define UNITY_MATERIAL_VOLUMETRIC // Define before including Lighting.hlsl and Material.hlsl
|
|
#include "../../../Lighting/Lighting.hlsl" // Includes Material.hlsl
|
|
#include "../../../Lighting/LightEvaluation.hlsl"
|
|
|
|
//--------------------------------------------------------------------------------------------------
|
|
// Inputs & outputs
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
RW_TEXTURE3D(float4, _VBufferLighting); // RGB = radiance, A = optical depth
|
|
TEXTURE3D(_VBufferLightingPrev); // RGB = radiance, A = optical depth
|
|
|
|
CBUFFER_START(UnityVolumetricLighting)
|
|
float4x4 _VBufferCoordToViewDirWS; // Actually just 3x3, but Unity can only set 4x4
|
|
CBUFFER_END
|
|
|
|
//--------------------------------------------------------------------------------------------------
|
|
// Implementation
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
struct Ray
|
|
{
|
|
float3 originWS;
|
|
float3 directionWS; // Normalized
|
|
float ratioLenToZ; // 1 / ViewSpaceZ
|
|
};
|
|
|
|
float3 GetPointAtDistance(Ray ray, float t)
|
|
{
|
|
return ray.originWS + t * ray.directionWS;
|
|
}
|
|
|
|
// Computes the in-scattered radiance along the ray.
|
|
void FillVolumetricLightingBuffer(LightLoopContext context, uint featureFlags,
|
|
PositionInputs posInput, Ray ray)
|
|
{
|
|
BakeLightingData unused; // Unused, so define once
|
|
|
|
float z0 = 0, t0 = 0; // Start at the origin of the ray
|
|
float de = rcp(VBUFFER_SLICE_COUNT); // Log-encoded distance between slices
|
|
|
|
float3 totalRadiance = 0;
|
|
float opticalDepth = 0;
|
|
|
|
uint sliceCountHack = max(VBUFFER_SLICE_COUNT, (uint)_VBufferDepthEncodingParams.x); // Prevent unrolling...
|
|
|
|
#ifdef LIGHTLOOP_TILE_PASS
|
|
// Our voxel is not necessarily completely inside a single light cluster.
|
|
// Note that Z-binning can solve this problem, as we can iterate over all Z-bins
|
|
// to compute min/max light indices, and then use this range for the entire slice.
|
|
uint clusterIndices[2];
|
|
float clusterDepths[2];
|
|
clusterIndices[0] = GetLightClusterIndex(posInput.tileCoord, z0);
|
|
clusterDepths[0] = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[0]);
|
|
#endif // LIGHTLOOP_TILE_PASS
|
|
|
|
// TODO: replace 'sliceCountHack' with VBUFFER_SLICE_COUNT when the shader compiler bug is fixed.
|
|
for (uint slice = 0; slice < sliceCountHack; slice++)
|
|
{
|
|
float e1 = slice * de + de; // (slice + 1) / sliceCount
|
|
float z1 = DecodeLogarithmicDepth(e1, _VBufferDepthEncodingParams);
|
|
float t1 = ray.ratioLenToZ * z1;
|
|
float dt = t1 - t0;
|
|
|
|
// Compute the position of the center of the voxel.
|
|
// We will use it for participating media sampling and reprojection.
|
|
float tc = t0 + 0.5 * dt;
|
|
float3 centerWS = GetPointAtDistance(ray, tc);
|
|
|
|
// Sample the participating medium at 'tc' (or 'centerWS').
|
|
// We consider it to be constant along the interval [t0, t1] (within the voxel).
|
|
float3 scattering = _GlobalFog_Scattering;
|
|
float extinction = _GlobalFog_Extinction;
|
|
|
|
// TODO: use a low-discrepancy point set.
|
|
float rndVal = 0.5;
|
|
|
|
float3 sampleRadiance = 0;
|
|
|
|
if (featureFlags & LIGHTFEATUREFLAGS_DIRECTIONAL)
|
|
{
|
|
float tOffset, weight;
|
|
ImportanceSampleHomogeneousMedium(rndVal, extinction, dt, tOffset, weight);
|
|
|
|
float t = t0 + tOffset;
|
|
posInput.positionWS = GetPointAtDistance(ray, t);
|
|
|
|
for (uint i = 0; i < _DirectionalLightCount; ++i)
|
|
{
|
|
// Fetch the light.
|
|
DirectionalLightData lightData = _DirectionalLightDatas[i];
|
|
float3 L = -lightData.forward; // Lights point backwards in Unity
|
|
|
|
float3 color; float attenuation;
|
|
EvaluateLight_Directional(context, posInput, lightData, unused, 0, L,
|
|
color, attenuation);
|
|
|
|
float intensity = attenuation * weight;
|
|
|
|
// Compute transmittance from 't0' to 't'.
|
|
intensity *= TransmittanceHomogeneousMedium(extinction, tOffset);
|
|
|
|
// Compute the amount of in-scattered radiance.
|
|
sampleRadiance += intensity * color;
|
|
}
|
|
}
|
|
|
|
#ifdef LIGHTLOOP_TILE_PASS
|
|
clusterIndices[1] = GetLightClusterIndex(posInput.tileCoord, z1);
|
|
clusterDepths[1] = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[1]);
|
|
|
|
// Loop over 1 or 2 light clusters.
|
|
for (int cluster = 0; cluster < 2; cluster++)
|
|
{
|
|
float tMin = max(t0, ray.ratioLenToZ * clusterDepths[cluster]);
|
|
float tMax = t1;
|
|
|
|
if (cluster == 0 && (clusterIndices[0] != clusterIndices[1]))
|
|
{
|
|
tMax = min(t1, ray.ratioLenToZ * clusterDepths[1]);
|
|
}
|
|
#else
|
|
float tMin = t0;
|
|
float tMax = t1;
|
|
#endif // LIGHTLOOP_TILE_PASS
|
|
|
|
if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL)
|
|
{
|
|
uint lightCount, lightStart;
|
|
|
|
#ifdef LIGHTLOOP_TILE_PASS
|
|
GetCountAndStartCluster(posInput.tileCoord, clusterIndices[cluster], LIGHTCATEGORY_PUNCTUAL,
|
|
lightStart, lightCount);
|
|
#else
|
|
lightCount = _PunctualLightCount;
|
|
lightStart = 0;
|
|
#endif // LIGHTLOOP_TILE_PASS
|
|
|
|
if (lightCount > 0)
|
|
{
|
|
LightData lightData = FetchLight(lightStart, 0);
|
|
|
|
uint i = 0, last = lightCount - 1;
|
|
|
|
// Box lights require special handling (see the next while loop).
|
|
while (i <= last && lightData.lightType != GPULIGHTTYPE_PROJECTOR_BOX)
|
|
{
|
|
float tEntr = tMin;
|
|
float tExit = tMax;
|
|
|
|
bool sampleLight = true;
|
|
|
|
// Perform ray-cone intersection for pyramid and spot lights.
|
|
if (lightData.lightType != GPULIGHTTYPE_POINT)
|
|
{
|
|
// 'lightData.right' and 'lightData.up' vectors are pre-scaled on the CPU
|
|
// s.t. if you were to place them at the distance of 1 directly in front
|
|
// of the light, they would give you the "footprint" of the light.
|
|
float3 coneAxisX = lightData.right;
|
|
float3 coneAxisY = lightData.up;
|
|
|
|
if (lightData.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID)
|
|
{
|
|
// For spot lights, the cone fit is exact.
|
|
// For pyramid lights, however, this is the "inscribed" cone
|
|
// (contained within the pyramid), and we want to intersect
|
|
// the "escribed" cone (which contains the pyramid).
|
|
// Therefore, we have to scale the radii by the sqrt(2).
|
|
// TODO: pre-scale this on the CPU.
|
|
coneAxisX *= rsqrt(2);
|
|
coneAxisY *= rsqrt(2);
|
|
}
|
|
|
|
sampleLight = ConeRayIntersect(ray.originWS, ray.directionWS,
|
|
lightData.positionWS, lightData.forward,
|
|
coneAxisX, coneAxisY,
|
|
tMin, tMax, tEntr, tExit);
|
|
}
|
|
|
|
if (sampleLight)
|
|
{
|
|
float t, distSq, rcpPdf;
|
|
ImportanceSamplePunctualLight(rndVal, lightData.positionWS,
|
|
ray.originWS, ray.directionWS,
|
|
tEntr, tExit, t, distSq, rcpPdf);
|
|
|
|
posInput.positionWS = GetPointAtDistance(ray, t);
|
|
|
|
float3 lightToSample = posInput.positionWS - lightData.positionWS;
|
|
float dist = sqrt(distSq);
|
|
float3 L = -lightToSample * rsqrt(distSq);
|
|
|
|
float3 color; float attenuation;
|
|
EvaluateLight_Punctual(context, posInput, lightData, unused, 0, L, dist, distSq,
|
|
color, attenuation);
|
|
|
|
float intensity = attenuation * rcpPdf;
|
|
|
|
// Compute transmittance from 't0' to 't'.
|
|
intensity *= TransmittanceHomogeneousMedium(extinction, t - t0);
|
|
|
|
// Compute the amount of in-scattered radiance.
|
|
sampleRadiance += color * intensity;
|
|
}
|
|
|
|
lightData = FetchLight(lightStart, min(++i, last));
|
|
}
|
|
|
|
while (i <= last)
|
|
{
|
|
lightData = FetchLight(lightStart, min(++i, last));
|
|
lightData.lightType = GPULIGHTTYPE_PROJECTOR_BOX;
|
|
}
|
|
}
|
|
}
|
|
#ifdef LIGHTLOOP_TILE_PASS
|
|
// Check whether the voxel is completely inside the light cluster.
|
|
if (clusterIndices[0] == clusterIndices[1]) break;
|
|
}
|
|
|
|
clusterIndices[0] = clusterIndices[1];
|
|
clusterDepths[0] = clusterDepths[1];
|
|
#endif // LIGHTLOOP_TILE_PASS
|
|
|
|
// Compute the transmittance from the camera to 't0'.
|
|
float transmittance = Transmittance(opticalDepth);
|
|
|
|
// Integral{a, b}{Transmittance(0, t) * Li(t) dt} = Transmittance(0, a) * Integral{a, b}{Transmittance(0, t - a) * Li(t) dt}.
|
|
totalRadiance += (transmittance * IsotropicPhaseFunction()) * scattering * sampleRadiance;
|
|
|
|
// Compute the optical depth up to the center of the interval.
|
|
opticalDepth += 0.5 * extinction * dt;
|
|
|
|
// Store the voxel data. TODO: reprojection of 'tc' (or 'centerWS').
|
|
_VBufferLighting[uint3(posInput.positionSS, slice)] = float4(totalRadiance, opticalDepth);
|
|
|
|
// Compute the optical depth up to the end of the interval.
|
|
opticalDepth += 0.5 * extinction * dt;
|
|
|
|
t0 = t1;
|
|
}
|
|
}
|
|
|
|
[numthreads(GROUP_SIZE_2D, 1, 1)]
|
|
void VolumetricLighting(uint2 groupId : SV_GroupID,
|
|
uint groupThreadId : SV_GroupThreadID)
|
|
{
|
|
// Perform compile-time checks.
|
|
if (!IsPower2(VBUFFER_TILE_SIZE) || !IsPower2(TILE_SIZE_CLUSTERED)) return;
|
|
|
|
// Note: any factor of 64 is a suitable wave size for our algorithm.
|
|
uint waveIndex = WaveReadFirstLane(groupThreadId / 64);
|
|
uint laneIndex = groupThreadId % 64;
|
|
uint quadIndex = laneIndex / 4;
|
|
|
|
// Arrange threads in the Morton order to optimally match the memory layout of GCN tiles.
|
|
uint2 groupCoord = DecodeMorton2D(groupThreadId);
|
|
uint2 groupOffset = groupId * GROUP_SIZE_1D;
|
|
uint2 voxelCoord = groupOffset + groupCoord;
|
|
uint2 tileCoord = voxelCoord * VBUFFER_TILE_SIZE / TILE_SIZE_CLUSTERED;
|
|
|
|
uint voxelsPerClusterTile = Sq((uint)(TILE_SIZE_CLUSTERED / VBUFFER_TILE_SIZE));
|
|
|
|
if (voxelsPerClusterTile >= 64)
|
|
{
|
|
// TODO: this is a compile-time test, make sure the compiler actually scalarizes.
|
|
tileCoord = WaveReadFirstLane(tileCoord);
|
|
}
|
|
|
|
[branch] if (voxelCoord.x >= (uint)_VBufferResolutionAndScale.x ||
|
|
voxelCoord.y >= (uint)_VBufferResolutionAndScale.y)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// TODO: use a low-discrepancy point set.
|
|
float2 sampleCoord = voxelCoord + 0.5;
|
|
|
|
// Compute the ray direction s.t. its ViewSpaceZ = 1.
|
|
float3 dir = -mul(float3(sampleCoord, 1), (float3x3)_VBufferCoordToViewDirWS);
|
|
float lenSq = dot(dir, dir);
|
|
float lenRcp = rsqrt(lenSq);
|
|
float len = lenSq * lenRcp;
|
|
|
|
Ray ray;
|
|
ray.originWS = GetCurrentViewPosition();
|
|
ray.ratioLenToZ = len;
|
|
ray.directionWS = dir * lenRcp;
|
|
|
|
// TODO
|
|
LightLoopContext context;
|
|
context.shadowContext = InitShadowContext();
|
|
uint featureFlags = 0xFFFFFFFF;
|
|
|
|
PositionInputs posInput = GetPositionInput(voxelCoord, rcp(_VBufferResolutionAndScale.xy), tileCoord);
|
|
|
|
FillVolumetricLightingBuffer(context, featureFlags, posInput, ray);
|
|
}
|