您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 

239 行
9.4 KiB

#define VSM 0
#define EVSM_2 1
#define EVSM_4 2
#define MSM 3
#define THREADS 16
#define MAX_BLUR_SIZE 17
#pragma kernel main_VSM_3 KERNEL_MAIN=main_VSM_3 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=3
#pragma kernel main_VSM_5 KERNEL_MAIN=main_VSM_5 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=5
#pragma kernel main_VSM_7 KERNEL_MAIN=main_VSM_7 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=7
#pragma kernel main_VSM_9 KERNEL_MAIN=main_VSM_9 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=9
#pragma kernel main_VSM_11 KERNEL_MAIN=main_VSM_11 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=11
#pragma kernel main_VSM_13 KERNEL_MAIN=main_VSM_13 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=13
#pragma kernel main_VSM_15 KERNEL_MAIN=main_VSM_15 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=15
#pragma kernel main_VSM_17 KERNEL_MAIN=main_VSM_17 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=17
#pragma kernel main_EVSM_2_3 KERNEL_MAIN=main_EVSM_2_3 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=3
#pragma kernel main_EVSM_2_5 KERNEL_MAIN=main_EVSM_2_5 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=5
#pragma kernel main_EVSM_2_7 KERNEL_MAIN=main_EVSM_2_7 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=7
#pragma kernel main_EVSM_2_9 KERNEL_MAIN=main_EVSM_2_9 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=9
#pragma kernel main_EVSM_2_11 KERNEL_MAIN=main_EVSM_2_11 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=11
#pragma kernel main_EVSM_2_13 KERNEL_MAIN=main_EVSM_2_13 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=13
#pragma kernel main_EVSM_2_15 KERNEL_MAIN=main_EVSM_2_15 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=15
#pragma kernel main_EVSM_2_17 KERNEL_MAIN=main_EVSM_2_17 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=17
#pragma kernel main_EVSM_4_3 KERNEL_MAIN=main_EVSM_4_3 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=3
#pragma kernel main_EVSM_4_5 KERNEL_MAIN=main_EVSM_4_5 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=5
#pragma kernel main_EVSM_4_7 KERNEL_MAIN=main_EVSM_4_7 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=7
#pragma kernel main_EVSM_4_9 KERNEL_MAIN=main_EVSM_4_9 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=9
#pragma kernel main_EVSM_4_11 KERNEL_MAIN=main_EVSM_4_11 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=11
#pragma kernel main_EVSM_4_13 KERNEL_MAIN=main_EVSM_4_13 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=13
#pragma kernel main_EVSM_4_15 KERNEL_MAIN=main_EVSM_4_15 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=15
#pragma kernel main_EVSM_4_17 KERNEL_MAIN=main_EVSM_4_17 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=17
#pragma kernel main_MSM_3 KERNEL_MAIN=main_MSM_3 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=3
#pragma kernel main_MSM_5 KERNEL_MAIN=main_MSM_5 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=5
#pragma kernel main_MSM_7 KERNEL_MAIN=main_MSM_7 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=7
#pragma kernel main_MSM_9 KERNEL_MAIN=main_MSM_9 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=9
#pragma kernel main_MSM_11 KERNEL_MAIN=main_MSM_11 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=11
#pragma kernel main_MSM_13 KERNEL_MAIN=main_MSM_13 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=13
#pragma kernel main_MSM_15 KERNEL_MAIN=main_MSM_15 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=15
#pragma kernel main_MSM_17 KERNEL_MAIN=main_MSM_17 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=17
#include "../../common.hlsl"
#include "../ShadowMoments.hlsl"
#define BLUR_BORDER (BLUR_SIZE / 2)
#define LDS_SIZE (THREADS + BLUR_BORDER + BLUR_BORDER)
#if MAX_MSAA == 1
Texture2D<float> depthTex;
#else
Texture2DMS<float> depthTex;
#endif
uniform uint4 srcRect; // .xy = offset, .zw = width/height
uniform uint4 dstRect; // .xy = offset, .z = array slice , .w = Flags: 1 := 16bpp, 2 := 2 channels pp, 4:= reversed z
uniform float4 blurWeightsStorage[3]; // Unity expects float arrays to be tightly packed
static float blurWeights[12] = (float[12])blurWeightsStorage;
static const int kBits_16 = 1; // 16 bits per channel
static const int kChannels_2 = 2; // 2 channels per pixel
static const int kReversed_z = 4; // depth buffer contains reversed z
#if (SHADOW_MOMENT_ALGORITHM == VSM)
# define SHADOW_MOMENTS 2
float2 DepthToMoments( float depth )
{
return float2( depth, depth * depth );
}
#elif SHADOW_MOMENT_ALGORITHM == EVSM_2
# define SHADOW_MOMENTS 2
uniform float evsmExponent;
float2 DepthToMoments( float depth )
{
float2 moments = ShadowMoments_WarpDepth( depth, evsmExponent.xx );
return float2( moments.x, moments.x * moments.x );
}
#elif SHADOW_MOMENT_ALGORITHM == EVSM_4
# define SHADOW_MOMENTS 4
uniform float2 evsmExponents;
float4 DepthToMoments( float depth )
{
float2 moments = ShadowMoments_WarpDepth( depth, evsmExponents );
return float4( moments.xy, moments.xy * moments.xy );
}
#elif SHADOW_MOMENT_ALGORITHM == MSM
# define SHADOW_MOMENTS 4
float4 DepthToMoments( float depth )
{
[branch]
if( (dstRect.w & kBits_16) != 0 )
return ShadowMoments_Encode16MSM( depth );
else
{
float dsq = depth * depth;
return float4( depth, dsq, depth * dsq, dsq * dsq );
}
}
#else
# error "No valid shadow moment algorithm has been set to the define SHADOW_MOMENT_ALGORITHM."
#endif
#define moment_t MERGE_NAME( float, SHADOW_MOMENTS )
RWTexture2DArray<moment_t> outputTex;
groupshared moment_t moments[LDS_SIZE][LDS_SIZE];
[numthreads( THREADS, THREADS, 1 )]
void KERNEL_MAIN(uint3 dispatchId : SV_DispatchThreadID, uint3 groupThreadId : SV_GroupThreadID)
{
uint i, j; // because the compiler scopes like its 1999.
#if MAX_MSAA > 1
uint width, height, sampleCnt;
depthTex.GetDimensions(width, height, sampleCnt);
sampleCnt = Clamp(0, MAX_MSAA, sampleCnt);
float sampleCntRcp = 1.0 / sampleCnt;
// calculate weights based on sample positions
float sumWeights = 0;
float sampleWeights[MAX_MSAA];
for (i = 0; i < sampleCnt; i++)
{
float2 spos = depthTex.GetSamplePosition( i );
sampleWeights[i] = sampleCntRcp; // TODO: find a better weight filter
sumWeights += sampleWeights[i];
}
sumWeights = 1.0 / sumWeights;
#endif
// load moments into LDS
// each workgroup works on THREADS * THREADS tiles, but the blur filter requires
// us to fetch enough data around the border of the current tile.
// We assume that the blur filter's support does not exceed THREADS, so we fetch
// the data in 4 blocks.
const int blurBorder = BLUR_BORDER;
const int2 ldsSize = int2( LDS_SIZE, LDS_SIZE );
const int2 threadsCnt = int2( THREADS, THREADS );
const int4 validSrc = int4( srcRect.xy, srcRect.xy + srcRect.zw - 1 );
int2 srcIdx = ((int2) dispatchId.xy) - blurBorder.xx + srcRect.xy;
int2 ldsIdx = groupThreadId.xy;
const bool reverse_z = (dstRect.w & kReversed_z) != 0;
// calculate an average moment over all samples for a given pixel and load the result into LDS
uint iw, ih, is;
[unroll]
for( ih = 0; ih < 2; ih++ )
{
[branch]
if (ldsIdx.y >= ldsSize.y)
continue;
[unroll]
for( iw = 0; iw < 2; iw++ )
{
[branch]
if (ldsIdx.x >= ldsSize.x)
continue;
moment_t avgMoments = 0.0;
#if MAX_MSAA > 1
for( is = 0; is < sampleCnt; is++ )
{
float depth = depthTex.Load( int3( Clamp( srcIdx, validSrc.xy, validSrc.zw ), is ) ).x;
depth = reverse_z ? (1.0 - depth) : depth;
moment_t moments = DepthToMoments( depth );
avgMoments += sampleWeights[is] * moments;
}
avgMoments *= sumWeights;
#else
float depth = depthTex.Load( int3( Clamp( srcIdx, validSrc.xy, validSrc.zw ), 0 ) ).x;
avgMoments = DepthToMoments( reverse_z ? (1.0-depth) : depth );
#endif
moments[ldsIdx.y][ldsIdx.x] = avgMoments;
ldsIdx.x += threadsCnt.x;
srcIdx.x += threadsCnt.x;
}
ldsIdx.x = groupThreadId.x;
srcIdx.x = (int) dispatchId.x - blurBorder + srcRect.x;
ldsIdx.y += threadsCnt.y;
srcIdx.y += threadsCnt.y;
}
// sync across all threads so LDS contains the moments for each pixel that we need for the blur
GroupMemoryBarrierWithGroupSync();
// first pass blurs horizontally
ldsIdx = groupThreadId.xy + int2( blurBorder, 0 );
moment_t hblurredMoment = 0.0, hblurredMoment2 = 0.0;
int blurOffset;
for( blurOffset = -blurBorder; blurOffset <= blurBorder; blurOffset++ )
{
hblurredMoment += moments[ldsIdx.y][ldsIdx.x + blurOffset] * blurWeights[abs( blurOffset )];
}
ldsIdx.y += threadsCnt.y;
[branch]
if( ldsIdx.y < ldsSize.y )
{
for( blurOffset = -blurBorder; blurOffset <= blurBorder; blurOffset++ )
{
hblurredMoment2 += moments[ldsIdx.y][ldsIdx.x + blurOffset] * blurWeights[abs(blurOffset)];
}
}
// make sure all reads/writes are done
GroupMemoryBarrierWithGroupSync();
// replace LDS values with the horizontally blurred values
moments[groupThreadId.y][ldsIdx.x] = hblurredMoment;
[branch]
if( ldsIdx.y < ldsSize.y )
moments[ldsIdx.y][ldsIdx.x] = hblurredMoment2;
GroupMemoryBarrierWithGroupSync();
// second pass blurs vertically
ldsIdx = groupThreadId.xy + blurBorder.xx;
moment_t vblurredMoment = 0.0;
for( blurOffset = -blurBorder; blurOffset <= blurBorder; blurOffset++ )
{
vblurredMoment += moments[ldsIdx.y + blurOffset][ldsIdx.x] * blurWeights[abs( blurOffset )];
}
// and write out the result
if( all( dispatchId.xy < srcRect.zw ) )
{
dispatchId.z = dstRect.z;
dispatchId.xy += dstRect.xy;
outputTex[dispatchId] = vblurredMoment;
}
}