您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
309 行
14 KiB
309 行
14 KiB
#define VSM 0
|
|
#define EVSM_2 1
|
|
#define EVSM_4 2
|
|
#define MSM 3
|
|
#define THREADS 16
|
|
#define MAX_BLUR_SIZE (THREADS+1)
|
|
|
|
#pragma kernel main_VSM_3 KERNEL_MAIN=main_VSM_3 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=3
|
|
#pragma kernel main_VSM_5 KERNEL_MAIN=main_VSM_5 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=5
|
|
#pragma kernel main_VSM_7 KERNEL_MAIN=main_VSM_7 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=7
|
|
#pragma kernel main_VSM_9 KERNEL_MAIN=main_VSM_9 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=9
|
|
#pragma kernel main_VSM_11 KERNEL_MAIN=main_VSM_11 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=11
|
|
#pragma kernel main_VSM_13 KERNEL_MAIN=main_VSM_13 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=13
|
|
#pragma kernel main_VSM_15 KERNEL_MAIN=main_VSM_15 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=15
|
|
#pragma kernel main_VSM_17 KERNEL_MAIN=main_VSM_17 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=1 BLUR_SIZE=17
|
|
|
|
#pragma kernel main_EVSM_2_3 KERNEL_MAIN=main_EVSM_2_3 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=3
|
|
#pragma kernel main_EVSM_2_5 KERNEL_MAIN=main_EVSM_2_5 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=5
|
|
#pragma kernel main_EVSM_2_7 KERNEL_MAIN=main_EVSM_2_7 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=7
|
|
#pragma kernel main_EVSM_2_9 KERNEL_MAIN=main_EVSM_2_9 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=9
|
|
#pragma kernel main_EVSM_2_11 KERNEL_MAIN=main_EVSM_2_11 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=11
|
|
#pragma kernel main_EVSM_2_13 KERNEL_MAIN=main_EVSM_2_13 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=13
|
|
#pragma kernel main_EVSM_2_15 KERNEL_MAIN=main_EVSM_2_15 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=15
|
|
#pragma kernel main_EVSM_2_17 KERNEL_MAIN=main_EVSM_2_17 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=1 BLUR_SIZE=17
|
|
|
|
#pragma kernel main_EVSM_4_3 KERNEL_MAIN=main_EVSM_4_3 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=3
|
|
#pragma kernel main_EVSM_4_5 KERNEL_MAIN=main_EVSM_4_5 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=5
|
|
#pragma kernel main_EVSM_4_7 KERNEL_MAIN=main_EVSM_4_7 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=7
|
|
#pragma kernel main_EVSM_4_9 KERNEL_MAIN=main_EVSM_4_9 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=9
|
|
#pragma kernel main_EVSM_4_11 KERNEL_MAIN=main_EVSM_4_11 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=11
|
|
#pragma kernel main_EVSM_4_13 KERNEL_MAIN=main_EVSM_4_13 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=13
|
|
#pragma kernel main_EVSM_4_15 KERNEL_MAIN=main_EVSM_4_15 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=15
|
|
#pragma kernel main_EVSM_4_17 KERNEL_MAIN=main_EVSM_4_17 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=1 BLUR_SIZE=17
|
|
|
|
#pragma kernel main_MSM_3 KERNEL_MAIN=main_MSM_3 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=3
|
|
#pragma kernel main_MSM_5 KERNEL_MAIN=main_MSM_5 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=5
|
|
#pragma kernel main_MSM_7 KERNEL_MAIN=main_MSM_7 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=7
|
|
#pragma kernel main_MSM_9 KERNEL_MAIN=main_MSM_9 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=9
|
|
#pragma kernel main_MSM_11 KERNEL_MAIN=main_MSM_11 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=11
|
|
#pragma kernel main_MSM_13 KERNEL_MAIN=main_MSM_13 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=13
|
|
#pragma kernel main_MSM_15 KERNEL_MAIN=main_MSM_15 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=15
|
|
#pragma kernel main_MSM_17 KERNEL_MAIN=main_MSM_17 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=1 BLUR_SIZE=17
|
|
|
|
#pragma kernel main_MSAA_VSM_3 KERNEL_MAIN=main_MSAA_VSM_3 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=8 BLUR_SIZE=3
|
|
#pragma kernel main_MSAA_VSM_5 KERNEL_MAIN=main_MSAA_VSM_5 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=8 BLUR_SIZE=5
|
|
#pragma kernel main_MSAA_VSM_7 KERNEL_MAIN=main_MSAA_VSM_7 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=8 BLUR_SIZE=7
|
|
#pragma kernel main_MSAA_VSM_9 KERNEL_MAIN=main_MSAA_VSM_9 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=8 BLUR_SIZE=9
|
|
#pragma kernel main_MSAA_VSM_11 KERNEL_MAIN=main_MSAA_VSM_11 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=8 BLUR_SIZE=11
|
|
#pragma kernel main_MSAA_VSM_13 KERNEL_MAIN=main_MSAA_VSM_13 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=8 BLUR_SIZE=13
|
|
#pragma kernel main_MSAA_VSM_15 KERNEL_MAIN=main_MSAA_VSM_15 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=8 BLUR_SIZE=15
|
|
#pragma kernel main_MSAA_VSM_17 KERNEL_MAIN=main_MSAA_VSM_17 SHADOW_MOMENT_ALGORITHM=VSM MAX_MSAA=8 BLUR_SIZE=17
|
|
|
|
#pragma kernel main_MSAA_EVSM_2_3 KERNEL_MAIN=main_MSAA_EVSM_2_3 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=8 BLUR_SIZE=3
|
|
#pragma kernel main_MSAA_EVSM_2_5 KERNEL_MAIN=main_MSAA_EVSM_2_5 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=8 BLUR_SIZE=5
|
|
#pragma kernel main_MSAA_EVSM_2_7 KERNEL_MAIN=main_MSAA_EVSM_2_7 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=8 BLUR_SIZE=7
|
|
#pragma kernel main_MSAA_EVSM_2_9 KERNEL_MAIN=main_MSAA_EVSM_2_9 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=8 BLUR_SIZE=9
|
|
#pragma kernel main_MSAA_EVSM_2_11 KERNEL_MAIN=main_MSAA_EVSM_2_11 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=8 BLUR_SIZE=11
|
|
#pragma kernel main_MSAA_EVSM_2_13 KERNEL_MAIN=main_MSAA_EVSM_2_13 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=8 BLUR_SIZE=13
|
|
#pragma kernel main_MSAA_EVSM_2_15 KERNEL_MAIN=main_MSAA_EVSM_2_15 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=8 BLUR_SIZE=15
|
|
#pragma kernel main_MSAA_EVSM_2_17 KERNEL_MAIN=main_MSAA_EVSM_2_17 SHADOW_MOMENT_ALGORITHM=EVSM_2 MAX_MSAA=8 BLUR_SIZE=17
|
|
|
|
#pragma kernel main_MSAA_EVSM_4_3 KERNEL_MAIN=main_MSAA_EVSM_4_3 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=8 BLUR_SIZE=3
|
|
#pragma kernel main_MSAA_EVSM_4_5 KERNEL_MAIN=main_MSAA_EVSM_4_5 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=8 BLUR_SIZE=5
|
|
#pragma kernel main_MSAA_EVSM_4_7 KERNEL_MAIN=main_MSAA_EVSM_4_7 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=8 BLUR_SIZE=7
|
|
#pragma kernel main_MSAA_EVSM_4_9 KERNEL_MAIN=main_MSAA_EVSM_4_9 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=8 BLUR_SIZE=9
|
|
#pragma kernel main_MSAA_EVSM_4_11 KERNEL_MAIN=main_MSAA_EVSM_4_11 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=8 BLUR_SIZE=11
|
|
#pragma kernel main_MSAA_EVSM_4_13 KERNEL_MAIN=main_MSAA_EVSM_4_13 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=8 BLUR_SIZE=13
|
|
#pragma kernel main_MSAA_EVSM_4_15 KERNEL_MAIN=main_MSAA_EVSM_4_15 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=8 BLUR_SIZE=15
|
|
#pragma kernel main_MSAA_EVSM_4_17 KERNEL_MAIN=main_MSAA_EVSM_4_17 SHADOW_MOMENT_ALGORITHM=EVSM_4 MAX_MSAA=8 BLUR_SIZE=17
|
|
|
|
#pragma kernel main_MSAA_MSM_3 KERNEL_MAIN=main_MSAA_MSM_3 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=8 BLUR_SIZE=3
|
|
#pragma kernel main_MSAA_MSM_5 KERNEL_MAIN=main_MSAA_MSM_5 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=8 BLUR_SIZE=5
|
|
#pragma kernel main_MSAA_MSM_7 KERNEL_MAIN=main_MSAA_MSM_7 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=8 BLUR_SIZE=7
|
|
#pragma kernel main_MSAA_MSM_9 KERNEL_MAIN=main_MSAA_MSM_9 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=8 BLUR_SIZE=9
|
|
#pragma kernel main_MSAA_MSM_11 KERNEL_MAIN=main_MSAA_MSM_11 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=8 BLUR_SIZE=11
|
|
#pragma kernel main_MSAA_MSM_13 KERNEL_MAIN=main_MSAA_MSM_13 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=8 BLUR_SIZE=13
|
|
#pragma kernel main_MSAA_MSM_15 KERNEL_MAIN=main_MSAA_MSM_15 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=8 BLUR_SIZE=15
|
|
#pragma kernel main_MSAA_MSM_17 KERNEL_MAIN=main_MSAA_MSM_17 SHADOW_MOMENT_ALGORITHM=MSM MAX_MSAA=8 BLUR_SIZE=17
|
|
|
|
#include "../../common.hlsl"
|
|
#include "../ShadowMoments.hlsl"
|
|
|
|
#if MAX_MSAA > 1
|
|
Texture2DMS<float> depthTex;
|
|
#else
|
|
Texture2D<float> depthTex;
|
|
#endif
|
|
|
|
uniform uint4 srcRect; // .xy = offset, .zw = width/height
|
|
uniform uint4 dstRect; // .xy = offset, .z = array slice , .w = Flags: 1 := 16bpp, 2 := 2 channels pp, 4:= reversed z
|
|
|
|
uniform float4 blurWeightsStorage[3]; // Unity expects float arrays to be tightly packed
|
|
static float blurWeights[12] = (float[12])blurWeightsStorage;
|
|
|
|
static const int kBits_16 = 1; // 16 bits per channel
|
|
static const int kChannels_2 = 2; // 2 channels per pixel
|
|
static const int kReversed_z = 4; // depth buffer contains reversed z
|
|
|
|
#if (SHADOW_MOMENT_ALGORITHM == VSM)
|
|
# define SHADOW_MOMENTS 2
|
|
|
|
float2 DepthToMoments( float depth )
|
|
{
|
|
return float2( depth, depth * depth );
|
|
}
|
|
#elif SHADOW_MOMENT_ALGORITHM == EVSM_2
|
|
# define SHADOW_MOMENTS 2
|
|
|
|
uniform float evsmExponent;
|
|
|
|
float2 DepthToMoments( float depth )
|
|
{
|
|
float2 moments = ShadowMoments_WarpDepth( depth, evsmExponent.xx );
|
|
return float2( moments.x, moments.x * moments.x );
|
|
}
|
|
#elif SHADOW_MOMENT_ALGORITHM == EVSM_4
|
|
# define SHADOW_MOMENTS 4
|
|
|
|
uniform float2 evsmExponents;
|
|
|
|
float4 DepthToMoments( float depth )
|
|
{
|
|
float2 moments = ShadowMoments_WarpDepth( depth, evsmExponents );
|
|
return float4( moments.xy, moments.xy * moments.xy );
|
|
}
|
|
#elif SHADOW_MOMENT_ALGORITHM == MSM
|
|
# define SHADOW_MOMENTS 4
|
|
|
|
float4 DepthToMoments( float depth )
|
|
{
|
|
if( (dstRect.w & kBits_16) != 0 )
|
|
return ShadowMoments_Encode16MSM( depth );
|
|
else
|
|
{
|
|
float dsq = depth * depth;
|
|
return float4( depth, dsq, depth * dsq, dsq * dsq );
|
|
}
|
|
}
|
|
#else
|
|
# error "No valid shadow moment algorithm has been set to the define SHADOW_MOMENT_ALGORITHM."
|
|
#endif
|
|
|
|
|
|
#define BLUR_BORDER (BLUR_SIZE / 2)
|
|
#define LDS_STRIDE (THREADS + BLUR_BORDER + BLUR_BORDER)
|
|
|
|
#define moment_t MERGE_NAME( float, SHADOW_MOMENTS )
|
|
RWTexture2DArray<moment_t> outputTex;
|
|
|
|
groupshared float moments1[THREADS * LDS_STRIDE]; // contains the blurred first moment
|
|
groupshared float moments2[THREADS * LDS_STRIDE]; // contains the blurred second moment
|
|
groupshared float moments3[THREADS * LDS_STRIDE]; // contains the blurred third moment
|
|
groupshared float moments4[THREADS * LDS_STRIDE]; // contains the blurred fourth moment
|
|
|
|
groupshared float sampleWeights[MAX_MSAA];
|
|
groupshared float sumWeights;
|
|
|
|
int getLDSIdx( int2 pos, int stride )
|
|
{
|
|
// interleave two consecutive rows to avoid bank conflicts
|
|
return (pos.y >> 1) * (stride << 1) + (pos.x << 1) + (pos.y & 1);
|
|
}
|
|
|
|
void writeToShared( moment_t val, int2 pos, int stride )
|
|
{
|
|
int idx = getLDSIdx( pos, stride );
|
|
moments1[idx] = val.x;
|
|
moments2[idx] = val.y;
|
|
#if SHADOW_MOMENTS == 4
|
|
moments3[idx] = val.z;
|
|
moments4[idx] = val.w;
|
|
#endif
|
|
}
|
|
|
|
moment_t readFromShared( int2 pos, int stride )
|
|
{
|
|
int idx = getLDSIdx( pos, stride );
|
|
moment_t res;
|
|
res.x = moments1[idx];
|
|
res.y = moments2[idx];
|
|
#if SHADOW_MOMENTS == 4
|
|
res.z = moments3[idx];
|
|
res.w = moments4[idx];
|
|
#endif
|
|
return res;
|
|
}
|
|
|
|
[numthreads( THREADS, THREADS, 1 )]
|
|
void KERNEL_MAIN( uint3 dispatchId : SV_DispatchThreadID, uint3 groupThreadId : SV_GroupThreadID )
|
|
{
|
|
#if MAX_MSAA > 1
|
|
uint width, height, sampleCnt;
|
|
depthTex.GetDimensions( width, height, sampleCnt );
|
|
sampleCnt = Clamp( sampleCnt, 2, MAX_MSAA );
|
|
float sampleCntRcp = 1.0 / sampleCnt;
|
|
|
|
// calculate weights based on sample positions
|
|
if( groupThreadId.x < sampleCnt )
|
|
{
|
|
float2 spos = depthTex.GetSamplePosition( groupThreadId.x );
|
|
sampleWeights[groupThreadId.x] = sampleCntRcp;
|
|
}
|
|
if( groupThreadId.x == 0 )
|
|
{
|
|
float sum = 0.0;
|
|
for( uint i = 0; i < sampleCnt; i++ )
|
|
sum += sampleWeights[i];
|
|
sumWeights = 1.0 / sum;
|
|
}
|
|
#endif
|
|
|
|
// load moments into LDS
|
|
// each workgroup works on THREADS * THREADS tiles, but the blur filter requires
|
|
// us to fetch enough data around the border of the current tile.
|
|
// We assume that the blur filter's support does not exceed THREADS, so we fetch
|
|
// the data in 4 blocks.
|
|
const bool reverse_z = (dstRect.w & kReversed_z) != 0;
|
|
const int blurBorder = BLUR_BORDER;
|
|
const int2 validSrc = (int2) (srcRect.xy + srcRect.zw - 1);
|
|
|
|
int2 srcIdx = ((int2) dispatchId.xy) - blurBorder.xx + (int2) srcRect.xy;
|
|
int2 ldsIdx = (int2) groupThreadId.xy;
|
|
moment_t hblurredMoments[2];
|
|
|
|
[unroll]
|
|
for( int ih = 0; ih < 2; ih++ )
|
|
{
|
|
[unroll]
|
|
for( int iw = 0; iw < 2; iw++ )
|
|
{
|
|
if( ldsIdx.x < LDS_STRIDE )
|
|
{
|
|
#if MAX_MSAA > 1
|
|
moment_t avgMoments = 0.0;
|
|
[loop]
|
|
for( uint is = 0; is < sampleCnt; is++ )
|
|
{
|
|
float depth = depthTex.Load( min( srcIdx, validSrc ), is ).x;
|
|
depth = reverse_z ? (1.0 - depth) : depth;
|
|
# if SHADOW_MOMENT_ALGORITHM == MSM
|
|
// We're pancaking triangles to znear in the depth pass so depth and subsequently all moments can end up being zero.
|
|
// The solver ShadowMoments_SolveMSM then ends up calculating infinities and nands, which produces different results
|
|
// on different vendors' GPUs. So we're adding a small safety margin here.
|
|
depth = Clamp( depth, 0.001, 0.999 );
|
|
# endif
|
|
avgMoments += sampleWeights[is] * DepthToMoments( depth );
|
|
}
|
|
avgMoments *= sumWeights;
|
|
|
|
writeToShared( avgMoments, int2( ldsIdx.x, groupThreadId.y ), LDS_STRIDE );
|
|
#else
|
|
float depth = depthTex.Load( int3( min( srcIdx, validSrc ), 0 ) ).x;
|
|
depth = reverse_z ? (1.0 - depth) : depth;
|
|
# if SHADOW_MOMENT_ALGORITHM == MSM
|
|
// We're pancaking triangles to znear in the depth pass so depth and subsequently all moments can end up being zero.
|
|
// The solver ShadowMoments_SolveMSM then ends up calculating infinities and nands, which produces different results
|
|
// on different vendors' GPUs. So we're adding a small safety margin here.
|
|
depth = Clamp( depth, 0.001, 0.999 );
|
|
# endif
|
|
writeToShared( DepthToMoments( depth ), int2( ldsIdx.x, groupThreadId.y ), LDS_STRIDE );
|
|
#endif
|
|
ldsIdx.x += THREADS;
|
|
srcIdx.x += THREADS;
|
|
}
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
hblurredMoments[ih] = 0;
|
|
int2 idx = { groupThreadId.x + blurBorder, groupThreadId.y };
|
|
[loop]
|
|
for( int blurOffset = -blurBorder; blurOffset <= blurBorder; blurOffset++ )
|
|
{
|
|
hblurredMoments[ih] += readFromShared( int2( idx.x + blurOffset, idx.y ), LDS_STRIDE ) * blurWeights[abs( blurOffset )];
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
ldsIdx.x = groupThreadId.x;
|
|
srcIdx.x = (int) dispatchId.x - blurBorder + srcRect.x;
|
|
srcIdx.y += THREADS;
|
|
}
|
|
|
|
// update LDS with horizontally blurred values
|
|
writeToShared( hblurredMoments[0], (int2) groupThreadId.xy, THREADS );
|
|
if( (groupThreadId.y + THREADS) < LDS_STRIDE )
|
|
writeToShared( hblurredMoments[1], int2( groupThreadId.x, groupThreadId.y + THREADS ), THREADS );
|
|
|
|
// sync threads
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// second pass blurs vertically
|
|
ldsIdx = (int2) groupThreadId.xy + int2( 0, blurBorder );
|
|
moment_t vblurredMoment = 0.0;
|
|
|
|
[unroll]
|
|
for( int blurOffset = -blurBorder; blurOffset <= blurBorder; blurOffset++ )
|
|
{
|
|
vblurredMoment += readFromShared( int2( ldsIdx.x, ldsIdx.y + blurOffset ), THREADS ) * blurWeights[abs(blurOffset)];
|
|
}
|
|
|
|
// and write out the result
|
|
if( all( dispatchId.xy < srcRect.zw ) )
|
|
{
|
|
dispatchId.xy += dstRect.xy;
|
|
dispatchId.z = dstRect.z;
|
|
outputTex[dispatchId] = vblurredMoment;
|
|
}
|
|
}
|