// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7. // https://github.com/wolfgangfengel/GPU-Pro-7 #pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen #pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING #include "..\common\ShaderBase.h" #include "LightDefinitions.cs.hlsl" #include "LightingConvexHullUtils.hlsl" #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) #include "SortingComputeUtils.hlsl" #endif #define FINE_PRUNING_ENABLED #define PERFORM_SPHERICAL_INTERSECTION_TESTS uniform int g_iNrVisibLights; uniform uint2 g_viDimensions; uniform float4x4 g_mInvScrProjection; uniform float4x4 g_mScrProjection; Texture2D g_depth_tex : register( t0 ); StructuredBuffer g_vBoundsBuffer : register( t1 ); StructuredBuffer g_vLightData : register( t2 ); StructuredBuffer g_data : register( t3 ); #ifdef USE_TWO_PASS_TILED_LIGHTING Buffer g_vBigTileLightList : register( t4 ); #endif #define NR_THREADS 64 // output buffer RWBuffer g_vLightList : register( u0 ); #define MAX_NR_COARSE_ENTRIES 64 #define MAX_NR_PRUNED_ENTRIES 24 groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES]; groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES]; // temporarily support room for all 64 while in LDS groupshared uint ldsZMin; groupshared uint ldsZMax; groupshared uint lightOffs; #ifdef FINE_PRUNING_ENABLED groupshared uint ldsDoesLightIntersect[2]; #endif groupshared int ldsNrLightsFinal; groupshared int ldsModelListCount[NR_LIGHT_MODELS]; // since NR_LIGHT_MODELS is 2 #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS groupshared uint lightOffsSph; #endif //float GetLinearDepth(float3 vP) //{ // float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); // return v4Pres.z / v4Pres.w; //} float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far { float3 vP = float3(0.0f,0.0f,zDptBufSpace); float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); return v4Pres.z / v4Pres.w; } float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth) { float fSx = g_mScrProjection[0].x; float fCx = g_mScrProjection[0].z; float fSy = g_mScrProjection[1].y; float fCy = g_mScrProjection[1].z; #if USE_LEFTHAND_CAMERASPACE return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 ); #else return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 ); #endif } float GetOnePixDiagWorldDistAtDepthOne() { float fSx = g_mScrProjection[0].x; float fSy = g_mScrProjection[1].y; return length( float2(1.0/fSx,1.0/fSy) ); } #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate); #endif #ifdef FINE_PRUNING_ENABLED void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths); #endif [numthreads(NR_THREADS, 1, 1)] void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) { uint2 tileIDX = u3GroupID.xy; uint t=threadID; if(t>4)), uint2(iWidth-1, iHeight-1) ); const float fDepth = FetchDepth(g_depth_tex, uCrd); vLinDepths[i] = GetLinearDepth(fDepth); if(fDepth>2; const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0]; for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS) { int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1]; #else for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS) { #endif const float3 vMi = g_vBoundsBuffer[l]; const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights]; if( all(vMa>vTileLL) && all(vMi>1; for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS) { uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs]; uint uHigh = prunedList[2*l+0+localOffs]; g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16); } localOffs += nrLightsFinal; offs += (nrTilesX*nrTilesY); } } #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate) { lightOffsSph = 0; // make a copy of coarseList in prunedList. for(int l=threadID; l>4)), uint2(iWidth-1, iHeight-1)); float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); // check pixel float3 fromLight = vVPos-lightData.lightPos.xyz; float distSq = dot(fromLight,fromLight); const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz); // spotDir = lightData.lightAxisZ.xyz float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) ); float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y); if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1; } uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); ++l; idxCoarse = l>4)), uint2(iWidth-1, iHeight-1)); float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); // check pixel float3 vLp = lightData.lightPos.xyz; float3 toLight = vLp - vVPos; float distSq = dot(toLight,toLight); if(lightData.radiusSq>distSq) uVal = 1; } uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); ++l; idxCoarse = l>4)), uint2(iWidth-1, iHeight-1)); float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); // check pixel float3 toLight = lightData.lightPos.xyz - vVPos; float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) ); dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange; // not as efficient as it could be if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1; // but allows us to not write out OuterDists } uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); ++l; idxCoarse = l=MAX_TYPES) ++l; } InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]); InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]); if(t==0) ldsNrLightsFinal = 0; #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) GroupMemoryBarrierWithGroupSync(); #endif if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 ) { unsigned int uInc = 1; unsigned int uIndex; InterlockedAdd(ldsNrLightsFinal, uInc, uIndex); if(uIndex