// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7. // https://github.com/wolfgangfengel/GPU-Pro-7 #pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen #pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING #include "ShaderBase.h" #include "LightDefinitions.cs.hlsl" #include "LightingConvexHullUtils.hlsl" #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) #include "SortingComputeUtils.hlsl" #endif #define FINE_PRUNING_ENABLED #define PERFORM_SPHERICAL_INTERSECTION_TESTS uniform int g_isOrthographic; uniform int g_iNrVisibLights; uniform uint2 g_viDimensions; uniform float4x4 g_mInvScrProjection; uniform float4x4 g_mScrProjection; Texture2D g_depth_tex : register( t0 ); StructuredBuffer g_vBoundsBuffer : register( t1 ); StructuredBuffer g_vLightData : register( t2 ); StructuredBuffer g_data : register( t3 ); #ifdef USE_TWO_PASS_TILED_LIGHTING StructuredBuffer g_vBigTileLightList : register( t4 ); // don't support Buffer yet in unity #endif #define NR_THREADS 64 // output buffer RWStructuredBuffer g_vLightList : register( u0 ); // don't support RWBuffer yet in unity #define MAX_NR_COARSE_ENTRIES 64 #define MAX_NR_PRUNED_ENTRIES 24 groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES]; groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES]; // temporarily support room for all 64 while in LDS groupshared uint ldsZMin; groupshared uint ldsZMax; groupshared uint lightOffs; #ifdef FINE_PRUNING_ENABLED groupshared uint ldsDoesLightIntersect[2]; #endif groupshared int ldsNrLightsFinal; groupshared int ldsModelListCount[NR_LIGHT_MODELS]; // since NR_LIGHT_MODELS is 2 #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS groupshared uint lightOffsSph; #endif //float GetLinearDepth(float3 vP) //{ // float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); // return v4Pres.z / v4Pres.w; //} float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far { float3 vP = float3(0.0f,0.0f,zDptBufSpace); float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); return v4Pres.z / v4Pres.w; } float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth) { bool isOrthographic = g_isOrthographic!=0; float fSx = g_mScrProjection[0].x; float fSy = g_mScrProjection[1].y; float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z; float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z; #if USE_LEFTHAND_CAMERASPACE bool useLeftHandVersion = true; #else bool useLeftHandVersion = isOrthographic; #endif float s = useLeftHandVersion ? 1 : (-1); float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy); return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth); } float GetOnePixDiagWorldDistAtDepthOne() { float fSx = g_mScrProjection[0].x; float fSy = g_mScrProjection[1].y; return length( float2(1.0/fSx,1.0/fSy) ); } #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate); #endif #ifdef FINE_PRUNING_ENABLED void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths); #endif [numthreads(NR_THREADS, 1, 1)] void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) { uint2 tileIDX = u3GroupID.xy; uint t=threadID; if(t>4)), uint2(iWidth-1, iHeight-1) ); const float fDepth = FetchDepth(g_depth_tex, uCrd); vLinDepths[i] = GetLinearDepth(fDepth); if(fDepth>log2BigTileToTileRatio; const int bigTileIdx = (tileIDX.y>>log2BigTileToTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToTileRatio); // map the idx to 64x64 tiles int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0]; for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS) { int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1]; #else for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS) { #endif const float3 vMi = g_vBoundsBuffer[l]; const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights]; if( all(vMa>vTileLL) && all(vMi>1; for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS) { uint uLow = l==0 ? nrLightsFinalClamped : prunedList[max(0,2 * l - 1 + localOffs)]; uint uHigh = prunedList[2 * l + 0 + localOffs]; g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16); } localOffs += nrLightsFinal; offs += (nrTilesX*nrTilesY); } } #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate) { if(threadID==0) lightOffsSph = 0; // make a copy of coarseList in prunedList. int l; for(l=threadID; l>4)), uint2(iWidth-1, iHeight-1)); float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); // check pixel float3 fromLight = vVPos-lightData.lightPos.xyz; float distSq = dot(fromLight,fromLight); const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz); // spotDir = lightData.lightAxisZ.xyz float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) ); float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y); if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1; } uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); ++l; idxCoarse = l>4)), uint2(iWidth-1, iHeight-1)); float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); // check pixel float3 vLp = lightData.lightPos.xyz; float3 toLight = vLp - vVPos; float distSq = dot(toLight,toLight); if(lightData.radiusSq>distSq) uVal = 1; } uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); ++l; idxCoarse = l>4)), uint2(iWidth-1, iHeight-1)); float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); // check pixel float3 toLight = lightData.lightPos.xyz - vVPos; float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) ); dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange; // not as efficient as it could be if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1; // but allows us to not write out OuterDists } uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); ++l; idxCoarse = l=MAX_TYPES) ++l; } InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]); InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]); if(t==0) ldsNrLightsFinal = 0; #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) GroupMemoryBarrierWithGroupSync(); #endif if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 ) { unsigned int uInc = 1; unsigned int uIndex; InterlockedAdd(ldsNrLightsFinal, uInc, uIndex); if(uIndex