#pragma kernel TileLightListGen_NoDepthRT LIGHTLISTGEN=TileLightListGen_NoDepthRT #pragma kernel TileLightListGen_DepthRT LIGHTLISTGEN=TileLightListGen_DepthRT ENABLE_DEPTH_TEXTURE_BACKPLANE #pragma kernel TileLightListGen_DepthRT_MSAA LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED #pragma kernel TileLightListGen_NoDepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile USE_TWO_PASS_TILED_LIGHTING #pragma kernel TileLightListGen_DepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING #pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING #pragma kernel ClearAtomic #include "CoreRP/ShaderLibrary/common.hlsl" #include "ShaderBase.hlsl" #include "LightLoop.cs.hlsl" #include "LightingConvexHullUtils.hlsl" #include "LightCullUtils.hlsl" #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) #include "SortingComputeUtils.hlsl" #endif #pragma only_renderers d3d11 ps4 xboxone vulkan metal //#define EXACT_EDGE_TESTS #define PERFORM_SPHERICAL_INTERSECTION_TESTS #define CONV_HULL_TEST_ENABLED CBUFFER_START(UnityLightListClustered) int g_iNrVisibLights; float4x4 g_mInvScrProjectionArr[2]; float4x4 g_mScrProjectionArr[2]; uint g_isOrthographic; int _EnvLightIndexShift; int _DecalIndexShift; float g_fClustScale; float g_fClustBase; float g_fNearPlane; float g_fFarPlane; int g_iLog2NumClusters; // numClusters = (1< g_depth_tex : register( t0 ); #else Texture2D g_depth_tex : register( t0 ); #endif StructuredBuffer g_vBoundsBuffer : register( t1 ); StructuredBuffer _LightVolumeData : register(t2); StructuredBuffer g_data : register( t3 ); #ifdef USE_TWO_PASS_TILED_LIGHTING StructuredBuffer g_vBigTileLightList : register( t4 ); // don't support Buffer yet in unity #endif #define NR_THREADS 64 RWStructuredBuffer g_vLayeredLightList : register( u0 ); // don't support RWBuffer yet in unity RWStructuredBuffer g_LayeredOffset : register( u1 ); // don't support RWBuffer yet in unity RWStructuredBuffer g_LayeredSingleIdxBuffer : register( u2 ); // don't support RWBuffer yet in unity #ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE RWStructuredBuffer g_logBaseBuffer : register( u3 ); // don't support RWBuffer yet in unity #endif #define MAX_NR_COARSE_ENTRIES 128 groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES]; groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2]; groupshared float4 lightPlanes[4*6]; // Each plane is defined by a float4. 6 planes per light, 4 lights (24 planes) groupshared uint lightOffs; #ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE groupshared uint ldsZMax; #endif #ifdef EXACT_EDGE_TESTS groupshared uint ldsIsLightInvisible; groupshared uint lightOffs2; #endif #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS groupshared uint lightOffsSph; #endif float GetLinearDepth(float zDptBufSpace, uint eyeIndex) // 0 is near 1 is far { float4x4 g_mInvScrProjection = g_mInvScrProjectionArr[eyeIndex]; // for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj) // however this function must also work for orthographic projection so we keep it like this. float m22 = g_mInvScrProjection[2].z, m23 = g_mInvScrProjection[2].w; float m32 = g_mInvScrProjection[3].z, m33 = g_mInvScrProjection[3].w; return (m22*zDptBufSpace+m23) / (m32*zDptBufSpace+m33); //float3 vP = float3(0.0f,0.0f,zDptBufSpace); //float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); //return v4Pres.z / v4Pres.w; } float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth, uint eyeIndex) { float4x4 g_mScrProjection = g_mScrProjectionArr[eyeIndex]; bool isOrthographic = g_isOrthographic!=0; float fSx = g_mScrProjection[0].x; float fSy = g_mScrProjection[1].y; float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z; float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z; #if USE_LEFT_HAND_CAMERA_SPACE bool useLeftHandVersion = true; #else bool useLeftHandVersion = isOrthographic; #endif float s = useLeftHandVersion ? 1 : (-1); float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy); return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth); } float GetOnePixDiagWorldDistAtDepthOne(uint eyeIndex) { float4x4 g_mScrProjection = g_mScrProjectionArr[eyeIndex]; float fSx = g_mScrProjection[0].x; float fSy = g_mScrProjection[1].y; return length( float2(1.0/fSx,1.0/fSy) ); } // SphericalIntersectionTests and CullByExactEdgeTests are close to the versions // in lightlistbuild-bigtile.compute. But would need more re-factoring than needed // right now. #ifdef EXACT_EDGE_TESTS int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex); #endif #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate, uint eyeIndex); #endif // returns 1 for intersection and 0 for none float4 FetchPlane(int l, int p, uint eyeIndex); bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase, uint eyeIndex) { // If this light's screen space depth bounds intersect this cluster...simple cluster test // TODO: Unify this code with the code in CheckIntersectionBasic... unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff; bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff); if(bIsHit) { #ifdef CONV_HULL_TEST_ENABLED float depthAtNearZ = ClusterIdxToZ(k, suggestedBase); float depthAtFarZ = ClusterIdxToZ(k+1, suggestedBase); for(int p=0; p<6; p++) { float4 plane = lightPlanes[6*(l&3)+p]; bool bAllInvisib = true; for(int i=0; i<8; i++) { float x = (i&1)==0 ? viTilLL.x : viTilUR.x; float y = (i&2)==0 ? viTilLL.y : viTilUR.y; float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ; float3 vP = GetViewPosFromLinDepth( float2(x, y), z, eyeIndex); // Test each corner of the cluster against the light bounding box planes bAllInvisib = bAllInvisib && dot(plane, float4(vP,1.0))>0; } if(bAllInvisib) bIsHit = false; } #endif } return bIsHit; } // l is the coarse light index, k is the cluster index bool CheckIntersectionBasic(int l, int k) { unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff; return ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff); } [numthreads(NR_THREADS, 1, 1)] void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) { uint eyeIndex = u3GroupID.z; uint2 tileIDX = u3GroupID.xy; uint t=threadID; const uint log2TileSize = firstbithigh(TILE_SIZE_CLUSTERED); uint nrTilesX = ((uint)g_screenSize.x +(TILE_SIZE_CLUSTERED-1))>>log2TileSize; uint nrTilesY = ((uint)g_screenSize.y +(TILE_SIZE_CLUSTERED-1))>>log2TileSize; // Screen space coordinates of clustered tile uint2 viTilLL = TILE_SIZE_CLUSTERED*tileIDX; uint2 viTilUR = min( viTilLL+uint2(TILE_SIZE_CLUSTERED,TILE_SIZE_CLUSTERED), uint2(g_screenSize.x, g_screenSize.y) ); // not width and height minus 1 since viTilUR represents the end of the tile corner. if(t==0) { lightOffs = 0; #ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE ldsZMax = 0; #endif } #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) GroupMemoryBarrierWithGroupSync(); #endif float dpt_ma=1.0; #ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE // establish min and max depth first dpt_ma=0.0; for(int idx=t; idx<(TILE_SIZE_CLUSTERED*TILE_SIZE_CLUSTERED); idx+=NR_THREADS) { // XRTODO: We need to stereo-ize access to g_depth_tex for texture arrays. uint2 uPixCrd = min( uint2(viTilLL.x+(idx&(TILE_SIZE_CLUSTERED-1)), viTilLL.y+(idx>>log2TileSize)), uint2(g_screenSize.x-1, g_screenSize.y-1) ); // TODO: For stereo double-wide, I need a proper way to insert the second eye width offset. Right now, I can just // use g_screenSize.x, but that's kinda cheating. // Additionally, we're going to have a method to select between a doublewide texture or texture array. Doubling // the kernels seems like a bad idea. We could branch our texture read to switch between different texture declarations. uint stereoDWOffset = eyeIndex * g_screenSize.x; uPixCrd.x += stereoDWOffset; #ifdef MSAA_ENABLED for(int i=0; i> log2BigTileToClustTileRatio; int NrBigTilesY = (nrTilesY + ((1<> log2BigTileToClustTileRatio; const int bigTileBase = eyeIndex * NrBigTilesX * NrBigTilesY; const int bigTileIdx = bigTileBase + ((tileIDX.y>>log2BigTileToClustTileRatio)*NrBigTilesX) + (tileIDX.x>>log2BigTileToClustTileRatio); // map the idx to 64x64 tiles int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*bigTileIdx+0]; for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS) { int l = g_vBigTileLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*bigTileIdx+l0+1]; #else for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS) { #endif // TODO: Seems kinda funny that we repeat this exact code here, bigtile, and FPTL... const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(l, g_iNrVisibLights, eyeIndex); const float2 vMi = g_vBoundsBuffer[boundsIndices.min].xy; const float2 vMa = g_vBoundsBuffer[boundsIndices.max].xy; if( all(vMa>vTileLL) && all(vMi>1); l += NR_THREADS) { const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights-1)]; const ScreenSpaceBoundsIndices l0Bounds = GenerateScreenSpaceBoundsIndices(l0, g_iNrVisibLights, eyeIndex); const ScreenSpaceBoundsIndices l1Bounds = GenerateScreenSpaceBoundsIndices(l1, g_iNrVisibLights, eyeIndex); const unsigned int clustIdxMi0 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0Bounds.min].z, eyeIndex), suggestedBase)); const unsigned int clustIdxMa0 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0Bounds.max].z, eyeIndex), suggestedBase)); const unsigned int clustIdxMi1 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1Bounds.min].z, eyeIndex), suggestedBase)); const unsigned int clustIdxMa1 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1Bounds.max].z, eyeIndex), suggestedBase)); clusterIdxs[l] = (clustIdxMa1<<24) | (clustIdxMi1<<16) | (clustIdxMa0<<8) | (clustIdxMi0<<0); } } #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) GroupMemoryBarrierWithGroupSync(); #endif int nrClusters = (1<>2; int m = i&3; if(i<24) lightPlanes[6*m+p] = FetchPlane(min(iNrCoarseLights-1,ll+m), p, eyeIndex); #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) GroupMemoryBarrierWithGroupSync(); #endif for(int l=ll; l>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges int iSwizzle = e0&0x3; int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2 vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane, eyeIndex); #if USE_LEFT_HAND_CAMERA_SPACE float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,1.0); #else float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,-1.0); #endif vE0 = iSection == 0 ? edgeSectionZero : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0))); } int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex) { if(threadID==0) lightOffs2 = 0; const bool bOnlyNeedFrustumSideEdges = true; const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull. const int totNrEdgePairs = 12*nrFrustEdges; for(int l=0; l0) ++positive; else if(fSignDist<0) ++negative; } int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0)); positive=0; negative=0; for(int j=0; j<8; j++) { float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane, eyeIndex); float fSignDist = dot(vN, vPf-vP0); if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative; } int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0)); bool bFoundSepPlane = (resh*resf)<0; if(bFoundSepPlane) InterlockedOr(ldsIsLightInvisible, 1); } } #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) GroupMemoryBarrierWithGroupSync(); #endif if(threadID==0 && ldsIsLightInvisible==0) { coarseList[lightOffs2++] = coarseList[l]; } } #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) GroupMemoryBarrierWithGroupSync(); #endif return lightOffs2; } #endif [numthreads(1, 1, 1)] void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) { g_LayeredSingleIdxBuffer[0]=0; }