#pragma kernel BigTileLightListGen #include "..\common\ShaderBase.h" #include "LightDefinitions.cs.hlsl" #include "LightingConvexHullUtils.hlsl" #define EXACT_EDGE_TESTS #define PERFORM_SPHERICAL_INTERSECTION_TESTS #define MAX_NR_BIGTILE_LIGHTS (MAX_NR_BIGTILE_LIGHTS_PLUSONE-1) uniform int g_iNrVisibLights; uniform uint2 g_viDimensions; uniform float4x4 g_mInvScrProjection; uniform float4x4 g_mScrProjection; uniform float g_fNearPlane; uniform float g_fFarPlane; StructuredBuffer g_vBoundsBuffer : register( t1 ); StructuredBuffer g_vLightData : register( t2 ); StructuredBuffer g_data : register( t3 ); #define NR_THREADS 64 // output buffer RWStructuredBuffer g_vLightList : register( u0 ); // 2kB (room for roughly 30 wavefronts) groupshared unsigned int lightsListLDS[MAX_NR_BIGTILE_LIGHTS_PLUSONE]; groupshared uint lightOffs; float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far { float3 vP = float3(0.0f,0.0f,zDptBufSpace); float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); return v4Pres.z / v4Pres.w; } float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth) { float fSx = g_mScrProjection[0].x; float fCx = g_mScrProjection[0].z; float fSy = g_mScrProjection[1].y; float fCy = g_mScrProjection[1].z; #ifdef LEFT_HAND_COORDINATES return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 ); #else return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 ); #endif } float GetOnePixDiagWorldDistAtDepthOne() { float fSx = g_mScrProjection[0].x; float fSy = g_mScrProjection[1].y; return length( float2(1.0/fSx,1.0/fSy) ); } void sortLightList(int localThreadID, int n); #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate); #endif #ifdef EXACT_EDGE_TESTS void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR); #endif [numthreads(NR_THREADS, 1, 1)] void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) { uint2 tileIDX = u3GroupID.xy; uint t=threadID; uint iWidth = g_viDimensions.x; uint iHeight = g_viDimensions.y; uint nrBigTilesX = (iWidth+63)/64; uint nrBigTilesY = (iHeight+63)/64; if(t==0) lightOffs = 0; #if !defined(XBONE) && !defined(PLAYSTATION4) GroupMemoryBarrierWithGroupSync(); #endif uint2 viTilLL = 64*tileIDX; uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner. float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight); float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight); // build coarse list using AABB for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS) { const float2 vMi = g_vBoundsBuffer[l].xy; const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy; if( all(vMa>vTileLL) && all(vMi>1; j>0; j=j>>1) { for(int i=localThreadID; ii) { const unsigned int Avalue = lightsListLDS[i]; const unsigned int Bvalue = lightsListLDS[ixj]; const bool mustSwap = ((i&k)!=0^(Avalue>Bvalue)) && Avalue!=Bvalue; if(mustSwap) { lightsListLDS[i]=Bvalue; lightsListLDS[ixj]=Avalue; } } } GroupMemoryBarrierWithGroupSync(); } } } #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate) { #ifdef LEFT_HAND_COORDINATES float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0); #else float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0); #endif float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne(); float worldDistAtDepthOne = 32*onePixDiagDist; // scale by half a tile int iNrVisib = 0; for(int l=threadID; l0.0001 ? (maxZdir.z/len) : len; // since len>=(maxZdir.z/len) we can use len as an approximate value when len<=epsilon float fOffs = scalarProj*fRad; #else float fOffs = fRad; // more false positives due to larger radius but works too #endif #ifdef LEFT_HAND_COORDINATES fRad = fRad + (center.z+fOffs)*worldDistAtDepthOne; #else fRad = fRad - (center.z-fOffs)*worldDistAtDepthOne; #endif float a = dot(V,V); float CdotV = dot(center,V); float c = dot(center,center) - fRad*fRad; float fDescDivFour = CdotV*CdotV - a*c; if(!(c<0 || (fDescDivFour>0 && CdotV>0))) // if ray misses bounding sphere lightsListLDS[l]=0xffffffff; } #if !defined(XBONE) && !defined(PLAYSTATION4) GroupMemoryBarrierWithGroupSync(); #endif } #endif #ifdef EXACT_EDGE_TESTS float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane) { float x = (i&1)==0 ? viTilLL.x : viTilUR.x; float y = (i&2)==0 ? viTilLL.y : viTilUR.y; float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane; #ifndef LEFT_HAND_COORDINATES z = -z; #endif return GetViewPosFromLinDepth( float2(x, y), z); } void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane) { int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges int iSwizzle = e0&0x3; int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2 vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane); vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0))); } void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR) { const bool bOnlyNeedFrustumSideEdges = true; const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull. const int totNrEdgePairs = 12*nrFrustEdges; for(int l=0; l0) ++positive; else if(fSignDist<0) ++negative; } int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0)); positive=0; negative=0; for(int j=0; j<8; j++) { float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane); float fSignDist = dot(vN, vPf-vP0); if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative; } int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0)); bool bFoundSepPlane = (resh*resf)<0; if(bFoundSepPlane) lightsListLDS[l]=0xffffffff; } } } #if !defined(XBONE) && !defined(PLAYSTATION4) GroupMemoryBarrierWithGroupSync(); #endif } #endif