#pragma kernel ScreenBoundsAABB #include "..\common\ShaderBase.h" #include "LightDefinitions.cs.hlsl" uniform int g_iNrVisibLights; uniform float4x4 g_mInvProjection; uniform float4x4 g_mProjection; StructuredBuffer g_data : register( t0 ); #define FLT_EPSILON 1.192092896e-07F // smallest such that 1.0+FLT_EPSILON != 1.0 #define NR_THREADS 64 // output buffer RWStructuredBuffer g_vBoundsBuffer : register( u0 ); #define MAX_PNTS 9 // strictly this should be 10=6+4 but we get more wavefronts and 10 seems to never hit (fingers crossed) // However, worst case the plane that would be skipped if such an extreme case ever happened would be backplane // clipping gets skipped which doesn't cause any errors. // LDS (2496 bytes) groupshared float posX[MAX_PNTS*8*2]; groupshared float posY[MAX_PNTS*8*2]; groupshared float posZ[MAX_PNTS*8*2]; groupshared float posW[MAX_PNTS*8*2]; groupshared unsigned int clipFlags[48]; unsigned int GetClip(const float4 P); int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p); void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r); void GetQuad(out float3 p0, out float3 p1, out float3 p2, out float3 p3, const float3 vBoxX, const float3 vBoxY, const float3 vBoxZ, const float3 vCen, const float2 vScaleXY, const int sideIndex); void GetPlane(out float3 p0, out float3 vN, const float3 vBoxX, const float3 vBoxY, const float3 vBoxZ, const float3 vCen, const float2 vScaleXY, const int sideIndex); [numthreads(NR_THREADS, 1, 1)] void ScreenBoundsAABB(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) { uint groupID = u3GroupID.x; //uint vindex = groupID * NR_THREADS + threadID; unsigned int g = groupID; unsigned int t = threadID; const int subLigt = (int) (t/8); const int lgtIndex = subLigt+(int) g*8; const int sideIndex = (int) (t%8); SFiniteLightBound lgtDat = g_data[lgtIndex]; const float3 vBoxX = lgtDat.vBoxAxisX.xyz; const float3 vBoxY = lgtDat.vBoxAxisY.xyz; const float3 vBoxZ = -lgtDat.vBoxAxisZ.xyz; // flip an axis to make it right handed since Determinant(worldToView)<0 const float3 vCen = lgtDat.vCen.xyz; const float fRadius = lgtDat.fRadius; const float2 vScaleXY = lgtDat.vScaleXY; { if(sideIndex<6 && lgtIndex<(int) g_iNrVisibLights) // mask 2 out of 8 threads { float3 q0, q1, q2, q3; GetQuad(q0, q1, q2, q3, vBoxX, vBoxY, vBoxZ, vCen, vScaleXY, sideIndex); const float4 vP0 = mul(g_mProjection, float4(q0, 1)); const float4 vP1 = mul(g_mProjection, float4(q1, 1)); const float4 vP2 = mul(g_mProjection, float4(q2, 1)); const float4 vP3 = mul(g_mProjection, float4(q3, 1)); // test vertices of one quad (of the convex hull) for intersection const unsigned int uFlag0 = GetClip(vP0); const unsigned int uFlag1 = GetClip(vP1); const unsigned int uFlag2 = GetClip(vP2); const unsigned int uFlag3 = GetClip(vP3); const float4 vPnts[] = {vP0, vP1, vP2, vP3}; // screen-space AABB of one quad (assuming no intersection) float3 vMin, vMax; for(int k=0; k<4; k++) { float fW = vPnts[k].w; float fS = fW<0 ? -1 : 1; float fWabs = fW<0 ? (-fW) : fW; fW = fS * (fWabs>(i*6))&0x3f; uFlagAnd &= uClipBits; uFlagOr |= uClipBits; } uCollectiveAnd &= uFlagAnd; uCollectiveOr |= uFlagOr; } bool bSetBoundYet = false; float3 vMin=0.0, vMax=0.0; if(uCollectiveAnd!=0 || uCollectiveOr==0) // all invisible or all visible (early out) { if(uCollectiveOr==0) // all visible { for(f=0; f<6; f++) { const int sideIndex = f; float3 vFaceMi = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 0], posY[subLigt*MAX_PNTS*2 + sideIndex + 0], posZ[subLigt*MAX_PNTS*2 + sideIndex + 0]); float3 vFaceMa = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 6], posY[subLigt*MAX_PNTS*2 + sideIndex + 6], posZ[subLigt*MAX_PNTS*2 + sideIndex + 6]); for(int k=0; k<2; k++) { float3 vP = k==0 ? vFaceMi : vFaceMa; if(f==0 && k==0) { vMin=vP; vMax=vP; } vMax = max(vMax, vP); vMin = min(vMin, vP); } } bSetBoundYet=true; } } else // :( need true clipping { for(f=0; f<6; f++) { float3 q0, q1, q2, q3; GetQuad(q0, q1, q2, q3, vBoxX, vBoxY, vBoxZ, vCen, vScaleXY, f); // 4 vertices to a quad of the convex hull in post projection space const float4 vP0 = mul(g_mProjection, float4(q0, 1)); const float4 vP1 = mul(g_mProjection, float4(q1, 1)); const float4 vP2 = mul(g_mProjection, float4(q2, 1)); const float4 vP3 = mul(g_mProjection, float4(q3, 1)); int iSrcIndex = 0; int offs = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; // fill up source clip buffer with the quad posX[offs+0]=vP0.x; posX[offs+1]=vP1.x; posX[offs+2]=vP2.x; posX[offs+3]=vP3.x; posY[offs+0]=vP0.y; posY[offs+1]=vP1.y; posY[offs+2]=vP2.y; posY[offs+3]=vP3.y; posZ[offs+0]=vP0.z; posZ[offs+1]=vP1.z; posZ[offs+2]=vP2.z; posZ[offs+3]=vP3.z; posW[offs+0]=vP0.w; posW[offs+1]=vP1.w; posW[offs+2]=vP2.w; posW[offs+3]=vP3.w; int iNrSrcVerts = 4; // do true clipping for(int p=0; p<6; p++) { const int nrVertsDst = ClipAgainstPlane(iSrcIndex, iNrSrcVerts, subLigt, p); iSrcIndex = 1-iSrcIndex; iNrSrcVerts = nrVertsDst; if(iNrSrcVerts<3 || iNrSrcVerts>=MAX_PNTS) break; } // final clipped convex primitive is in src buffer if(iNrSrcVerts>2) { int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; for(int k=0; kfRadius) { float2 vMi, vMa; bool2 bMi, bMa; CalcBound(bMi, bMa, vMi, vMa, g_mInvProjection, vCen, fRadius); vMin.xy = bMi ? max(vMin.xy, vMi) : vMin.xy; vMax.xy = bMa ? min(vMax.xy, vMa) : vMax.xy; } #ifdef LEFT_HAND_COORDINATES if((vCen.z-fRadius)>0.0) { float4 vPosF = mul(g_mProjection, float4(0,0,vCen.z-fRadius,1)); vMin.z = max(vMin.z, vPosF.z/vPosF.w); } if((vCen.z+fRadius)>0.0) { float4 vPosB = mul(g_mProjection, float4(0,0,vCen.z+fRadius,1)); vMax.z = min(vMax.z, vPosB.z/vPosB.w); } #else if((vCen.z+fRadius)<0.0) { float4 vPosF = mul(g_mProjection, float4(0,0,vCen.z+fRadius,1)); vMin.z = max(vMin.z, vPosF.z/vPosF.w); } if((vCen.z-fRadius)<0.0) { float4 vPosB = mul(g_mProjection, float4(0,0,vCen.z-fRadius,1)); vMax.z = min(vMax.z, vPosB.z/vPosB.w); } #endif else { vMin = float3(-3,-3,-3); vMax = float3(-2,-2,-2); } } // we should consider doing a look-up here into a max depth mip chain // to see if the light is occluded: vMin.z*VIEWPORT_SCALE_Z > MipTexelMaxDepth //g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, -0.5*vMax.y+0.5, vMin.z*VIEWPORT_SCALE_Z); //g_vBoundsBuffer[lgtIndex+g_iNrVisibLights] = float3(0.5*vMax.x+0.5, -0.5*vMin.y+0.5, vMax.z*VIEWPORT_SCALE_Z); // changed for unity g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, 0.5*vMin.y+0.5, vMin.z*VIEWPORT_SCALE_Z); g_vBoundsBuffer[lgtIndex+(int) g_iNrVisibLights] = float3(0.5*vMax.x+0.5, 0.5*vMax.y+0.5, vMax.z*VIEWPORT_SCALE_Z); } } } float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p); int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p) { int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; int offs_dst = (1-iSrcIndex)*MAX_PNTS+subLigt*MAX_PNTS*2; float4 vPrev = float4(posX[offs_src+(iNrSrcVerts-1)], posY[offs_src+(iNrSrcVerts-1)], posZ[offs_src+(iNrSrcVerts-1)], posW[offs_src+(iNrSrcVerts-1)]); int nrVertsDst = 0; unsigned int uMask = (1<P.w)?2:0) | ((P.y<-P.w)?4:0) | ((P.y>P.w)?8:0) | ((P.z<0)?16:0) | ((P.z>P.w)?32:0); } float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p) { const float fS = p==4 ? 0 : ((p&1)==0 ? -1 : 1); const int index = ((uint) p)/2; float x1 = index==0 ? vVisib.x : (index==1 ? vVisib.y : vVisib.z); float x0 = index==0 ? vInvisib.x : (index==1 ? vInvisib.y : vInvisib.z); //fS*((vVisib.w-vInvisib.w)*t + vInvisib.w) = (x1-x0)*t + x0; const float fT = (fS*vInvisib.w-x0)/((x1-x0) - fS*(vVisib.w-vInvisib.w)); float4 vNew = vVisib*fT + vInvisib*(1-fT); // just to be really anal we make sure the clipped against coordinate is precise if(index==0) vNew.x = fS*vNew.w; else if(index==1) vNew.y = fS*vNew.w; else vNew.z = fS*vNew.w; return vNew; } void GetQuad(out float3 p0, out float3 p1, out float3 p2, out float3 p3, const float3 vBoxX, const float3 vBoxY, const float3 vBoxZ, const float3 vCen, const float2 vScaleXY, const int sideIndex) { const int iAbsSide = (sideIndex == 0 || sideIndex == 1) ? 0 : ((sideIndex == 2 || sideIndex == 3) ? 1 : 2); const float fS = (sideIndex & 1) != 0 ? 1 : (-1); float3 vA = fS*(iAbsSide == 0 ? vBoxX : (iAbsSide == 1 ? (-vBoxY) : vBoxZ)); float3 vB = fS*(iAbsSide == 0 ? (-vBoxY) : (iAbsSide == 1 ? (-vBoxX) : (-vBoxY))); float3 vC = iAbsSide == 0 ? vBoxZ : (iAbsSide == 1 ? vBoxZ : (-vBoxX)); bool bIsTopQuad = iAbsSide == 2 && (sideIndex & 1) != 0; // in this case all 4 verts get scaled. bool bIsSideQuad = (iAbsSide == 0 || iAbsSide == 1); // if side quad only two verts get scaled (impacts q1 and q2) if (bIsTopQuad) { vB *= vScaleXY.y; vC *= vScaleXY.x; } float3 vA2 = vA; float3 vB2 = vB; if (bIsSideQuad) { vA2 *= (iAbsSide == 0 ? vScaleXY.x : vScaleXY.y); vB2 *= (iAbsSide == 0 ? vScaleXY.y : vScaleXY.x); } // delivered counterclockwise in right hand space and clockwise in left hand space p0 = vCen + (vA + vB - vC); // vCen + vA is center of face when vScaleXY is 1.0 p1 = vCen + (vA - vB - vC); p2 = vCen + (vA2 - vB2 + vC); p3 = vCen + (vA2 + vB2 + vC); } void GetPlane(out float3 p0, out float3 vN, const float3 vBoxX, const float3 vBoxY, const float3 vBoxZ, const float3 vCen, const float2 vScaleXY, const int sideIndex) { const int iAbsSide = (sideIndex == 0 || sideIndex == 1) ? 0 : ((sideIndex == 2 || sideIndex == 3) ? 1 : 2); const float fS = (sideIndex & 1) != 0 ? 1 : (-1); float3 vA = fS*(iAbsSide == 0 ? vBoxX : (iAbsSide == 1 ? (-vBoxY) : vBoxZ)); float3 vB = fS*(iAbsSide == 0 ? (-vBoxY) : (iAbsSide == 1 ? (-vBoxX) : (-vBoxY))); float3 vC = iAbsSide == 0 ? vBoxZ : (iAbsSide == 1 ? vBoxZ : (-vBoxX)); bool bIsTopQuad = iAbsSide == 2 && (sideIndex & 1) != 0; // in this case all 4 verts get scaled. bool bIsSideQuad = (iAbsSide == 0 || iAbsSide == 1); // if side quad only two verts get scaled (impacts q1 and q2) if (bIsTopQuad) { vB *= vScaleXY.y; vC *= vScaleXY.x; } float3 vA2 = vA; float3 vB2 = vB; if (bIsSideQuad) { vA2 *= (iAbsSide == 0 ? vScaleXY.x : vScaleXY.y); vB2 *= (iAbsSide == 0 ? vScaleXY.y : vScaleXY.x); } p0 = vCen + (vA + vB - vC); // vCen + vA is center of face when vScaleXY is 1.0 float3 vNout = cross( vB2, 0.5*(vA-vA2) - vC ); #ifdef LEFT_HAND_COORDINATES vNout = -vNout; #endif vN = vNout; } float4 TransformPlaneToPostSpace(float4x4 InvProjection, float4 plane) { return mul(plane, InvProjection); } float4 EvalPlanePair(float2 posXY_in, float r) { // rotate by 90 degrees to avoid potential division by zero bool bMustFlip = abs(posXY_in.y)