您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
537 行
21 KiB
537 行
21 KiB
// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
|
|
// https://github.com/wolfgangfengel/GPU-Pro-7
|
|
|
|
#pragma kernel ScreenBoundsAABB SCRAABBGEN=ScreenBoundsAABB
|
|
#pragma kernel ScreenBoundsAABB_Oblique SCRAABBGEN=ScreenBoundsAABB_Oblique USE_OBLIQUE_MODE
|
|
|
|
|
|
#include "CoreRP/ShaderLibrary/Common.hlsl"
|
|
#include "LightLoop.cs.hlsl"
|
|
#include "LightCullUtils.hlsl"
|
|
|
|
#pragma only_renderers d3d11 ps4 xboxone vulkan metal switch
|
|
|
|
uniform int g_isOrthographic;
|
|
uniform int g_iNrVisibLights;
|
|
|
|
uniform float4x4 g_mInvProjectionArr[2];
|
|
uniform float4x4 g_mProjectionArr[2];
|
|
|
|
StructuredBuffer<SFiniteLightBound> g_data : register( t0 );
|
|
|
|
#define NR_THREADS 64
|
|
|
|
// output buffer
|
|
RWStructuredBuffer<float4> g_vBoundsBuffer : register( u0 );
|
|
|
|
#define MAX_PNTS 9 // strictly this should be 10=6+4 but we get more wavefronts and 10 seems to never hit (fingers crossed)
|
|
// However, worst case the plane that would be skipped if such an extreme case ever happened would be backplane
|
|
// clipping gets skipped which doesn't cause any errors.
|
|
|
|
|
|
// LDS (2496 bytes)
|
|
groupshared float posX[MAX_PNTS*8*2];
|
|
groupshared float posY[MAX_PNTS*8*2];
|
|
groupshared float posZ[MAX_PNTS*8*2];
|
|
groupshared float posW[MAX_PNTS*8*2];
|
|
groupshared unsigned int clipFlags[48];
|
|
|
|
|
|
unsigned int GetClip(const float4 P);
|
|
int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p);
|
|
void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r);
|
|
|
|
#include "LightingConvexHullUtils.hlsl"
|
|
|
|
|
|
[numthreads(NR_THREADS, 1, 1)]
|
|
void SCRAABBGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
|
|
{
|
|
uint groupID = u3GroupID.x;
|
|
uint eyeIndex = u3GroupID.y; // currently, can only be 0 or 1
|
|
|
|
// The g_ is preserved in order to make cross-pipeline (FPTL) updates easier
|
|
float4x4 g_mInvProjection = g_mInvProjectionArr[eyeIndex];
|
|
float4x4 g_mProjection = g_mProjectionArr[eyeIndex];
|
|
|
|
//uint vindex = groupID * NR_THREADS + threadID;
|
|
unsigned int g = groupID;
|
|
unsigned int t = threadID;
|
|
|
|
const int subLigt = (int) (t/8);
|
|
const int lgtIndex = subLigt+(int) g*8;
|
|
const int sideIndex = (int) (t%8);
|
|
|
|
const int eyeAdjustedLgtIndex = GenerateLightCullDataIndex(lgtIndex, g_iNrVisibLights, eyeIndex);
|
|
SFiniteLightBound lgtDat = g_data[eyeAdjustedLgtIndex];
|
|
|
|
const float3 boxX = lgtDat.boxAxisX.xyz;
|
|
const float3 boxY = lgtDat.boxAxisY.xyz;
|
|
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
|
|
const float3 center = lgtDat.center.xyz;
|
|
const float radius = lgtDat.radius;
|
|
const float2 scaleXY = lgtDat.scaleXY;
|
|
|
|
{
|
|
if(sideIndex<6 && lgtIndex<(int) g_iNrVisibLights) // mask 2 out of 8 threads
|
|
{
|
|
float3 q0, q1, q2, q3;
|
|
GetHullQuad(q0, q1, q2, q3, boxX, boxY, boxZ, center, scaleXY, sideIndex);
|
|
|
|
|
|
const float4 vP0 = mul(g_mProjection, float4(q0, 1));
|
|
const float4 vP1 = mul(g_mProjection, float4(q1, 1));
|
|
const float4 vP2 = mul(g_mProjection, float4(q2, 1));
|
|
const float4 vP3 = mul(g_mProjection, float4(q3, 1));
|
|
|
|
// test vertices of one quad (of the convex hull) for intersection
|
|
const unsigned int uFlag0 = GetClip(vP0);
|
|
const unsigned int uFlag1 = GetClip(vP1);
|
|
const unsigned int uFlag2 = GetClip(vP2);
|
|
const unsigned int uFlag3 = GetClip(vP3);
|
|
|
|
const float4 vPnts[] = {vP0, vP1, vP2, vP3};
|
|
|
|
// screen-space AABB of one quad (assuming no intersection)
|
|
float3 vMin, vMax;
|
|
for(int k=0; k<4; k++)
|
|
{
|
|
float fW = vPnts[k].w;
|
|
float fS = fW<0 ? -1 : 1;
|
|
float fWabs = fW<0 ? (-fW) : fW;
|
|
fW = fS * (fWabs<FLT_EPS ? FLT_EPS : fWabs);
|
|
float3 vP = float3(vPnts[k].x/fW, vPnts[k].y/fW, vPnts[k].z/fW);
|
|
if(k==0) { vMin=vP; vMax=vP; }
|
|
|
|
vMax = max(vMax, vP); vMin = min(vMin, vP);
|
|
}
|
|
|
|
clipFlags[subLigt*6+sideIndex] = (uFlag0<<0) | (uFlag1<<6) | (uFlag2<<12) | (uFlag3<<18);
|
|
|
|
// store in clip buffer (only use these vMin and vMax if light is 100% visible in which case clipping isn't needed)
|
|
posX[subLigt*MAX_PNTS*2 + sideIndex] = vMin.x;
|
|
posY[subLigt*MAX_PNTS*2 + sideIndex] = vMin.y;
|
|
posZ[subLigt*MAX_PNTS*2 + sideIndex] = vMin.z;
|
|
|
|
posX[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.x;
|
|
posY[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.y;
|
|
posZ[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.z;
|
|
}
|
|
}
|
|
|
|
// if not XBONE and not PLAYSTATION4 we need a memorybarrier here
|
|
// since we can't rely on the gpu cores being 64 wide.
|
|
// We need a pound define around this.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
|
|
{
|
|
int f=0;
|
|
|
|
if(sideIndex==0 && lgtIndex<(int) g_iNrVisibLights)
|
|
{
|
|
// quick acceptance or rejection
|
|
unsigned int uCollectiveAnd = (unsigned int) -1;
|
|
unsigned int uCollectiveOr = 0;
|
|
for(f=0; f<6; f++)
|
|
{
|
|
unsigned int uFlagAnd = clipFlags[subLigt*6+f]&0x3f;
|
|
unsigned int uFlagOr = uFlagAnd;
|
|
for(int i=1; i<4; i++)
|
|
{
|
|
unsigned int uClipBits = (clipFlags[subLigt*6+f]>>(i*6))&0x3f;
|
|
uFlagAnd &= uClipBits;
|
|
uFlagOr |= uClipBits;
|
|
}
|
|
|
|
uCollectiveAnd &= uFlagAnd;
|
|
uCollectiveOr |= uFlagOr;
|
|
}
|
|
|
|
bool bSetBoundYet = false;
|
|
float3 vMin=0.0, vMax=0.0;
|
|
if(uCollectiveAnd!=0 || uCollectiveOr==0) // all invisible or all visible (early out)
|
|
{
|
|
if(uCollectiveOr==0) // all visible
|
|
{
|
|
for(f=0; f<6; f++)
|
|
{
|
|
const int sideIndex = f;
|
|
|
|
float3 vFaceMi = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 0], posY[subLigt*MAX_PNTS*2 + sideIndex + 0], posZ[subLigt*MAX_PNTS*2 + sideIndex + 0]);
|
|
float3 vFaceMa = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 6], posY[subLigt*MAX_PNTS*2 + sideIndex + 6], posZ[subLigt*MAX_PNTS*2 + sideIndex + 6]);
|
|
|
|
for(int k=0; k<2; k++)
|
|
{
|
|
float3 vP = k==0 ? vFaceMi : vFaceMa;
|
|
if(f==0 && k==0) { vMin=vP; vMax=vP; }
|
|
|
|
vMax = max(vMax, vP); vMin = min(vMin, vP);
|
|
}
|
|
}
|
|
bSetBoundYet=true;
|
|
}
|
|
}
|
|
else // :( need true clipping
|
|
{
|
|
|
|
for(f=0; f<6; f++)
|
|
{
|
|
float3 q0, q1, q2, q3;
|
|
GetHullQuad(q0, q1, q2, q3, boxX, boxY, boxZ, center, scaleXY, f);
|
|
|
|
// 4 vertices to a quad of the convex hull in post projection space
|
|
const float4 vP0 = mul(g_mProjection, float4(q0, 1));
|
|
const float4 vP1 = mul(g_mProjection, float4(q1, 1));
|
|
const float4 vP2 = mul(g_mProjection, float4(q2, 1));
|
|
const float4 vP3 = mul(g_mProjection, float4(q3, 1));
|
|
|
|
|
|
int iSrcIndex = 0;
|
|
|
|
int offs = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
|
|
|
|
// fill up source clip buffer with the quad
|
|
posX[offs+0]=vP0.x; posX[offs+1]=vP1.x; posX[offs+2]=vP2.x; posX[offs+3]=vP3.x;
|
|
posY[offs+0]=vP0.y; posY[offs+1]=vP1.y; posY[offs+2]=vP2.y; posY[offs+3]=vP3.y;
|
|
posZ[offs+0]=vP0.z; posZ[offs+1]=vP1.z; posZ[offs+2]=vP2.z; posZ[offs+3]=vP3.z;
|
|
posW[offs+0]=vP0.w; posW[offs+1]=vP1.w; posW[offs+2]=vP2.w; posW[offs+3]=vP3.w;
|
|
|
|
int iNrSrcVerts = 4;
|
|
|
|
// do true clipping
|
|
for(int p=0; p<6; p++)
|
|
{
|
|
const int nrVertsDst = ClipAgainstPlane(iSrcIndex, iNrSrcVerts, subLigt, p);
|
|
|
|
iSrcIndex = 1-iSrcIndex;
|
|
iNrSrcVerts = nrVertsDst;
|
|
|
|
if(iNrSrcVerts<3 || iNrSrcVerts>=MAX_PNTS) break;
|
|
}
|
|
|
|
// final clipped convex primitive is in src buffer
|
|
if(iNrSrcVerts>2)
|
|
{
|
|
int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
|
|
for(int k=0; k<iNrSrcVerts; k++)
|
|
{
|
|
float4 vCur = float4(posX[offs_src+k], posY[offs_src+k], posZ[offs_src+k], posW[offs_src+k]);
|
|
|
|
// project and apply toward AABB
|
|
float3 vP = float3(vCur.x/vCur.w, vCur.y/vCur.w, vCur.z/vCur.w);
|
|
if(!bSetBoundYet) { vMin=vP; vMax=vP; bSetBoundYet=true; }
|
|
|
|
vMax = max(vMax, vP); vMin = min(vMin, vP);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
////////////////////// look for camera frustum verts that need to be included. That is frustum vertices inside the convex hull for the light
|
|
#ifdef USE_OBLIQUE_MODE
|
|
bool bIsObliqueClipPlane = true;
|
|
#else
|
|
bool bIsObliqueClipPlane = false;
|
|
#endif
|
|
const int nrFrustVertsToTest = bIsObliqueClipPlane ? 4 : 8;
|
|
|
|
int i=0;
|
|
for(i=0; i<nrFrustVertsToTest; i++) // establish 8 camera frustum vertices
|
|
{
|
|
float3 vVertPSpace = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0);
|
|
|
|
float4 v4ViewSpace = mul(g_mInvProjection, float4(vVertPSpace,1));
|
|
float3 vViewSpace = float3(v4ViewSpace.x/v4ViewSpace.w, v4ViewSpace.y/v4ViewSpace.w, v4ViewSpace.z/v4ViewSpace.w);
|
|
|
|
posX[subLigt*MAX_PNTS*2 + i] = vViewSpace.x;
|
|
posY[subLigt*MAX_PNTS*2 + i] = vViewSpace.y;
|
|
posZ[subLigt*MAX_PNTS*2 + i] = vViewSpace.z;
|
|
}
|
|
|
|
// determine which camera frustum vertices are inside the convex hull
|
|
uint uVisibFl = 0xff;
|
|
for(f=0; f<6; f++)
|
|
{
|
|
float3 vP0, vN;
|
|
GetHullPlane(vP0, vN, boxX, boxY, boxZ, center, scaleXY, f);
|
|
|
|
for(i=0; i<nrFrustVertsToTest; i++)
|
|
{
|
|
float3 vViewSpace = float3(posX[subLigt*MAX_PNTS*2 + i], posY[subLigt*MAX_PNTS*2 + i], posZ[subLigt*MAX_PNTS*2 + i]);
|
|
uVisibFl &= ( dot(vViewSpace-vP0, vN)<0 ? 0xff : (~(1<<i)) );
|
|
}
|
|
}
|
|
|
|
// apply camera frustum vertices inside the convex hull to the AABB
|
|
for(i=0; i<nrFrustVertsToTest; i++)
|
|
{
|
|
if((uVisibFl&(1<<i))!=0)
|
|
{
|
|
float3 vP = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0);
|
|
|
|
if(!bSetBoundYet) { vMin=vP; vMax=vP; bSetBoundYet=true; }
|
|
|
|
vMax = max(vMax, vP); vMin = min(vMin, vP);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// determine AABB bound in [-1;1]x[-1;1] screen space using bounding sphere.
|
|
// Use the result to make our already established AABB from the convex hull
|
|
// potentially tighter.
|
|
if(!bSetBoundYet)
|
|
{
|
|
// set the AABB off-screen
|
|
vMin = float3(-3,-3,-3);
|
|
vMax = float3(-2,-2,-2);
|
|
}
|
|
else
|
|
{
|
|
//if((center.z+radius)<0.0)
|
|
if(g_isOrthographic==0 && length(center)>radius)
|
|
{
|
|
float2 vMi, vMa;
|
|
bool2 bMi, bMa;
|
|
CalcBound(bMi, bMa, vMi, vMa, g_mInvProjection, center, radius);
|
|
|
|
vMin.xy = bMi ? max(vMin.xy, vMi) : vMin.xy;
|
|
vMax.xy = bMa ? min(vMax.xy, vMa) : vMax.xy;
|
|
}
|
|
else if(g_isOrthographic!=0)
|
|
{
|
|
float2 vMi = mul(g_mProjection, float4(center.xyz-radius,1)).xy; // no division needed for ortho
|
|
float2 vMa = mul(g_mProjection, float4(center.xyz+radius,1)).xy; // no division needed for ortho
|
|
vMin.xy = max(vMin.xy, vMi);
|
|
vMax.xy = min(vMax.xy, vMa);
|
|
}
|
|
#ifndef USE_OBLIQUE_MODE
|
|
#if USE_LEFT_HAND_CAMERA_SPACE
|
|
if((center.z-radius)>0.0)
|
|
{
|
|
float4 vPosF = mul(g_mProjection, float4(0,0,center.z-radius,1));
|
|
vMin.z = max(vMin.z, vPosF.z/vPosF.w);
|
|
}
|
|
if((center.z+radius)>0.0)
|
|
{
|
|
float4 vPosB = mul(g_mProjection, float4(0,0,center.z+radius,1));
|
|
vMax.z = min(vMax.z, vPosB.z/vPosB.w);
|
|
}
|
|
#else
|
|
if((center.z+radius)<0.0)
|
|
{
|
|
float4 vPosF = mul(g_mProjection, float4(0,0,center.z+radius,1));
|
|
vMin.z = max(vMin.z, vPosF.z/vPosF.w);
|
|
}
|
|
if((center.z-radius)<0.0)
|
|
{
|
|
float4 vPosB = mul(g_mProjection, float4(0,0,center.z-radius,1));
|
|
vMax.z = min(vMax.z, vPosB.z/vPosB.w);
|
|
}
|
|
#endif
|
|
else
|
|
{
|
|
vMin = float3(-3,-3,-3);
|
|
vMax = float3(-2,-2,-2);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
// we should consider doing a look-up here into a max depth mip chain
|
|
// to see if the light is occluded: vMin.z*VIEWPORT_SCALE_Z > MipTexelMaxDepth
|
|
//g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, -0.5*vMax.y+0.5, vMin.z*VIEWPORT_SCALE_Z);
|
|
//g_vBoundsBuffer[lgtIndex+g_iNrVisibLights] = float3(0.5*vMax.x+0.5, -0.5*vMin.y+0.5, vMax.z*VIEWPORT_SCALE_Z);
|
|
|
|
// changed for unity
|
|
|
|
// Each light's AABB is represented by two float3s, the min and max of the box.
|
|
// And for stereo, we have two sets of lights. Therefore, each eye has a set of mins, followed by
|
|
// a set of maxs, and each set is equal to g_iNrVisibLights.
|
|
const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(lgtIndex, g_iNrVisibLights, eyeIndex);
|
|
|
|
// build a linear (in camera space) min/max Z for the aabb. This is needed for clustered when oblique is active
|
|
float linMiZ, linMaZ;
|
|
#ifndef USE_OBLIQUE_MODE
|
|
float2 vMiZW = mul(g_mInvProjection, float4(vMin,1)).zw;
|
|
float2 vMaZW = mul(g_mInvProjection, float4(vMax,1)).zw;
|
|
linMiZ = vMiZW.x/vMiZW.y; linMaZ = vMaZW.x/vMaZW.y;
|
|
#else
|
|
for(int i=0; i<8; i++) // establish 8 aabb points in camera space.
|
|
{
|
|
float3 vP = float3((i&1)!=0 ? vMax.x : vMin.x, (i&2)!=0 ? vMax.y : vMin.y, (i&4)!=0 ? vMax.z : vMin.z);
|
|
|
|
float2 v2Pc = mul(g_mInvProjection, float4(vP,1)).zw;
|
|
float linZ = v2Pc.x/v2Pc.y;
|
|
|
|
if(i==0) { linMiZ=linZ; linMaZ=linZ; }
|
|
#if USE_LEFT_HAND_CAMERA_SPACE
|
|
linMiZ = min(linMiZ, linZ); linMaZ = max(linMaZ, linZ);
|
|
#else
|
|
linMiZ = max(linMiZ, linZ); linMaZ = min(linMaZ, linZ);
|
|
#endif
|
|
}
|
|
|
|
float z0 = center.z-radius, z1 = center.z+radius;
|
|
#if USE_LEFT_HAND_CAMERA_SPACE
|
|
linMiZ = max(linMiZ, z0); linMaZ = min(linMaZ, z1);
|
|
#else
|
|
linMiZ = min(linMiZ, z1); linMaZ = max(linMaZ, z0);
|
|
#endif
|
|
|
|
#endif
|
|
|
|
g_vBoundsBuffer[boundsIndices.min] = float4(0.5*vMin.x + 0.5, 0.5*vMin.y + 0.5, vMin.z*VIEWPORT_SCALE_Z, linMiZ);
|
|
g_vBoundsBuffer[boundsIndices.max] = float4(0.5*vMax.x + 0.5, 0.5*vMax.y + 0.5, vMax.z*VIEWPORT_SCALE_Z, linMaZ);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p);
|
|
|
|
int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p)
|
|
{
|
|
int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
|
|
int offs_dst = (1-iSrcIndex)*MAX_PNTS+subLigt*MAX_PNTS*2;
|
|
|
|
float4 vPrev = float4(posX[offs_src+(iNrSrcVerts-1)], posY[offs_src+(iNrSrcVerts-1)], posZ[offs_src+(iNrSrcVerts-1)], posW[offs_src+(iNrSrcVerts-1)]);
|
|
|
|
int nrVertsDst = 0;
|
|
|
|
unsigned int uMask = (1<<p);
|
|
bool bIsPrevVisib = (GetClip(vPrev)&uMask)==0;
|
|
for(int i=0; i<iNrSrcVerts; i++)
|
|
{
|
|
float4 vCur = float4(posX[offs_src+i], posY[offs_src+i], posZ[offs_src+i], posW[offs_src+i]);
|
|
bool bIsCurVisib = (GetClip(vCur)&uMask)==0;
|
|
if( (bIsCurVisib && !bIsPrevVisib) || (!bIsCurVisib && bIsPrevVisib) )
|
|
{
|
|
//assert(nrVertsDst<MAX_PNTS);
|
|
if(nrVertsDst<MAX_PNTS)
|
|
{
|
|
// generate new vertex
|
|
float4 vNew = GenNewVert(bIsCurVisib ? vCur : vPrev, bIsCurVisib ? vPrev : vCur, p);
|
|
posX[offs_dst+nrVertsDst]=vNew.x; posY[offs_dst+nrVertsDst]=vNew.y; posZ[offs_dst+nrVertsDst]=vNew.z; posW[offs_dst+nrVertsDst]=vNew.w;
|
|
++nrVertsDst;
|
|
}
|
|
}
|
|
|
|
if(bIsCurVisib)
|
|
{
|
|
//assert(nrVertsDst<MAX_PNTS);
|
|
if(nrVertsDst<MAX_PNTS)
|
|
{
|
|
posX[offs_dst+nrVertsDst]=vCur.x; posY[offs_dst+nrVertsDst]=vCur.y; posZ[offs_dst+nrVertsDst]=vCur.z; posW[offs_dst+nrVertsDst]=vCur.w;
|
|
++nrVertsDst;
|
|
}
|
|
}
|
|
|
|
vPrev = vCur;
|
|
bIsPrevVisib = bIsCurVisib;
|
|
}
|
|
|
|
return nrVertsDst;
|
|
}
|
|
|
|
|
|
|
|
unsigned int GetClip(const float4 P)
|
|
{
|
|
#ifdef USE_OBLIQUE_MODE
|
|
bool bIsObliqueClipPlane = true;
|
|
#else
|
|
bool bIsObliqueClipPlane = false;
|
|
#endif
|
|
|
|
//-P.w <= P.x <= P.w
|
|
return (((P.x<-P.w)?1:0) | ((P.x>P.w)?2:0) | ((P.y<-P.w)?4:0) | ((P.y>P.w)?8:0) | ((P.z<0)?16:0) | ((P.z>P.w)?32:0)) & (bIsObliqueClipPlane ? 0x1f : 0x3f);
|
|
}
|
|
|
|
float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p)
|
|
{
|
|
const float fS = p==4 ? 0 : ((p&1)==0 ? -1 : 1);
|
|
const int index = ((uint) p)/2;
|
|
float x1 = index==0 ? vVisib.x : (index==1 ? vVisib.y : vVisib.z);
|
|
float x0 = index==0 ? vInvisib.x : (index==1 ? vInvisib.y : vInvisib.z);
|
|
|
|
//fS*((vVisib.w-vInvisib.w)*t + vInvisib.w) = (x1-x0)*t + x0;
|
|
|
|
const float fT = (fS*vInvisib.w-x0)/((x1-x0) - fS*(vVisib.w-vInvisib.w));
|
|
float4 vNew = vVisib*fT + vInvisib*(1-fT);
|
|
|
|
// just to be really anal we make sure the clipped against coordinate is precise
|
|
if(index==0) vNew.x = fS*vNew.w;
|
|
else if(index==1) vNew.y = fS*vNew.w;
|
|
else vNew.z = fS*vNew.w;
|
|
|
|
return vNew;
|
|
}
|
|
|
|
|
|
float4 TransformPlaneToPostSpace(float4x4 InvProjection, float4 plane)
|
|
{
|
|
return mul(plane, InvProjection);
|
|
}
|
|
|
|
float4 EvalPlanePair(out bool validPlanes, float2 posXY_in, float r)
|
|
{
|
|
// rotate by 90 degrees to avoid potential division by zero
|
|
bool bMustFlip = abs(posXY_in.y)<abs(posXY_in.x);
|
|
float2 posXY = bMustFlip ? float2(-posXY_in.y, posXY_in.x) : posXY_in;
|
|
|
|
float fLenSQ = dot(posXY, posXY);
|
|
float diffSq = fLenSQ - r*r;
|
|
float D = posXY.y * sqrt(max(0.0, diffSq));
|
|
|
|
float4 res;
|
|
res.x = (-r*posXY.x - D) / fLenSQ;
|
|
res.z = (-r*posXY.x + D) / fLenSQ;
|
|
res.y = (-r-res.x*posXY.x) / posXY.y;
|
|
res.w = (-r-res.z*posXY.x) / posXY.y;
|
|
|
|
// rotate back by 90 degrees
|
|
res = bMustFlip ? float4(res.y, -res.x, res.w, -res.z) : res;
|
|
|
|
validPlanes = diffSq>0.0;
|
|
|
|
return res;
|
|
}
|
|
|
|
void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r)
|
|
{
|
|
bool validX, validY;
|
|
float4 planeX = EvalPlanePair(validX, float2(pos_view_space.x, pos_view_space.z), r);
|
|
float4 planeY = EvalPlanePair(validY, float2(pos_view_space.y, pos_view_space.z), r);
|
|
|
|
|
|
#if USE_LEFT_HAND_CAMERA_SPACE
|
|
planeX = planeX.zwxy; // need to swap left/right and top/bottom planes when using left hand system
|
|
planeY = planeY.zwxy;
|
|
#endif
|
|
|
|
bIsMinValid = bool2(planeX.z<0, planeY.z<0) && bool2(validX,validY);
|
|
bIsMaxValid = bool2((-planeX.x)<0, (-planeY.x)<0) && bool2(validX,validY);
|
|
|
|
// hopefully the compiler takes zeros into account
|
|
// should be the case since the transformation in TransformPlaneToPostSpace()
|
|
// is done using multiply-adds and not dot product instructions.
|
|
float4 planeX0 = TransformPlaneToPostSpace(InvProjection, float4(planeX.x, 0, planeX.y, 0));
|
|
float4 planeX1 = TransformPlaneToPostSpace(InvProjection, float4(planeX.z, 0, planeX.w, 0));
|
|
float4 planeY0 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.x, planeY.y, 0));
|
|
float4 planeY1 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.z, planeY.w, 0));
|
|
|
|
|
|
// convert planes to the forms (1,0,0,D) and (0,1,0,D)
|
|
// 2D bound is given by -D components
|
|
float2 A = -float2(planeX0.w / planeX0.x, planeY0.w / planeY0.y);
|
|
float2 B = -float2(planeX1.w / planeX1.x, planeY1.w / planeY1.y);
|
|
|
|
// Bound is complete
|
|
vMin = B;
|
|
vMax = A;
|
|
}
|