您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
573 行
18 KiB
573 行
18 KiB
#pragma kernel TileLightListGen_NoDepthRT LIGHTLISTGEN=TileLightListGen_NoDepthRT
|
|
#pragma kernel TileLightListGen_DepthRT LIGHTLISTGEN=TileLightListGen_DepthRT ENABLE_DEPTH_TEXTURE_BACKPLANE
|
|
#pragma kernel TileLightListGen_DepthRT_MSAA LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED
|
|
#pragma kernel TileLightListGen_NoDepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
|
|
#pragma kernel TileLightListGen_DepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING
|
|
#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING
|
|
#pragma kernel ClearAtomic
|
|
|
|
#include "ShaderBase.h"
|
|
#include "LightDefinitions.cs.hlsl"
|
|
#include "LightingConvexHullUtils.hlsl"
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
#include "SortingComputeUtils.hlsl"
|
|
#endif
|
|
|
|
//#define EXACT_EDGE_TESTS
|
|
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
|
|
#define CONV_HULL_TEST_ENABLED
|
|
|
|
uniform int g_isOrthographic;
|
|
uniform int g_iNrVisibLights;
|
|
uniform float4x4 g_mInvScrProjection;
|
|
uniform float4x4 g_mScrProjection;
|
|
|
|
uniform float g_fClustScale;
|
|
uniform float g_fClustBase;
|
|
uniform float g_fNearPlane;
|
|
uniform float g_fFarPlane;
|
|
uniform int g_iLog2NumClusters; // numClusters = (1<<g_iLog2NumClusters)
|
|
|
|
#include "ClusteredUtils.h"
|
|
|
|
|
|
#ifdef MSAA_ENABLED
|
|
Texture2DMS<float> g_depth_tex : register( t0 );
|
|
#else
|
|
Texture2D g_depth_tex : register( t0 );
|
|
#endif
|
|
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
|
|
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
|
|
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
|
|
|
|
#ifdef USE_TWO_PASS_TILED_LIGHTING
|
|
StructuredBuffer<uint> g_vBigTileLightList : register( t4 ); // don't support Buffer yet in unity
|
|
#endif
|
|
|
|
|
|
#define NR_THREADS 64
|
|
|
|
// output buffer
|
|
RWStructuredBuffer<uint> g_vLayeredLightList : register( u0 ); // don't support RWBuffer yet in unity
|
|
RWStructuredBuffer<uint> g_LayeredOffset : register( u1 ); // don't support RWBuffer yet in unity
|
|
RWStructuredBuffer<uint> g_LayeredSingleIdxBuffer : register( u2 ); // don't support RWBuffer yet in unity
|
|
|
|
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
|
|
RWStructuredBuffer<float> g_logBaseBuffer : register( u3 ); // don't support RWBuffer yet in unity
|
|
#endif
|
|
|
|
|
|
#define MAX_NR_COARSE_ENTRIES 128
|
|
|
|
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
|
|
groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2];
|
|
groupshared float4 lightPlanes[4*6];
|
|
|
|
groupshared uint lightOffs;
|
|
|
|
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
|
|
groupshared uint ldsZMax;
|
|
#endif
|
|
|
|
#ifdef EXACT_EDGE_TESTS
|
|
groupshared uint ldsIsLightInvisible;
|
|
groupshared uint lightOffs2;
|
|
#endif
|
|
|
|
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
|
|
groupshared uint lightOffsSph;
|
|
#endif
|
|
|
|
|
|
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
|
|
{
|
|
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
|
|
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
|
|
return v4Pres.z / v4Pres.w;
|
|
}
|
|
|
|
|
|
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
|
|
{
|
|
bool isOrthographic = g_isOrthographic!=0;
|
|
float fSx = g_mScrProjection[0].x;
|
|
float fSy = g_mScrProjection[1].y;
|
|
float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z;
|
|
float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z;
|
|
|
|
#if USE_LEFTHAND_CAMERASPACE
|
|
bool useLeftHandVersion = true;
|
|
#else
|
|
bool useLeftHandVersion = isOrthographic;
|
|
#endif
|
|
|
|
float s = useLeftHandVersion ? 1 : (-1);
|
|
float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy);
|
|
|
|
return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
|
|
}
|
|
|
|
float GetOnePixDiagWorldDistAtDepthOne()
|
|
{
|
|
float fSx = g_mScrProjection[0].x;
|
|
float fSy = g_mScrProjection[1].y;
|
|
|
|
return length( float2(1.0/fSx,1.0/fSy) );
|
|
}
|
|
|
|
#ifdef EXACT_EDGE_TESTS
|
|
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane);
|
|
#endif
|
|
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
|
|
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
|
|
#endif
|
|
|
|
|
|
// returns 1 for intersection and 0 for none
|
|
|
|
float4 FetchPlane(int l, int p);
|
|
|
|
|
|
bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase)
|
|
{
|
|
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
|
|
bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
|
|
if(bIsHit)
|
|
{
|
|
#ifdef CONV_HULL_TEST_ENABLED
|
|
float depthAtNearZ = ClusterIdxToZ(k, suggestedBase);
|
|
float depthAtFarZ = ClusterIdxToZ(k+1, suggestedBase);
|
|
|
|
for(int p=0; p<6; p++)
|
|
{
|
|
float4 plane = lightPlanes[6*(l&3)+p];
|
|
|
|
bool bAllInvisib = true;
|
|
|
|
for(int i=0; i<8; i++)
|
|
{
|
|
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
|
|
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
|
|
float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ;
|
|
float3 vP = GetViewPosFromLinDepth( float2(x, y), z);
|
|
|
|
bAllInvisib = bAllInvisib && dot(plane, float4(vP,1.0))>0;
|
|
}
|
|
|
|
if(bAllInvisib) bIsHit = false;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
return bIsHit;
|
|
}
|
|
|
|
bool CheckIntersectionBasic(int l, int k)
|
|
{
|
|
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
|
|
return ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
|
|
}
|
|
|
|
|
|
[numthreads(NR_THREADS, 1, 1)]
|
|
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
|
|
{
|
|
uint2 tileIDX = u3GroupID.xy;
|
|
uint t=threadID;
|
|
|
|
uint iWidth;
|
|
uint iHeight;
|
|
#ifdef MSAA_ENABLED
|
|
uint iNumSamplesMSAA;
|
|
g_depth_tex.GetDimensions(iWidth, iHeight, iNumSamplesMSAA);
|
|
#else
|
|
g_depth_tex.GetDimensions(iWidth, iHeight);
|
|
#endif
|
|
const uint log2TileSize = firstbithigh(TILE_SIZE_CLUSTERED);
|
|
uint nrTilesX = (iWidth+(TILE_SIZE_CLUSTERED-1))>>log2TileSize;
|
|
uint nrTilesY = (iHeight+(TILE_SIZE_CLUSTERED-1))>>log2TileSize;
|
|
|
|
uint2 viTilLL = TILE_SIZE_CLUSTERED*tileIDX;
|
|
uint2 viTilUR = min( viTilLL+uint2(TILE_SIZE_CLUSTERED,TILE_SIZE_CLUSTERED), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
|
|
|
|
if(t==0)
|
|
{
|
|
lightOffs = 0;
|
|
|
|
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
|
|
ldsZMax = 0;
|
|
#endif
|
|
}
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
|
|
float dpt_ma=1.0;
|
|
|
|
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
|
|
// establish min and max depth first
|
|
dpt_ma=0.0;
|
|
|
|
for(int idx=t; idx<(TILE_SIZE_CLUSTERED*TILE_SIZE_CLUSTERED); idx+=NR_THREADS)
|
|
{
|
|
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&(TILE_SIZE_CLUSTERED-1)), viTilLL.y+(idx>>log2TileSize)), uint2(iWidth-1, iHeight-1) );
|
|
#ifdef MSAA_ENABLED
|
|
for(uint i=0; i<iNumSamplesMSAA; i++)
|
|
{
|
|
const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i);
|
|
#else
|
|
const float fDpth = FetchDepth(g_depth_tex, uPixCrd);
|
|
#endif
|
|
if(fDpth<VIEWPORT_SCALE_Z) // if not skydome
|
|
{
|
|
dpt_ma = max(fDpth, dpt_ma);
|
|
}
|
|
#ifdef MSAA_ENABLED
|
|
}
|
|
#endif
|
|
}
|
|
|
|
InterlockedMax(ldsZMax, asuint(dpt_ma) );
|
|
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
dpt_ma = asfloat(ldsZMax);
|
|
if(dpt_ma<=0.0) dpt_ma = VIEWPORT_SCALE_Z; // assume sky pixel
|
|
#endif
|
|
|
|
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
|
|
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
|
|
|
|
// build coarse list using AABB
|
|
#ifdef USE_TWO_PASS_TILED_LIGHTING
|
|
const uint log2BigTileToClustTileRatio = firstbithigh(64) - log2TileSize;
|
|
|
|
int NrBigTilesX = (nrTilesX+((1<<log2BigTileToClustTileRatio)-1))>>log2BigTileToClustTileRatio;
|
|
const int bigTileIdx = (tileIDX.y>>log2BigTileToClustTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToClustTileRatio); // map the idx to 64x64 tiles
|
|
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
|
|
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
|
|
{
|
|
int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
|
|
#else
|
|
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
|
|
{
|
|
#endif
|
|
const float2 vMi = g_vBoundsBuffer[l].xy;
|
|
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
|
|
|
|
if( all(vMa>vTileLL) && all(vMi<vTileUR))
|
|
{
|
|
unsigned int uInc = 1;
|
|
unsigned int uIndex;
|
|
InterlockedAdd(lightOffs, uInc, uIndex);
|
|
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
|
|
}
|
|
}
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
|
|
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
|
|
|
|
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
|
|
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(TILE_SIZE_CLUSTERED/2,TILE_SIZE_CLUSTERED/2), uint2(iWidth-1, iHeight-1))) );
|
|
#endif
|
|
|
|
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
|
|
|
|
#if USE_LEFTHAND_CAMERASPACE
|
|
float fTileFarPlane = GetLinearDepth(dpt_ma);
|
|
#else
|
|
float fTileFarPlane = -GetLinearDepth(dpt_ma);
|
|
#endif
|
|
float suggestedBase = SuggestLogBase50(fTileFarPlane);
|
|
#else
|
|
float fTileFarPlane = g_fFarPlane;
|
|
float suggestedBase = g_fClustBase;
|
|
#endif
|
|
|
|
|
|
#ifdef EXACT_EDGE_TESTS
|
|
iNrCoarseLights = CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, fTileFarPlane);
|
|
#endif
|
|
|
|
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
|
|
#endif
|
|
|
|
//////////// cell specific code
|
|
{
|
|
for(int l=(int) t; l<((iNrCoarseLights+1)>>1); l += NR_THREADS)
|
|
{
|
|
const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights-1)];
|
|
const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase));
|
|
const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase));
|
|
const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase));
|
|
const unsigned int clustIdxMa1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1+g_iNrVisibLights].z), suggestedBase));
|
|
|
|
clusterIdxs[l] = (clustIdxMa1<<24) | (clustIdxMi1<<16) | (clustIdxMa0<<8) | (clustIdxMi0<<0);
|
|
}
|
|
}
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
|
|
int nrClusters = (1<<g_iLog2NumClusters);
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
|
|
uint start = 0;
|
|
int i=(int) t;
|
|
int iSpaceAvail = 0;
|
|
int iSum = 0;
|
|
if(i<nrClusters)
|
|
{
|
|
for(int l=0; l<iNrCoarseLights; l++)
|
|
{
|
|
iSum += (CheckIntersectionBasic(l, i) ? 1 : 0);
|
|
}
|
|
|
|
iSpaceAvail = min(iSum,MAX_NR_COARSE_ENTRIES); // combined storage for both direct lights and reflection
|
|
InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start); // alloc list memory
|
|
}
|
|
|
|
int modelListCount[NR_LIGHT_MODELS]={0,0}; // direct light count and reflection lights
|
|
uint offs = start;
|
|
for(int ll=0; ll<iNrCoarseLights; ll+=4)
|
|
{
|
|
int p = i>>2;
|
|
int m = i&3;
|
|
if(i<24) lightPlanes[6*m+p] = FetchPlane(min(iNrCoarseLights-1,ll+m), p);
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
|
|
for(int l=ll; l<min(iNrCoarseLights,(ll+4)); l++)
|
|
{
|
|
if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
|
|
{
|
|
uint lightModel = g_vLightData[coarseList[l]].lightModel;
|
|
++modelListCount[lightModel==REFLECTION_LIGHT ? 1 : 0];
|
|
g_vLayeredLightList[offs++] = coarseList[l]; // reflection lights will be last since we sorted
|
|
}
|
|
}
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
}
|
|
|
|
uint localOffs=0;
|
|
offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
|
|
for(int category=0; category<NR_LIGHT_MODELS; category++)
|
|
{
|
|
int numLights = min(modelListCount[category],31); // only allow 5 bits
|
|
if(i<nrClusters)
|
|
{
|
|
g_LayeredOffset[offs] = (start+localOffs) | (((uint) numLights)<<27);
|
|
offs += (nrClusters*nrTilesX*nrTilesY);
|
|
localOffs += modelListCount[category]; // use unclamped count for localOffs
|
|
}
|
|
}
|
|
|
|
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
|
|
if(threadID==0) g_logBaseBuffer[tileIDX.y*nrTilesX + tileIDX.x] = suggestedBase;
|
|
#endif
|
|
}
|
|
|
|
|
|
float4 FetchPlane(int l, int p)
|
|
{
|
|
SFiniteLightBound lgtDat = g_data[coarseList[l]];
|
|
|
|
const float3 boxX = lgtDat.boxAxisX.xyz;
|
|
const float3 boxY = lgtDat.boxAxisY.xyz;
|
|
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
|
|
const float3 center = lgtDat.center.xyz;
|
|
const float radius = lgtDat.radius;
|
|
const float2 scaleXY = lgtDat.scaleXY;
|
|
|
|
return GetHullPlaneEq(boxX, boxY, boxZ, center, scaleXY, p);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
|
|
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
|
|
{
|
|
#if USE_LEFTHAND_CAMERASPACE
|
|
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
|
|
#else
|
|
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
|
|
#endif
|
|
|
|
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
|
|
float halfTileSizeAtZDistOne = (TILE_SIZE_CLUSTERED/2)*onePixDiagDist; // scale by half a tile
|
|
|
|
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
|
|
{
|
|
SFiniteLightBound lgtDat = g_data[coarseList[l]];
|
|
|
|
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius, g_isOrthographic!=0) )
|
|
coarseList[l]=0xffffffff;
|
|
}
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
|
|
// to greedy to double buffer coarseList lds on this so serializing removal of gaps.
|
|
if(threadID==0)
|
|
{
|
|
int offs = 0;
|
|
for(int l=0; l<iNrCoarseLights; l++)
|
|
{
|
|
if(coarseList[l]!=0xffffffff)
|
|
coarseList[offs++] = coarseList[l];
|
|
}
|
|
lightOffsSph = offs;
|
|
}
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
|
|
return lightOffsSph;
|
|
}
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef EXACT_EDGE_TESTS
|
|
|
|
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
|
|
{
|
|
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
|
|
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
|
|
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
|
|
#if !USE_LEFTHAND_CAMERASPACE
|
|
z = -z;
|
|
#endif
|
|
return GetViewPosFromLinDepth( float2(x, y), z);
|
|
}
|
|
|
|
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
|
|
{
|
|
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
|
|
int iSwizzle = e0&0x3;
|
|
|
|
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
|
|
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
|
|
|
|
#if USE_LEFTHAND_CAMERASPACE
|
|
float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,1.0);
|
|
#else
|
|
float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,-1.0);
|
|
#endif
|
|
|
|
vE0 = iSection==0 ? edgeSectionZero : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
|
|
}
|
|
|
|
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
|
|
{
|
|
if(threadID==0) lightOffs2 = 0;
|
|
|
|
const bool bOnlyNeedFrustumSideEdges = true;
|
|
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
|
|
|
|
const int totNrEdgePairs = 12*nrFrustEdges;
|
|
for(int l=0; l<iNrCoarseLights; l++)
|
|
{
|
|
if(threadID==0) ldsIsLightInvisible=0;
|
|
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
const int idxCoarse = coarseList[l];
|
|
[branch]if(g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
|
|
{
|
|
SFiniteLightBound lgtDat = g_data[idxCoarse];
|
|
|
|
const float3 boxX = lgtDat.boxAxisX.xyz;
|
|
const float3 boxY = lgtDat.boxAxisY.xyz;
|
|
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
|
|
const float3 center = lgtDat.center.xyz;
|
|
const float2 scaleXY = lgtDat.scaleXY;
|
|
|
|
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
|
|
{
|
|
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
|
|
int e1 = i - e0*nrFrustEdges;
|
|
|
|
int idx_cur=0, idx_twin=0;
|
|
float3 vP0, vE0;
|
|
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
|
|
|
|
|
|
float3 vP1, vE1;
|
|
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane);
|
|
|
|
// potential separation plane
|
|
float3 vN = cross(vE0, vE1);
|
|
|
|
int positive=0, negative=0;
|
|
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6).
|
|
{
|
|
int j = (idx_cur+k)&0x7;
|
|
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
|
|
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
|
|
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
|
|
}
|
|
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
|
|
|
|
positive=0; negative=0;
|
|
for(int j=0; j<8; j++)
|
|
{
|
|
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane);
|
|
float fSignDist = dot(vN, vPf-vP0);
|
|
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
|
|
}
|
|
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
|
|
|
|
bool bFoundSepPlane = (resh*resf)<0;
|
|
|
|
if(bFoundSepPlane) InterlockedOr(ldsIsLightInvisible, 1);
|
|
}
|
|
}
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
if(threadID==0 && ldsIsLightInvisible==0)
|
|
{
|
|
coarseList[lightOffs2++] = coarseList[l];
|
|
}
|
|
}
|
|
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
#endif
|
|
return lightOffs2;
|
|
}
|
|
#endif
|
|
|
|
|
|
|
|
[numthreads(1, 1, 1)]
|
|
void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
|
|
{
|
|
g_LayeredSingleIdxBuffer[0]=0;
|
|
}
|