#include "SortingComputeUtils.hlsl"
#endif
#define NARROW_MOBILE_ENABLED
#ifdef NARROW_MOBILE_ENABLED
#define EMUL_LOCAL_ATOMICS
#endif
uniform int g_isOrthographic;
uniform int g_iNrVisibLights;
uniform uint2 g_viDimensions;
uniform float4x4 g_mInvScrProjection;
StructuredBuffer<uint> g_vBigTileLightList : register( t4 ); // don't support Buffer yet in unity
#endif
#ifdef NARROW_MOBILE_ENABLED
#define NR_THREADS 32
#else
#define NR_THREADS 64
#endif
#include "LocalAtomics.hlsl"
#define NR_THREADS 64
// output buffer
RWStructuredBuffer<uint> g_vLightList : register( u0 ); // don't support RWBuffer yet in unity
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
bool isOrthographic = g_isOrthographic!=0;
float fCx = g_mScrProjection[0].z;
float fCy = g_mScrProjection[1].z;
float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z;
float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z;
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
bool useLeftHandVersion = true;
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
bool useLeftHandVersion = isOrthographic;
float s = useLeftHandVersion ? 1 : (-1);
float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy);
return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
}
float GetOnePixDiagWorldDistAtDepthOne()
#endif
#ifdef FINE_PRUNING_ENABLED
#ifndef NARROW_MOBILE_ENABLED
#else
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths1, float4 vLinDepths2);
#endif
#endif
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
#ifndef NARROW_MOBILE_ENABLED
#else
for(int i=(int) t; i<MAX_NR_COARSE_ENTRIES; i+=NR_THREADS)
prunedList[i]=0;
#endif
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrTilesX = (iWidth+15)/16;
float4 vLinDepths;
#ifdef NARROW_MOBILE_ENABLED
float4 vLinDepths2;
#endif
#ifndef NARROW_MOBILE_ENABLED
#else
for(int i = 0; i < 8; i++)
#endif
const float linDepth = GetLinearDepth(fDepth);
#ifndef NARROW_MOBILE_ENABLED
vLinDepths[i] = linDepth;
#else
if(i<4) vLinDepths[i] = linDepth;
else vLinDepths2[i-4] = linDepth;
#endif
vLinDepths[i] = GetLinearDepth(fDepth);
if(fDepth<VIEWPORT_SCALE_Z) // if not skydome
{
dpt_mi = min(fDepth, dpt_mi);
InterlockedMAX(ldsZMax, asuint(dpt_ma), threadID );
InterlockedMIN(ldsZMin, asuint(dpt_mi), threadID );
InterlockedMax(ldsZMax, asuint(dpt_ma) );
InterlockedMin(ldsZMin, asuint(dpt_mi) );
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING
const int bigTileIdx = (tileIDX.y>>log2BigTileToTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToTileRatio); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
int nrLightsIn = nrBigTileLights;
int nrLightsIn = (int) g_iNrVisibLights;
#ifndef EMUL_LOCAL_ATOMICS
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
}
#else
unsigned int uInc = (all(vMa>vTileLL) && all(vMi<vTileUR)) ? 1 : 0;
unsigned int uIndex;
InterlockedADDAndPrev(lightOffs, uInc, uIndex, t, l, nrLightsIn);
if(uIndex<MAX_NR_COARSE_ENTRIES && uInc!=0) coarseList[uIndex] = l; // add to light list
#endif
}
#ifdef FINE_PRUNING_ENABLED
#ifndef FINE_PRUNING_ENABLED
{
#ifndef NARROW_MOBILE_ENABLED
#else
for(int i=(int) t; t<iNrCoarseLights; i+=NR_THREADS) prunedList[i] = coarseList[i];
#endif
if(t==0) ldsNrLightsFinal=iNrCoarseLights;
}
#else
#ifndef NARROW_MOBILE_ENABLED
#else
FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths, vLinDepths2);
#endif
}
#endif
int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
#ifndef EMUL_LOCAL_ATOMICS
#else
uint model = g_vLightData[ prunedList[i] ].lightModel;
for(int m=0; m<NR_LIGHT_MODELS; m++)
{
uint uInc = model==m ? 1 : 0;
InterlockedADD(ldsModelListCount[m], uInc, threadID, i, nrLightsCombinedList);
}
#endif
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) && !defined(NARROW_MOBILE_ENABLED)
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);
#endif
{
SFiniteLightBound lightData = g_data[prunedList[l]];
bool bHit = DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius);
#ifndef EMUL_LOCAL_ATOMICS
if( bHit )
if( DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius, g_isOrthographic!=0) )
#else
unsigned int uInc = bHit ? 1 : 0;
unsigned int uIndex;
InterlockedADDAndPrev(lightOffsSph, uInc, uIndex, threadID, l, iNrCoarseLights);
if(bHit) coarseList[uIndex]=prunedList[l]; // read from the original copy of coarseList which is backed up in prunedList
#endif
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#ifdef FINE_PRUNING_ENABLED
// initializes ldsNrLightsFinal with the number of accepted lights.
// all accepted entries delivered in prunedList[].
#ifndef NARROW_MOBILE_ENABLED
#else
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths1, float4 vLinDepths2)
#endif
#ifndef NARROW_MOBILE_ENABLED
const int numPixSerial = 4;
#else
const int numPixSerial = 8;
#endif
uint t = threadID;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<numPixSerial; i++)
for(int i=0; i<4; i++)
#ifdef NARROW_MOBILE_ENABLED
float4 vLinDepths = i<4 ? vLinDepths1 : vLinDepths2;
#endif
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i&0x3]);
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 fromLight = vVPos-lightData.lightPos.xyz;
float distSq = dot(fromLight,fromLight);
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<numPixSerial; i++)
for(int i=0; i<4; i++)
#ifdef NARROW_MOBILE_ENABLED
float4 vLinDepths = i<4 ? vLinDepths1 : vLinDepths2;
#endif
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i&0x3]);
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 vLp = lightData.lightPos.xyz;
float3 toLight = vLp - vVPos;
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<numPixSerial; i++)
for(int i=0; i<4; i++)
#ifdef NARROW_MOBILE_ENABLED
float4 vLinDepths = i<4 ? vLinDepths1 : vLinDepths2;
#endif
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i&0x3]);
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 toLight = lightData.lightPos.xyz - vVPos;
if(uLgtType>=MAX_TYPES) ++l;
}
InterlockedOR(ldsDoesLightIntersect[0], uLightsFlags[0], threadID );
InterlockedOR(ldsDoesLightIntersect[1], uLightsFlags[1], threadID );
InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0] );
InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1] );
if(t==0) ldsNrLightsFinal = 0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#ifndef NARROW_MOBILE_ENABLED
if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
{
unsigned int uInc = 1;
}
#else
for(uint i=t; i<MAX_NR_COARSE_ENTRIES; i+=NR_THREADS)
{
unsigned int uInc = (i<(uint) iNrCoarseLights && (ldsDoesLightIntersect[i<32 ? 0 : 1]&(1<<(i&31)))!=0) ? 1 : 0;
unsigned int uIndex;
InterlockedADDAndPrev(ldsNrLightsFinal, uInc, uIndex, t, i, MAX_NR_COARSE_ENTRIES);
if(uInc!=0 && uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[i]; // we allow up to 64 pruned lights while stored in LDS.
}
#endif
}
#endif