if(t==0) lightOffs = 0;
int i;
int i;
for(i=t; i<iNrCoarseLights; i+=NR_THREADS) if((int)lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
iNrCoarseLights = lightOffs;

int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)

const uint idxCoarse = lightsListLDS[l];
bool canEnter = idxCoarse<(uint) g_iNrVisibLights;
bool canEnter = idxCoarse<(uint) g_iNrVisibLights;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;


#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"

dpt_ma = asfloat(ldsZMax);
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
// build coarse list using AABB

for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
const float2 vMi = g_vBoundsBuffer[l].xy;
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
if( all(vMa>vTileLL) && all(vMi<vTileUR))
if( all(vMa>vTileLL) && all(vMi<vTileUR))
unsigned int uInc = 1;
unsigned int uIndex;

InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start); // alloc list memory
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
int categoryListCount[LIGHTCATEGORY_COUNT]={0,0,0}; // direct light count and reflection lights
uint offs = start;

if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
uint lightCategory = _LightVolumeData[coarseList[l]].lightCategory;
uint lightCategory = _LightVolumeData[coarseList[l]].lightCategory;
g_vLayeredLightList[offs++] = coarseList[l] - shiftIndex[lightCategory]; // reflection lights will be last since we sorted

const int idxCoarse = coarseList[l];
[branch]if (_LightVolumeData[idxCoarse].lightVolume != LIGHTVOLUMETYPE_SPHERE) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
[branch]if (_LightVolumeData[idxCoarse].lightVolume != LIGHTVOLUMETYPE_SPHERE) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
SFiniteLightBound lgtDat = g_data[idxCoarse];


#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"

uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform int _EnvLightIndexShift;
Texture2D g_depth_tex : register( t0 );
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );

int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);

int localOffs=0;
int offs = tileIDX.y*nrTilesX + tileIDX.x;
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
// We remap the prunedList index to the original LightData / EnvLightData indices
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs] - shiftIndex[category];
uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
// We remap the prunedList index to the original LightData / EnvLightData indices
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs] - shiftIndex[category];
uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);

if(threadID==0) lightOffsSph = 0;
// make a copy of coarseList in prunedList.
int l;
int l;
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)

// fetch light
int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// TODO: Change by SebL
const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// TODO: Change by SebL
const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
// serially check 4 pixels
uint uVal = 0;

uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
LightVolumeData lightData = _LightVolumeData[idxCoarse];
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;

uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
LightVolumeData lightData = _LightVolumeData[idxCoarse];
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;


uniform float4x4 g_mInvProjection;
uniform float4x4 g_mProjection;
#define NR_THREADS 64


float FetchDepthMSAA(Texture2DMS<float> depthTexture, uint2 pixCoord, uint sampleIdx)
float zdpth = depthTexture.Load(uint3(pixCoord.xy, 0), sampleIdx).x;
float zdpth = depthTexture.Load(pixCoord.xy, sampleIdx).x;
zdpth = 1.0 - zdpth;


#define FLT_EPSILON 1.192092896e-07f
// Using pow often result to a warning like this
// "pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them"
// PositivePow remove this warning when you know the value is positive and avoid inf/NAN.
float PositivePow(float base, float power)
return pow(max(abs(base), float(FLT_EPSILON)), power);
float2 PositivePow(float2 base, float2 power)
return pow(max(abs(base), float2(FLT_EPSILON, FLT_EPSILON)), power);
float3 PositivePow(float3 base, float3 power)
return pow(max(abs(base), float3(FLT_EPSILON, FLT_EPSILON, FLT_EPSILON)), power);
float4 PositivePow(float4 base, float4 power)
return pow(max(abs(base), float4(FLT_EPSILON, FLT_EPSILON, FLT_EPSILON, FLT_EPSILON)), power);
const float geomSeries = (1.0 - pow(base, C)) / (1 - base); // geometric series: sum_k=0^{C-1} base^k
const float geomSeries = (1.0 - PositivePow(base, C)) / (1 - base); // geometric series: sum_k=0^{C-1} base^k
return geomSeries / (g_fFarPlane - g_fNearPlane);

if (logBasePerTile)
userscale = GetScaleFromBase(suggestedBase);
float dist = (pow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
float dist = (PositivePow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
res = dist + g_fNearPlane;


if(t==0) lightOffs = 0;
for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
int i;
for(i=t; i<iNrCoarseLights; i+=NR_THREADS) if((int)lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
for(i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
for(i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = i==0 ? iNrCoarseLights : lightsListLDS[i-1];

int i
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)

for(int l=0; l<iNrCoarseLights; l++)
const uint idxCoarse = lightsListLDS[l];
bool canEnter = idxCoarse<(uint) g_iNrVisibLights;
if(canEnter) canEnter = g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT; // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.

const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;



#pragma kernel ClearAtomic
#include "LightingConvexHullUtils.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)

groupshared uint lightOffs;
groupshared int ldsZMax;
groupshared uint ldsZMax;

uint2 uPixCrd = min( uint2(viTilLL.x+(idx&(TILE_SIZE_CLUSTERED-1)), viTilLL.y+(idx>>log2TileSize)), uint2(iWidth-1, iHeight-1) );
for(int i=0; i<iNumSamplesMSAA; i++)
for(uint i=0; i<iNumSamplesMSAA; i++)
const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i);

dpt_ma = asfloat(ldsZMax);
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
// build coarse list using AABB

for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
const float2 vMi = g_vBoundsBuffer[l].xy;
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy))
if( all(vMa>vTileLL) && all(vMi<vTileUR))
unsigned int uInc = 1;
unsigned int uIndex;

if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
uint lightModel = g_vLightData[ coarseList[l] ].lightModel;
++modelListCount[ lightModel==REFLECTION_LIGHT ? 1 : 0];
uint lightModel = g_vLightData[coarseList[l]].lightModel;
++modelListCount[lightModel==REFLECTION_LIGHT ? 1 : 0];
g_vLayeredLightList[offs++] = coarseList[l]; // reflection lights will be last since we sorted

uint localOffs=0;
offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
for(int m=0; m<NR_LIGHT_MODELS; m++)
for(int category=0; category<NR_LIGHT_MODELS; category++)
int numLights = min(modelListCount[m],31); // only allow 5 bits
int numLights = min(modelListCount[category],31); // only allow 5 bits
localOffs += modelListCount[m]; // use unclamped count for localOffs
localOffs += modelListCount[category]; // use unclamped count for localOffs

void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)


#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING

// write lights to global buffers
int localOffs=0;
int offs = tileIDX.y*nrTilesX + tileIDX.x;
for(int m=0; m<NR_LIGHT_MODELS; m++)
for(int category=0; category<NR_LIGHT_MODELS; category++)
int nrLightsFinal = ldsModelListCount[ m ];
int nrLightsFinal = ldsModelListCount[category];
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs];
uint uHigh = prunedList[2*l+0+localOffs];
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs];
uint uHigh = prunedList[2 * l + 0 + localOffs];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);


if(threadID==0) lightOffsSph = 0;
// make a copy of coarseList in prunedList.
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
int l;
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)

float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
SFiniteLightBound lightData = g_data[prunedList[l]];

if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t]; // we allow up to 64 pruned lights while stored in LDS.


uniform float4x4 g_mInvProjection;
uniform float4x4 g_mProjection;
#define FLT_EPSILON 1.192092896e-07F // smallest such that 1.0+FLT_EPSILON != 1.0
#define NR_THREADS 64
