#include "ShaderBase.hlsl"
#include "LightLoop.cs.hlsl"
#include "LightingConvexHullUtils.hlsl"
#include "LightCullUtils.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "SortingComputeUtils.hlsl"
CBUFFER_START(UnityLightListClustered)
int g_iNrVisibLights;
// XRTODO: Stereo-ize these
uint g_isOrthographic;
int _EnvLightIndexShift;
int _DecalIndexShift;
CBUFFER_END
// ClusteredUtils.hlsl is dependent on the constants declared in UnityLightListClustered :/
// g_fClustBase
// g_fNearPlane
// g_fFarPlane
// g_iLog2NumClusters
// XRTODO: Reading from these textures and buffers must be stereo-ized
#ifdef MSAA_ENABLED
Texture2DMS<float> g_depth_tex : register( t0 );
#else
#define NR_THREADS 64
// XRTODO: Stereo-ize writes to these buffers (except g_LayeredSingleIdxBuffer)
// XRTODO: Stereo-ize writes to g_logBaseBuffer
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
RWStructuredBuffer<float> g_logBaseBuffer : register( u3 ); // don't support RWBuffer yet in unity
#endif
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2];
groupshared float4 lightPlanes[4*6];
groupshared float4 lightPlanes[4*6]; // Each plane is defined by a float4. 6 planes per light, 4 lights (24 planes)
groupshared uint lightOffs;
groupshared uint lightOffsSph;
#endif
// XRTODO: Stereo-ize access to g_mInvScrProjection, pass in eyeIndex
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
{
// for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
//return v4Pres.z / v4Pres.w;
}
// XRTODO: Stereo-ize access to g_mScrProjection, pass in eyeIndex
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
bool isOrthographic = g_isOrthographic!=0;
return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
}
// XRTODO: Stereo-ize access to g_mScrProjection, pass in eyeIndex
float GetOnePixDiagWorldDistAtDepthOne()
{
float fSx = g_mScrProjection[0].x;
}
// SphericalIntersectionTests and CullByExactEdgeTests are close to the versions
// in lightlistbuild-bigtile.compute. But would need more re-factoring than needed
// right now.
// XRTODO: Stereo-ize these functions with eyeIndex
#ifdef EXACT_EDGE_TESTS
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane);
#endif
// returns 1 for intersection and 0 for none
// XRTODO: Pass in eyeIndex
// XRTODO: Stereo-ize due to access to GetViewPosFromLinDepth
// We need eyeIndex
// If this light's screen space depth bounds intersect this cluster...simple cluster test
// TODO: Unify this code with the code in CheckIntersectionBasic...
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
if(bIsHit)
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ;
// XRTODO: Stereo-ize this by passing in eyeIndex
// Test each corner of the cluster against the light bounding box planes
bAllInvisib = bAllInvisib && dot(plane, float4(vP,1.0))>0;
}
return bIsHit;
}
// l is the coarse light index, k is the cluster index
bool CheckIntersectionBasic(int l, int k)
{
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
[numthreads(NR_THREADS, 1, 1)]
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
// XRTODO: Generate eyeIndex from u3GroupID.z
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
// Screen space coordinates of clustered tile
uint2 viTilLL = TILE_SIZE_CLUSTERED*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(TILE_SIZE_CLUSTERED,TILE_SIZE_CLUSTERED), uint2(g_screenSize.x, g_screenSize.y) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
for(int idx=t; idx<(TILE_SIZE_CLUSTERED*TILE_SIZE_CLUSTERED); idx+=NR_THREADS)
{
// XRTODO: We need to stereo-ize access to g_depth_tex. This is the only time we use viTilLL
// to generate a screen-space texture coordinate, so we can localize our stereo texture access here.
// For double-wide, we need to sample the correct half.
// For instancing/multi-view, the right texture layer. When we add support for that, we need to pass in
// an extra uint2 for layer index.
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&(TILE_SIZE_CLUSTERED-1)), viTilLL.y+(idx>>log2TileSize)), uint2(g_screenSize.x-1, g_screenSize.y-1) );
#ifdef MSAA_ENABLED
for(int i=0; i<g_iNumSamplesMSAA; i++)
#endif
}
// Max across TG
// Why is this a uint? Can't we save floats in shared mem?
InterlockedMax(ldsZMax, asuint(dpt_ma) );
if(dpt_ma<=0.0) dpt_ma = VIEWPORT_SCALE_Z; // assume sky pixel
#endif
// 'Normalized' coordinates of tile, for use with AABB bounds in g_vBoundsBuffer
float2 vTileLL = float2(viTilLL.x/g_screenSize.x, viTilLL.y/g_screenSize.y);
float2 vTileUR = float2(viTilUR.x/g_screenSize.x, viTilUR.y/g_screenSize.y);
// XRTODO: Properly stereo-ize access to g_vBigTileLightList
// All of this code is localized here, so I don't really have to worry about side-effects further on down
// I need to generate NrBigTilesY, so I can generate a per-eye offset/base into g_vBigTileLightList
// Since bigTileIdx is used twice (once for count, once for the light list), I should probably fix that one.
// Would be worth function-alizing, because this code is shared with FPTL/lightlistbuild.compute
int NrBigTilesX = (nrTilesX+((1<<log2BigTileToClustTileRatio)-1))>>log2BigTileToClustTileRatio;
const int bigTileIdx = (tileIDX.y>>log2BigTileToClustTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToClustTileRatio); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*bigTileIdx+0];
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
// XRTODO: Once we have our light index (l), we need to make sure it indexes the
// correct portion of g_vBoundsBuffer. I have that code in GenerateScreenSpaceBoundsIndices
// TODO: Seems kinda funny that we repeat this exact code here, bigtile, and FPTL...
const float2 vMi = g_vBoundsBuffer[l].xy;
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
// XRTODO: Stereo-ize by passing in eyeIndex
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(TILE_SIZE_CLUSTERED/2,TILE_SIZE_CLUSTERED/2), uint2(g_screenSize.x-1, g_screenSize.y-1))) );
#endif
// XRTODO: Stereo-ize access to GetLinearDepth with eyeIndex
#else
#else // USE_LEFT_HAND_CAMERA_SPACE
#else
#else // ENABLE_DEPTH_TEXTURE_BACKPLANE
// XRTODO: Stereo-ize by passing in eyeIndex
// TODO: Why not sort on console?
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
#endif
// This code is a little tricky. For each light in our coarse list (and the list is associated with the screen tile),
// we find a cluster associated with the lights AABB Z value. For each light, we have a min and max cluster index.
// And because the cluster index is a max of 255, we are able to encode 4 cluster indices per 32-bit DWORD.
// Therefore, we can encode 2 lights per uint (light 0 min idx, light 0 max idx, light 1...)
// Each iteration of the loop goes over two neighboring lights in the coarseList, unfortunate name choice of 'l'
// TODO: We should write some encode/decode functions to help put cluster indices into the shared mem buffer,
// and extract them later. The code that reads from clusterIdx is hairy.
// XRTODO: Stereo-ize access to g_vBoundsBuffer, run l0 and l1 through GenerateScreenSpaceBoundsIndices
// The logic here is a bit confusing. We seem to process pairs of lights, and
// It's for the cluster in the tile (the depth layer slice whatever)
// XRTODO: Stereo-ize GetLinearDepth with eyeIndex
const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase));
const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase));
const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase));
int i=(int) t;
int iSpaceAvail = 0;
int iSum = 0;
// Each thread in the TG represents a cluster index, and tests all coarse lights against that cluster.
// It should be noted that nrClusters can never be greater than the TG size, otherwise, this code doesn't work!
// Each thread checks it's respective cluster against all coarse lights for intersection.
// At the end, 'iSum' represents the number of lights that intersect this cluster!
// We have a limit to the number of lights we will track in a cluster (128). This is how much memory we
// want to allocate out of g_LayeredSingleIdxBuffer.
// All the light lists live in g_vLayeredLightList. They aren't sorted in any manner, and it's tightly packed.
// The allocation can handle the max lights per tile, but it likely won't use all the memory.
// g_LayeredSingleIdxBuffer is recording the allocations out of the buffer.
// 'start' indicates the offset into g_vLayeredLightList for the _cluster_ being
// processed by this thread. And the TG is processing the tile.
// 'iSpaceAvail' is how many total lights are in this cluster.
// This allocation might be roughly over, because CheckIntersectionBasic is a very basic check.
// XRTODO: For stereo, we don't have to adjust anything into g_LayeredSingleIdxBuffer. Each thread is processing it's own
// cluster, so we just need to make sure there is enough memory allocated in g_vLayeredLightList for two eyes worth of
// lists. The offset we get from start is good enough. We do have to store the offset into the
// stereo-corrected half of g_LayeredOffset.
}
// All our cull data are in the same list, but at render time envLights are separated so we need to shift the index
// TODO: Why is this indexed like this?
// Process in chunks of 4 lights from the coarse tile list
// XRTODO: Pass in eyeIndex to FetchPlane, as it looks into g_data (SFiniteLightBound)
// The first 24 threads in the TG each generate 1 plane equation. There are
// 6 planes per light, and we process 4 lights at a time, hence, 24 threads.
// We could do more, but this might be all the LDS that can be spared...
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
// XRTODO: Stereo-ize CheckIntersection by passing in eyeIndex to GetViewPosFromLinDepth
// XRTODO: Stereo-ize index into _LightVolumeData, run coarseList[l] thru GenerateLightCullDataIndex
}
// TODO: I think there _might_ be a potential bug here. The way this code seems to work is that
// as each light comes in, they are added to the allocated list in g_vLayeredLightList.
// As each light is added, the respective category count is incremented, and the raw light index is
// altered by subtracting the shift associated with the category. So these light indices are category
// dependent. Since they are category dependent, these indices have to grouped into category sub-lists
// inside the cluster's allocation. But...when the coarseList is generated, there's no guarantee it is sorted
// unless it runs through the SORTLIST routine above.
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
// We now fill information about each light type into g_LayeredOffset
// The offset into g_vLayeredLightList, encoded with the number of lights in the category
// The encoded offsets are assuming the lights are sorted by category in the cluster list
// XRTODO: Stereo-ize this initial 'offs' to jump into the correct half of g_LayeredOffs.
// The offsets are organized Category/Cluster/Row/Column.
// For stereo, we just add eyeIndex*LIGHTCATEGORY_COUNT*nrClusters*nrTilesX*nrTilesY
offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
for(int category=0; category<LIGHTCATEGORY_COUNT; category++)
{
}
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
// XRTODO: Stereo-ize this, as this is per-eye set of tiles
// Add eyeIndex*nrTilesY*nrTilesX to this index
// XRTODO: Must be stereo-ized because it fetches from g_data
// Pass in eyeIndex
// XRTODO: Use eyeIndex and coarseList[l] to index into g_data
// use GenerateLightCullDataIndex
SFiniteLightBound lgtDat = g_data[coarseList[l]];
const float3 boxX = lgtDat.boxAxisX.xyz;
// XRTODO: Stereo-ize for a bunch of use cases! Pass in eyeIndex!
// Needed for GetViewPosFromLinDepth, GetOnePixDiagWorldDistAtDepthOne, and g_data
// XRTODO: Stereo-ize access with eyeIndex
#if USE_LEFT_HAND_CAMERA_SPACE
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
#else
// XRTODO: Stereo-ize access with eyeIndex
// XRTODO: Stereo-ize access with eyeIndex and GenerateLightCullDataIndex
// Use it on coarseList[l], use result to index g_data
SFiniteLightBound lgtDat = g_data[coarseList[l]];
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius, g_isOrthographic!=0) )
#ifdef EXACT_EDGE_TESTS
// XRTODO: Stereo-ize for access to GetViewPosFromLinDepth, use eyeIndex
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
{
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
return GetViewPosFromLinDepth( float2(x, y), z);
}
// XRTODO: Stereo-ize for access to GetTileVertex, use eyeIndex
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
{
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
vE0 = iSection == 0 ? edgeSectionZero : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
}
// XRTODO: Stereo-ize with eyeIndex, used for indexing _LightVolumeData and g_data,
// and with GetFrustEdge and GetTileVertex functions
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
{
if(threadID==0) lightOffs2 = 0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
// XRTODO: stereo-ize index used to access _LightVolumeData (and g_data), use GenerateLightCullDataIndex and eyeIndex
// XRTODO: stereo-ize index used to access g_data, use the same index generated above from idxCoarse
SFiniteLightBound lgtDat = g_data[idxCoarse];
const float3 boxX = lgtDat.boxAxisX.xyz;
float3 vP1, vE1;
// XRTODO: Stereo-ize to use GetFrustEdge, use eyeIndex
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane);
// potential separation plane
positive=0; negative=0;
for(int j=0; j<8; j++)
{
// XRTODO: Stereo-ize to use GetTileVertex, use eyeIndex
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane);
float fSignDist = dot(vN, vPf-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;