Merge pull request #1043 from Unity-Technologies/hdrp-xr-clustered

XR HDRP Clustered Lighting support
7 年前 · 736e0ff8
--- a/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/ClusteredUtils.hlsl
+++ b/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/ClusteredUtils.hlsl
    return max(g_fClustBase, suggested_base);
 }

+uint GenerateLogBaseBufferIndex(uint2 tileIndex, uint numTilesX, uint numTilesY, uint eyeIndex)
+{
+    uint eyeOffset = eyeIndex * numTilesX * numTilesY;
+    return (eyeOffset + (tileIndex.y * numTilesX) + tileIndex.x);
+}
+
+uint GenerateLayeredOffsetBufferIndex(uint lightCategory, uint2 tileIndex, uint clusterIndex, uint numTilesX, uint numTilesY, int numClusters, uint eyeIndex)
+{
+    // Each eye is split into category, cluster, x, y
+
+    uint eyeOffset = eyeIndex * LIGHTCATEGORY_COUNT * numClusters * numTilesX * numTilesY;
+    int lightOffset = ((lightCategory * numClusters + clusterIndex) * numTilesY + tileIndex.y) * numTilesX + tileIndex.x;
+
+    return (eyeOffset + lightOffset);
+}
+
 #endif
--- a/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/LightLoop.cs
+++ b/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/LightLoop.cs
            return (uint)logVolume << 20 | (uint)lightVolumeType << 17 | listType << 16 | ((uint)probeIndex & 0xFFFF);
        }

-        void VoxelLightListGeneration(CommandBuffer cmd, HDCamera hdCamera, Matrix4x4 projscr, Matrix4x4 invProjscr, RenderTargetIdentifier cameraDepthBufferRT)
+        void VoxelLightListGeneration(CommandBuffer cmd, HDCamera hdCamera, Matrix4x4[] projscrArr, Matrix4x4[] invProjscrArr, RenderTargetIdentifier cameraDepthBufferRT)
        {
            Camera camera = hdCamera.camera;
            // clear atomic offset index
            cmd.SetComputeIntParam(buildPerVoxelLightListShader, HDShaderIDs._EnvLightIndexShift, m_lightList.lights.Count);
            cmd.SetComputeIntParam(buildPerVoxelLightListShader, HDShaderIDs._DecalIndexShift, m_lightList.lights.Count + m_lightList.envLights.Count);
            cmd.SetComputeIntParam(buildPerVoxelLightListShader, HDShaderIDs.g_iNrVisibLights, m_lightCount);
-            cmd.SetComputeMatrixParam(buildPerVoxelLightListShader, HDShaderIDs.g_mScrProjection, projscr);
-            cmd.SetComputeMatrixParam(buildPerVoxelLightListShader, HDShaderIDs.g_mInvScrProjection, invProjscr);
+            cmd.SetComputeMatrixArrayParam(buildPerVoxelLightListShader, HDShaderIDs.g_mScrProjectionArr, projscrArr);
+            cmd.SetComputeMatrixArrayParam(buildPerVoxelLightListShader, HDShaderIDs.g_mInvScrProjectionArr, invProjscrArr);

            cmd.SetComputeIntParam(buildPerVoxelLightListShader, HDShaderIDs.g_iLog2NumClusters, k_Log2NumClusters);


            var numTilesX = GetNumTileClusteredX(hdCamera);
            var numTilesY = GetNumTileClusteredY(hdCamera);
-            cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, numTilesX, numTilesY, 1);
+            int numEyes = m_FrameSettings.enableStereo ? 2 : 1;
+            //cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, numTilesX, numTilesY, 1);
+            cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, numTilesX, numTilesY, numEyes);
        }

        public void BuildGPULightListsCommon(HDCamera hdCamera, CommandBuffer cmd, RenderTargetIdentifier cameraDepthBufferRT, RenderTargetIdentifier stencilTextureRT, bool skyEnabled)
            var invProjscrArr = new Matrix4x4[2];
            if (m_FrameSettings.enableStereo)
            {
+                // XRTODO: If possible, we could generate a non-oblique stereo projection
+                // matrix.  It's ok if it's not the exact same matrix, as long as it encompasses
+                // the same FOV as the original projection matrix (which would mean padding each half
+                // of the frustum with the max half-angle). We don't need the light information in 
+                // real projection space.  We just use screen space to figure out what is proximal
+                // to a cluster or tile.
+                // Once we generate this non-oblique projection matrix, it can be shared across both eyes (un-array)
                for (int eyeIndex = 0; eyeIndex < 2; eyeIndex++)
                {
                    projArr[eyeIndex] = CameraProjectionStereoLHS(hdCamera.camera, (Camera.StereoscopicEye)eyeIndex);
            }

            // Cluster
-            VoxelLightListGeneration(cmd, hdCamera, projscrArr[0], invProjscrArr[0], cameraDepthBufferRT);
+            VoxelLightListGeneration(cmd, hdCamera, projscrArr, invProjscrArr, cameraDepthBufferRT);

            if (enableFeatureVariants)
            {
--- a/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/LightLoopDef.hlsl
+++ b/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/LightLoopDef.hlsl
 // these uniforms are only needed for when OPAQUES_ONLY is NOT defined
 // but there's a problem with our front-end compilation of compute shaders with multiple kernels causing it to error
 //#ifdef USE_CLUSTERED_LIGHTLIST
-float4x4 g_mInvScrProjection;
+float4x4 g_mInvScrProjection; // TODO: remove, unused in HDRP

 float g_fClustScale;
 float g_fClustBase;
 TEXTURE2D_ARRAY(_Env2DTextures);
 float4x4 _Env2DCaptureVP[MAX_ENV2D_LIGHT];

+// XRTODO: Need to stereo-ize access
 TEXTURE2D(_DeferredShadowTexture);

 CBUFFER_START(UnityPerLightLoop)
    float logBase = g_fClustBase;
    if (g_isLogBaseBufferEnabled)
    {
+        // XRTODO: Stereo-ize access to g_logBaseBuffer
        logBase = g_logBaseBuffer[tileIndex.y * _NumTileClusteredX + tileIndex.x];
    }

    float logBase = g_fClustBase;
    if (g_isLogBaseBufferEnabled)
    {
-        logBase = g_logBaseBuffer[tileIndex.y * _NumTileClusteredX + tileIndex.x];
+        const uint logBaseIndex = GenerateLogBaseBufferIndex(tileIndex, _NumTileClusteredX, _NumTileClusteredY, unity_StereoEyeIndex);
+        logBase = g_logBaseBuffer[logBaseIndex];
    }

    return SnapToClusterIdxFlex(linearDepth, logBase, g_isLogBaseBufferEnabled != 0);
 {
    int nrClusters = (1 << g_iLog2NumClusters);
-    const int idx = ((lightCategory * nrClusters + clusterIndex) * _NumTileClusteredY + tileIndex.y) * _NumTileClusteredX + tileIndex.x;
+
+    const int idx = GenerateLayeredOffsetBufferIndex(lightCategory, tileIndex, clusterIndex, _NumTileClusteredX, _NumTileClusteredY, nrClusters, unity_StereoEyeIndex);
+
    uint dataPair = g_vLayeredOffsetsBuffer[idx];
    start = dataPair & 0x7ffffff;
    lightCount = (dataPair >> 27) & 31;
 {
+    // Note: XR depends on unity_StereoEyeIndex already being defined,
+    // which means ShaderVariables.hlsl needs to be defined ahead of this!
+
    uint2 tileIndex    = posInput.tileCoord;
    uint  clusterIndex = GetLightClusterIndex(tileIndex, posInput.linearDepth);

--- a/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/lightlistbuild-bigtile.compute
+++ b/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/lightlistbuild-bigtile.compute
 	uint2 viTilLL = 64*tileIDX;
 	uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) );			// not width and height minus 1 since viTilUR represents the end of the tile corner.

-    // 'Normalized' coordinates of tile
+    // 'Normalized' coordinates of tile, for use with AABB bounds in g_vBoundsBuffer
 	float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
 	float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);

--- a/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/lightlistbuild-clustered.compute
+++ b/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/lightlistbuild-clustered.compute
 #include "ShaderBase.hlsl"
 #include "LightLoop.cs.hlsl"
 #include "LightingConvexHullUtils.hlsl"
+#include "LightCullUtils.hlsl"

 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 #include "SortingComputeUtils.hlsl"

 CBUFFER_START(UnityLightListClustered)
 int g_iNrVisibLights;
-float4x4 g_mInvScrProjection;
-float4x4 g_mScrProjection;
+
+float4x4 g_mInvScrProjectionArr[2];
+float4x4 g_mScrProjectionArr[2];
+
 uint g_isOrthographic;
 int _EnvLightIndexShift;
 int _DecalIndexShift;

 CBUFFER_END

+// ClusteredUtils.hlsl is dependent on the constants declared in UnityLightListClustered :/
+// g_fClustBase, g_fNearPlane, g_fFarPlane, g_iLog2NumClusters
-

 #ifdef MSAA_ENABLED
 Texture2DMS<float> g_depth_tex : register( t0 );

 #define NR_THREADS			64

-// output buffer
 RWStructuredBuffer<uint> g_vLayeredLightList : register( u0 );			// don't support RWBuffer yet in unity
 RWStructuredBuffer<uint> g_LayeredOffset : register( u1 );				// don't support RWBuffer yet in unity
 RWStructuredBuffer<uint> g_LayeredSingleIdxBuffer : register( u2 );		// don't support RWBuffer yet in unity

 groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
 groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2];
-groupshared float4 lightPlanes[4*6];
+groupshared float4 lightPlanes[4*6]; // Each plane is defined by a float4. 6 planes per light, 4 lights (24 planes)

 groupshared uint lightOffs;

 groupshared uint lightOffsSph;
 #endif

-float GetLinearDepth(float zDptBufSpace)    // 0 is near 1 is far
+float GetLinearDepth(float zDptBufSpace, uint eyeIndex)    // 0 is near 1 is far
+    float4x4 g_mInvScrProjection = g_mInvScrProjectionArr[eyeIndex];
+
    // for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
 	// however this function must also work for orthographic projection so we keep it like this.
    float m22 = g_mInvScrProjection[2].z, m23 = g_mInvScrProjection[2].w;
    //return v4Pres.z / v4Pres.w;
 }

-float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
+float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth, uint eyeIndex)
+    float4x4 g_mScrProjection = g_mScrProjectionArr[eyeIndex];
+
 	bool isOrthographic = g_isOrthographic!=0;
 	float fSx = g_mScrProjection[0].x;
 	float fSy = g_mScrProjection[1].y;
 	return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
 }

-float GetOnePixDiagWorldDistAtDepthOne()
+float GetOnePixDiagWorldDistAtDepthOne(uint eyeIndex)
+    float4x4 g_mScrProjection = g_mScrProjectionArr[eyeIndex];
 	float fSx = g_mScrProjection[0].x;
 	float fSy = g_mScrProjection[1].y;

+// SphericalIntersectionTests and CullByExactEdgeTests are close to the versions
+// in lightlistbuild-bigtile.compute.  But would need more re-factoring than needed
+// right now.
+
-int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane);
+int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex);
-int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
+int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate, uint eyeIndex);
-float4 FetchPlane(int l, int p);
+float4 FetchPlane(int l, int p, uint eyeIndex);
-
-bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase)
+bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase, uint eyeIndex)
+    // If this light's screen space depth bounds intersect this cluster...simple cluster test
+    // TODO: Unify this code with the code in CheckIntersectionBasic...
 	unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
 	bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
 	if(bIsHit)
 				float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
 				float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
 				float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ;
-				float3 vP = GetViewPosFromLinDepth( float2(x, y), z);
+                float3 vP = GetViewPosFromLinDepth( float2(x, y), z, eyeIndex);
+                // Test each corner of the cluster against the light bounding box planes
 				bAllInvisib = bAllInvisib && dot(plane, float4(vP,1.0))>0;
 			}

 	return bIsHit;
 }

+// l is the coarse light index, k is the cluster index
 bool CheckIntersectionBasic(int l, int k)
 {
 	unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
 [numthreads(NR_THREADS, 1, 1)]
 void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
 {
+    uint eyeIndex = u3GroupID.z;
+
 	uint2 tileIDX = u3GroupID.xy;
 	uint t=threadID;


+    // Screen space coordinates of clustered tile
 	uint2 viTilLL = TILE_SIZE_CLUSTERED*tileIDX;
 	uint2 viTilUR = min( viTilLL+uint2(TILE_SIZE_CLUSTERED,TILE_SIZE_CLUSTERED), uint2(g_screenSize.x, g_screenSize.y) );		// not width and height minus 1 since viTilUR represents the end of the tile corner.


 	for(int idx=t; idx<(TILE_SIZE_CLUSTERED*TILE_SIZE_CLUSTERED); idx+=NR_THREADS)
 	{
+        // XRTODO: We need to stereo-ize access to g_depth_tex for texture arrays.
+
+        // TODO: For stereo double-wide, I need a proper way to insert the second eye width offset. Right now, I can just
+        // use g_screenSize.x, but that's kinda cheating.
+        // Additionally, we're going to have a method to select between a doublewide texture or texture array. Doubling
+        // the kernels seems like a bad idea.  We could branch our texture read to switch between different texture declarations.
+        uint stereoDWOffset = eyeIndex * g_screenSize.x;
+        uPixCrd.x += stereoDWOffset;
 #ifdef MSAA_ENABLED
 		for(int i=0; i<g_iNumSamplesMSAA; i++)
 		{
 #endif
 	}

+    // Why is this a uint? Doesn't InterlockedMax support shared mem floats?
 	InterlockedMax(ldsZMax, asuint(dpt_ma) );


 	if(dpt_ma<=0.0) dpt_ma = VIEWPORT_SCALE_Z;		// assume sky pixel
 #endif

+    // 'Normalized' coordinates of tile, for use with AABB bounds in g_vBoundsBuffer
 	float2 vTileLL = float2(viTilLL.x/g_screenSize.x, viTilLL.y/g_screenSize.y);
 	float2 vTileUR = float2(viTilUR.x/g_screenSize.x, viTilUR.y/g_screenSize.y);


-	int NrBigTilesX = (nrTilesX+((1<<log2BigTileToClustTileRatio)-1))>>log2BigTileToClustTileRatio;
-	const int bigTileIdx = (tileIDX.y>>log2BigTileToClustTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToClustTileRatio);		// map the idx to 64x64 tiles
-	int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*bigTileIdx+0];
+    int NrBigTilesX = (nrTilesX + ((1<<log2BigTileToClustTileRatio)-1)) >> log2BigTileToClustTileRatio;
+    int NrBigTilesY = (nrTilesY + ((1<<log2BigTileToClustTileRatio)-1)) >> log2BigTileToClustTileRatio;
+    const int bigTileBase = eyeIndex * NrBigTilesX * NrBigTilesY;
+	const int bigTileIdx = bigTileBase + ((tileIDX.y>>log2BigTileToClustTileRatio)*NrBigTilesX) + (tileIDX.x>>log2BigTileToClustTileRatio);		// map the idx to 64x64 tiles
+	
+    int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*bigTileIdx+0];
 	for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
 	{
 		int l = g_vBigTileLightList[MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE*bigTileIdx+l0+1];
 #endif
-		const float2 vMi = g_vBoundsBuffer[l].xy;
-		const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
+        // TODO: Seems kinda funny that we repeat this exact code here, bigtile, and FPTL...
+
+        const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(l, g_iNrVisibLights, eyeIndex);
+        const float2 vMi = g_vBoundsBuffer[boundsIndices.min].xy;
+        const float2 vMa = g_vBoundsBuffer[boundsIndices.max].xy;

 		if( all(vMa>vTileLL) && all(vMi<vTileUR))
 		{
 	int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);

 #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
-	iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(TILE_SIZE_CLUSTERED/2,TILE_SIZE_CLUSTERED/2), uint2(g_screenSize.x-1, g_screenSize.y-1))) );
+	iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(TILE_SIZE_CLUSTERED/2,TILE_SIZE_CLUSTERED/2), uint2(g_screenSize.x-1, g_screenSize.y-1))), eyeIndex );
-	float fTileFarPlane = GetLinearDepth(dpt_ma);
-#else
-	float fTileFarPlane = -GetLinearDepth(dpt_ma);
+    float fTileFarPlane = GetLinearDepth(dpt_ma, eyeIndex);
+#else // USE_LEFT_HAND_CAMERA_SPACE
+    float fTileFarPlane = -GetLinearDepth(dpt_ma, eyeIndex);
-#else
+#else // ENABLE_DEPTH_TEXTURE_BACKPLANE
 	float fTileFarPlane = g_fFarPlane;
 	float suggestedBase = g_fClustBase;
 #endif
-	iNrCoarseLights = CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, fTileFarPlane);
+    iNrCoarseLights = CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, fTileFarPlane, eyeIndex);
+    // NOTE: Why not sort on console?
 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 	SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
 #endif
+        // TODO: We should write some encode/decode functions to help put cluster indices into the shared mem buffer,
+        // and extract them later.  The code that reads from clusterIdx is hairy.
+
-			const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase));
-			const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase));
-			const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase));
-			const unsigned int clustIdxMa1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1+g_iNrVisibLights].z), suggestedBase));
+            const ScreenSpaceBoundsIndices l0Bounds = GenerateScreenSpaceBoundsIndices(l0, g_iNrVisibLights, eyeIndex);
+            const ScreenSpaceBoundsIndices l1Bounds = GenerateScreenSpaceBoundsIndices(l1, g_iNrVisibLights, eyeIndex);
+            const unsigned int clustIdxMi0 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0Bounds.min].z, eyeIndex), suggestedBase));
+            const unsigned int clustIdxMa0 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0Bounds.max].z, eyeIndex), suggestedBase));
+            const unsigned int clustIdxMi1 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1Bounds.min].z, eyeIndex), suggestedBase));
+            const unsigned int clustIdxMa1 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1Bounds.max].z, eyeIndex), suggestedBase));
 			clusterIdxs[l] = (clustIdxMa1<<24) | (clustIdxMi1<<16) | (clustIdxMa0<<8) | (clustIdxMi0<<0);
 		}
 	}
 	int iSum = 0;
 	if(i<nrClusters)
 	{
+        // Each thread checks it's respective cluster against all coarse lights for intersection.
+        // At the end, 'iSum' represents the number of lights that intersect this cluster!
+        // We have a limit to the number of lights we will track in a cluster (128). This is how much memory we
+        // want to allocate out of g_LayeredSingleIdxBuffer.
 		iSpaceAvail = min(iSum,MAX_NR_COARSE_ENTRIES);							// combined storage for both direct lights and reflection
 		InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start);		// alloc list memory
 	}
 	int shiftIndex[LIGHTCATEGORY_COUNT];
 	ZERO_INITIALIZE_ARRAY(int, shiftIndex, LIGHTCATEGORY_COUNT);
+    // NOTE: Why is this indexed like this?
+
-		if(i<24) lightPlanes[6*m+p] = FetchPlane(min(iNrCoarseLights-1,ll+m), p);
+		if(i<24) lightPlanes[6*m+p] = FetchPlane(min(iNrCoarseLights-1,ll+m), p, eyeIndex);
+
 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 		GroupMemoryBarrierWithGroupSync();
 #endif
-			if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
+			if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase, eyeIndex) )
-				uint lightCategory = _LightVolumeData[coarseList[l]].lightCategory;
+                const int lightVolIndex = GenerateLightCullDataIndex(coarseList[l], g_iNrVisibLights, eyeIndex);
+                uint lightCategory = _LightVolumeData[lightVolIndex].lightCategory;
-			}
+            }
 		}

 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)

 	uint localOffs=0;
-	offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
+
+    offs = GenerateLayeredOffsetBufferIndex(0, tileIDX, i, nrTilesX, nrTilesY, nrClusters, eyeIndex);
 	for(int category=0; category<LIGHTCATEGORY_COUNT; category++)
 	{
 		int numLights = min(categoryListCount[category],31);		// only allow 5 bits
 	}

 #ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
-	if(threadID==0) g_logBaseBuffer[tileIDX.y*nrTilesX + tileIDX.x] = suggestedBase;
+    const uint logBaseIndex = GenerateLogBaseBufferIndex(tileIDX, nrTilesX, nrTilesY, eyeIndex);
+	if(threadID==0) g_logBaseBuffer[logBaseIndex] = suggestedBase;
-float4 FetchPlane(int l, int p)
+float4 FetchPlane(int l, int p, uint eyeIndex)
-	SFiniteLightBound lgtDat = g_data[coarseList[l]];
+    const int lightBoundIndex = GenerateLightCullDataIndex(coarseList[l], g_iNrVisibLights, eyeIndex);
+    SFiniteLightBound lgtDat = g_data[lightBoundIndex];

 	const float3 boxX = lgtDat.boxAxisX.xyz;
 	const float3 boxY = lgtDat.boxAxisY.xyz;



-
-int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
+int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate, uint eyeIndex)
-	float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
+    float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0, eyeIndex);
-	float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0, eyeIndex);
-	float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
+    float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne(eyeIndex);
-		SFiniteLightBound lgtDat = g_data[coarseList[l]];
+        const int lightBoundIndex = GenerateLightCullDataIndex(coarseList[l], g_iNrVisibLights, eyeIndex);
+        SFiniteLightBound lgtDat = g_data[lightBoundIndex];

 		if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius, g_isOrthographic!=0) )
 			coarseList[l]=UINT_MAX;

 #ifdef EXACT_EDGE_TESTS

-float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
+float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane, uint eyeIndex)
 {
 	float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
 	float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
 #endif
-	return GetViewPosFromLinDepth( float2(x, y), z);
+    return GetViewPosFromLinDepth( float2(x, y), z, eyeIndex);
-void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
+void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex)
-	vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
+    vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane, eyeIndex);

 #if USE_LEFT_HAND_CAMERA_SPACE
 	float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,1.0);
 	vE0 = iSection == 0 ? edgeSectionZero : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
 }

-int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
+int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex)
 {
 	if(threadID==0) lightOffs2 = 0;

 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 		GroupMemoryBarrierWithGroupSync();
 #endif
-		const int idxCoarse = coarseList[l];
-		UNITY_BRANCH if (_LightVolumeData[idxCoarse].lightVolume != LIGHTVOLUMETYPE_SPHERE)		// don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
+        const int lightCullIndex = GenerateLightCullDataIndex(coarseList[l], g_iNrVisibLights, eyeIndex);
+		UNITY_BRANCH if (_LightVolumeData[lightCullIndex].lightVolume != LIGHTVOLUMETYPE_SPHERE)		// don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
-			SFiniteLightBound lgtDat = g_data[idxCoarse];
+            SFiniteLightBound lgtDat = g_data[lightCullIndex];

 			const float3 boxX = lgtDat.boxAxisX.xyz;
 			const float3 boxY = lgtDat.boxAxisY.xyz;


 				float3 vP1, vE1;
-				GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane);
+                GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane, eyeIndex);

 				// potential separation plane
 				float3 vN = cross(vE0, vE1);
 				positive=0; negative=0;
 				for(int j=0; j<8; j++)
 				{
-					float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane);
+                    float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane, eyeIndex);
 					float fSignDist = dot(vN, vPf-vP0);
 					if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
 				}