HDRenderLoop: Intermediate state

8 年前 · 31e4c8ca
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.cs
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.cs
                    colorMRTs[index] = RTIDs[index];
                }

+
+            
            /*
            public void BindBuffers(Material mat)
            {
                }
            }
            */
+            


            public int gbufferCount { get; set; }

        GBufferManager m_gbufferManager = new GBufferManager();

-        private int s_CameraColorBuffer;
-        private int s_CameraDepthBuffer;
-        private int s_VelocityBuffer;
-        private int s_DistortionBuffer;
+        int s_CameraColorBuffer;
+        int s_CameraDepthBuffer;
+        int s_VelocityBuffer;
+        int s_DistortionBuffer;
-        private ComputeBuffer s_punctualLightList;
-        private ComputeBuffer s_envLightList;
-        private ComputeBuffer s_areaLightList;
-        private ComputeBuffer s_punctualShadowList;
+        ComputeBuffer s_punctualLightList;
+        ComputeBuffer s_envLightList;
+        ComputeBuffer s_areaLightList;
+        ComputeBuffer s_punctualShadowList;
-        private TextureCacheCubemap m_cubeReflTexArray;
+        TextureCacheCubemap m_cubeReflTexArray;
+        TextureCache2D m_CookieTexArray;
+        TextureCacheCubemap m_CubeCookieTexArray;
-        private static int s_WidthOnRecord;
-        private static int s_HeightOnRecord;
+        static int s_WidthOnRecord;
+        static int s_HeightOnRecord;

        void OnEnable()
        {

            m_cubeReflTexArray = new TextureCacheCubemap();
            m_cubeReflTexArray.AllocTextureArray(32, (int)m_TextureSettings.reflectionCubemapSize, TextureFormat.BC6H, true);
+            m_CookieTexArray = new TextureCache2D();
+            m_CookieTexArray.AllocTextureArray(8, (int)m_TextureSettings.spotCookieSize, (int)m_TextureSettings.spotCookieSize, TextureFormat.RGBA32, true);
+            m_CubeCookieTexArray = new TextureCacheCubemap();            
+            m_CubeCookieTexArray.AllocTextureArray(4, (int)m_TextureSettings.pointCookieSize, TextureFormat.RGBA32, true);
+            m_TilePassLightLoop.Rebuild();

            // Init Gbuffer description
            m_LitRenderLoop = new Lit.RenderLoop(); // Our object can be garbacge collected, so need to be allocate here
        void OnDisable()
        {
            m_LitRenderLoop.OnDisable();
+            m_TilePassLightLoop.OnDisable();

            s_punctualLightList.Release();
            s_areaLightList.Release();
            if (m_FinalPassMaterial) DestroyImmediate(m_FinalPassMaterial);

            m_cubeReflTexArray.Release();
+            m_CookieTexArray.Release();
+            m_CubeCookieTexArray.Release();
        }

        void InitAndClearBuffer(Camera camera, RenderLoop renderLoop)
                    UpdatePunctualLights(cullResults.visibleLights, ref shadows);
                    UpdateReflectionProbes(cullResults.visibleReflectionProbes);

-                    if (true)
-                        var numLights = GenerateSourceLightBuffers(camera, cullResults);
-                        BuildPerTileLightLists(camera, loop, numLights, projscr, invProjscr);
+                        var numLights = 0; // GenerateSourceLightBuffers(camera, cullResults);
+                        m_tilePassLightLoop.BuildPerTileLightLists(camera, loop, numLights, projscr, invProjscr);
+                        /*
-                        PushGlobalParams(camera, loop, CameraToWorld(camera), projscr, invProjscr, numDirLights);
+                        m_tilePassLightLoop.PushGlobalParams(camera, loop, CameraToWorld(camera), projscr, invProjscr, numDirLights);
-                        DoTiledDeferredLighting(camera, loop, numLights, numDirLights);
+                        m_tilePassLightLoop.DoTiledDeferredLighting(camera, loop, numLights, numDirLights);
+                        */
                    }

                    RenderDeferredLighting(camera, renderLoop);
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/Resources/Deferred.shader
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/Resources/Deferred.shader
 Shader "Hidden/HDRenderLoop/Deferred"
 {
-    Properties
-    {
-        _SrcBlend("", Float) = 1
-        _DstBlend("", Float) = 1
-    }
-
    SubShader
    {

--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/TilePass.cs
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/TilePass.cs
                return "LIGHTLOOP_SINGLE_PASS";
            }

+            public const int MaxNumLights = 1024;
+            public const int MaxNumDirLights = 2;
+            public const float FltMax = 3.402823466e+38F;
+
+            ComputeShader buildScreenAABBShader;
+            ComputeShader buildPerTileLightListShader;     // FPTL
+            ComputeShader buildPerBigTileLightListShader;
+            ComputeShader buildPerVoxelLightListShader;    // clustered
+
            private static int s_GenAABBKernel;
            private static int s_GenListPerTileKernel;
            private static int s_GenListPerVoxelKernel;
            public bool enableDrawLightBoundsDebug = false;
            public bool enableDrawTileDebug = false;
            public bool enableComputeLightEvaluation = false;
-            const bool k_UseDepthBuffer = true;//      // only has an impact when EnableClustered is true (requires a depth-prepass)
+            const bool k_UseDepthBuffer = true;      // only has an impact when EnableClustered is true (requires a depth-prepass)
            const bool k_UseAsyncCompute = true;        // should not use on mobile

            const int k_Log2NumClusters = 6;     // accepted range is from 0 to 6. NumClusters is 1<<g_iLog2NumClusters
            // clustered light list specific buffers and data end

            const int k_TileSize = 16;
+
+            bool usingFptl
+            {
+                get
+                {
+                    bool isEnabledMSAA = false;
+                    Debug.Assert(!isEnabledMSAA || enableClustered);
+                    bool disableFptl = (disableFptlWhenClustered && enableClustered) || isEnabledMSAA;
+                    return !disableFptl;
+                }
+            }
+
+            // Local function
+            void ClearComputeBuffers()
+            {
+                ReleaseResolutionDependentBuffers();
+
+                if (s_AABBBoundsBuffer != null)
+                    s_AABBBoundsBuffer.Release();
+
+                if (s_ConvexBoundsBuffer != null)
+                    s_ConvexBoundsBuffer.Release();
+
+                if (s_LightDataBuffer != null)
+                    s_LightDataBuffer.Release();
+
+                if (s_DirLightList != null)
+                    s_DirLightList.Release();
+
+                if (enableClustered)
+                {
+                    if (s_GlobalLightListAtomic != null)
+                        s_GlobalLightListAtomic.Release();
+                }
+            }
+
+            void Rebuild()
+            {
+                ClearComputeBuffers();
+
+                buildScreenAABBShader = Resources.Load<ComputeShader>("Resources/srcbound");
+                buildPerTileLightListShader = Resources.Load<ComputeShader>("Resources/lightlistbuild");
+                buildPerBigTileLightListShader = Resources.Load<ComputeShader>("Resources/lightlistbuild-bigtile");
+                buildPerVoxelLightListShader = Resources.Load<ComputeShader>("Resources/lightlistbuild-clustered");
+
+                s_GenAABBKernel = buildScreenAABBShader.FindKernel("ScreenBoundsAABB");
+                s_GenListPerTileKernel = buildPerTileLightListShader.FindKernel(enableBigTilePrepass ? "TileLightListGen_SrcBigTile" : "TileLightListGen");
+                s_AABBBoundsBuffer = new ComputeBuffer(2 * MaxNumLights, 3 * sizeof(float));
+                s_ConvexBoundsBuffer = new ComputeBuffer(MaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightBound)));
+                s_LightDataBuffer = new ComputeBuffer(MaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightData)));
+                s_DirLightList = new ComputeBuffer(MaxNumDirLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(DirectionalLight)));
+
+                buildScreenAABBShader.SetBuffer(s_GenAABBKernel, "g_data", s_ConvexBoundsBuffer);
+                //m_BuildScreenAABBShader.SetBuffer(kGenAABBKernel, "g_vBoundsBuffer", m_aabbBoundsBuffer);
+
+                buildPerTileLightListShader.SetBuffer(s_GenListPerTileKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
+                buildPerTileLightListShader.SetBuffer(s_GenListPerTileKernel, "g_vLightData", s_LightDataBuffer);
+                buildPerTileLightListShader.SetBuffer(s_GenListPerTileKernel, "g_data", s_ConvexBoundsBuffer);
+
+                if (enableClustered)
+                {
+                    var kernelName = enableBigTilePrepass ? (k_UseDepthBuffer ? "TileLightListGen_DepthRT_SrcBigTile" : "TileLightListGen_NoDepthRT_SrcBigTile") : (k_UseDepthBuffer ? "TileLightListGen_DepthRT" : "TileLightListGen_NoDepthRT");
+                    s_GenListPerVoxelKernel = buildPerVoxelLightListShader.FindKernel(kernelName);
+                    s_ClearVoxelAtomicKernel = buildPerVoxelLightListShader.FindKernel("ClearAtomic");
+                    buildPerVoxelLightListShader.SetBuffer(s_GenListPerVoxelKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
+                    buildPerVoxelLightListShader.SetBuffer(s_GenListPerVoxelKernel, "g_vLightData", s_LightDataBuffer);
+                    buildPerVoxelLightListShader.SetBuffer(s_GenListPerVoxelKernel, "g_data", s_ConvexBoundsBuffer);
+
+                    s_GlobalLightListAtomic = new ComputeBuffer(1, sizeof(uint));
+                }
+
+                if (enableBigTilePrepass)
+                {
+                    s_GenListPerBigTileKernel = buildPerBigTileLightListShader.FindKernel("BigTileLightListGen");
+                    buildPerBigTileLightListShader.SetBuffer(s_GenListPerBigTileKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
+                    buildPerBigTileLightListShader.SetBuffer(s_GenListPerBigTileKernel, "g_vLightData", s_LightDataBuffer);
+                    buildPerBigTileLightListShader.SetBuffer(s_GenListPerBigTileKernel, "g_data", s_ConvexBoundsBuffer);
+                }
+            }
+
+            void OnDisable()
+            {
+                s_AABBBoundsBuffer.Release();
+                s_ConvexBoundsBuffer.Release();
+                s_LightDataBuffer.Release();
+                ReleaseResolutionDependentBuffers();
+                s_DirLightList.Release();
+
+                if (enableClustered)
+                {
+                    s_GlobalLightListAtomic.Release();
+                }
+            }

            public bool NeedResize()
            {
            }


+            void BuildPerTileLightLists(Camera camera, RenderLoop loop, int numLights, Matrix4x4 projscr, Matrix4x4 invProjscr)
+            {
+                var w = camera.pixelWidth;
+                var h = camera.pixelHeight;
+                var numTilesX = (w + 15) / 16;
+                var numTilesY = (h + 15) / 16;
+                var numBigTilesX = (w + 63) / 64;
+                var numBigTilesY = (h + 63) / 64;
+
+                var cmd = new CommandBuffer() { name = "Build light list" };
+
+                // generate screen-space AABBs (used for both fptl and clustered).
+                {
+                    var proj = CameraProjection(camera);
+                    var temp = new Matrix4x4();
+                    temp.SetRow(0, new Vector4(1.0f, 0.0f, 0.0f, 0.0f));
+                    temp.SetRow(1, new Vector4(0.0f, 1.0f, 0.0f, 0.0f));
+                    temp.SetRow(2, new Vector4(0.0f, 0.0f, 0.5f, 0.5f));
+                    temp.SetRow(3, new Vector4(0.0f, 0.0f, 0.0f, 1.0f));
+                    var projh = temp * proj;
+                    var invProjh = projh.inverse;
+
+                    cmd.SetComputeIntParam(buildScreenAABBShader, "g_iNrVisibLights", numLights);
+                    SetMatrixCS(cmd, buildScreenAABBShader, "g_mProjection", projh);
+                    SetMatrixCS(cmd, buildScreenAABBShader, "g_mInvProjection", invProjh);
+                    cmd.SetComputeBufferParam(buildScreenAABBShader, s_GenAABBKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
+                    cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (numLights + 7) / 8, 1, 1);
+                }
+
+                // enable coarse 2D pass on 64x64 tiles (used for both fptl and clustered).
+                if (enableBigTilePrepass)
+                {
+                    cmd.SetComputeIntParams(buildPerBigTileLightListShader, "g_viDimensions", new int[2] { w, h });
+                    cmd.SetComputeIntParam(buildPerBigTileLightListShader, "g_iNrVisibLights", numLights);
+                    SetMatrixCS(cmd, buildPerBigTileLightListShader, "g_mScrProjection", projscr);
+                    SetMatrixCS(cmd, buildPerBigTileLightListShader, "g_mInvScrProjection", invProjscr);
+                    cmd.SetComputeFloatParam(buildPerBigTileLightListShader, "g_fNearPlane", camera.nearClipPlane);
+                    cmd.SetComputeFloatParam(buildPerBigTileLightListShader, "g_fFarPlane", camera.farClipPlane);
+                    cmd.SetComputeBufferParam(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, "g_vLightList", s_BigTileLightList);
+                    cmd.DispatchCompute(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, numBigTilesX, numBigTilesY, 1);
+                }
+
+                if (usingFptl)       // optimized for opaques only
+                {
+                    cmd.SetComputeIntParams(buildPerTileLightListShader, "g_viDimensions", new int[2] { w, h });
+                    cmd.SetComputeIntParam(buildPerTileLightListShader, "g_iNrVisibLights", numLights);
+                    SetMatrixCS(cmd, buildPerTileLightListShader, "g_mScrProjection", projscr);
+                    SetMatrixCS(cmd, buildPerTileLightListShader, "g_mInvScrProjection", invProjscr);
+                    cmd.SetComputeTextureParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_depth_tex", new RenderTargetIdentifier(s_CameraDepthTexture));
+                    cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_vLightList", s_LightList);
+                    if (enableBigTilePrepass) cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_vBigTileLightList", s_BigTileLightList);
+                    cmd.DispatchCompute(buildPerTileLightListShader, s_GenListPerTileKernel, numTilesX, numTilesY, 1);
+                }
+
+                if (enableClustered)        // works for transparencies too.
+                {
+                    VoxelLightListGeneration(cmd, camera, numLights, projscr, invProjscr);
+                }
+
+                loop.ExecuteCommandBuffer(cmd);
+                cmd.Dispose();
+            }

        }
    }
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources.meta
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources.meta
+fileFormatVersion: 2
+guid: 10637537837597a41861afbe118b246a
+folderAsset: yes
+timeCreated: 1479306736
+licenseType: Pro
+DefaultImporter:
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute
+#pragma kernel BigTileLightListGen
+
+#include "..\common\ShaderBase.h"
+#include "LightDefinitions.cs.hlsl"
+
+#include "LightingConvexHullUtils.hlsl"
+#include "SortingComputeUtils.hlsl"
+
+#define EXACT_EDGE_TESTS
+#define PERFORM_SPHERICAL_INTERSECTION_TESTS
+
+#define MAX_NR_BIGTILE_LIGHTS				(MAX_NR_BIGTILE_LIGHTS_PLUSONE-1)
+
+
+uniform int g_iNrVisibLights;
+uniform uint2 g_viDimensions;
+uniform float4x4 g_mInvScrProjection;
+uniform float4x4 g_mScrProjection;
+uniform float g_fNearPlane;
+uniform float g_fFarPlane;
+
+StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
+StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
+StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
+
+
+#define NR_THREADS			64
+
+// output buffer
+RWBuffer<uint> g_vLightList : register( u0 );
+
+
+// 2kB (room for roughly 30 wavefronts)
+groupshared unsigned int lightsListLDS[MAX_NR_BIGTILE_LIGHTS_PLUSONE];
+groupshared uint lightOffs;
+
+
+float GetLinearDepth(float zDptBufSpace)	// 0 is near 1 is far
+{
+	float3 vP = float3(0.0f,0.0f,zDptBufSpace);
+	float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
+	return v4Pres.z / v4Pres.w;
+}
+
+
+float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
+{
+	float fSx = g_mScrProjection[0].x;
+	float fCx = g_mScrProjection[0].z;
+	float fSy = g_mScrProjection[1].y;
+	float fCy = g_mScrProjection[1].z;
+
+#if USE_LEFTHAND_CAMERASPACE
+	return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
+#else
+	return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
+#endif
+}
+
+float GetOnePixDiagWorldDistAtDepthOne()
+{
+	float fSx = g_mScrProjection[0].x;
+	float fSy = g_mScrProjection[1].y;
+
+	return length( float2(1.0/fSx,1.0/fSy) );
+}
+
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
+#endif
+
+#ifdef EXACT_EDGE_TESTS
+void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR);
+#endif
+
+
+
+
+[numthreads(NR_THREADS, 1, 1)]
+void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
+{
+	uint2 tileIDX = u3GroupID.xy;
+	uint t=threadID;
+
+	uint iWidth = g_viDimensions.x;
+	uint iHeight = g_viDimensions.y;
+	uint nrBigTilesX = (iWidth+63)/64;
+	uint nrBigTilesY = (iHeight+63)/64;
+
+	if(t==0) lightOffs = 0;
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+
+	uint2 viTilLL = 64*tileIDX;
+	uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) );			// not width and height minus 1 since viTilUR represents the end of the tile corner.
+
+	float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
+	float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
+
+	// build coarse list using AABB
+	for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
+	{
+		const float2 vMi = g_vBoundsBuffer[l].xy;
+		const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
+
+		if( all(vMa>vTileLL) && all(vMi<vTileUR))
+		{
+			unsigned int uInc = 1;
+			unsigned int uIndex;
+			InterlockedAdd(lightOffs, uInc, uIndex);
+			if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l;		// add to light list
+		}
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	int iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS);
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+	SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))) );
+#endif
+
+#ifdef EXACT_EDGE_TESTS
+	CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy);
+#endif
+
+
+	// sort lights
+	SORTLIST(lightsListLDS, iNrCoarseLights, MAX_NR_BIGTILE_LIGHTS_PLUSONE, t, NR_THREADS);
+
+	lightOffs = 0;
+	GroupMemoryBarrierWithGroupSync();
+	for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
+	GroupMemoryBarrierWithGroupSync();
+	iNrCoarseLights = lightOffs;
+
+	int offs = tileIDX.y*nrBigTilesX + tileIDX.x;
+
+	for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
+		g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = t==0 ? iNrCoarseLights : lightsListLDS[i-1];
+}
+
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
+{
+#if USE_LEFTHAND_CAMERASPACE
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
+#else
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
+#endif
+
+	float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
+	float halfTileSizeAtZDistOne = 32*onePixDiagDist;		// scale by half a tile
+	
+	for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
+	{
+		SFiniteLightBound lgtDat = g_data[lightsListLDS[l]];
+	
+		if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
+			lightsListLDS[l]=0xffffffff;
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+}
+#endif
+
+
+
+
+
+
+
+#ifdef EXACT_EDGE_TESTS
+float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
+{
+	float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
+	float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
+	float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
+#if !USE_LEFTHAND_CAMERASPACE
+	z = -z;
+#endif
+	return GetViewPosFromLinDepth( float2(x, y), z);
+}
+
+void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
+{
+	int iSection = e0>>2;		// section 0 is side edges, section 1 is near edges and section 2 is far edges
+	int iSwizzle = e0&0x3;
+
+	int i=iSwizzle + (2*(iSection&0x2));	// offset by 4 at section 2
+	vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
+	vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
+}
+
+void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
+{
+	const bool bOnlyNeedFrustumSideEdges = true;
+	const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8;	// max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
+
+	const int totNrEdgePairs = 12*nrFrustEdges;
+	for(int l=0; l<iNrCoarseLights; l++)
+	{
+		const int idxCoarse = lightsListLDS[l];
+		[branch]if(idxCoarse<(uint) g_iNrVisibLights && g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT)		// don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
+		{
+			SFiniteLightBound lgtDat = g_data[idxCoarse];
+	
+			const float3 boxX = lgtDat.boxAxisX.xyz;
+			const float3 boxY = lgtDat.boxAxisY.xyz;
+			const float3 boxZ = -lgtDat.boxAxisZ.xyz;           // flip axis (so it points away from the light direction for a spot-light)
+			const float3 center = lgtDat.center.xyz;
+			const float2 scaleXY = lgtDat.scaleXY;
+
+			for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
+			{
+				int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
+				int e1 = i - e0*nrFrustEdges;
+
+				int idx_cur=0, idx_twin=0;
+				float3 vP0, vE0;
+				GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
+				
+			
+				float3 vP1, vE1;
+				GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, g_fFarPlane);
+				
+				// potential separation plane
+				float3 vN = cross(vE0, vE1);
+			
+				int positive=0, negative=0;
+				for(int k=1; k<8; k++)		// only need to test 7 verts (technically just 6).
+				{
+					int j = (idx_cur+k)&0x7;
+					float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
+					float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
+					if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
+				}
+				int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
+
+				positive=0; negative=0;
+				for(int j=0; j<8; j++)
+				{
+					float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane);
+					float fSignDist = dot(vN, vPf-vP0);
+					if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
+				}
+				int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
+
+				bool bFoundSepPlane = (resh*resf)<0;
+				if(bFoundSepPlane) lightsListLDS[l]=0xffffffff;
+			}
+		}
+	}
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+}
+#endif
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute.meta
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute.meta
+fileFormatVersion: 2
+guid: d190937525dcb3949942a0a93295d2fe
+timeCreated: 1479306737
+licenseType: Pro
+ComputeShaderImporter:
+  currentAPIMask: 4
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
+#pragma kernel TileLightListGen_NoDepthRT					LIGHTLISTGEN=TileLightListGen_NoDepthRT
+#pragma kernel TileLightListGen_DepthRT						LIGHTLISTGEN=TileLightListGen_DepthRT			ENABLE_DEPTH_TEXTURE_BACKPLANE
+#pragma kernel TileLightListGen_DepthRT_MSAA				LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA		ENABLE_DEPTH_TEXTURE_BACKPLANE		MSAA_ENABLED
+#pragma kernel TileLightListGen_NoDepthRT_SrcBigTile		LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile																	USE_TWO_PASS_TILED_LIGHTING
+#pragma kernel TileLightListGen_DepthRT_SrcBigTile			LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile			ENABLE_DEPTH_TEXTURE_BACKPLANE							USE_TWO_PASS_TILED_LIGHTING
+#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile		LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile		ENABLE_DEPTH_TEXTURE_BACKPLANE		MSAA_ENABLED		USE_TWO_PASS_TILED_LIGHTING
+#pragma kernel ClearAtomic
+
+
+#include "..\common\ShaderBase.h"
+#include "LightDefinitions.cs.hlsl"
+
+#include "LightingConvexHullUtils.hlsl"
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+#include "SortingComputeUtils.hlsl"
+#endif
+
+//#define EXACT_EDGE_TESTS
+#define PERFORM_SPHERICAL_INTERSECTION_TESTS
+#define CONV_HULL_TEST_ENABLED
+
+uniform int g_iNrVisibLights;
+uniform float4x4 g_mInvScrProjection;
+uniform float4x4 g_mScrProjection;
+
+uniform float g_fClustScale;
+uniform float g_fClustBase;
+uniform float g_fNearPlane;
+uniform float g_fFarPlane;
+uniform int	  g_iLog2NumClusters;		// numClusters = (1<<g_iLog2NumClusters)
+
+#include "ClusteredUtils.h"
+
+
+#ifdef MSAA_ENABLED
+Texture2DMS<float> g_depth_tex : register( t0 );
+#else
+Texture2D g_depth_tex : register( t0 );
+#endif
+StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
+StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
+StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
+
+#ifdef USE_TWO_PASS_TILED_LIGHTING
+Buffer<uint> g_vBigTileLightList : register( t4 );
+#endif
+
+
+#define NR_THREADS			64
+
+// output buffer
+RWBuffer<uint> g_vLayeredLightList : register( u0 );
+RWBuffer<uint> g_LayeredOffset : register( u1 );
+RWBuffer<uint> g_LayeredSingleIdxBuffer : register( u2 );
+
+#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
+RWBuffer<float> g_logBaseBuffer : register( u3 );
+#endif
+
+
+#define MAX_NR_COARSE_ENTRIES		128
+
+groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
+groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2];
+groupshared float4 lightPlanes[4*6];
+
+groupshared uint lightOffs;
+
+#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
+groupshared int ldsZMax;
+#endif
+
+#ifdef EXACT_EDGE_TESTS
+groupshared uint ldsIsLightInvisible;
+groupshared uint lightOffs2;
+#endif
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+groupshared uint lightOffsSph;
+#endif
+
+
+float GetLinearDepth(float zDptBufSpace)	// 0 is near 1 is far
+{
+	float3 vP = float3(0.0f,0.0f,zDptBufSpace);
+	float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
+	return v4Pres.z / v4Pres.w;
+}
+
+
+float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
+{
+	float fSx = g_mScrProjection[0].x;
+	float fCx = g_mScrProjection[0].z;
+	float fSy = g_mScrProjection[1].y;
+	float fCy = g_mScrProjection[1].z;
+
+#if USE_LEFTHAND_CAMERASPACE
+	return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
+#else
+	return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
+#endif
+}
+
+float GetOnePixDiagWorldDistAtDepthOne()
+{
+	float fSx = g_mScrProjection[0].x;
+	float fSy = g_mScrProjection[1].y;
+
+	return length( float2(1.0/fSx,1.0/fSy) );
+}
+
+#ifdef EXACT_EDGE_TESTS
+int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane);
+#endif
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
+#endif
+
+
+// returns 1 for intersection and 0 for none
+
+float4 FetchPlane(int l, int p);
+
+
+bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase)
+{
+	unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
+	bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
+	if(bIsHit)
+	{
+#ifdef CONV_HULL_TEST_ENABLED
+		float depthAtNearZ = ClusterIdxToZ(k, suggestedBase);
+		float depthAtFarZ = ClusterIdxToZ(k+1, suggestedBase);
+
+		for(int p=0; p<6; p++)
+		{
+			float4 plane = lightPlanes[6*(l&3)+p];
+		
+			bool bAllInvisib = true;
+
+			for(int i=0; i<8; i++)
+			{
+				float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
+				float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
+				float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ;
+				float3 vP = GetViewPosFromLinDepth( float2(x, y), z);
+
+				bAllInvisib = bAllInvisib && dot(plane, float4(vP,1.0))>0;
+			}
+
+			if(bAllInvisib) bIsHit = false;
+		}
+#endif
+	}
+
+	return bIsHit;
+}
+
+bool CheckIntersectionBasic(int l, int k)
+{
+	unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
+	return ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
+}
+
+
+[numthreads(NR_THREADS, 1, 1)]
+void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
+{
+	uint2 tileIDX = u3GroupID.xy;
+	uint t=threadID;
+
+	uint iWidth;
+	uint iHeight;
+#ifdef MSAA_ENABLED
+	uint iNumSamplesMSAA;
+	g_depth_tex.GetDimensions(iWidth, iHeight, iNumSamplesMSAA);
+#else
+	g_depth_tex.GetDimensions(iWidth, iHeight);
+#endif
+	uint nrTilesX = (iWidth+15)/16;
+	uint nrTilesY = (iHeight+15)/16;
+
+	uint2 viTilLL = 16*tileIDX;
+	uint2 viTilUR = min( viTilLL+uint2(16,16), uint2(iWidth, iHeight) );		// not width and height minus 1 since viTilUR represents the end of the tile corner.
+
+	if(t==0)
+	{
+		lightOffs = 0;
+
+#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
+		ldsZMax = 0;
+#endif
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	float dpt_ma=1.0;
+
+#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
+	// establish min and max depth first
+	dpt_ma=0.0;
+
+	for(int idx=t; idx<256; idx+=NR_THREADS)
+	{
+		uint2 uPixCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
+#ifdef MSAA_ENABLED
+		for(int i=0; i<iNumSamplesMSAA; i++)
+		{
+		const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i);
+#else
+		const float fDpth = FetchDepth(g_depth_tex, uPixCrd);
+#endif
+		if(fDpth<VIEWPORT_SCALE_Z)		// if not skydome
+		{
+			dpt_ma = max(fDpth, dpt_ma);
+		}
+#ifdef MSAA_ENABLED
+		}
+#endif
+	}
+
+	InterlockedMax(ldsZMax, asuint(dpt_ma) );
+
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+	dpt_ma = asfloat(ldsZMax);
+#endif
+
+	float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
+	float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
+	
+
+	// build coarse list using AABB
+#ifdef USE_TWO_PASS_TILED_LIGHTING
+	int NrBigTilesX = (nrTilesX+3)>>2;
+	const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2);		// map the idx to 64x64 tiles
+	int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
+	for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
+	{
+		int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
+#else
+	for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
+	{
+#endif
+		const float3 vMi = g_vBoundsBuffer[l];
+		const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
+
+		if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy))
+		{
+			unsigned int uInc = 1;
+			unsigned int uIndex;
+			InterlockedAdd(lightOffs, uInc, uIndex);
+			if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l;		// add to light list
+		}
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
+	
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+	iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
+#endif
+
+#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
+
+#if USE_LEFTHAND_CAMERASPACE
+	float fTileFarPlane = GetLinearDepth(dpt_ma);
+#else
+	float fTileFarPlane = -GetLinearDepth(dpt_ma);
+#endif
+	float suggestedBase = SuggestLogBase50(fTileFarPlane);
+#else
+	float fTileFarPlane = g_fFarPlane;
+	float suggestedBase = g_fClustBase;
+#endif
+
+
+#ifdef EXACT_EDGE_TESTS
+	iNrCoarseLights = CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, fTileFarPlane);
+#endif
+
+	// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
+#endif
+
+	//////////// cell specific code
+	{
+		for(int l=(int) t; l<((iNrCoarseLights+1)>>1); l += NR_THREADS)
+		{
+			const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights)];
+			const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase));
+			const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase));
+			const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase));
+			const unsigned int clustIdxMa1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1+g_iNrVisibLights].z), suggestedBase));
+
+			clusterIdxs[l] = (clustIdxMa1<<24) | (clustIdxMi1<<16) | (clustIdxMa0<<8) | (clustIdxMi0<<0);
+		}
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	int nrClusters = (1<<g_iLog2NumClusters);
+
+
+
+	//////////////////////////////////////////////////////////
+	
+	uint start = 0;
+	int i=(int) t;
+	int iSpaceAvail = 0;
+	int iSum = 0;
+	if(i<nrClusters)
+	{
+		for(int l=0; l<iNrCoarseLights; l++)
+		{
+			iSum += (CheckIntersectionBasic(l, i) ? 1 : 0);
+		}
+
+		iSpaceAvail = min(iSum,MAX_NR_COARSE_ENTRIES);							// combined storage for both direct lights and reflection
+		InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start);		// alloc list memory
+	}
+
+	int modelListCount[NR_LIGHT_MODELS]={0,0};		// direct light count and reflection lights
+	uint offs = start;
+	for(int ll=0; ll<iNrCoarseLights; ll+=4)
+	{
+		int p = i>>2;
+		int m = i&3;
+		if(i<24) lightPlanes[6*m+p] = FetchPlane(min(iNrCoarseLights-1,ll+m), p);
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+
+		for(int l=ll; l<min(iNrCoarseLights,(ll+4)); l++)
+		{
+			if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
+			{
+				uint lightModel = g_vLightData[ coarseList[l] ].lightModel;
+				++modelListCount[ lightModel==REFLECTION_LIGHT ? 1 : 0];
+				g_vLayeredLightList[offs++] = coarseList[l];			// reflection lights will be last since we sorted
+			}
+		}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+	}
+
+	uint localOffs=0;
+	offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
+	for(int m=0; m<NR_LIGHT_MODELS; m++)
+	{
+		int numLights = min(modelListCount[m],31);		// only allow 5 bits
+		if(i<nrClusters)
+		{
+			g_LayeredOffset[offs] = (start+localOffs) | (((uint) numLights)<<27);
+			offs += (nrClusters*nrTilesX*nrTilesY);
+			localOffs += modelListCount[m];		// use unclamped count for localOffs
+		}
+	}
+
+#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
+	g_logBaseBuffer[tileIDX.y*nrTilesX + tileIDX.x] = suggestedBase;
+#endif
+}
+
+
+float4 FetchPlane(int l, int p)
+{
+	SFiniteLightBound lgtDat = g_data[coarseList[l]];
+	
+	const float3 boxX = lgtDat.boxAxisX.xyz;
+	const float3 boxY = lgtDat.boxAxisY.xyz;
+	const float3 boxZ = -lgtDat.boxAxisZ.xyz;           // flip axis (so it points away from the light direction for a spot-light)
+	const float3 center = lgtDat.center.xyz;
+	const float radius = lgtDat.radius;
+	const float2 scaleXY = lgtDat.scaleXY;
+
+	return GetPlaneEq(boxX, boxY, boxZ, center, scaleXY, p);
+}
+
+
+
+
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
+{
+#if USE_LEFTHAND_CAMERASPACE
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
+#else
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
+#endif
+
+	float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
+	float halfTileSizeAtZDistOne = 8*onePixDiagDist;		// scale by half a tile
+	
+	for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
+	{
+		SFiniteLightBound lgtDat = g_data[coarseList[l]];
+
+		if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
+			coarseList[l]=0xffffffff;
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+
+	// to greedy to double buffer coarseList lds on this so serializing removal of gaps.
+	if(threadID==0)
+	{
+		int offs = 0;
+		for(int l=0; l<iNrCoarseLights; l++)
+		{	if(coarseList[l]!=0xffffffff) coarseList[offs++] = coarseList[l]; }
+		lightOffsSph = offs;
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	return lightOffsSph;
+}
+#endif
+
+
+
+
+
+
+
+#ifdef EXACT_EDGE_TESTS
+
+float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
+{
+	float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
+	float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
+	float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
+#if !USE_LEFTHAND_CAMERASPACE
+	z = -z;
+#endif
+	return GetViewPosFromLinDepth( float2(x, y), z);
+}
+
+void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
+{
+	int iSection = e0>>2;		// section 0 is side edges, section 1 is near edges and section 2 is far edges
+	int iSwizzle = e0&0x3;
+
+	int i=iSwizzle + (2*(iSection&0x2));	// offset by 4 at section 2
+	vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
+	vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
+}
+
+int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
+{
+	if(threadID==0) lightOffs2 = 0;
+
+	const bool bOnlyNeedFrustumSideEdges = true;
+	const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8;	// max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
+
+	const int totNrEdgePairs = 12*nrFrustEdges;
+	for(int l=0; l<iNrCoarseLights; l++)
+	{
+		if(threadID==0) ldsIsLightInvisible=0;
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+		const int idxCoarse = coarseList[l];
+		[branch]if(g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT)		// don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
+		{
+			SFiniteLightBound lgtDat = g_data[idxCoarse];
+	
+			const float3 boxX = lgtDat.boxAxisX.xyz;
+			const float3 boxY = lgtDat.boxAxisY.xyz;
+			const float3 boxZ = -lgtDat.boxAxisZ.xyz;           // flip axis (so it points away from the light direction for a spot-light)
+			const float3 center = lgtDat.center.xyz;
+			const float2 scaleXY = lgtDat.scaleXY;
+
+			for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
+			{
+				int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
+				int e1 = i - e0*nrFrustEdges;
+
+				int idx_cur=0, idx_twin=0;
+				float3 vP0, vE0;
+				GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
+				
+			
+				float3 vP1, vE1;
+				GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane);
+				
+				// potential separation plane
+				float3 vN = cross(vE0, vE1);
+			
+				int positive=0, negative=0;
+				for(int k=1; k<8; k++)		// only need to test 7 verts (technically just 6).
+				{
+					int j = (idx_cur+k)&0x7;
+					float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
+					float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
+					if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
+				}
+				int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
+
+				positive=0; negative=0;
+				for(int j=0; j<8; j++)
+				{
+					float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane);
+					float fSignDist = dot(vN, vPf-vP0);
+					if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
+				}
+				int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
+
+				bool bFoundSepPlane = (resh*resf)<0;
+
+				if(bFoundSepPlane) InterlockedOr(ldsIsLightInvisible, 1);
+			}
+		}
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+		if(threadID==0 && ldsIsLightInvisible==0)
+		{
+			coarseList[lightOffs2++] = coarseList[l];
+		}
+	}
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+	return lightOffs2;
+}
+#endif
+
+
+
+[numthreads(1, 1, 1)]
+void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
+{
+	g_LayeredSingleIdxBuffer[0]=0;
+}
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute.meta
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute.meta
+fileFormatVersion: 2
+guid: a19ed36b92650254397f2a566ed76d36
+timeCreated: 1479306737
+licenseType: Pro
+ComputeShaderImporter:
+  currentAPIMask: 4
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild.compute
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild.compute
+// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
+// https://github.com/wolfgangfengel/GPU-Pro-7
+
+#pragma kernel TileLightListGen					LIGHTLISTGEN=TileLightListGen
+#pragma kernel TileLightListGen_SrcBigTile		LIGHTLISTGEN=TileLightListGen_SrcBigTile		USE_TWO_PASS_TILED_LIGHTING
+
+
+#include "..\common\ShaderBase.h"
+#include "LightDefinitions.cs.hlsl"
+
+#include "LightingConvexHullUtils.hlsl"
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+#include "SortingComputeUtils.hlsl"
+#endif
+
+
+#define FINE_PRUNING_ENABLED
+#define PERFORM_SPHERICAL_INTERSECTION_TESTS
+
+
+uniform int g_iNrVisibLights;
+uniform uint2 g_viDimensions;
+uniform float4x4 g_mInvScrProjection;
+uniform float4x4 g_mScrProjection;
+
+
+Texture2D g_depth_tex : register( t0 );
+StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
+StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
+StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
+
+#ifdef USE_TWO_PASS_TILED_LIGHTING
+Buffer<uint> g_vBigTileLightList : register( t4 );
+#endif
+
+#define NR_THREADS			64
+
+// output buffer
+RWBuffer<uint> g_vLightList : register( u0 );
+
+
+#define MAX_NR_COARSE_ENTRIES		64
+#define MAX_NR_PRUNED_ENTRIES		24
+
+groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
+groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES];		// temporarily support room for all 64 while in LDS
+
+groupshared uint ldsZMin;
+groupshared uint ldsZMax;
+groupshared uint lightOffs;
+#ifdef FINE_PRUNING_ENABLED
+groupshared uint ldsDoesLightIntersect[2];
+#endif
+groupshared int ldsNrLightsFinal;
+
+groupshared int ldsModelListCount[NR_LIGHT_MODELS];		// since NR_LIGHT_MODELS is 2
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+groupshared uint lightOffsSph;
+#endif
+
+
+//float GetLinearDepth(float3 vP)
+//{
+//	float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
+//	return v4Pres.z / v4Pres.w;
+//}
+
+float GetLinearDepth(float zDptBufSpace)	// 0 is near 1 is far
+{
+	float3 vP = float3(0.0f,0.0f,zDptBufSpace);
+	float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
+	return v4Pres.z / v4Pres.w;
+}
+
+
+float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
+{
+	float fSx = g_mScrProjection[0].x;
+	float fCx = g_mScrProjection[0].z;
+	float fSy = g_mScrProjection[1].y;
+	float fCy = g_mScrProjection[1].z;
+
+#if USE_LEFTHAND_CAMERASPACE
+	return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
+#else
+	return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
+#endif
+}
+
+float GetOnePixDiagWorldDistAtDepthOne()
+{
+	float fSx = g_mScrProjection[0].x;
+	float fSy = g_mScrProjection[1].y;
+
+	return length( float2(1.0/fSx,1.0/fSy) );
+}
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
+#endif
+
+#ifdef FINE_PRUNING_ENABLED
+void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths);
+#endif
+
+
+[numthreads(NR_THREADS, 1, 1)]
+void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
+{
+	uint2 tileIDX = u3GroupID.xy;
+	uint t=threadID;
+
+	if(t<MAX_NR_COARSE_ENTRIES)
+		prunedList[t]=0;
+	
+	uint iWidth = g_viDimensions.x;
+	uint iHeight = g_viDimensions.y;
+	uint nrTilesX = (iWidth+15)/16;
+	uint nrTilesY = (iHeight+15)/16;
+
+	// build tile scr boundary
+	const uint uFltMax = 0x7f7fffff;  // FLT_MAX as a uint
+	if(t==0)
+	{
+		ldsZMin = uFltMax;
+		ldsZMax = 0;
+		lightOffs = 0;
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+
+	uint2 viTilLL = 16*tileIDX;
+
+	// establish min and max depth first
+	float dpt_mi=asfloat(uFltMax), dpt_ma=0.0;
+
+
+	float4 vLinDepths;
+	{
+		// Fetch depths and calculate min/max
+		[unroll]
+		for(int i = 0; i < 4; i++)
+		{
+			int idx = i * NR_THREADS + t;
+			uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
+			const float fDepth = FetchDepth(g_depth_tex, uCrd);
+			vLinDepths[i] = GetLinearDepth(fDepth);
+			if(fDepth<VIEWPORT_SCALE_Z)		// if not skydome
+			{
+				dpt_mi = min(fDepth, dpt_mi);
+				dpt_ma = max(fDepth, dpt_ma);
+			}
+		}
+
+		InterlockedMax(ldsZMax, asuint(dpt_ma));
+		InterlockedMin(ldsZMin, asuint(dpt_mi));
+
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+	}
+
+
+	float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
+	float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
+	vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
+	
+
+	// build coarse list using AABB
+#ifdef USE_TWO_PASS_TILED_LIGHTING
+	int NrBigTilesX = (nrTilesX+3)>>2;
+	const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2);		// map the idx to 64x64 tiles
+	int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
+	for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
+	{
+		int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
+#else
+	for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
+	{
+#endif
+		const float3 vMi = g_vBoundsBuffer[l];
+		const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
+
+		if( all(vMa>vTileLL) && all(vMi<vTileUR))
+		{
+			unsigned int uInc = 1;
+			unsigned int uIndex;
+			InterlockedAdd(lightOffs, uInc, uIndex);
+			if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l;		// add to light list
+		}
+	}
+
+#ifdef FINE_PRUNING_ENABLED	
+	if(t<2) ldsDoesLightIntersect[t] = 0;
+#endif
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+	iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
+#endif
+
+#ifndef FINE_PRUNING_ENABLED	
+	{
+		if((int)t<iNrCoarseLights) prunedList[t] = coarseList[t];
+		if(t==0) ldsNrLightsFinal=iNrCoarseLights;
+	}
+#else
+	{
+		// initializes ldsNrLightsFinal with the number of accepted lights.
+		// all accepted entries delivered in prunedList[].
+		FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths);
+	}
+#endif
+
+	//
+	if(t<NR_LIGHT_MODELS) ldsModelListCount[t]=0;
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	
+	int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
+	for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS) 
+	{
+		InterlockedAdd(ldsModelListCount[ g_vLightData[ prunedList[i] ].lightModel ], 1);
+	}
+
+
+	// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
+	//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);
+#endif
+
+	// write lights to global buffers
+	int localOffs=0;
+	int offs = tileIDX.y*nrTilesX + tileIDX.x;
+	for(int m=0; m<NR_LIGHT_MODELS; m++)
+	{
+		int nrLightsFinal = ldsModelListCount[ m ];
+		int nrLightsFinalClamped = nrLightsFinal<MAX_NR_PRUNED_ENTRIES ? nrLightsFinal : MAX_NR_PRUNED_ENTRIES;
+		
+
+		const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
+		for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
+		{
+			uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs];
+			uint uHigh = prunedList[2*l+0+localOffs];
+
+			g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
+		}
+
+		localOffs += nrLightsFinal;
+		offs += (nrTilesX*nrTilesY);
+	}
+
+}
+
+
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
+{
+	lightOffsSph = 0;
+
+	// make a copy of coarseList in prunedList.
+	for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
+		prunedList[l]=coarseList[l];
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+#if USE_LEFTHAND_CAMERASPACE
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
+#else
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
+#endif
+
+	float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
+	float halfTileSizeAtZDistOne = 8*onePixDiagDist;		// scale by half a tile
+	
+	for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
+	{
+		SFiniteLightBound lightData = g_data[coarseList[l]];
+	
+		if( DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius) )
+		{
+			unsigned int uIndex;
+			InterlockedAdd(lightOffsSph, 1, uIndex);
+			coarseList[uIndex]=prunedList[l];		// read from the original copy of coarseList which is backed up in prunedList
+		}
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	return lightOffsSph;
+}
+#endif
+
+
+#ifdef FINE_PRUNING_ENABLED
+// initializes ldsNrLightsFinal with the number of accepted lights.
+// all accepted entries delivered in prunedList[].
+void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths)
+{
+	uint t = threadID;
+	uint iWidth = g_viDimensions.x;
+	uint iHeight = g_viDimensions.y;
+
+	uint uLightsFlags[2] = {0,0};
+	int l=0;
+	// need this outer loop even on xb1 and ps4 since direct lights and
+	// reflection lights are kept in separate regions.
+	while(l<iNrCoarseLights)
+	{
+		// fetch light
+		int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
+		uint uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
+
+		// spot
+		while(l<iNrCoarseLights && uLgtType==SPOT_LIGHT)
+		{
+			SFiniteLightData lightData = g_vLightData[idxCoarse];
+			const bool bIsSpotDisc = (lightData.flags&IS_CIRCULAR_SPOT_SHAPE)!=0;
+				
+			// serially check 4 pixels
+			uint uVal = 0;
+			for(int i=0; i<4; i++)
+			{
+				int idx = t + i*NR_THREADS;
+	
+				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
+	
+				// check pixel
+				float3 fromLight = vVPos-lightData.lightPos.xyz;
+				float distSq = dot(fromLight,fromLight);
+				const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz);		// spotDir = lightData.lightAxisZ.xyz
+
+				float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) );
+
+				float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y);
+				if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1;
+			}
+
+			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
+			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
+			uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
+		}
+
+		// sphere
+		while(l<iNrCoarseLights && uLgtType==SPHERE_LIGHT)
+		{
+			SFiniteLightData lightData = g_vLightData[idxCoarse];
+
+			// serially check 4 pixels
+			uint uVal = 0;
+			for(int i=0; i<4; i++)
+			{
+				int idx = t + i*NR_THREADS;
+	
+				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
+	
+				// check pixel
+				float3 vLp = lightData.lightPos.xyz;
+				float3 toLight = vLp - vVPos; 
+				float distSq = dot(toLight,toLight);
+			
+				if(lightData.radiusSq>distSq) uVal = 1;
+			}
+
+			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
+			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
+			uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
+		}
+
+		// Box
+		while(l<iNrCoarseLights && uLgtType==BOX_LIGHT)
+		{
+			SFiniteLightData lightData = g_vLightData[idxCoarse];
+
+			// serially check 4 pixels
+			uint uVal = 0;
+			for(int i=0; i<4; i++)
+			{
+				int idx = t + i*NR_THREADS;
+	
+				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
+
+				// check pixel
+				float3 toLight  = lightData.lightPos.xyz - vVPos;
+
+				float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) );
+				dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange;		// not as efficient as it could be
+				if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1;						// but allows us to not write out OuterDists
+			}
+
+			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
+			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
+			uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
+		}
+
+		// in case we have some corrupt data make sure we terminate
+		if(uLgtType>=MAX_TYPES) ++l;
+	}
+
+	InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
+	InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
+	if(t==0) ldsNrLightsFinal = 0;
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
+	{
+		unsigned int uInc = 1;
+		unsigned int uIndex;
+		InterlockedAdd(ldsNrLightsFinal, uInc, uIndex);
+		if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t];		// we allow up to 64 pruned lights while stored in LDS.
+	}
+}
+#endif
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild.compute.meta
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild.compute.meta
+fileFormatVersion: 2
+guid: 65af3444cbf4b3747a4dead7ee00cfee
+timeCreated: 1479306737
+licenseType: Pro
+ComputeShaderImporter:
+  currentAPIMask: 4
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/scrbound.compute
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/scrbound.compute
+// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
+// https://github.com/wolfgangfengel/GPU-Pro-7
+
+#pragma kernel ScreenBoundsAABB
+
+#include "..\common\ShaderBase.h"
+#include "LightDefinitions.cs.hlsl"
+
+uniform int g_iNrVisibLights;
+uniform float4x4 g_mInvProjection;
+uniform float4x4 g_mProjection;
+
+
+StructuredBuffer<SFiniteLightBound> g_data : register( t0 );
+
+
+
+#define FLT_EPSILON     1.192092896e-07F        // smallest such that 1.0+FLT_EPSILON != 1.0
+#define NR_THREADS			64
+
+// output buffer
+RWStructuredBuffer<float3> g_vBoundsBuffer : register( u0 );
+
+#define MAX_PNTS		9		// strictly this should be 10=6+4 but we get more wavefronts and 10 seems to never hit (fingers crossed)
+								// However, worst case the plane that would be skipped if such an extreme case ever happened would be backplane
+								// clipping gets skipped which doesn't cause any errors.
+
+
+// LDS (2496 bytes)
+groupshared float posX[MAX_PNTS*8*2];
+groupshared float posY[MAX_PNTS*8*2];
+groupshared float posZ[MAX_PNTS*8*2];
+groupshared float posW[MAX_PNTS*8*2];
+groupshared unsigned int clipFlags[48];
+
+
+unsigned int GetClip(const float4 P);
+int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p);
+void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r);
+
+#include "LightingConvexHullUtils.hlsl"
+
+
+[numthreads(NR_THREADS, 1, 1)]
+void ScreenBoundsAABB(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
+{
+	uint groupID = u3GroupID.x;
+	
+	//uint vindex = groupID * NR_THREADS + threadID;
+	unsigned int g = groupID;
+	unsigned int t = threadID;
+
+	const int subLigt = (int) (t/8);
+	const int lgtIndex = subLigt+(int) g*8;
+	const int sideIndex = (int) (t%8);
+	
+	SFiniteLightBound lgtDat = g_data[lgtIndex];
+	
+	const float3 boxX = lgtDat.boxAxisX.xyz;
+	const float3 boxY = lgtDat.boxAxisY.xyz;
+	const float3 boxZ = -lgtDat.boxAxisZ.xyz;           // flip axis (so it points away from the light direction for a spot-light)
+	const float3 center = lgtDat.center.xyz;
+	const float radius = lgtDat.radius;
+	const float2 scaleXY = lgtDat.scaleXY;
+
+	{
+		if(sideIndex<6 && lgtIndex<(int) g_iNrVisibLights)		// mask 2 out of 8 threads
+		{
+			float3 q0, q1, q2, q3;
+			GetQuad(q0, q1, q2, q3, boxX, boxY, boxZ, center, scaleXY, sideIndex);
+
+
+			const float4 vP0 = mul(g_mProjection, float4(q0, 1));
+			const float4 vP1 = mul(g_mProjection, float4(q1, 1));
+			const float4 vP2 = mul(g_mProjection, float4(q2, 1));
+			const float4 vP3 = mul(g_mProjection, float4(q3, 1));
+
+			// test vertices of one quad (of the convex hull) for intersection
+			const unsigned int uFlag0 = GetClip(vP0);
+			const unsigned int uFlag1 = GetClip(vP1);
+			const unsigned int uFlag2 = GetClip(vP2);
+			const unsigned int uFlag3 = GetClip(vP3);
+
+			const float4 vPnts[] = {vP0, vP1, vP2, vP3};
+				
+			// screen-space AABB of one quad (assuming no intersection)
+			float3 vMin, vMax;
+			for(int k=0; k<4; k++)
+			{
+				float fW = vPnts[k].w;
+				float fS = fW<0 ? -1 : 1;
+				float fWabs = fW<0 ? (-fW) : fW;
+				fW = fS * (fWabs<FLT_EPSILON ? FLT_EPSILON : fWabs);
+				float3 vP = float3(vPnts[k].x/fW, vPnts[k].y/fW, vPnts[k].z/fW);
+				if(k==0) { vMin=vP; vMax=vP; }
+				
+				vMax = max(vMax, vP); vMin = min(vMin, vP);
+			}
+
+			clipFlags[subLigt*6+sideIndex] = (uFlag0<<0) | (uFlag1<<6) | (uFlag2<<12) | (uFlag3<<18);
+
+			// store in clip buffer (only use these vMin and vMax if light is 100% visible in which case clipping isn't needed)
+			posX[subLigt*MAX_PNTS*2 + sideIndex] = vMin.x;
+			posY[subLigt*MAX_PNTS*2 + sideIndex] = vMin.y;
+			posZ[subLigt*MAX_PNTS*2 + sideIndex] = vMin.z;
+
+			posX[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.x;
+			posY[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.y;
+			posZ[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.z;
+		}
+	}
+
+	// if not XBONE and not PLAYSTATION4 we need a memorybarrier here
+	// since we can't rely on the gpu cores being 64 wide.
+	// We need a pound define around this.
+	GroupMemoryBarrierWithGroupSync();
+
+
+	{
+		int f=0;
+
+		if(sideIndex==0 && lgtIndex<(int) g_iNrVisibLights)
+		{
+			// quick acceptance or rejection
+			unsigned int uCollectiveAnd = (unsigned int) -1;
+			unsigned int uCollectiveOr = 0;
+			for(f=0; f<6; f++)
+			{
+				unsigned int uFlagAnd = clipFlags[subLigt*6+f]&0x3f;
+				unsigned int uFlagOr = uFlagAnd;
+				for(int i=1; i<4; i++)
+				{
+					unsigned int uClipBits = (clipFlags[subLigt*6+f]>>(i*6))&0x3f;
+					uFlagAnd &= uClipBits;
+					uFlagOr |= uClipBits;
+				}
+
+				uCollectiveAnd &= uFlagAnd;
+				uCollectiveOr |= uFlagOr;
+			}
+
+			bool bSetBoundYet = false;
+			float3 vMin=0.0, vMax=0.0;
+			if(uCollectiveAnd!=0 || uCollectiveOr==0)		// all invisible or all visible (early out)
+			{
+				if(uCollectiveOr==0)	// all visible
+				{
+					for(f=0; f<6; f++)
+					{
+						const int sideIndex = f;
+
+						float3 vFaceMi = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 0], posY[subLigt*MAX_PNTS*2 + sideIndex + 0], posZ[subLigt*MAX_PNTS*2 + sideIndex + 0]);
+						float3 vFaceMa = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 6], posY[subLigt*MAX_PNTS*2 + sideIndex + 6], posZ[subLigt*MAX_PNTS*2 + sideIndex + 6]);
+						
+						for(int k=0; k<2; k++)
+						{
+							float3 vP = k==0 ? vFaceMi : vFaceMa;
+							if(f==0 && k==0) { vMin=vP; vMax=vP; }
+							
+							vMax = max(vMax, vP); vMin = min(vMin, vP);
+						}
+					}
+					bSetBoundYet=true;
+				}
+			}
+			else		// :( need true clipping
+			{
+				
+				for(f=0; f<6; f++)
+				{
+					float3 q0, q1, q2, q3;
+					GetQuad(q0, q1, q2, q3, boxX, boxY, boxZ, center, scaleXY, f);
+			
+					// 4 vertices to a quad of the convex hull in post projection space
+					const float4 vP0 = mul(g_mProjection, float4(q0, 1));
+					const float4 vP1 = mul(g_mProjection, float4(q1, 1));
+					const float4 vP2 = mul(g_mProjection, float4(q2, 1));
+					const float4 vP3 = mul(g_mProjection, float4(q3, 1));
+
+					
+					int iSrcIndex = 0;
+
+					int offs = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
+
+					// fill up source clip buffer with the quad
+					posX[offs+0]=vP0.x; posX[offs+1]=vP1.x; posX[offs+2]=vP2.x; posX[offs+3]=vP3.x;
+					posY[offs+0]=vP0.y; posY[offs+1]=vP1.y; posY[offs+2]=vP2.y; posY[offs+3]=vP3.y;
+					posZ[offs+0]=vP0.z; posZ[offs+1]=vP1.z; posZ[offs+2]=vP2.z; posZ[offs+3]=vP3.z;
+					posW[offs+0]=vP0.w; posW[offs+1]=vP1.w; posW[offs+2]=vP2.w; posW[offs+3]=vP3.w;
+
+					int iNrSrcVerts = 4;
+
+					// do true clipping
+					for(int p=0; p<6; p++)
+					{
+						const int nrVertsDst = ClipAgainstPlane(iSrcIndex, iNrSrcVerts, subLigt, p);
+
+						iSrcIndex = 1-iSrcIndex;
+						iNrSrcVerts = nrVertsDst;
+
+						if(iNrSrcVerts<3 || iNrSrcVerts>=MAX_PNTS) break;
+					}
+
+					// final clipped convex primitive is in src buffer
+					if(iNrSrcVerts>2)
+					{
+						int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
+						for(int k=0; k<iNrSrcVerts; k++)
+						{
+							float4 vCur = float4(posX[offs_src+k], posY[offs_src+k], posZ[offs_src+k], posW[offs_src+k]);
+							
+							// project and apply toward AABB
+							float3 vP = float3(vCur.x/vCur.w, vCur.y/vCur.w, vCur.z/vCur.w);
+							if(!bSetBoundYet) { vMin=vP; vMax=vP; bSetBoundYet=true; }
+							
+							vMax = max(vMax, vP); vMin = min(vMin, vP);
+						}
+					}
+						
+				}
+
+				////////////////////// look for camera frustum verts that need to be included. That is frustum vertices inside the convex hull for the light
+				int i=0;
+				for(i=0; i<8; i++)	// establish 8 camera frustum vertices
+				{
+					float3 vVertPSpace = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0);
+				
+					float4 v4ViewSpace = mul(g_mInvProjection, float4(vVertPSpace,1));
+					float3 vViewSpace = float3(v4ViewSpace.x/v4ViewSpace.w, v4ViewSpace.y/v4ViewSpace.w, v4ViewSpace.z/v4ViewSpace.w);
+
+					posX[subLigt*MAX_PNTS*2 + i] = vViewSpace.x;
+					posY[subLigt*MAX_PNTS*2 + i] = vViewSpace.y;
+					posZ[subLigt*MAX_PNTS*2 + i] = vViewSpace.z;
+				}
+
+				// determine which camera frustum vertices are inside the convex hull
+				uint uVisibFl = 0xff;
+				for(f=0; f<6; f++)
+				{
+					float3 vP0, vN;
+					GetPlane(vP0, vN, boxX, boxY, boxZ, center, scaleXY, f);
+
+					for(i=0; i<8; i++)
+					{
+						float3 vViewSpace = float3(posX[subLigt*MAX_PNTS*2 + i], posY[subLigt*MAX_PNTS*2 + i], posZ[subLigt*MAX_PNTS*2 + i]);
+						uVisibFl &= ( dot(vViewSpace-vP0, vN)<0 ? 0xff : (~(1<<i)) );
+					}
+				}
+
+				// apply camera frustum vertices inside the convex hull to the AABB
+				for(i=0; i<8; i++)
+				{
+					if((uVisibFl&(1<<i))!=0)
+					{
+						float3 vP = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0);
+
+						if(!bSetBoundYet) { vMin=vP; vMax=vP; bSetBoundYet=true; }
+							
+						vMax = max(vMax, vP); vMin = min(vMin, vP);
+					}
+				}
+			}
+
+
+			
+
+
+			// determine AABB bound in [-1;1]x[-1;1] screen space using bounding sphere.
+			// Use the result to make our already established AABB from the convex hull
+			// potentially tighter.
+			if(!bSetBoundYet)
+			{
+				// set the AABB off-screen
+				vMin = float3(-3,-3,-3);
+				vMax = float3(-2,-2,-2);
+			}
+			else
+			{
+				//if((center.z+radius)<0.0)
+				if( length(center)>radius)
+				{
+					float2 vMi, vMa;
+					bool2 bMi, bMa;
+					CalcBound(bMi, bMa, vMi, vMa, g_mInvProjection, center, radius);
+		
+					vMin.xy = bMi ? max(vMin.xy, vMi) : vMin.xy;
+					vMax.xy = bMa ? min(vMax.xy, vMa) : vMax.xy;
+				}
+
+#if USE_LEFTHAND_CAMERASPACE
+				if((center.z-radius)>0.0)
+				{
+					float4 vPosF = mul(g_mProjection, float4(0,0,center.z-radius,1));
+					vMin.z = max(vMin.z, vPosF.z/vPosF.w);
+				}
+				if((center.z+radius)>0.0)
+				{
+					float4 vPosB = mul(g_mProjection, float4(0,0,center.z+radius,1));
+					vMax.z = min(vMax.z, vPosB.z/vPosB.w);
+				}
+#else
+				if((center.z+radius)<0.0)
+				{
+					float4 vPosF = mul(g_mProjection, float4(0,0,center.z+radius,1));
+					vMin.z = max(vMin.z, vPosF.z/vPosF.w);
+				}
+				if((center.z-radius)<0.0)
+				{
+					float4 vPosB = mul(g_mProjection, float4(0,0,center.z-radius,1));
+					vMax.z = min(vMax.z, vPosB.z/vPosB.w);
+				}
+#endif
+				else
+				{
+					vMin = float3(-3,-3,-3);
+					vMax = float3(-2,-2,-2);
+				}
+			}
+
+
+			// we should consider doing a look-up here into a max depth mip chain
+			// to see if the light is occluded: vMin.z*VIEWPORT_SCALE_Z > MipTexelMaxDepth
+			//g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, -0.5*vMax.y+0.5, vMin.z*VIEWPORT_SCALE_Z);
+			//g_vBoundsBuffer[lgtIndex+g_iNrVisibLights] = float3(0.5*vMax.x+0.5, -0.5*vMin.y+0.5, vMax.z*VIEWPORT_SCALE_Z);
+			
+			// changed for unity
+			g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, 0.5*vMin.y+0.5, vMin.z*VIEWPORT_SCALE_Z);
+			g_vBoundsBuffer[lgtIndex+(int) g_iNrVisibLights] = float3(0.5*vMax.x+0.5, 0.5*vMax.y+0.5, vMax.z*VIEWPORT_SCALE_Z);
+		}
+	}
+}
+
+
+float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p);
+
+int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p)
+{
+	int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
+	int offs_dst = (1-iSrcIndex)*MAX_PNTS+subLigt*MAX_PNTS*2;
+
+	float4 vPrev = float4(posX[offs_src+(iNrSrcVerts-1)], posY[offs_src+(iNrSrcVerts-1)], posZ[offs_src+(iNrSrcVerts-1)], posW[offs_src+(iNrSrcVerts-1)]);
+
+	int nrVertsDst = 0;
+
+	unsigned int uMask = (1<<p);
+	bool bIsPrevVisib = (GetClip(vPrev)&uMask)==0;
+	for(int i=0; i<iNrSrcVerts; i++)
+	{
+		float4 vCur = float4(posX[offs_src+i], posY[offs_src+i], posZ[offs_src+i], posW[offs_src+i]);
+		bool bIsCurVisib = (GetClip(vCur)&uMask)==0;
+		if( (bIsCurVisib && !bIsPrevVisib) || (!bIsCurVisib && bIsPrevVisib) )
+		{
+			//assert(nrVertsDst<MAX_PNTS);
+			if(nrVertsDst<MAX_PNTS)
+			{
+				// generate new vertex
+				float4 vNew = GenNewVert(bIsCurVisib ? vCur : vPrev, bIsCurVisib ? vPrev : vCur, p);
+				posX[offs_dst+nrVertsDst]=vNew.x; posY[offs_dst+nrVertsDst]=vNew.y; posZ[offs_dst+nrVertsDst]=vNew.z; posW[offs_dst+nrVertsDst]=vNew.w;
+				++nrVertsDst;
+			}
+		}
+							
+		if(bIsCurVisib)
+		{
+			//assert(nrVertsDst<MAX_PNTS);
+			if(nrVertsDst<MAX_PNTS)
+			{
+				posX[offs_dst+nrVertsDst]=vCur.x; posY[offs_dst+nrVertsDst]=vCur.y; posZ[offs_dst+nrVertsDst]=vCur.z; posW[offs_dst+nrVertsDst]=vCur.w;
+				++nrVertsDst;
+			}
+		}
+
+		vPrev = vCur;
+		bIsPrevVisib = bIsCurVisib;
+	}
+
+	return nrVertsDst;
+}
+
+
+
+unsigned int GetClip(const float4 P)
+{
+	//-P.w <= P.x <= P.w
+	return ((P.x<-P.w)?1:0) | ((P.x>P.w)?2:0) | ((P.y<-P.w)?4:0) | ((P.y>P.w)?8:0) | ((P.z<0)?16:0) | ((P.z>P.w)?32:0);
+}
+
+float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p)
+{
+	const float fS = p==4 ? 0 : ((p&1)==0 ? -1 : 1);
+	const int index = ((uint) p)/2;
+	float x1 = index==0 ? vVisib.x : (index==1 ? vVisib.y : vVisib.z);
+	float x0 = index==0 ? vInvisib.x : (index==1 ? vInvisib.y : vInvisib.z);
+	
+	//fS*((vVisib.w-vInvisib.w)*t + vInvisib.w) = (x1-x0)*t + x0;
+
+	const float fT = (fS*vInvisib.w-x0)/((x1-x0) - fS*(vVisib.w-vInvisib.w));
+	float4 vNew = vVisib*fT + vInvisib*(1-fT);
+
+	// just to be really anal we make sure the clipped against coordinate is precise
+	if(index==0) vNew.x = fS*vNew.w;
+	else if(index==1) vNew.y = fS*vNew.w;
+	else vNew.z = fS*vNew.w;
+
+	return vNew;
+}
+
+
+float4 TransformPlaneToPostSpace(float4x4 InvProjection, float4 plane)
+{
+	return mul(plane, InvProjection);
+}
+
+float4 EvalPlanePair(float2 posXY_in, float r)
+{
+	// rotate by 90 degrees to avoid potential division by zero
+	bool bMustFlip = abs(posXY_in.y)<abs(posXY_in.x);
+	float2 posXY = bMustFlip ? float2(-posXY_in.y, posXY_in.x) : posXY_in;
+
+	float fLenSQ = dot(posXY, posXY);
+	float D = posXY.y * sqrt(fLenSQ - r*r);
+
+	float4 res;
+	res.x = (-r*posXY.x - D) / fLenSQ;
+	res.z = (-r*posXY.x + D) / fLenSQ;
+	res.y = (-r-res.x*posXY.x) / posXY.y;
+	res.w = (-r-res.z*posXY.x) / posXY.y;
+
+	// rotate back by 90 degrees
+	res = bMustFlip ? float4(res.y, -res.x, res.w, -res.z) : res;
+
+	return res;
+}
+
+void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r)
+{
+	float4 planeX = EvalPlanePair(float2(pos_view_space.x, pos_view_space.z), r);
+	float4 planeY = EvalPlanePair(float2(pos_view_space.y, pos_view_space.z), r);
+
+
+#if USE_LEFTHAND_CAMERASPACE
+	planeX = planeX.zwxy;		// need to swap left/right and top/bottom planes when using left hand system
+	planeY = planeY.zwxy;
+#endif
+
+	bIsMinValid = bool2(planeX.z<0, planeY.z<0);
+	bIsMaxValid = bool2((-planeX.x)<0, (-planeY.x)<0);
+
+	// hopefully the compiler takes zeros into account
+	// should be the case since the transformation in TransformPlaneToPostSpace()
+	// is done using multiply-adds and not dot product instructions.
+	float4 planeX0 = TransformPlaneToPostSpace(InvProjection, float4(planeX.x, 0, planeX.y, 0));
+	float4 planeX1 = TransformPlaneToPostSpace(InvProjection, float4(planeX.z, 0, planeX.w, 0));
+	float4 planeY0 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.x, planeY.y, 0));
+	float4 planeY1 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.z, planeY.w, 0));
+
+	
+	// convert planes to the forms (1,0,0,D) and (0,1,0,D)
+	// 2D bound is given by -D components
+	float2 A = -float2(planeX0.w / planeX0.x, planeY0.w / planeY0.y);
+	float2 B = -float2(planeX1.w / planeX1.x, planeY1.w / planeY1.y);
+
+	// Bound is complete
+	vMin = B;
+	vMax = A;
+}
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/scrbound.compute.meta
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/scrbound.compute.meta
+fileFormatVersion: 2
+guid: 728dce960f8a9c44bbc3abb3b851d8f6
+timeCreated: 1479306737
+licenseType: Pro
+ComputeShaderImporter:
+  currentAPIMask: 4
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: