fixed tile debugging dropdown

added tile classification with indirect dispatch (feature variant shaders) added debugging mode for feature variants
8 年前 · 12d98082
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Debug/Resources/DebugViewTiles.shader
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Debug/Resources/DebugViewTiles.shader
-Shader "Hidden/HDRenderPipeline/DebugViewTiles"
-{
-    SubShader
-    {
-
-        Pass
-        {
-            ZWrite Off
-            Blend SrcAlpha OneMinusSrcAlpha
-
-            HLSLPROGRAM
-            #pragma target 4.5
-            #pragma only_renderers d3d11 ps4 metal // TEMP: until we go further in dev
-
-            #pragma vertex Vert
-            #pragma fragment Frag
-
-            #define LIGHTLOOP_TILE_PASS           
-            #define LIGHTLOOP_TILE_ALL
-
-            #pragma multi_compile USE_FPTL_LIGHTLIST USE_CLUSTERED_LIGHTLIST
-
-            //-------------------------------------------------------------------------------------
-            // Include
-            //-------------------------------------------------------------------------------------
-
-            #include "../../../ShaderLibrary/Common.hlsl"
-
-            // Note: We have fix as guidelines that we have only one deferred material (with control of GBuffer enabled). Mean a users that add a new
-            // deferred material must replace the old one here. If in the future we want to support multiple layout (cause a lot of consistency problem), 
-            // the deferred shader will require to use multicompile.
-            #define UNITY_MATERIAL_LIT // Need to be define before including Material.hlsl
-            #include "../../ShaderConfig.cs.hlsl"
-            #include "../../ShaderVariables.hlsl"
-            #include "../../Lighting/Lighting.hlsl" // This include Material.hlsl
-
-            //-------------------------------------------------------------------------------------
-            // variable declaration
-            //-------------------------------------------------------------------------------------
-
-            uint _ViewTilesFlags;
-            float2 _MousePixelCoord;
-
-
-            float4 Vert(float3 positionOS : POSITION): SV_POSITION
-            {
-                return TransformWorldToHClip(TransformObjectToWorld(positionOS));
-            }
-
-            float4 AlphaBlend(float4 c0, float4 c1)	// c1 over c0
-            {
-                return float4(lerp(c0.rgb, c1.rgb, c1.a), c0.a + c1.a - c0.a * c1.a);
-            }
-
-            float4 OverlayHeatMap(uint2 pixCoord, uint numLights)
-            {
-                const float4 kRadarColors[12] =
-                {
-                    float4(0.0, 0.0, 0.0, 0.0),   // black
-                    float4(0.0, 0.0, 0.6, 0.5),   // dark blue
-                    float4(0.0, 0.0, 0.9, 0.5),   // blue
-                    float4(0.0, 0.6, 0.9, 0.5),   // light blue
-                    float4(0.0, 0.9, 0.9, 0.5),   // cyan
-                    float4(0.0, 0.9, 0.6, 0.5),   // blueish green
-                    float4(0.0, 0.9, 0.0, 0.5),   // green
-                    float4(0.6, 0.9, 0.0, 0.5),   // yellowish green
-                    float4(0.9, 0.9, 0.0, 0.5),   // yellow
-                    float4(0.9, 0.6, 0.0, 0.5),   // orange
-                    float4(0.9, 0.0, 0.0, 0.5),   // red
-                    float4(1.0, 0.0, 0.0, 0.9)    // strong red
-                };
-
-                float maxNrLightsPerTile = 31; // TODO: setup a constant for that
-
-                int colorIndex = numLights == 0 ? 0 : (1 + (int)floor(10 * (log2((float)numLights) / log2(maxNrLightsPerTile))));
-                colorIndex = colorIndex < 0 ? 0 : colorIndex;
-                float4 col = colorIndex > 11 ? float4(1.0, 1.0, 1.0, 1.0) : kRadarColors[colorIndex];
-
-                int2 coord = pixCoord - int2(1, 1);
-
-                float4 color = float4(PositivePow(col.xyz, 2.2), 0.3 * col.w);
-                if (numLights > 0)
-                {
-                    if (SampleDebugFontNumber(coord, numLights))		// Shadow
-                        color = float4(0, 0, 0, 1);
-                    if (SampleDebugFontNumber(coord + 1, numLights))	// Text
-                        color = float4(1, 1, 1, 1);
-                }
-                return color;
-            }
-
-            float4 Frag(float4 positionCS : SV_POSITION) : SV_Target
-            {
-                // positionCS is SV_Position
-                PositionInputs posInput = GetPositionInput(positionCS.xy, _ScreenSize.zw, uint2(positionCS.xy) / GetTileSize());
-                float depth = LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).x;
-                UpdatePositionInput(depth, _InvViewProjMatrix, _ViewProjMatrix, posInput);
- 
-                int2 pixelCoord = posInput.unPositionSS.xy;
-                int2 tileCoord = (float2)pixelCoord / TILE_SIZE;
-                int2 mouseTileCoord = _MousePixelCoord / TILE_SIZE;
-                int2 offsetInTile = pixelCoord - tileCoord * TILE_SIZE;
-
-                int n = 0;
-                for (int category = 0; category < LIGHTCATEGORY_COUNT; category++)
-                {
-                    uint mask = 1u << category;
-                    if (mask & _ViewTilesFlags)
-                    {
-                        uint start;
-                        uint count;
-                        GetCountAndStart(posInput, category, start, count);
-                        n += count;
-                    }
-                }
-                
-                float4 result = float4(0.0, 0.0, 0.0, 0.0);
-
-				// Tile overlap counter
-                if (n > 0)
-                {
-                    result = OverlayHeatMap(int2(posInput.unPositionSS.xy) & (TILE_SIZE - 1), n);
-                }
-
-				// Highlight selected tile
-                if (all(mouseTileCoord == tileCoord))
-                {
-                    bool border = any(offsetInTile == 0 || offsetInTile == TILE_SIZE - 1);
-                    float4 result2 = float4(1.0, 1.0, 1.0, border ? 1.0 : 0.5);
-                    result = AlphaBlend(result, result2);
-                }
-
-                // Print light lists for selected tile at the bottom of the screen
-                int maxLights = 32;
-                if (tileCoord.y < LIGHTCATEGORY_COUNT && tileCoord.x < maxLights + 3)
-                {
-                    PositionInputs mousePosInput = GetPositionInput(_MousePixelCoord, _ScreenSize.zw, uint2(0,0));
-                    float depthMouse = LOAD_TEXTURE2D(_MainDepthTexture, mousePosInput.unPositionSS).x;
-                    UpdatePositionInput(depthMouse, _InvViewProjMatrix, _ViewProjMatrix, mousePosInput);
-
-                    uint category = (LIGHTCATEGORY_COUNT - 1) - tileCoord.y;
-                    uint start;
-                    uint count;
-                    GetCountAndStart(mousePosInput, category, start, count);
-
-                    float4 result2 = float4(.1,.1,.1,.9);
-                    int2 fontCoord = int2(pixelCoord.x, offsetInTile.y);
-                    int lightListIndex = tileCoord.x - 2;
-
-                    int n = -1;
-                    if(tileCoord.x == 0)
-                    {
-                        n = (int)count;
-                    }
-                    else if(lightListIndex >= 0 && lightListIndex < (int)count)
-                    {
-                        n = FetchIndex(start, lightListIndex);
-                    }
-
-                    if (n >= 0)
-                    {
-                        if (SampleDebugFontNumber(offsetInTile, n))
-                            result2 = float4(0.0, 0.0, 0.0, 1.0);
-                        if (SampleDebugFontNumber(offsetInTile + 1, n))
-                            result2 = float4(1.0, 1.0, 1.0, 1.0);
-                    }
-
-                    result = AlphaBlend(result, result2);
-                }
-
-                return result;
-            }
-
-            ENDHLSL
-        }
-    }
-    Fallback Off
-}
+Shader "Hidden/HDRenderPipeline/DebugViewTiles"
+{
+    SubShader
+    {
+
+        Pass
+        {
+            ZWrite Off
+            Blend SrcAlpha OneMinusSrcAlpha
+
+            HLSLPROGRAM
+            #pragma target 4.5
+            #pragma only_renderers d3d11 ps4 metal // TEMP: until we go further in dev
+
+            #pragma vertex Vert
+            #pragma fragment Frag
+
+            #define LIGHTLOOP_TILE_PASS           
+            #define LIGHTLOOP_TILE_ALL
+
+            #pragma multi_compile USE_FPTL_LIGHTLIST USE_CLUSTERED_LIGHTLIST
+            #pragma multi_compile SHOW_LIGHT_CATEGORIES SHOW_FEATURE_VARIANTS
+
+            //-------------------------------------------------------------------------------------
+            // Include
+            //-------------------------------------------------------------------------------------
+
+            #include "../../../ShaderLibrary/Common.hlsl"
+
+            // Note: We have fix as guidelines that we have only one deferred material (with control of GBuffer enabled). Mean a users that add a new
+            // deferred material must replace the old one here. If in the future we want to support multiple layout (cause a lot of consistency problem), 
+            // the deferred shader will require to use multicompile.
+            #define UNITY_MATERIAL_LIT // Need to be define before including Material.hlsl
+            #include "../../ShaderConfig.cs.hlsl"
+            #include "../../ShaderVariables.hlsl"
+            #include "../../Lighting/Lighting.hlsl" // This include Material.hlsl
+
+            //-------------------------------------------------------------------------------------
+            // variable declaration
+            //-------------------------------------------------------------------------------------
+
+            uint _ViewTilesFlags;
+            uint _NumTiles;
+            float2 _MousePixelCoord;
+            
+            StructuredBuffer<uint> g_TileList;
+            Buffer<uint> g_DispatchIndirectBuffer;
+
+            struct VSOut
+            {
+                float4 Pos : SV_POSITION;
+                int Variant : TEXCOORD0;
+            };
+
+#if SHOW_FEATURE_VARIANTS
+            VSOut Vert(uint vertexID : SV_VertexID)
+            {
+                uint quadIndex = vertexID / 6;
+                uint quadVertex = vertexID - quadIndex * 6;
+                quadVertex = (0x312210 >> (quadVertex<<2)) & 3;	//remap [0,5]->[0,3]
+
+                uint2 tileSize = GetTileSize();
+
+                uint variant = 0;
+                while (quadIndex >= g_DispatchIndirectBuffer[variant * 3 + 0] && variant < NUM_FEATURE_VARIANTS)
+                {
+                    quadIndex -= g_DispatchIndirectBuffer[variant * 3 + 0];
+                    variant++;
+                }
+
+                uint tileIndex = g_TileList[variant * _NumTiles + quadIndex];
+                uint2 tileCoord = uint2(tileIndex & 0xFFFF, tileIndex >> 16);
+                uint2 pixelCoord = (tileCoord + uint2((quadVertex+1) & 1, (quadVertex >> 1) & 1)) * tileSize;
+
+                float2 clipCoord = (pixelCoord / _ScreenParams.xy) * 2.0 - 1.0;
+                clipCoord.y *= -1;
+
+                VSOut Out;
+                Out.Pos = float4(clipCoord, 0, 1.0);
+                Out.Variant = variant;
+                return Out;
+            }
+#else
+            VSOut Vert(float3 positionOS : POSITION)
+            {
+                VSOut Out;
+                Out.Pos = TransformWorldToHClip(TransformObjectToWorld(positionOS));
+                Out.Variant = 0;
+                return Out;
+            }
+#endif
+
+            float4 AlphaBlend(float4 c0, float4 c1)	// c1 over c0
+            {
+                return float4(lerp(c0.rgb, c1.rgb, c1.a), c0.a + c1.a - c0.a * c1.a);
+            }
+
+            float4 OverlayHeatMap(uint2 pixCoord, uint n)
+            {
+                const float4 kRadarColors[12] =
+                {
+                    float4(0.0, 0.0, 0.0, 0.0),   // black
+                    float4(0.0, 0.0, 0.6, 0.5),   // dark blue
+                    float4(0.0, 0.0, 0.9, 0.5),   // blue
+                    float4(0.0, 0.6, 0.9, 0.5),   // light blue
+                    float4(0.0, 0.9, 0.9, 0.5),   // cyan
+                    float4(0.0, 0.9, 0.6, 0.5),   // blueish green
+                    float4(0.0, 0.9, 0.0, 0.5),   // green
+                    float4(0.6, 0.9, 0.0, 0.5),   // yellowish green
+                    float4(0.9, 0.9, 0.0, 0.5),   // yellow
+                    float4(0.9, 0.6, 0.0, 0.5),   // orange
+                    float4(0.9, 0.0, 0.0, 0.5),   // red
+                    float4(1.0, 0.0, 0.0, 0.9)    // strong red
+                };
+
+                float maxNrLightsPerTile = 31; // TODO: setup a constant for that
+
+                int colorIndex = n == 0 ? 0 : (1 + (int)floor(10 * (log2((float)n) / log2(maxNrLightsPerTile))));
+                colorIndex = colorIndex < 0 ? 0 : colorIndex;
+                float4 col = colorIndex > 11 ? float4(1.0, 1.0, 1.0, 1.0) : kRadarColors[colorIndex];
+
+                int2 coord = pixCoord - int2(1, 1);
+
+                float4 color = float4(PositivePow(col.xyz, 2.2), 0.3 * col.w);
+                if (n >= 0)
+                {
+                    if (SampleDebugFontNumber(coord, n))		// Shadow
+                        color = float4(0, 0, 0, 1);
+                    if (SampleDebugFontNumber(coord + 1, n))	// Text
+                        color = float4(1, 1, 1, 1);
+                }
+                return color;
+            }
+
+            float4 Frag(float4 positionCS : SV_POSITION, int Variant : TEXCOORD0) : SV_Target
+            {
+                // positionCS is SV_Position
+                PositionInputs posInput = GetPositionInput(positionCS.xy, _ScreenSize.zw, uint2(positionCS.xy) / GetTileSize());
+                float depth = LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).x;
+                UpdatePositionInput(depth, _InvViewProjMatrix, _ViewProjMatrix, posInput);
+ 
+                int2 pixelCoord = posInput.unPositionSS.xy;
+                int2 tileCoord = (float2)pixelCoord / TILE_SIZE;
+                int2 mouseTileCoord = _MousePixelCoord / TILE_SIZE;
+                int2 offsetInTile = pixelCoord - tileCoord * TILE_SIZE;
+
+                int n = 0;
+#ifdef SHOW_LIGHT_CATEGORIES
+                for (int category = 0; category < LIGHTCATEGORY_COUNT; category++)
+                {
+                    uint mask = 1u << category;
+                    if (mask & _ViewTilesFlags)
+                    {
+                        uint start;
+                        uint count;
+                        GetCountAndStart(posInput, category, start, count);
+                        n += count;
+                    }
+                }
+                if(n == 0) n = -1;
+#else
+                n = Variant;
+#endif
+                
+                float4 result = float4(0.0, 0.0, 0.0, 0.0);
+
+                // Tile overlap counter
+                if (n >= 0)
+                {
+                    result = OverlayHeatMap(int2(posInput.unPositionSS.xy) & (TILE_SIZE - 1), n);
+                }
+
+#ifdef SHOW_LIGHT_CATEGORIES
+                // Highlight selected tile
+                if (all(mouseTileCoord == tileCoord))
+                {
+                    bool border = any(offsetInTile == 0 || offsetInTile == TILE_SIZE - 1);
+                    float4 result2 = float4(1.0, 1.0, 1.0, border ? 1.0 : 0.5);
+                    result = AlphaBlend(result, result2);
+                }
+
+                // Print light lists for selected tile at the bottom of the screen
+                int maxLights = 32;
+                if (tileCoord.y < LIGHTCATEGORY_COUNT && tileCoord.x < maxLights + 3)
+                {
+                    PositionInputs mousePosInput = GetPositionInput(_MousePixelCoord, _ScreenSize.zw, uint2(0,0));
+                    float depthMouse = LOAD_TEXTURE2D(_MainDepthTexture, mousePosInput.unPositionSS).x;
+                    UpdatePositionInput(depthMouse, _InvViewProjMatrix, _ViewProjMatrix, mousePosInput);
+
+                    uint category = (LIGHTCATEGORY_COUNT - 1) - tileCoord.y;
+                    uint start;
+                    uint count;
+                    GetCountAndStart(mousePosInput, category, start, count);
+
+                    float4 result2 = float4(.1,.1,.1,.9);
+                    int2 fontCoord = int2(pixelCoord.x, offsetInTile.y);
+                    int lightListIndex = tileCoord.x - 2;
+
+                    int n = -1;
+                    if(tileCoord.x == 0)
+                    {
+                        n = (int)count;
+                    }
+                    else if(lightListIndex >= 0 && lightListIndex < (int)count)
+                    {
+                        n = FetchIndex(start, lightListIndex);
+                    }
+
+                    if (n >= 0)
+                    {
+                        if (SampleDebugFontNumber(offsetInTile, n))
+                            result2 = float4(0.0, 0.0, 0.0, 1.0);
+                        if (SampleDebugFontNumber(offsetInTile + 1, n))
+                            result2 = float4(1.0, 1.0, 1.0, 1.0);
+                    }
+
+                    result = AlphaBlend(result, result2);
+                }
+#endif
+
+                return result;
+            }
+
+            ENDHLSL
+        }
+    }
+    Fallback Off
+}
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TileLightLoopProducer.cs
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TileLightLoopProducer.cs
            [Range(0.0f, 1.0f)]
            public float specularGlobalDimmer = 1.0f;

-            public enum TileDebugByCategory : int { None = 0, Punctual = 1, Area = 2, AreaAndPunctual = 3, Environment = 4, EnvironmentAndPunctual = 5, EnvironmentAndArea = 6, EnvironmentAndAreaAndPunctual };
-            public TileDebugByCategory tileDebugByCategory;
+            public enum TileDebug : int { None = 0, Punctual = 1, Area = 2, AreaAndPunctual = 3, Environment = 4, EnvironmentAndPunctual = 5, EnvironmentAndArea = 6, EnvironmentAndAreaAndPunctual = 7, FeatureVariants = 8 };
+            public TileDebug tileDebugByCategory;

            public static TileSettings defaultSettings = new TileSettings
            {
                enableComputeFeatureVariants = false,

-                tileDebugByCategory = TileDebugByCategory.None,
+                tileDebugByCategory = TileDebug.None,
                enableClustered = true,
                enableFptlForOpaqueWhenClustered = true,
                enableBigTilePrepass = true,
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute
-// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
-// https://github.com/wolfgangfengel/GPU-Pro-7
-
-#pragma kernel TileLightListGen					LIGHTLISTGEN=TileLightListGen
-#pragma kernel TileLightListGen_SrcBigTile		LIGHTLISTGEN=TileLightListGen_SrcBigTile		USE_TWO_PASS_TILED_LIGHTING
-
-#include "../../../../ShaderLibrary/common.hlsl"
-#include "../ShaderBase.hlsl"
-#include "../TilePass.cs.hlsl"
-#include "../LightingConvexHullUtils.hlsl"
-
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-#include "../SortingComputeUtils.hlsl"
-#endif
-
-#define FINE_PRUNING_ENABLED
-#define PERFORM_SPHERICAL_INTERSECTION_TESTS
-
-
-uniform int g_iNrVisibLights;
-uniform uint2 g_viDimensions;
-uniform float4x4 g_mInvScrProjection;
-uniform float4x4 g_mScrProjection;
-uniform int _EnvLightIndexShift;
-
-
-Texture2D g_depth_tex : register( t0 );
-StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
-StructuredBuffer<LightVolumeData> _LightVolumeData : register(t2);
-StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
-
-#ifdef USE_TWO_PASS_TILED_LIGHTING
-StructuredBuffer<uint> g_vBigTileLightList : register( t4 );		// don't support Buffer yet in unity
-#endif
-
-#define NR_THREADS			64
-
-// output buffer
-RWStructuredBuffer<uint> g_vLightList : register( u0 );				// don't support RWBuffer yet in unity
-
-
-#define MAX_NR_COARSE_ENTRIES		64
-#define MAX_NR_PRUNED_ENTRIES		24
-
-groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
-groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES];		// temporarily support room for all 64 while in LDS
-
-groupshared uint ldsZMin;
-groupshared uint ldsZMax;
-groupshared uint lightOffs;
-#ifdef FINE_PRUNING_ENABLED
-groupshared uint ldsDoesLightIntersect[2];
-#endif
-groupshared int ldsNrLightsFinal;
-
-groupshared int ldsCategoryListCount[LIGHTCATEGORY_COUNT];		// since LIGHTCATEGORY_COUNT is 3
-
-#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
-groupshared uint lightOffsSph;
-#endif
-
-
-//float GetLinearDepth(float3 vP)
-//{
-//	float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
-//	return v4Pres.z / v4Pres.w;
-//}
-
-float GetLinearDepth(float zDptBufSpace)	// 0 is near 1 is far
-{
-	float3 vP = float3(0.0f,0.0f,zDptBufSpace);
-	float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
-	return v4Pres.z / v4Pres.w;
-}
-
-
-float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
-{
-	float fSx = g_mScrProjection[0].x;
-	float fCx = g_mScrProjection[0].z;
-	float fSy = g_mScrProjection[1].y;
-	float fCy = g_mScrProjection[1].z;
-
-#if USE_LEFTHAND_CAMERASPACE
-	return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
-#else
-	return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
-#endif
-}
-
-float GetOnePixDiagWorldDistAtDepthOne()
-{
-	float fSx = g_mScrProjection[0].x;
-	float fSy = g_mScrProjection[1].y;
-
-	return length( float2(1.0/fSx,1.0/fSy) );
-}
-
-#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
-int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
-#endif
-
-#ifdef FINE_PRUNING_ENABLED
-void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths);
-#endif
-
-
-[numthreads(NR_THREADS, 1, 1)]
-void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
-{
-	uint2 tileIDX = u3GroupID.xy;
-	uint t=threadID;
-
-	if(t<MAX_NR_COARSE_ENTRIES)
-		prunedList[t]=0;
-	
-	uint iWidth = g_viDimensions.x;
-	uint iHeight = g_viDimensions.y;
-	uint nrTilesX = (iWidth+15)/16;
-	uint nrTilesY = (iHeight+15)/16;
-
-	// build tile scr boundary
-	const uint uFltMax = 0x7f7fffff;  // FLT_MAX as a uint
-	if(t==0)
-	{
-		ldsZMin = uFltMax;
-		ldsZMax = 0;
-		lightOffs = 0;
-	}
-
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-	GroupMemoryBarrierWithGroupSync();
-#endif
-
-
-	uint2 viTilLL = 16*tileIDX;
-
-	// establish min and max depth first
-	float dpt_mi=asfloat(uFltMax), dpt_ma=0.0;
-
-
-	float4 vLinDepths;
-	{
-		// Fetch depths and calculate min/max
-		[unroll]
-		for(int i = 0; i < 4; i++)
-		{
-			int idx = i * NR_THREADS + t;
-			uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
-			const float fDepth = FetchDepth(g_depth_tex, uCrd);
-			vLinDepths[i] = GetLinearDepth(fDepth);
-			if(fDepth<VIEWPORT_SCALE_Z)		// if not skydome
-			{
-				dpt_mi = min(fDepth, dpt_mi);
-				dpt_ma = max(fDepth, dpt_ma);
-			}
-		}
-
-		InterlockedMax(ldsZMax, asuint(dpt_ma));
-		InterlockedMin(ldsZMin, asuint(dpt_mi));
-
-
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-		GroupMemoryBarrierWithGroupSync();
-#endif
-	}
-
-
-	float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
-	float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
-	vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
-	
-
-	// build coarse list using AABB
-#ifdef USE_TWO_PASS_TILED_LIGHTING
-	int NrBigTilesX = (nrTilesX+3)>>2;
-	const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2);		// map the idx to 64x64 tiles
-	int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
-	for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
-	{
-		int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
-#else
-	for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
-	{
-#endif
-		const float3 vMi = g_vBoundsBuffer[l];
-		const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
-
-		if( all(vMa>vTileLL) && all(vMi<vTileUR))
-		{
-			unsigned int uInc = 1;
-			unsigned int uIndex;
-			InterlockedAdd(lightOffs, uInc, uIndex);
-			if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l;		// add to light list
-		}
-	}
-
-#ifdef FINE_PRUNING_ENABLED	
-	if(t<2) ldsDoesLightIntersect[t] = 0;
-#endif
-
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-	GroupMemoryBarrierWithGroupSync();
-#endif
-
-	int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
-
-#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
-	iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
-#endif
-
-#ifndef FINE_PRUNING_ENABLED	
-	{
-		if((int)t<iNrCoarseLights) prunedList[t] = coarseList[t];
-		if(t==0) ldsNrLightsFinal=iNrCoarseLights;
-	}
-#else
-	{
-		// initializes ldsNrLightsFinal with the number of accepted lights.
-		// all accepted entries delivered in prunedList[].
-		FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths);
-	}
-#endif
-
-	//
-	if(t<LIGHTCATEGORY_COUNT) ldsCategoryListCount[t]=0;
-
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-	GroupMemoryBarrierWithGroupSync();
-#endif
-
-	
-	int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
-	for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS) 
-	{
-		InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
-	}
-
-
-	// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-	SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
-	//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);
-#endif
-
-	// write lights to global buffers
-	int localOffs=0;
-	int offs = tileIDX.y*nrTilesX + tileIDX.x;
-
-	// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
-	// to make it work correctly
-	int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
-
-	for(int category=0; category<LIGHTCATEGORY_COUNT; category++)
-	{
-		int nrLightsFinal = ldsCategoryListCount[category];
-		int nrLightsFinalClamped = nrLightsFinal<MAX_NR_PRUNED_ENTRIES ? nrLightsFinal : MAX_NR_PRUNED_ENTRIES;
-
-		const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
-		for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
-		{
-			// We remap the prunedList index to the original LightData / EnvLightData indices
-			uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs] - shiftIndex[category];
-			uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
-
-			g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
-		}
-
-		localOffs += nrLightsFinal;
-		offs += (nrTilesX*nrTilesY);
-	}
-}
-
-
-
-#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
-int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
-{
-	if(threadID==0) lightOffsSph = 0;
-
-	// make a copy of coarseList in prunedList.
-	int l;
-	for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
-		prunedList[l]=coarseList[l];
-
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-	GroupMemoryBarrierWithGroupSync();
-#endif
-
-#if USE_LEFTHAND_CAMERASPACE
-	float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
-#else
-	float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
-#endif
-
-	float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
-	float halfTileSizeAtZDistOne = 8*onePixDiagDist;		// scale by half a tile
-	
-	for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
-	{
-		SFiniteLightBound lightData = g_data[prunedList[l]];
-	
-		if( DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius) )
-		{
-			unsigned int uIndex;
-			InterlockedAdd(lightOffsSph, 1, uIndex);
-			coarseList[uIndex]=prunedList[l];		// read from the original copy of coarseList which is backed up in prunedList
-		}
-	}
-
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-	GroupMemoryBarrierWithGroupSync();
-#endif
-
-	return lightOffsSph;
-}
-#endif
-
-
-#ifdef FINE_PRUNING_ENABLED
-// initializes ldsNrLightsFinal with the number of accepted lights.
-// all accepted entries delivered in prunedList[].
-void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths)
-{
-	uint t = threadID;
-	uint iWidth = g_viDimensions.x;
-	uint iHeight = g_viDimensions.y;
-
-	uint uLightsFlags[2] = {0,0};
-	int l=0;
-	// need this outer loop even on xb1 and ps4 since direct lights and
-	// reflection lights are kept in separate regions.
-	while(l<iNrCoarseLights)
-	{
-		// fetch light
-		int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
-		uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
-
-		// spot
-		while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_CONE)
-		{
-			LightVolumeData lightData = _LightVolumeData[idxCoarse];
-			// TODO: Change by SebL
-			const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
-				
-			// serially check 4 pixels
-			uint uVal = 0;
-			for(int i=0; i<4; i++)
-			{
-				int idx = t + i*NR_THREADS;
-	
-				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
-				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
-	
-				// check pixel
-				float3 fromLight = vVPos-lightData.lightPos.xyz;
-				float distSq = dot(fromLight,fromLight);
-				const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz);		// spotDir = lightData.lightAxisZ.xyz
-
-				float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) );
-
-				float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y);
-				if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1;
-			}
-
-			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
-			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
-			uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
-		}
-
-		// sphere
-		while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_SPHERE)
-		{
-			LightVolumeData lightData = _LightVolumeData[idxCoarse];
-
-			// serially check 4 pixels
-			uint uVal = 0;
-			for(int i=0; i<4; i++)
-			{
-				int idx = t + i*NR_THREADS;
-	
-				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
-				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
-	
-				// check pixel
-				float3 vLp = lightData.lightPos.xyz;
-				float3 toLight = vLp - vVPos; 
-				float distSq = dot(toLight,toLight);
-			
-				if(lightData.radiusSq>distSq) uVal = 1;
-			}
-
-			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
-			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
-			uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
-		}
-
-		// Box
-		while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_BOX)
-		{
-			LightVolumeData lightData = _LightVolumeData[idxCoarse];
-
-			// serially check 4 pixels
-			uint uVal = 0;
-			for(int i=0; i<4; i++)
-			{
-				int idx = t + i*NR_THREADS;
-	
-				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
-				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
-
-				// check pixel
-				float3 toLight  = lightData.lightPos.xyz - vVPos;
-
-				float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) );
-				dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange;		// not as efficient as it could be
-				if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1;						// but allows us to not write out OuterDists
-			}
-
-			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
-			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
-			uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
-		}
-
-		// in case we have some corrupt data make sure we terminate
-		if(uLightVolume >=LIGHTVOLUMETYPE_COUNT) ++l;
-	}
-
-	InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
-	InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
-	if(t==0) ldsNrLightsFinal = 0;
-
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-	GroupMemoryBarrierWithGroupSync();
-#endif
-
-	if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
-	{
-		unsigned int uInc = 1;
-		unsigned int uIndex;
-		InterlockedAdd(ldsNrLightsFinal, uInc, uIndex);
-		if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t];		// we allow up to 64 pruned lights while stored in LDS.
-	}
-}
-#endif
+// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
+// https://github.com/wolfgangfengel/GPU-Pro-7
+
+#pragma kernel TileLightListGen                             LIGHTLISTGEN=TileLightListGen
+#pragma kernel TileLightListGen_SrcBigTile                  LIGHTLISTGEN=TileLightListGen_SrcBigTile                    USE_TWO_PASS_TILED_LIGHTING
+#pragma kernel TileLightListGen_FeatureFlags                LIGHTLISTGEN=TileLightListGen_FeatureFlags                  USE_FEATURE_FLAGS
+#pragma kernel TileLightListGen_SrcBigTile_FeatureFlags     LIGHTLISTGEN=TileLightListGen_SrcBigTile_FeatureFlags       USE_TWO_PASS_TILED_LIGHTING		USE_FEATURE_FLAGS
+
+#include "../../../../ShaderLibrary/common.hlsl"
+#include "../ShaderBase.hlsl"
+#include "../TilePass.cs.hlsl"
+#include "../LightingConvexHullUtils.hlsl"
+#include "../FeatureFlags.hlsl"
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+#include "../SortingComputeUtils.hlsl"
+#endif
+
+#define FINE_PRUNING_ENABLED
+#define PERFORM_SPHERICAL_INTERSECTION_TESTS
+
+
+uniform int g_iNrVisibLights;
+uniform uint2 g_viDimensions;
+uniform float4x4 g_mInvScrProjection;
+uniform float4x4 g_mScrProjection;
+uniform int _EnvLightIndexShift;
+
+
+Texture2D g_depth_tex : register( t0 );
+StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
+StructuredBuffer<LightVolumeData> _LightVolumeData : register(t2);
+StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
+
+#ifdef USE_TWO_PASS_TILED_LIGHTING
+StructuredBuffer<uint> g_vBigTileLightList : register( t4 );		// don't support Buffer yet in unity
+#endif
+
+#define NR_THREADS			64
+
+// output buffer
+RWStructuredBuffer<uint> g_vLightList : register( u0 );				// don't support RWBuffer yet in unity
+
+
+#define MAX_NR_COARSE_ENTRIES		64
+#define MAX_NR_PRUNED_ENTRIES		24
+
+groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
+groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES];		// temporarily support room for all 64 while in LDS
+
+groupshared uint ldsZMin;
+groupshared uint ldsZMax;
+groupshared uint lightOffs;
+#ifdef FINE_PRUNING_ENABLED
+groupshared uint ldsDoesLightIntersect[2];
+#endif
+groupshared int ldsNrLightsFinal;
+
+groupshared int ldsCategoryListCount[LIGHTCATEGORY_COUNT];		// since LIGHTCATEGORY_COUNT is 3
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+groupshared uint lightOffsSph;
+#endif
+
+
+#ifdef USE_FEATURE_FLAGS
+groupshared uint ldsFeatureFlags;
+RWBuffer<uint> g_DispatchIndirectBuffer;
+RWStructuredBuffer<uint> g_TileList;
+#endif
+
+
+//float GetLinearDepth(float3 vP)
+//{
+//	float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
+//	return v4Pres.z / v4Pres.w;
+//}
+
+float GetLinearDepth(float zDptBufSpace)	// 0 is near 1 is far
+{
+	float3 vP = float3(0.0f,0.0f,zDptBufSpace);
+	float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
+	return v4Pres.z / v4Pres.w;
+}
+
+
+float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
+{
+	float fSx = g_mScrProjection[0].x;
+	float fCx = g_mScrProjection[0].z;
+	float fSy = g_mScrProjection[1].y;
+	float fCy = g_mScrProjection[1].z;
+
+#if USE_LEFTHAND_CAMERASPACE
+	return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
+#else
+	return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
+#endif
+}
+
+float GetOnePixDiagWorldDistAtDepthOne()
+{
+	float fSx = g_mScrProjection[0].x;
+	float fSy = g_mScrProjection[1].y;
+
+	return length( float2(1.0/fSx,1.0/fSy) );
+}
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
+#endif
+
+#ifdef FINE_PRUNING_ENABLED
+void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths);
+#endif
+
+
+[numthreads(NR_THREADS, 1, 1)]
+void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
+{
+	uint2 tileIDX = u3GroupID.xy;
+	uint t=threadID;
+
+	if(t<MAX_NR_COARSE_ENTRIES)
+		prunedList[t]=0;
+	
+	uint iWidth = g_viDimensions.x;
+	uint iHeight = g_viDimensions.y;
+	uint nrTilesX = (iWidth+15)/16;
+	uint nrTilesY = (iHeight+15)/16;
+	uint nrTiles = nrTilesX * nrTilesY;	// Precompute?
+
+	// build tile scr boundary
+	const uint uFltMax = 0x7f7fffff;  // FLT_MAX as a uint
+	if(t==0)
+	{
+		ldsZMin = uFltMax;
+		ldsZMax = 0;
+		lightOffs = 0;
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+
+	uint2 viTilLL = 16*tileIDX;
+
+	// establish min and max depth first
+	float dpt_mi=asfloat(uFltMax), dpt_ma=0.0;
+
+
+	float4 vLinDepths;
+	{
+		// Fetch depths and calculate min/max
+		[unroll]
+		for(int i = 0; i < 4; i++)
+		{
+			int idx = i * NR_THREADS + t;
+			uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
+			const float fDepth = FetchDepth(g_depth_tex, uCrd);
+			vLinDepths[i] = GetLinearDepth(fDepth);
+			if(fDepth<VIEWPORT_SCALE_Z)		// if not skydome
+			{
+				dpt_mi = min(fDepth, dpt_mi);
+				dpt_ma = max(fDepth, dpt_ma);
+			}
+		}
+
+		InterlockedMax(ldsZMax, asuint(dpt_ma));
+		InterlockedMin(ldsZMin, asuint(dpt_mi));
+
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+		GroupMemoryBarrierWithGroupSync();
+#endif
+	}
+
+
+	float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
+	float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
+	vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
+	
+
+	// build coarse list using AABB
+#ifdef USE_TWO_PASS_TILED_LIGHTING
+	int NrBigTilesX = (nrTilesX+3)>>2;
+	const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2);		// map the idx to 64x64 tiles
+	int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
+	for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
+	{
+		int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
+#else
+	for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
+	{
+#endif
+		const float3 vMi = g_vBoundsBuffer[l];
+		const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
+
+		if( all(vMa>vTileLL) && all(vMi<vTileUR))
+		{
+			unsigned int uInc = 1;
+			unsigned int uIndex;
+			InterlockedAdd(lightOffs, uInc, uIndex);
+			if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l;		// add to light list
+		}
+	}
+
+#ifdef FINE_PRUNING_ENABLED	
+	if(t<2) ldsDoesLightIntersect[t] = 0;
+#endif
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+	iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
+#endif
+
+#ifndef FINE_PRUNING_ENABLED	
+	{
+		if((int)t<iNrCoarseLights) prunedList[t] = coarseList[t];
+		if(t==0) ldsNrLightsFinal=iNrCoarseLights;
+	}
+#else
+	{
+		// initializes ldsNrLightsFinal with the number of accepted lights.
+		// all accepted entries delivered in prunedList[].
+		FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths);
+	}
+#endif
+
+	//
+	if(t<LIGHTCATEGORY_COUNT) ldsCategoryListCount[t]=0;
+#ifdef USE_FEATURE_FLAGS
+	if(t==0) ldsFeatureFlags=0;
+#endif
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	
+	int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
+	for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS) 
+	{
+		InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
+#ifdef USE_FEATURE_FLAGS
+		InterlockedOr(ldsFeatureFlags, _LightVolumeData[prunedList[i]].featureFlags);
+#endif
+	}
+
+	// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
+	//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);
+#endif
+
+#ifdef USE_FEATURE_FLAGS
+	if(t == 0)
+	{
+		uint variant = FeatureFlagsToTileVariant(ldsFeatureFlags);
+		uint offset;
+		InterlockedAdd(g_DispatchIndirectBuffer[variant * 3 + 0], 1, offset);
+		g_TileList[variant*nrTiles + offset] = (tileIDX.y << 16) + tileIDX.x;
+	}
+#endif
+
+	// write lights to global buffers
+	int localOffs=0;
+	int offs = tileIDX.y*nrTilesX + tileIDX.x;
+
+	// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
+	// to make it work correctly
+	int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
+
+	for(int category=0; category<LIGHTCATEGORY_COUNT; category++)
+	{
+		int nrLightsFinal = ldsCategoryListCount[category];
+		int nrLightsFinalClamped = nrLightsFinal<MAX_NR_PRUNED_ENTRIES ? nrLightsFinal : MAX_NR_PRUNED_ENTRIES;
+
+		const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
+		for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
+		{
+			// We remap the prunedList index to the original LightData / EnvLightData indices
+			uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs] - shiftIndex[category];
+			uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
+
+			g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
+		}
+
+		localOffs += nrLightsFinal;
+		offs += (nrTilesX*nrTilesY);
+	}
+}
+
+
+
+#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
+int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
+{
+	if(threadID==0) lightOffsSph = 0;
+
+	// make a copy of coarseList in prunedList.
+	int l;
+	for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
+		prunedList[l]=coarseList[l];
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+#if USE_LEFTHAND_CAMERASPACE
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
+#else
+	float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
+#endif
+
+	float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
+	float halfTileSizeAtZDistOne = 8*onePixDiagDist;		// scale by half a tile
+	
+	for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
+	{
+		SFiniteLightBound lightData = g_data[prunedList[l]];
+	
+		if( DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius) )
+		{
+			unsigned int uIndex;
+			InterlockedAdd(lightOffsSph, 1, uIndex);
+			coarseList[uIndex]=prunedList[l];		// read from the original copy of coarseList which is backed up in prunedList
+		}
+	}
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	return lightOffsSph;
+}
+#endif
+
+
+#ifdef FINE_PRUNING_ENABLED
+// initializes ldsNrLightsFinal with the number of accepted lights.
+// all accepted entries delivered in prunedList[].
+void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths)
+{
+	uint t = threadID;
+	uint iWidth = g_viDimensions.x;
+	uint iHeight = g_viDimensions.y;
+
+	uint uLightsFlags[2] = {0,0};
+	int l=0;
+	// need this outer loop even on xb1 and ps4 since direct lights and
+	// reflection lights are kept in separate regions.
+	while(l<iNrCoarseLights)
+	{
+		// fetch light
+		int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
+		uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
+
+		// spot
+		while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_CONE)
+		{
+			LightVolumeData lightData = _LightVolumeData[idxCoarse];
+			// TODO: Change by SebL
+			const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
+				
+			// serially check 4 pixels
+			uint uVal = 0;
+			for(int i=0; i<4; i++)
+			{
+				int idx = t + i*NR_THREADS;
+	
+				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
+	
+				// check pixel
+				float3 fromLight = vVPos-lightData.lightPos.xyz;
+				float distSq = dot(fromLight,fromLight);
+				const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz);		// spotDir = lightData.lightAxisZ.xyz
+
+				float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) );
+
+				float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y);
+				if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1;
+			}
+
+			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
+			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
+			uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
+		}
+
+		// sphere
+		while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_SPHERE)
+		{
+			LightVolumeData lightData = _LightVolumeData[idxCoarse];
+
+			// serially check 4 pixels
+			uint uVal = 0;
+			for(int i=0; i<4; i++)
+			{
+				int idx = t + i*NR_THREADS;
+	
+				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
+	
+				// check pixel
+				float3 vLp = lightData.lightPos.xyz;
+				float3 toLight = vLp - vVPos; 
+				float distSq = dot(toLight,toLight);
+			
+				if(lightData.radiusSq>distSq) uVal = 1;
+			}
+
+			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
+			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
+			uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
+		}
+
+		// Box
+		while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_BOX)
+		{
+			LightVolumeData lightData = _LightVolumeData[idxCoarse];
+
+			// serially check 4 pixels
+			uint uVal = 0;
+			for(int i=0; i<4; i++)
+			{
+				int idx = t + i*NR_THREADS;
+	
+				uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
+
+				// check pixel
+				float3 toLight  = lightData.lightPos.xyz - vVPos;
+
+				float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) );
+				dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange;		// not as efficient as it could be
+				if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1;						// but allows us to not write out OuterDists
+			}
+
+			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
+			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
+			uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
+		}
+
+		// in case we have some corrupt data make sure we terminate
+		if(uLightVolume >=LIGHTVOLUMETYPE_COUNT) ++l;
+	}
+
+	InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
+	InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
+	if(t==0) ldsNrLightsFinal = 0;
+
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
+	GroupMemoryBarrierWithGroupSync();
+#endif
+
+	if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
+	{
+		unsigned int uInc = 1;
+		unsigned int uIndex;
+		InterlockedAdd(ldsNrLightsFinal, uInc, uIndex);
+		if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t];		// we allow up to 64 pruned lights while stored in LDS.
+	}
+}
+#endif
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/shadeopaque.compute
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/shadeopaque.compute
-#pragma kernel ShadeOpaque_Fptl				            SHADE_OPAQUE_ENTRY=ShadeOpaque_Fptl				        USE_FPTL_LIGHTLIST
-#pragma kernel ShadeOpaque_Fptl_DebugLighting			SHADE_OPAQUE_ENTRY=ShadeOpaque_Fptl_DebugLighting		USE_FPTL_LIGHTLIST        LIGHTING_DEBUG
-#pragma kernel ShadeOpaque_Clustered		            SHADE_OPAQUE_ENTRY=ShadeOpaque_Clustered		        USE_CLUSTERED_LIGHTLIST
-#pragma kernel ShadeOpaque_Clustered_DebugLighting		SHADE_OPAQUE_ENTRY=ShadeOpaque_Clustered_DebugLighting	USE_CLUSTERED_LIGHTLIST   LIGHTING_DEBUG
-
-#pragma #pragma enable_d3d11_debug_symbols
-
-// Split lighting is required for the SSS pass.
-// Not currently possible since we need to access the stencil buffer from the compute shader.
-// #pragma multi_compile _ OUTPUT_SPLIT_LIGHTING
-
-#define LIGHTLOOP_TILE_PASS 1
-#define LIGHTLOOP_TILE_DIRECT 1
-#define LIGHTLOOP_TILE_INDIRECT 1
-#define LIGHTLOOP_TILE_ALL 1
-
-//-------------------------------------------------------------------------------------
-// Include
-//-------------------------------------------------------------------------------------
-
-#include "../../../../ShaderLibrary/Common.hlsl"
-#include "../../../Debug/HDRenderPipelineDebug.cs.hlsl"
-#include "../../../Debug/DebugLighting.hlsl"
-
-// Note: We have fix as guidelines that we have only one deferred material (with control of GBuffer enabled). Mean a users that add a new
-// deferred material must replace the old one here. If in the future we want to support multiple layout (cause a lot of consistency problem), 
-// the deferred shader will require to use multicompile.
-#define UNITY_MATERIAL_LIT // Need to be define before including Material.hlsl
-#include "../../../ShaderConfig.cs.hlsl"
-#include "../../../ShaderVariables.hlsl"
-#include "../../../Lighting/Lighting.hlsl" // This include Material.hlsl
-
-//-------------------------------------------------------------------------------------
-// variable declaration
-//-------------------------------------------------------------------------------------
-
-DECLARE_GBUFFER_TEXTURE(_GBufferTexture);
-
-#ifdef OUTPUT_SPLIT_LIGHTING
-    RWTexture2D<float4> specularLightingUAV;
-    RWTexture2D<float3> diffuseLightingUAV;
-#else
-    RWTexture2D<float4> combinedLightingUAV;
-#endif
-
-[numthreads(16, 16, 1)]
-void SHADE_OPAQUE_ENTRY(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupId : SV_GroupID)
-{
-    uint2 pixelCoord = dispatchThreadId;
-    //PositionInputs posInput = GetPositionInput(pixelCoord.xy, _ScreenSize.zw, uint2(pixelCoord.xy) / GetTileSize());
-    PositionInputs posInput = GetPositionInput(pixelCoord.xy, _ScreenSize.zw, groupId);
-    float depth = LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).x;
-    UpdatePositionInput(depth, _InvViewProjMatrix, _ViewProjMatrix, posInput);
-    float3 V = GetWorldSpaceNormalizeViewDir(posInput.positionWS);
-
-    FETCH_GBUFFER(gbuffer, _GBufferTexture, posInput.unPositionSS);
-    BSDFData bsdfData;
-    float3 bakeDiffuseLighting;
-    DECODE_FROM_GBUFFER(gbuffer, bsdfData, bakeDiffuseLighting);
-
-    PreLightData preLightData = GetPreLightData(V, posInput, bsdfData);
-
-    float3 diffuseLighting;
-    float3 specularLighting;
-    LightLoop(V, posInput, preLightData, bsdfData, bakeDiffuseLighting, diffuseLighting, specularLighting);
-
-#ifdef OUTPUT_SPLIT_LIGHTING
-    specularLightingUAV[pixelCoord] = float4(specularLighting, 1.0);
-    diffuseLightingUAV[pixelCoord]  = diffuseLighting;
-#else
-    combinedLightingUAV[pixelCoord] = float4(diffuseLighting + specularLighting, 1.0);
-#endif
-}
-
+#pragma kernel ShadeOpaque_Direct_Fptl                          SHADE_OPAQUE_ENTRY=ShadeOpaque_Direct_Fptl                              USE_FPTL_LIGHTLIST
+#pragma kernel ShadeOpaque_Direct_Fptl_DebugLighting            SHADE_OPAQUE_ENTRY=ShadeOpaque_Direct_Fptl_DebugLighting                USE_FPTL_LIGHTLIST        LIGHTING_DEBUG
+#pragma kernel ShadeOpaque_Direct_Clustered                     SHADE_OPAQUE_ENTRY=ShadeOpaque_Direct_Clustered                         USE_CLUSTERED_LIGHTLIST
+#pragma kernel ShadeOpaque_Direct_Clustered_DebugLighting       SHADE_OPAQUE_ENTRY=ShadeOpaque_Direct_Clustered_DebugLighting           USE_CLUSTERED_LIGHTLIST   LIGHTING_DEBUG
+
+#pragma kernel ShadeOpaque_Indirect_Fptl_Variant0               SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant0                   USE_FPTL_LIGHTLIST          USE_INDIRECT    VARIANT=0
+#pragma kernel ShadeOpaque_Indirect_Fptl_Variant1               SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant1                   USE_FPTL_LIGHTLIST          USE_INDIRECT    VARIANT=1
+#pragma kernel ShadeOpaque_Indirect_Fptl_Variant2               SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant2                   USE_FPTL_LIGHTLIST          USE_INDIRECT    VARIANT=2
+#pragma kernel ShadeOpaque_Indirect_Fptl_Variant3               SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant3                   USE_FPTL_LIGHTLIST          USE_INDIRECT    VARIANT=3
+#pragma kernel ShadeOpaque_Indirect_Fptl_Variant4               SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant4                   USE_FPTL_LIGHTLIST          USE_INDIRECT    VARIANT=4
+#pragma kernel ShadeOpaque_Indirect_Fptl_Variant5               SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant5                   USE_FPTL_LIGHTLIST          USE_INDIRECT    VARIANT=5
+#pragma kernel ShadeOpaque_Indirect_Fptl_Variant6               SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant6                   USE_FPTL_LIGHTLIST          USE_INDIRECT    VARIANT=6
+#pragma kernel ShadeOpaque_Indirect_Fptl_Variant7               SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant7                   USE_FPTL_LIGHTLIST          USE_INDIRECT    VARIANT=7
+
+#pragma kernel ShadeOpaque_Indirect_Clustered_Variant0          SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant0              USE_CLUSTERED_LIGHTLIST     USE_INDIRECT    VARIANT=0
+#pragma kernel ShadeOpaque_Indirect_Clustered_Variant1          SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant1              USE_CLUSTERED_LIGHTLIST     USE_INDIRECT    VARIANT=1
+#pragma kernel ShadeOpaque_Indirect_Clustered_Variant2          SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant2              USE_CLUSTERED_LIGHTLIST     USE_INDIRECT    VARIANT=2
+#pragma kernel ShadeOpaque_Indirect_Clustered_Variant3          SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant3              USE_CLUSTERED_LIGHTLIST     USE_INDIRECT    VARIANT=3
+#pragma kernel ShadeOpaque_Indirect_Clustered_Variant4          SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant4              USE_CLUSTERED_LIGHTLIST     USE_INDIRECT    VARIANT=4
+#pragma kernel ShadeOpaque_Indirect_Clustered_Variant5          SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant5              USE_CLUSTERED_LIGHTLIST     USE_INDIRECT    VARIANT=5
+#pragma kernel ShadeOpaque_Indirect_Clustered_Variant6          SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant6              USE_CLUSTERED_LIGHTLIST     USE_INDIRECT    VARIANT=6
+#pragma kernel ShadeOpaque_Indirect_Clustered_Variant7          SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant7              USE_CLUSTERED_LIGHTLIST     USE_INDIRECT    VARIANT=7
+
+#pragma #pragma enable_d3d11_debug_symbols
+
+// Split lighting is required for the SSS pass.
+// Not currently possible since we need to access the stencil buffer from the compute shader.
+// #pragma multi_compile _ OUTPUT_SPLIT_LIGHTING
+
+#define LIGHTLOOP_TILE_PASS 1
+#define LIGHTLOOP_TILE_DIRECT 1
+#define LIGHTLOOP_TILE_INDIRECT 1
+#define LIGHTLOOP_TILE_ALL 1
+
+//-------------------------------------------------------------------------------------
+// Include
+//-------------------------------------------------------------------------------------
+
+#include "../../../../ShaderLibrary/Common.hlsl"
+#include "../../../Debug/HDRenderPipelineDebug.cs.hlsl"
+#include "../../../Debug/DebugLighting.hlsl"
+
+// Note: We have fix as guidelines that we have only one deferred material (with control of GBuffer enabled). Mean a users that add a new
+// deferred material must replace the old one here. If in the future we want to support multiple layout (cause a lot of consistency problem), 
+// the deferred shader will require to use multicompile.
+#define UNITY_MATERIAL_LIT // Need to be define before including Material.hlsl
+#include "../../../ShaderConfig.cs.hlsl"
+#include "../../../ShaderVariables.hlsl"
+#include "../../../Lighting/Lighting.hlsl" // This include Material.hlsl
+#include "../../../Lighting/TilePass/FeatureFlags.hlsl"
+//-------------------------------------------------------------------------------------
+// variable declaration
+//-------------------------------------------------------------------------------------
+
+DECLARE_GBUFFER_TEXTURE(_GBufferTexture);
+
+#ifdef OUTPUT_SPLIT_LIGHTING
+    RWTexture2D<float4> specularLightingUAV;
+    RWTexture2D<float3> diffuseLightingUAV;
+#else
+    RWTexture2D<float4> combinedLightingUAV;
+#endif
+
+#if USE_INDIRECT
+	uint g_TileListOffset;
+    StructuredBuffer<uint> g_TileList;
+// Indirect
+[numthreads(16, 16, 1)]
+void SHADE_OPAQUE_ENTRY(uint2 groupThreadId : SV_GroupThreadID, uint groupId : SV_GroupID)
+{
+    uint tileIndex = g_TileList[g_TileListOffset + groupId];
+    uint2 tileCoord = uint2(tileIndex & 0xFFFF, tileIndex >> 16);
+    uint2 pixelCoord = tileCoord * GetTileSize() + groupThreadId;
+
+    PositionInputs posInput = GetPositionInput(pixelCoord.xy, _ScreenSize.zw, tileCoord);
+    uint featureFlags = FeatureFlagsToTileVariant(VARIANT);
+#else
+// Direct
+[numthreads(16, 16, 1)]
+void SHADE_OPAQUE_ENTRY(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupId : SV_GroupID)
+{
+    uint2 pixelCoord = dispatchThreadId;
+    PositionInputs posInput = GetPositionInput(pixelCoord.xy, _ScreenSize.zw, groupId);
+	uint featureFlags = 0xFFFFFFFF;
+#endif
+    
+    float depth = LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).x;
+    UpdatePositionInput(depth, _InvViewProjMatrix, _ViewProjMatrix, posInput);
+    float3 V = GetWorldSpaceNormalizeViewDir(posInput.positionWS);
+
+    FETCH_GBUFFER(gbuffer, _GBufferTexture, posInput.unPositionSS);
+    BSDFData bsdfData;
+    float3 bakeDiffuseLighting;
+    DECODE_FROM_GBUFFER(gbuffer, bsdfData, bakeDiffuseLighting);
+
+    PreLightData preLightData = GetPreLightData(V, posInput, bsdfData);
+
+    float3 diffuseLighting;
+    float3 specularLighting;
+    LightLoop(V, posInput, preLightData, bsdfData, bakeDiffuseLighting, diffuseLighting, specularLighting);
+
+#ifdef OUTPUT_SPLIT_LIGHTING
+    specularLightingUAV[pixelCoord] = float4(specularLighting, 1.0);
+    diffuseLightingUAV[pixelCoord]  = diffuseLighting;
+#else
+    combinedLightingUAV[pixelCoord] = float4(diffuseLighting + specularLighting, 1.0);
+#endif
+}
+
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs.hlsl
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs.hlsl
 #define HAS_COOKIE_TEXTURE (2)
 #define IS_BOX_PROJECTED (4)
 #define HAS_SHADOW (8)
+#define FEATURE_FLAG_PUNCTUAL_LIGHT (1)
+#define FEATURE_FLAG_AREA_LIGHT (2)
+#define NUM_FEATURE_VARIANTS (8)

 // Generated from UnityEngine.Experimental.Rendering.HDPipeline.TilePass.SFiniteLightBound
 // PackingRules = Exact
 	float3 lightAxisZ;
 	float cotan;
 	float3 boxInnerDist;
-	float unused;
+	uint featureFlags;
 	float3 boxInvRange;
 	float unused2;
 };
 {
 	return value.boxInnerDist;
 }
-float GetUnused(LightVolumeData value)
+uint GetFeatureFlags(LightVolumeData value)
-	return value.unused;
+	return value.featureFlags;
 }
 float3 GetBoxInvRange(LightVolumeData value)
 {
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/FeatureFlags.hlsl
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/FeatureFlags.hlsl
+#ifndef __FEATURE_FLAGS_H__
+#define __FEATURE_FLAGS_H__
+
+#include "TilePass.cs.hlsl"
+
+uint FeatureFlagsToTileVariant(uint featureFlags)
+{
+	if(featureFlags & FEATURE_FLAG_AREA_LIGHT)
+	{
+		return 1;
+	}
+	else
+	{
+		return 0;
+	}
+}
+
+uint TileVariantToFeatureFlags(uint variant)
+{
+	if(variant == 0)
+	{
+		return 0xFFFFFFFF & (~FEATURE_FLAG_AREA_LIGHT);
+	}
+	else if(variant == 1)
+	{
+		return 0xFFFFFFFF;
+	}
+	
+}
+
+#endif
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/FeatureFlags.hlsl.meta
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/FeatureFlags.hlsl.meta
+fileFormatVersion: 2
+guid: 0ef495ee49d152b419e9fd62aa24523f
+timeCreated: 1489679489
+licenseType: Pro
+ShaderImporter:
+  defaultTextures: []
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: