merge in Mortens changes from master

7 年前 · 6ef0b7ab
--- a/Assets/ScriptableRenderPipeline/Core/TextureCache.cs
+++ b/Assets/ScriptableRenderPipeline/Core/TextureCache.cs

            if (!TextureCache.supportsCubemapArrayTextures)
            {
-				if (!m_CubeBlitMaterial) m_CubeBlitMaterial = new Material(Shader.Find("Hidden/CubeToPano")) { hideFlags = HideFlags.HideAndDontSave };
+                if (!m_CubeBlitMaterial) m_CubeBlitMaterial = new Material(Shader.Find("Hidden/CubeToPano")) { hideFlags = HideFlags.HideAndDontSave };

                int panoWidthTop = 4 * width;
                int panoHeightTop = 2 * width;
                m_StagingRTs = new RenderTexture[m_NumPanoMipLevels];
                for (int m = 0; m < m_NumPanoMipLevels; m++)
                {
-					m_StagingRTs[m] = new RenderTexture(Mathf.Max(1, panoWidthTop >> m), Mathf.Max(1, panoHeightTop >> m), 0, RenderTextureFormat.ARGBHalf) { hideFlags = HideFlags.HideAndDontSave };
+                    m_StagingRTs[m] = new RenderTexture(Mathf.Max(1, panoWidthTop >> m), Mathf.Max(1, panoHeightTop >> m), 0, RenderTextureFormat.ARGBHalf) { hideFlags = HideFlags.HideAndDontSave };
                }

                if (m_CubeBlitMaterial)
--- a/Assets/ScriptableRenderPipeline/Fptl/FptlLighting.cs
+++ b/Assets/ScriptableRenderPipeline/Fptl/FptlLighting.cs
            cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_ClearVoxelAtomicKernel, "g_LayeredSingleIdxBuffer", s_GlobalLightListAtomic);
            cmd.DispatchCompute(buildPerVoxelLightListShader, s_ClearVoxelAtomicKernel, 1, 1, 1);

+            bool isOrthographic = camera.orthographic;
+            cmd.SetComputeIntParam(buildPerVoxelLightListShader, "g_isOrthographic", isOrthographic ? 1 : 0);
            cmd.SetComputeIntParam(buildPerVoxelLightListShader, "g_iNrVisibLights", numLights);
            cmd.SetComputeMatrixParam(buildPerVoxelLightListShader, "g_mScrProjection", projscr);
            cmd.SetComputeMatrixParam(buildPerVoxelLightListShader, "g_mInvScrProjection", invProjscr);

            var cmd = CommandBufferPool.Get("Build light list" );

+            bool isOrthographic = camera.orthographic;
+
            // generate screen-space AABBs (used for both fptl and clustered).
            if (numLights != 0)
            {
                var projh = temp * proj;
                var invProjh = projh.inverse;

+                cmd.SetComputeIntParam(buildScreenAABBShader, "g_isOrthographic", isOrthographic ? 1 : 0);
                cmd.SetComputeIntParam(buildScreenAABBShader, "g_iNrVisibLights", numLights);
                cmd.SetComputeMatrixParam(buildScreenAABBShader, "g_mProjection", projh);
                cmd.SetComputeMatrixParam(buildScreenAABBShader, "g_mInvProjection", invProjh);
            // enable coarse 2D pass on 64x64 tiles (used for both fptl and clustered).
            if (enableBigTilePrepass)
            {
+                cmd.SetComputeIntParam(buildPerBigTileLightListShader, "g_isOrthographic", isOrthographic ? 1 : 0);
                cmd.SetComputeIntParams(buildPerBigTileLightListShader, "g_viDimensions", new int[2] { w, h });
                cmd.SetComputeIntParam(buildPerBigTileLightListShader, "g_iNrVisibLights", numLights);
                cmd.SetComputeMatrixParam(buildPerBigTileLightListShader, "g_mScrProjection", projscr);

            if (usingFptl)        // optimized for opaques only
            {
+                cmd.SetComputeIntParam(buildPerTileLightListShader, "g_isOrthographic", isOrthographic ? 1 : 0);
                cmd.SetComputeIntParams(buildPerTileLightListShader, "g_viDimensions", new int[2] { w, h });
                cmd.SetComputeIntParam(buildPerTileLightListShader, "g_iNrVisibLights", numLights);
                cmd.SetComputeMatrixParam(buildPerTileLightListShader, "g_mScrProjection", projscr);
        void PushGlobalParams(Camera camera, ScriptableRenderContext loop, Matrix4x4 viewToWorld, Matrix4x4 scrProj, Matrix4x4 incScrProj, int numDirLights)
        {
            var cmd = CommandBufferPool.Get("Push Global Parameters");
-
+                  
+            bool isOrthographic = camera.orthographic;
+            cmd.SetGlobalFloat("g_isOrthographic", (float) (isOrthographic ? 1 : 0));
            cmd.SetGlobalFloat("g_widthRT", (float)camera.pixelWidth);
            cmd.SetGlobalFloat("g_heightRT", (float)camera.pixelHeight);

--- a/Assets/ScriptableRenderPipeline/Fptl/LightingConvexHullUtils.hlsl
+++ b/Assets/ScriptableRenderPipeline/Fptl/LightingConvexHullUtils.hlsl
    return float4(vN, -dot(vN,p0));
 }

-bool DoesSphereOverlapTile(float3 dir, float halfTileSizeAtZDistOne, float3 sphCen, float sphRadiusIn)
+bool DoesSphereOverlapTile(float3 dir, float halfTileSizeAtZDistOne, float3 sphCen_in, float sphRadiusIn, bool isOrthographic)
-    float3 V = dir;     // ray direction down center of tile (does not need to be normalized).
+    float3 V = float3(isOrthographic ? 0.0 : dir.x, isOrthographic ? 0.0 : dir.y, dir.z);     // ray direction down center of tile (does not need to be normalized).
+	float3 sphCen = float3(sphCen_in.x - (isOrthographic ? dir.x : 0.0), sphCen_in.y - (isOrthographic ? dir.y : 0.0), sphCen_in.z); 

 #if 1
    float3 maxZdir = float3(-sphCen.z*sphCen.x, -sphCen.z*sphCen.y, sphCen.x*sphCen.x + sphCen.y*sphCen.y);     // cross(sphCen,cross(Zaxis,sphCen))

    // enlarge sphere so it overlaps the center of the tile assuming it overlaps the tile to begin with.
 #if USE_LEFTHAND_CAMERASPACE
-    float sphRadius = sphRadiusIn + (sphCen.z+offs)*halfTileSizeAtZDistOne;
+	float s = sphCen.z+offs;
-    float sphRadius = sphRadiusIn - (sphCen.z-offs)*halfTileSizeAtZDistOne;
+	float s = -(sphCen.z-offs);
+	float sphRadius = sphRadiusIn + (isOrthographic ? 1.0 : s)*halfTileSizeAtZDistOne;

    float a = dot(V,V);
    float CdotV = dot(sphCen,V);
--- a/Assets/ScriptableRenderPipeline/Fptl/LightingUtils.hlsl
+++ b/Assets/ScriptableRenderPipeline/Fptl/LightingUtils.hlsl
 uniform float4x4 g_mInvScrProjection;


+uniform uint g_isOrthographic;
 uniform uint g_widthRT;
 uniform uint g_heightRT;

-    float fSx = g_mScrProjection[0].x;
-    //float fCx = g_mScrProjection[2].x;
-    float fCx = g_mScrProjection[0].z;
-    float fSy = g_mScrProjection[1].y;
-    //float fCy = g_mScrProjection[2].y;
-    float fCy = g_mScrProjection[1].z;
+	bool isOrthographic = g_isOrthographic!=0;
+	float fSx = g_mScrProjection[0].x;
+	float fSy = g_mScrProjection[1].y;
+	float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z;
+	float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z;
-    return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
+	bool useLeftHandVersion = true;
-    return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
+	bool useLeftHandVersion = isOrthographic;
+
+	float s = useLeftHandVersion ? 1 : (-1);
+	float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy);
+
+	return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
 }

 float GetLinearZFromSVPosW(float posW)

 float GetLinearDepth(float zDptBufSpace)    // 0 is near 1 is far
 {
-    // todo (simplify): m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
+    // for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
+	// however this function must also work for orthographic projection so we keep it like this.
    float m22 = g_mInvScrProjection[2].z, m23 = g_mInvScrProjection[2].w;
    float m32 = g_mInvScrProjection[3].z, m33 = g_mInvScrProjection[3].w;

--- a/Assets/ScriptableRenderPipeline/Fptl/lightlistbuild-bigtile.compute
+++ b/Assets/ScriptableRenderPipeline/Fptl/lightlistbuild-bigtile.compute
 #define MAX_NR_BIGTILE_LIGHTS				(MAX_NR_BIGTILE_LIGHTS_PLUSONE-1)


+uniform int g_isOrthographic;
 uniform int g_iNrVisibLights;
 uniform uint2 g_viDimensions;
 uniform float4x4 g_mInvScrProjection;

 float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
 {
+	bool isOrthographic = g_isOrthographic!=0;
-	float fCx = g_mScrProjection[0].z;
-	float fCy = g_mScrProjection[1].z;
+	float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z;
+	float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z;
-	return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
+	bool useLeftHandVersion = true;
-	return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
+	bool useLeftHandVersion = isOrthographic;
+
+	float s = useLeftHandVersion ? 1 : (-1);
+	float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy);
+
+	return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
 }

 float GetOnePixDiagWorldDistAtDepthOne()
 	{
 		SFiniteLightBound lgtDat = g_data[lightsListLDS[l]];

-		if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
+		if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius, g_isOrthographic!=0) )
 			lightsListLDS[l]=0xffffffff;
 	}


 	int i=iSwizzle + (2*(iSection&0x2));	// offset by 4 at section 2
 	vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
-	vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
+
+#if USE_LEFTHAND_CAMERASPACE
+	float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,1.0); 
+#else
+	float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,-1.0); 
+#endif
+
+	vE0 = iSection == 0 ? edgeSectionZero : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
 }

 void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
--- a/Assets/ScriptableRenderPipeline/Fptl/lightlistbuild-clustered.compute
+++ b/Assets/ScriptableRenderPipeline/Fptl/lightlistbuild-clustered.compute
 #define PERFORM_SPHERICAL_INTERSECTION_TESTS
 #define CONV_HULL_TEST_ENABLED

+uniform int g_isOrthographic;
 uniform int g_iNrVisibLights;
 uniform float4x4 g_mInvScrProjection;
 uniform float4x4 g_mScrProjection;

 float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
 {
+	bool isOrthographic = g_isOrthographic!=0;
-	float fCx = g_mScrProjection[0].z;
-	float fCy = g_mScrProjection[1].z;
+	float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z;
+	float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z;
-	return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
+	bool useLeftHandVersion = true;
-	return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
+	bool useLeftHandVersion = isOrthographic;
+
+	float s = useLeftHandVersion ? 1 : (-1);
+	float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy);
+
+	return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
 }

 float GetOnePixDiagWorldDistAtDepthOne()
 	{
 		SFiniteLightBound lgtDat = g_data[coarseList[l]];

-		if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
+		if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius, g_isOrthographic!=0) )
 			coarseList[l]=0xffffffff;
 	}


 	int i=iSwizzle + (2*(iSection&0x2));	// offset by 4 at section 2
 	vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
-	vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
+
+#if USE_LEFTHAND_CAMERASPACE
+	float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,1.0); 
+#else
+	float3 edgeSectionZero = g_isOrthographic==0 ? vP0 : float3(0.0,0.0,-1.0); 
+#endif
+
+	vE0 = iSection==0 ? edgeSectionZero : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
 }

 int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
--- a/Assets/ScriptableRenderPipeline/Fptl/lightlistbuild.compute
+++ b/Assets/ScriptableRenderPipeline/Fptl/lightlistbuild.compute
 #include "SortingComputeUtils.hlsl"
 #endif

-#define NARROW_MOBILE_ENABLED
-
-#ifdef NARROW_MOBILE_ENABLED
-	#define EMUL_LOCAL_ATOMICS
-#endif
-
+uniform int g_isOrthographic;
 uniform int g_iNrVisibLights;
 uniform uint2 g_viDimensions;
 uniform float4x4 g_mInvScrProjection;
 StructuredBuffer<uint> g_vBigTileLightList : register( t4 );		// don't support Buffer yet in unity
 #endif

-#ifdef NARROW_MOBILE_ENABLED
-	#define NR_THREADS			32
-#else
-	#define NR_THREADS			64
-#endif
-
-#include "LocalAtomics.hlsl"
+#define NR_THREADS			64

 // output buffer
 RWStructuredBuffer<uint> g_vLightList : register( u0 );				// don't support RWBuffer yet in unity

 float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
 {
+	bool isOrthographic = g_isOrthographic!=0;
-	float fCx = g_mScrProjection[0].z;
-	float fCy = g_mScrProjection[1].z;
+	float fCx = isOrthographic ? g_mScrProjection[0].w : g_mScrProjection[0].z;
+	float fCy = isOrthographic ? g_mScrProjection[1].w : g_mScrProjection[1].z;
-	return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
+	bool useLeftHandVersion = true;
-	return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
+	bool useLeftHandVersion = isOrthographic;
+
+	float s = useLeftHandVersion ? 1 : (-1);
+	float2 p = float2( (s*v2ScrPos.x-fCx)/fSx, (s*v2ScrPos.y-fCy)/fSy);
+
+	return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
 }

 float GetOnePixDiagWorldDistAtDepthOne()
 #endif

 #ifdef FINE_PRUNING_ENABLED
-
-#ifndef NARROW_MOBILE_ENABLED
-#else
-void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths1, float4 vLinDepths2);
-#endif
-
 #endif


 	uint2 tileIDX = u3GroupID.xy;
 	uint t=threadID;

-#ifndef NARROW_MOBILE_ENABLED
-#else
-	for(int i=(int) t; i<MAX_NR_COARSE_ENTRIES; i+=NR_THREADS)
-		prunedList[i]=0;
-#endif
-	
+
 	uint iWidth = g_viDimensions.x;
 	uint iHeight = g_viDimensions.y;
 	uint nrTilesX = (iWidth+15)/16;


 	float4 vLinDepths;
-#ifdef NARROW_MOBILE_ENABLED
-	float4 vLinDepths2;
-#endif
-#ifndef NARROW_MOBILE_ENABLED
-#else
-		for(int i = 0; i < 8; i++)
-#endif
-			const float linDepth = GetLinearDepth(fDepth);
-#ifndef NARROW_MOBILE_ENABLED
-			vLinDepths[i] = linDepth;
-#else
-			if(i<4) vLinDepths[i] = linDepth;
-			else vLinDepths2[i-4] = linDepth;
-#endif
+			vLinDepths[i] = GetLinearDepth(fDepth);
 			if(fDepth<VIEWPORT_SCALE_Z)		// if not skydome
 			{
 				dpt_mi = min(fDepth, dpt_mi);

-		InterlockedMAX(ldsZMax, asuint(dpt_ma), threadID);
-		InterlockedMIN(ldsZMin, asuint(dpt_mi), threadID);
+		InterlockedMax(ldsZMax, asuint(dpt_ma));
+		InterlockedMin(ldsZMin, asuint(dpt_mi));


 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 	float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
 	float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
 	vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
+

 	// build coarse list using AABB
 #ifdef USE_TWO_PASS_TILED_LIGHTING
 	const int bigTileIdx = (tileIDX.y>>log2BigTileToTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToTileRatio);		// map the idx to 64x64 tiles
 	int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
-	int nrLightsIn = nrBigTileLights;
-	int nrLightsIn = (int) g_iNrVisibLights;
-#ifndef EMUL_LOCAL_ATOMICS
+
 		if( all(vMa>vTileLL) && all(vMi<vTileUR))
 		{
 			unsigned int uInc = 1;
 		}
-#else
-		unsigned int uInc = (all(vMa>vTileLL) && all(vMi<vTileUR)) ? 1 : 0;
-		unsigned int uIndex;
-		InterlockedADDAndPrev(lightOffs, uInc, uIndex, t, l, nrLightsIn);
-		if(uIndex<MAX_NR_COARSE_ENTRIES && uInc!=0) coarseList[uIndex] = l;		// add to light list
-#endif
 	}

 #ifdef FINE_PRUNING_ENABLED

 #ifndef FINE_PRUNING_ENABLED
 	{
-#ifndef NARROW_MOBILE_ENABLED
-#else
-		for(int i=(int) t; t<iNrCoarseLights; i+=NR_THREADS) prunedList[i] = coarseList[i];
-#endif
 		if(t==0) ldsNrLightsFinal=iNrCoarseLights;
 	}
 #else
-#ifndef NARROW_MOBILE_ENABLED
-#else
-		FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths, vLinDepths2);
-#endif
 	}
 #endif



 	int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
-	for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS) 
+	for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
-#ifndef EMUL_LOCAL_ATOMICS
-#else
-		uint model = g_vLightData[ prunedList[i] ].lightModel;
-		for(int m=0; m<NR_LIGHT_MODELS; m++)
-		{
-			uint uInc = model==m ? 1 : 0;
-			InterlockedADD(ldsModelListCount[m], uInc, threadID, i, nrLightsCombinedList);
-		}
-#endif
-#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) && !defined(NARROW_MOBILE_ENABLED)
+#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 	SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
 	//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);
 #endif
 	{
 		SFiniteLightBound lightData = g_data[prunedList[l]];

-		bool bHit = DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius);
-#ifndef EMUL_LOCAL_ATOMICS	
-		if( bHit )
+		if( DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius, g_isOrthographic!=0) )
-#else
-		unsigned int uInc = bHit ? 1 : 0;
-		unsigned int uIndex;
-		InterlockedADDAndPrev(lightOffsSph, uInc, uIndex, threadID, l, iNrCoarseLights);
-		if(bHit) coarseList[uIndex]=prunedList[l];		// read from the original copy of coarseList which is backed up in prunedList
-#endif
 	}

 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 #ifdef FINE_PRUNING_ENABLED
 // initializes ldsNrLightsFinal with the number of accepted lights.
 // all accepted entries delivered in prunedList[].
-
-#ifndef NARROW_MOBILE_ENABLED
-#else
-void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths1, float4 vLinDepths2)
-#endif
-#ifndef NARROW_MOBILE_ENABLED
-	const int numPixSerial = 4;
-#else
-	const int numPixSerial = 8;
-#endif
 	uint t = threadID;
 	uint iWidth = g_viDimensions.x;
 	uint iHeight = g_viDimensions.y;

 			// serially check 4 pixels
 			uint uVal = 0;
-			for(int i=0; i<numPixSerial; i++)
+			for(int i=0; i<4; i++)
-#ifdef NARROW_MOBILE_ENABLED
-				float4 vLinDepths = i<4 ? vLinDepths1 : vLinDepths2;
-#endif
-				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i&0x3]);
-	
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
+
 				// check pixel
 				float3 fromLight = vVPos-lightData.lightPos.xyz;
 				float distSq = dot(fromLight,fromLight);

 			// serially check 4 pixels
 			uint uVal = 0;
-			for(int i=0; i<numPixSerial; i++)
+			for(int i=0; i<4; i++)
-#ifdef NARROW_MOBILE_ENABLED
-				float4 vLinDepths = i<4 ? vLinDepths1 : vLinDepths2;
-#endif
-				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i&0x3]);
-	
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
+
 				// check pixel
 				float3 vLp = lightData.lightPos.xyz;
 				float3 toLight = vLp - vVPos;

 			// serially check 4 pixels
 			uint uVal = 0;
-			for(int i=0; i<numPixSerial; i++)
+			for(int i=0; i<4; i++)
-#ifdef NARROW_MOBILE_ENABLED
-				float4 vLinDepths = i<4 ? vLinDepths1 : vLinDepths2;
-#endif
-				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i&0x3]);
+				float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);

 				// check pixel
 				float3 toLight  = lightData.lightPos.xyz - vVPos;
 		if(uLgtType>=MAX_TYPES) ++l;
 	}

-	InterlockedOR(ldsDoesLightIntersect[0], uLightsFlags[0], threadID);
-	InterlockedOR(ldsDoesLightIntersect[1], uLightsFlags[1], threadID);
+	InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
+	InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
 	if(t==0) ldsNrLightsFinal = 0;

 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
-#ifndef NARROW_MOBILE_ENABLED
 	if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
 	{
 		unsigned int uInc = 1;
 	}
-#else
-	for(uint i=t; i<MAX_NR_COARSE_ENTRIES; i+=NR_THREADS)
-	{
-		unsigned int uInc = (i<(uint) iNrCoarseLights && (ldsDoesLightIntersect[i<32 ? 0 : 1]&(1<<(i&31)))!=0) ? 1 : 0;
-		unsigned int uIndex;
-		InterlockedADDAndPrev(ldsNrLightsFinal, uInc, uIndex, t, i, MAX_NR_COARSE_ENTRIES);
-		if(uInc!=0 && uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[i];		// we allow up to 64 pruned lights while stored in LDS.
-	}
-#endif
 }
 #endif
--- a/Assets/ScriptableRenderPipeline/Fptl/scrbound.compute
+++ b/Assets/ScriptableRenderPipeline/Fptl/scrbound.compute
 #include "ShaderBase.h"
 #include "LightDefinitions.cs.hlsl"

+uniform int g_isOrthographic;
 uniform int g_iNrVisibLights;
 uniform float4x4 g_mInvProjection;
 uniform float4x4 g_mProjection;
 			}
 			else
 			{
-				//if((center.z+radius)<0.0)
-				if( length(center)>radius)
+				if(g_isOrthographic==0 && length(center)>radius)
 				{
 					float2 vMi, vMa;
 					bool2 bMi, bMa;
 					vMax.xy = bMa ? min(vMax.xy, vMa) : vMax.xy;
+				}
+				else if(g_isOrthographic!=0)
+				{
+					float2 vMi = mul(g_mProjection, float4(center.xyz-radius,1)).xy;	 // no division needed for ortho
+					float2 vMa = mul(g_mProjection, float4(center.xyz+radius,1)).xy;	 // no division needed for ortho
+					vMin.xy = max(vMin.xy, vMi);
+					vMax.xy = min(vMax.xy, vMa);
 				}

 #if USE_LEFTHAND_CAMERASPACE