Merge pull request #188 from Unity-Technologies/metal-ios

metal-ios
8 年前 · 8488f9ad
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute

 	if(t==0) lightOffs = 0;
 	GroupMemoryBarrierWithGroupSync();
-    int i;
+	int i;
 	for(i=t; i<iNrCoarseLights; i+=NR_THREADS) if((int)lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
 	GroupMemoryBarrierWithGroupSync();
 	iNrCoarseLights = lightOffs;

 	int i=iSwizzle + (2*(iSection&0x2));	// offset by 4 at section 2
 	vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
-    vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
+	vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
 }

 void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
 	{
 		const uint idxCoarse = lightsListLDS[l];

-		bool canEnter = idxCoarse<(uint)g_iNrVisibLights;
+		bool canEnter = idxCoarse<(uint) g_iNrVisibLights;
-    [branch]if(canEnter)
+		[branch]if(canEnter)
-			const float3 boxZ = -lgtDat.boxAxisZ.xyz;           // flip axis (so it points away from the light direction for a spot-light)
+			const float3 boxZ = -lgtDat.boxAxisZ.xyz;	// flip axis (so it points away from the light direction for a spot-light)
 			const float3 center = lgtDat.center.xyz;
 			const float2 scaleXY = lgtDat.scaleXY;

--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
 #include "../ShaderBase.hlsl"
 #include "../TilePass.cs.hlsl"
 #include "../LightingConvexHullUtils.hlsl"
+
-

 //#define EXACT_EDGE_TESTS
 #define PERFORM_SPHERICAL_INTERSECTION_TESTS
 	dpt_ma = asfloat(ldsZMax);
 #endif

-	float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
-	float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
-	
+	float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
+	float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);

 	// build coarse list using AABB
 #ifdef USE_TWO_PASS_TILED_LIGHTING
 	for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
 	{
 #endif
-		const float3 vMi = g_vBoundsBuffer[l];
-		const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
+		const float2 vMi = g_vBoundsBuffer[l].xy;
+		const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
-		if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy))
+		if( all(vMa>vTileLL) && all(vMi<vTileUR))
 		{
 			unsigned int uInc = 1;
 			unsigned int uIndex;
 		InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start);		// alloc list memory
 	}

-    // All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
-    // to make it work correctly
-    int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
+	// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
+	// to make it work correctly
+	int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT

 	int categoryListCount[LIGHTCATEGORY_COUNT]={0,0,0};		// direct light count and reflection lights
 	uint offs = start;
 		{
 			if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
 			{
-                uint lightCategory = _LightVolumeData[coarseList[l]].lightCategory;
-                ++categoryListCount[lightCategory];
+				uint lightCategory = _LightVolumeData[coarseList[l]].lightCategory;
+				++categoryListCount[lightCategory];
 				g_vLayeredLightList[offs++] = coarseList[l] - shiftIndex[lightCategory];			// reflection lights will be last since we sorted
 			}
 		}
 		GroupMemoryBarrierWithGroupSync();
 #endif
 		const int idxCoarse = coarseList[l];
-        [branch]if (_LightVolumeData[idxCoarse].lightVolume != LIGHTVOLUMETYPE_SPHERE)		// don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
+		[branch]if (_LightVolumeData[idxCoarse].lightVolume != LIGHTVOLUMETYPE_SPHERE)		// don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
 		{
 			SFiniteLightBound lgtDat = g_data[idxCoarse];
 	
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute
 #include "../ShaderBase.hlsl"
 #include "../TilePass.cs.hlsl"
 #include "../LightingConvexHullUtils.hlsl"
+
-

 #define FINE_PRUNING_ENABLED
 #define PERFORM_SPHERICAL_INTERSECTION_TESTS
 uniform float4x4 g_mInvScrProjection;
 uniform float4x4 g_mScrProjection;
 uniform int _EnvLightIndexShift;
+

 Texture2D g_depth_tex : register( t0 );
 StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
 	int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
 	for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS) 
 	{
-        InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
+		InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
 	}


 	int localOffs=0;
 	int offs = tileIDX.y*nrTilesX + tileIDX.x;

-    // All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
-    // to make it work correctly
-    int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
+	// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
+	// to make it work correctly
+	int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
-		
-            // We remap the prunedList index to the original LightData / EnvLightData indices
-			uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs] - shiftIndex[category];
-            uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
+			// We remap the prunedList index to the original LightData / EnvLightData indices
+			uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs] - shiftIndex[category];
+			uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];

 			g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
 		}
 	if(threadID==0) lightOffsSph = 0;

 	// make a copy of coarseList in prunedList.
-    int l;
+	int l;
 	for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
 		prunedList[l]=coarseList[l];

 	{
 		// fetch light
 		int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
-        uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
+		uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
-            LightVolumeData lightData = _LightVolumeData[idxCoarse];
-            // TODO: Change by SebL
-            const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
+			LightVolumeData lightData = _LightVolumeData[idxCoarse];
+			// TODO: Change by SebL
+			const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
 				
 			// serially check 4 pixels
 			uint uVal = 0;

 			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
 			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
-            uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
+			uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
-            LightVolumeData lightData = _LightVolumeData[idxCoarse];
+			LightVolumeData lightData = _LightVolumeData[idxCoarse];

 			// serially check 4 pixels
 			uint uVal = 0;

 			uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
 			++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
-            uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
+			uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
-            LightVolumeData lightData = _LightVolumeData[idxCoarse];
+			LightVolumeData lightData = _LightVolumeData[idxCoarse];

 			// serially check 4 pixels
 			uint uVal = 0;
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/scrbound.compute
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/scrbound.compute
 uniform float4x4 g_mInvProjection;
 uniform float4x4 g_mProjection;

-
-

 #define NR_THREADS			64

--- a/Assets/ScriptableRenderPipeline/common/ShaderBase.h
+++ b/Assets/ScriptableRenderPipeline/common/ShaderBase.h

 float FetchDepthMSAA(Texture2DMS<float> depthTexture, uint2 pixCoord, uint sampleIdx)
 {
-	float zdpth = depthTexture.Load(uint3(pixCoord.xy, 0), sampleIdx).x;
+	float zdpth = depthTexture.Load(pixCoord.xy, sampleIdx).x;
 #ifdef REVERSE_ZBUF
 	zdpth = 1.0 - zdpth;
 #endif
--- a/Assets/ScriptableRenderPipeline/fptl/ClusteredUtils.h
+++ b/Assets/ScriptableRenderPipeline/fptl/ClusteredUtils.h
    #define FLT_EPSILON     1.192092896e-07f
 #endif

+// Using pow often result to a warning like this
+// "pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them"
+// PositivePow remove this warning when you know the value is positive and avoid inf/NAN.
+float PositivePow(float base, float power)
+{
+    return pow(max(abs(base), float(FLT_EPSILON)), power);
+}
+
+float2 PositivePow(float2 base, float2 power)
+{
+    return pow(max(abs(base), float2(FLT_EPSILON, FLT_EPSILON)), power);
+}
+
+float3 PositivePow(float3 base, float3 power)
+{
+    return pow(max(abs(base), float3(FLT_EPSILON, FLT_EPSILON, FLT_EPSILON)), power);
+}
+
+float4 PositivePow(float4 base, float4 power)
+{
+    return pow(max(abs(base), float4(FLT_EPSILON, FLT_EPSILON, FLT_EPSILON, FLT_EPSILON)), power);
+}
+
-    const float geomSeries = (1.0 - pow(base, C)) / (1 - base);     // geometric series: sum_k=0^{C-1} base^k
+    const float geomSeries = (1.0 - PositivePow(base, C)) / (1 - base);     // geometric series: sum_k=0^{C-1} base^k
    return geomSeries / (g_fFarPlane - g_fNearPlane);
 }

    if (logBasePerTile)
        userscale = GetScaleFromBase(suggestedBase);

-    float dist = (pow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
+    float dist = (PositivePow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
    res = dist + g_fNearPlane;

 #if USE_LEFTHAND_CAMERASPACE
--- a/Assets/ScriptableRenderPipeline/fptl/lightlistbuild-bigtile.compute
+++ b/Assets/ScriptableRenderPipeline/fptl/lightlistbuild-bigtile.compute

 	if(t==0) lightOffs = 0;
 	GroupMemoryBarrierWithGroupSync();
-	for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
+	int i;
+	for(i=t; i<iNrCoarseLights; i+=NR_THREADS) if((int)lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
-	for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
+	for(i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
 		g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = i==0 ? iNrCoarseLights : lightsListLDS[i-1];
 }


 	int i=iSwizzle + (2*(iSection&0x2));	// offset by 4 at section 2
 	vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
-	vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
+	vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
 }

 void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
 	for(int l=0; l<iNrCoarseLights; l++)
 	{
 		const uint idxCoarse = lightsListLDS[l];
-    
+
 		bool canEnter = idxCoarse<(uint) g_iNrVisibLights;
 		if(canEnter) canEnter = g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT;		// don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
 		[branch]if(canEnter)
 			const float3 boxX = lgtDat.boxAxisX.xyz;
 			const float3 boxY = lgtDat.boxAxisY.xyz;
-			const float3 boxZ = -lgtDat.boxAxisZ.xyz;           // flip axis (so it points away from the light direction for a spot-light)
+			const float3 boxZ = -lgtDat.boxAxisZ.xyz;	// flip axis (so it points away from the light direction for a spot-light)
 			const float3 center = lgtDat.center.xyz;
 			const float2 scaleXY = lgtDat.scaleXY;

 		GroupMemoryBarrierWithGroupSync();
 #endif
 }
-#endif
+#endif
--- a/Assets/ScriptableRenderPipeline/fptl/lightlistbuild-clustered.compute
+++ b/Assets/ScriptableRenderPipeline/fptl/lightlistbuild-clustered.compute
 #pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile		LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile		ENABLE_DEPTH_TEXTURE_BACKPLANE		MSAA_ENABLED		USE_TWO_PASS_TILED_LIGHTING
 #pragma kernel ClearAtomic

-
-
 #include "LightingConvexHullUtils.hlsl"

 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 groupshared uint lightOffs;

 #ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
-groupshared int ldsZMax;
+groupshared uint ldsZMax;
 #endif

 #ifdef EXACT_EDGE_TESTS
 	{
 		uint2 uPixCrd = min( uint2(viTilLL.x+(idx&(TILE_SIZE_CLUSTERED-1)), viTilLL.y+(idx>>log2TileSize)), uint2(iWidth-1, iHeight-1) );
 #ifdef MSAA_ENABLED
-		for(int i=0; i<iNumSamplesMSAA; i++)
+		for(uint i=0; i<iNumSamplesMSAA; i++)
 		{
 		const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i);
 #else
 	dpt_ma = asfloat(ldsZMax);
 #endif

-	float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
-	float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
-	
+	float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
+	float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);

 	// build coarse list using AABB
 #ifdef USE_TWO_PASS_TILED_LIGHTING
 	for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
 	{
 #endif
-		const float3 vMi = g_vBoundsBuffer[l];
-		const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
+		const float2 vMi = g_vBoundsBuffer[l].xy;
+		const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
-		if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy))
+		if( all(vMa>vTileLL) && all(vMi<vTileUR))
 		{
 			unsigned int uInc = 1;
 			unsigned int uIndex;
 		{
 			if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
 			{
-				uint lightModel = g_vLightData[ coarseList[l] ].lightModel;
-				++modelListCount[ lightModel==REFLECTION_LIGHT ? 1 : 0];
+				uint lightModel = g_vLightData[coarseList[l]].lightModel;
+				++modelListCount[lightModel==REFLECTION_LIGHT ? 1 : 0];
 				g_vLayeredLightList[offs++] = coarseList[l];			// reflection lights will be last since we sorted
 			}
 		}

 	uint localOffs=0;
 	offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
-	for(int m=0; m<NR_LIGHT_MODELS; m++)
+	for(int category=0; category<NR_LIGHT_MODELS; category++)
-		int numLights = min(modelListCount[m],31);		// only allow 5 bits
+		int numLights = min(modelListCount[category],31);		// only allow 5 bits
-			localOffs += modelListCount[m];		// use unclamped count for localOffs
+			localOffs += modelListCount[category];		// use unclamped count for localOffs
 		}
 	}

 void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
 {
 	g_LayeredSingleIdxBuffer[0]=0;
-}
+}
--- a/Assets/ScriptableRenderPipeline/fptl/lightlistbuild.compute
+++ b/Assets/ScriptableRenderPipeline/fptl/lightlistbuild.compute
 #pragma kernel TileLightListGen					LIGHTLISTGEN=TileLightListGen
 #pragma kernel TileLightListGen_SrcBigTile		LIGHTLISTGEN=TileLightListGen_SrcBigTile		USE_TWO_PASS_TILED_LIGHTING

-
-
-

 #define FINE_PRUNING_ENABLED
 #define PERFORM_SPHERICAL_INTERSECTION_TESTS
 	// write lights to global buffers
 	int localOffs=0;
 	int offs = tileIDX.y*nrTilesX + tileIDX.x;
-	for(int m=0; m<NR_LIGHT_MODELS; m++)
+
+	for(int category=0; category<NR_LIGHT_MODELS; category++)
-		int nrLightsFinal = ldsModelListCount[ m ];
+		int nrLightsFinal = ldsModelListCount[category];
-		
-			uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs];
-			uint uHigh = prunedList[2*l+0+localOffs];
+			uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs];
+			uint uHigh = prunedList[2 * l + 0 + localOffs];

 			g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
 		}
 	}
-
 }


 	if(threadID==0) lightOffsSph = 0;

 	// make a copy of coarseList in prunedList.
-	for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
+	int l;
+	for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
 		prunedList[l]=coarseList[l];

 #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
 	float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
 	float halfTileSizeAtZDistOne = 8*onePixDiagDist;		// scale by half a tile
 	
-	for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
+	for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
 	{
 		SFiniteLightBound lightData = g_data[prunedList[l]];
 	
 		if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t];		// we allow up to 64 pruned lights while stored in LDS.
 	}
 }
-#endif
+#endif
--- a/Assets/ScriptableRenderPipeline/fptl/scrbound.compute
+++ b/Assets/ScriptableRenderPipeline/fptl/scrbound.compute
 uniform float4x4 g_mInvProjection;
 uniform float4x4 g_mProjection;

-
-
-

 #define FLT_EPSILON     1.192092896e-07F        // smallest such that 1.0+FLT_EPSILON != 1.0
 #define NR_THREADS			64