浏览代码

Merge pull request #188 from Unity-Technologies/metal-ios

metal-ios
/fptl_cleanup
GitHub 8 年前
当前提交
8488f9ad
共有 10 个文件被更改,包括 90 次插入78 次删除
  1. 10
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute
  2. 25
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
  3. 36
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute
  4. 2
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/scrbound.compute
  5. 2
      Assets/ScriptableRenderPipeline/common/ShaderBase.h
  6. 27
      Assets/ScriptableRenderPipeline/fptl/ClusteredUtils.h
  7. 13
      Assets/ScriptableRenderPipeline/fptl/lightlistbuild-bigtile.compute
  8. 29
      Assets/ScriptableRenderPipeline/fptl/lightlistbuild-clustered.compute
  9. 21
      Assets/ScriptableRenderPipeline/fptl/lightlistbuild.compute
  10. 3
      Assets/ScriptableRenderPipeline/fptl/scrbound.compute

10
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute


if(t==0) lightOffs = 0;
GroupMemoryBarrierWithGroupSync();
int i;
int i;
for(i=t; i<iNrCoarseLights; i+=NR_THREADS) if((int)lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
GroupMemoryBarrierWithGroupSync();
iNrCoarseLights = lightOffs;

int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
}
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)

{
const uint idxCoarse = lightsListLDS[l];
bool canEnter = idxCoarse<(uint)g_iNrVisibLights;
bool canEnter = idxCoarse<(uint) g_iNrVisibLights;
[branch]if(canEnter)
[branch]if(canEnter)
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;

25
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-clustered.compute


#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"
//#define EXACT_EDGE_TESTS
#define PERFORM_SPHERICAL_INTERSECTION_TESTS

dpt_ma = asfloat(ldsZMax);
#endif
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING

for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
const float2 vMi = g_vBoundsBuffer[l].xy;
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy))
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;

InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start); // alloc list memory
}
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
int categoryListCount[LIGHTCATEGORY_COUNT]={0,0,0}; // direct light count and reflection lights
uint offs = start;

{
if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
{
uint lightCategory = _LightVolumeData[coarseList[l]].lightCategory;
++categoryListCount[lightCategory];
uint lightCategory = _LightVolumeData[coarseList[l]].lightCategory;
++categoryListCount[lightCategory];
g_vLayeredLightList[offs++] = coarseList[l] - shiftIndex[lightCategory]; // reflection lights will be last since we sorted
}
}

GroupMemoryBarrierWithGroupSync();
#endif
const int idxCoarse = coarseList[l];
[branch]if (_LightVolumeData[idxCoarse].lightVolume != LIGHTVOLUMETYPE_SPHERE) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
[branch]if (_LightVolumeData[idxCoarse].lightVolume != LIGHTVOLUMETYPE_SPHERE) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
{
SFiniteLightBound lgtDat = g_data[idxCoarse];

36
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute


#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"
#define FINE_PRUNING_ENABLED
#define PERFORM_SPHERICAL_INTERSECTION_TESTS

uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform int _EnvLightIndexShift;
Texture2D g_depth_tex : register( t0 );
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );

int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
{
InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
}

int localOffs=0;
int offs = tileIDX.y*nrTilesX + tileIDX.x;
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
// We remap the prunedList index to the original LightData / EnvLightData indices
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs] - shiftIndex[category];
uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
// We remap the prunedList index to the original LightData / EnvLightData indices
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs] - shiftIndex[category];
uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
}

if(threadID==0) lightOffsSph = 0;
// make a copy of coarseList in prunedList.
int l;
int l;
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
prunedList[l]=coarseList[l];

{
// fetch light
int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// TODO: Change by SebL
const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// TODO: Change by SebL
const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
// serially check 4 pixels
uint uVal = 0;

uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
LightVolumeData lightData = _LightVolumeData[idxCoarse];
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;

uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
LightVolumeData lightData = _LightVolumeData[idxCoarse];
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;

2
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/scrbound.compute


uniform float4x4 g_mInvProjection;
uniform float4x4 g_mProjection;
#define NR_THREADS 64

2
Assets/ScriptableRenderPipeline/common/ShaderBase.h


float FetchDepthMSAA(Texture2DMS<float> depthTexture, uint2 pixCoord, uint sampleIdx)
{
float zdpth = depthTexture.Load(uint3(pixCoord.xy, 0), sampleIdx).x;
float zdpth = depthTexture.Load(pixCoord.xy, sampleIdx).x;
#ifdef REVERSE_ZBUF
zdpth = 1.0 - zdpth;
#endif

27
Assets/ScriptableRenderPipeline/fptl/ClusteredUtils.h


#define FLT_EPSILON 1.192092896e-07f
#endif
// Using pow often result to a warning like this
// "pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them"
// PositivePow remove this warning when you know the value is positive and avoid inf/NAN.
float PositivePow(float base, float power)
{
return pow(max(abs(base), float(FLT_EPSILON)), power);
}
float2 PositivePow(float2 base, float2 power)
{
return pow(max(abs(base), float2(FLT_EPSILON, FLT_EPSILON)), power);
}
float3 PositivePow(float3 base, float3 power)
{
return pow(max(abs(base), float3(FLT_EPSILON, FLT_EPSILON, FLT_EPSILON)), power);
}
float4 PositivePow(float4 base, float4 power)
{
return pow(max(abs(base), float4(FLT_EPSILON, FLT_EPSILON, FLT_EPSILON, FLT_EPSILON)), power);
}
const float geomSeries = (1.0 - pow(base, C)) / (1 - base); // geometric series: sum_k=0^{C-1} base^k
const float geomSeries = (1.0 - PositivePow(base, C)) / (1 - base); // geometric series: sum_k=0^{C-1} base^k
return geomSeries / (g_fFarPlane - g_fNearPlane);
}

if (logBasePerTile)
userscale = GetScaleFromBase(suggestedBase);
float dist = (pow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
float dist = (PositivePow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
res = dist + g_fNearPlane;
#if USE_LEFTHAND_CAMERASPACE

13
Assets/ScriptableRenderPipeline/fptl/lightlistbuild-bigtile.compute


if(t==0) lightOffs = 0;
GroupMemoryBarrierWithGroupSync();
for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
int i;
for(i=t; i<iNrCoarseLights; i+=NR_THREADS) if((int)lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
for(i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = i==0 ? iNrCoarseLights : lightsListLDS[i-1];
}

int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
vE0 = iSection == 0 ? vP0 : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
}
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)

for(int l=0; l<iNrCoarseLights; l++)
{
const uint idxCoarse = lightsListLDS[l];
bool canEnter = idxCoarse<(uint) g_iNrVisibLights;
if(canEnter) canEnter = g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT; // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
[branch]if(canEnter)

const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;

GroupMemoryBarrierWithGroupSync();
#endif
}
#endif
#endif

29
Assets/ScriptableRenderPipeline/fptl/lightlistbuild-clustered.compute


#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING
#pragma kernel ClearAtomic
#include "LightingConvexHullUtils.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)

groupshared uint lightOffs;
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
groupshared int ldsZMax;
groupshared uint ldsZMax;
#endif
#ifdef EXACT_EDGE_TESTS

{
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&(TILE_SIZE_CLUSTERED-1)), viTilLL.y+(idx>>log2TileSize)), uint2(iWidth-1, iHeight-1) );
#ifdef MSAA_ENABLED
for(int i=0; i<iNumSamplesMSAA; i++)
for(uint i=0; i<iNumSamplesMSAA; i++)
{
const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i);
#else

dpt_ma = asfloat(ldsZMax);
#endif
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING

for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
const float2 vMi = g_vBoundsBuffer[l].xy;
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy))
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;

{
if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
{
uint lightModel = g_vLightData[ coarseList[l] ].lightModel;
++modelListCount[ lightModel==REFLECTION_LIGHT ? 1 : 0];
uint lightModel = g_vLightData[coarseList[l]].lightModel;
++modelListCount[lightModel==REFLECTION_LIGHT ? 1 : 0];
g_vLayeredLightList[offs++] = coarseList[l]; // reflection lights will be last since we sorted
}
}

uint localOffs=0;
offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
for(int m=0; m<NR_LIGHT_MODELS; m++)
for(int category=0; category<NR_LIGHT_MODELS; category++)
int numLights = min(modelListCount[m],31); // only allow 5 bits
int numLights = min(modelListCount[category],31); // only allow 5 bits
localOffs += modelListCount[m]; // use unclamped count for localOffs
localOffs += modelListCount[category]; // use unclamped count for localOffs
}
}

void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
g_LayeredSingleIdxBuffer[0]=0;
}
}

21
Assets/ScriptableRenderPipeline/fptl/lightlistbuild.compute


#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#define FINE_PRUNING_ENABLED
#define PERFORM_SPHERICAL_INTERSECTION_TESTS

// write lights to global buffers
int localOffs=0;
int offs = tileIDX.y*nrTilesX + tileIDX.x;
for(int m=0; m<NR_LIGHT_MODELS; m++)
for(int category=0; category<NR_LIGHT_MODELS; category++)
int nrLightsFinal = ldsModelListCount[ m ];
int nrLightsFinal = ldsModelListCount[category];
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs];
uint uHigh = prunedList[2*l+0+localOffs];
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs];
uint uHigh = prunedList[2 * l + 0 + localOffs];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
}

}
}

if(threadID==0) lightOffsSph = 0;
// make a copy of coarseList in prunedList.
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
int l;
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
prunedList[l]=coarseList[l];
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)

float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
{
SFiniteLightBound lightData = g_data[prunedList[l]];

if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t]; // we allow up to 64 pruned lights while stored in LDS.
}
}
#endif
#endif

3
Assets/ScriptableRenderPipeline/fptl/scrbound.compute


uniform float4x4 g_mInvProjection;
uniform float4x4 g_mProjection;
#define FLT_EPSILON 1.192092896e-07F // smallest such that 1.0+FLT_EPSILON != 1.0
#define NR_THREADS 64

正在加载...
取消
保存