Sebastien Lagarde 8 年前
当前提交
b3d6e1ea
共有 3 个文件被更改,包括 143 次插入124 次删除
  1. 2
      Assets/ScriptableRenderLoop/fptl/lightlistbuild-bigtile.compute
  2. 6
      Assets/ScriptableRenderLoop/fptl/lightlistbuild-clustered.compute
  3. 259
      Assets/ScriptableRenderLoop/fptl/lightlistbuild.compute

2
Assets/ScriptableRenderLoop/fptl/lightlistbuild-bigtile.compute


GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = lightOffs<MAX_NR_BIGTILE_LIGHTS ? lightOffs : MAX_NR_BIGTILE_LIGHTS;
int iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))) );

6
Assets/ScriptableRenderLoop/fptl/lightlistbuild-clustered.compute


GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = lightOffs<MAX_NR_COARSE_ENTRIES ? lightOffs : MAX_NR_COARSE_ENTRIES;
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );

iNrCoarseLights = CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, fTileFarPlane);
#endif
// sort lights
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
#if !defined(XBONE) && !defined(PLAYSTATION4)
SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
#endif

}
iSpaceAvail = min(iSum,MAX_NR_COARSE_ENTRIES); // combined storage for both direct lights and reflection
InterlockedAdd(g_LayeredSingleIdxBuffer[0], iSpaceAvail, start); // alloc list memory
InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start); // alloc list memory
}
int modelListCount[NR_LIGHT_MODELS]={0,0}; // direct light count and reflection lights

259
Assets/ScriptableRenderLoop/fptl/lightlistbuild.compute


int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
#endif
#ifdef FINE_PRUNING_ENABLED
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths);
#endif
[numthreads(NR_THREADS, 1, 1)]
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)

GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = lightOffs<MAX_NR_COARSE_ENTRIES ? lightOffs : MAX_NR_COARSE_ENTRIES;
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );

{
int iNrLightsOut = iNrCoarseLights<MAX_NR_PRUNED_ENTRIES ? iNrCoarseLights : MAX_NR_PRUNED_ENTRIES;
if((int)t<iNrLightsOut) prunedList[t] = coarseList[t];
if(t==0) ldsNrLightsFinal=iNrLightsOut;
if((int)t<iNrCoarseLights) prunedList[t] = coarseList[t];
if(t==0) ldsNrLightsFinal=iNrCoarseLights;
uint uLightsFlags[2] = {0,0};
int l=0;
// need this outer loop even on xb1 and ps4 since direct lights and
// reflection lights are kept in separate regions.
while(l<iNrCoarseLights)
{
// fetch light
int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uint uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
// spot
while(l<iNrCoarseLights && uLgtType==SPOT_LIGHT)
{
SFiniteLightData lightData = g_vLightData[idxCoarse];
const bool bIsSpotDisc = (lightData.flags&IS_CIRCULAR_SPOT_SHAPE)!=0;
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 fromLight = vVPos-lightData.lightPos.xyz;
float distSq = dot(fromLight,fromLight);
const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz); // spotDir = lightData.lightAxisZ.xyz
float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) );
float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y);
if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
}
// sphere
while(l<iNrCoarseLights && uLgtType==SPHERE_LIGHT)
{
SFiniteLightData lightData = g_vLightData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 vLp = lightData.lightPos.xyz;
float3 toLight = vLp - vVPos;
float distSq = dot(toLight,toLight);
if(lightData.radiusSq>distSq) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
}
// Box
while(l<iNrCoarseLights && uLgtType==BOX_LIGHT)
{
SFiniteLightData lightData = g_vLightData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 toLight = lightData.lightPos.xyz - vVPos;
float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) );
dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange; // not as efficient as it could be
if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1; // but allows us to not write out OuterDists
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
}
// in case we have some corrupt data make sure we terminate
if(uLgtType>=MAX_TYPES) ++l;
}
InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
if(t==0) ldsNrLightsFinal = 0;
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(ldsNrLightsFinal, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t]; // we allow up to 64 pruned lights while stored in LDS.
}
// initializes ldsNrLightsFinal with the number of accepted lights.
// all accepted entries delivered in prunedList[].
FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths);
}
#endif

#endif
int nrLightsCombinedList = ldsNrLightsFinal<MAX_NR_COARSE_ENTRIES ? ldsNrLightsFinal : MAX_NR_COARSE_ENTRIES;
int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
{
InterlockedAdd(ldsModelListCount[ g_vLightData[ prunedList[i] ].lightModel ], 1);

// sort lights
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
#if !defined(XBONE) && !defined(PLAYSTATION4)
SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);

#endif
return lightOffsSph;
}
#endif
#ifdef FINE_PRUNING_ENABLED
// initializes ldsNrLightsFinal with the number of accepted lights.
// all accepted entries delivered in prunedList[].
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths)
{
uint t = threadID;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint uLightsFlags[2] = {0,0};
int l=0;
// need this outer loop even on xb1 and ps4 since direct lights and
// reflection lights are kept in separate regions.
while(l<iNrCoarseLights)
{
// fetch light
int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uint uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
// spot
while(l<iNrCoarseLights && uLgtType==SPOT_LIGHT)
{
SFiniteLightData lightData = g_vLightData[idxCoarse];
const bool bIsSpotDisc = (lightData.flags&IS_CIRCULAR_SPOT_SHAPE)!=0;
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 fromLight = vVPos-lightData.lightPos.xyz;
float distSq = dot(fromLight,fromLight);
const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz); // spotDir = lightData.lightAxisZ.xyz
float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) );
float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y);
if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
}
// sphere
while(l<iNrCoarseLights && uLgtType==SPHERE_LIGHT)
{
SFiniteLightData lightData = g_vLightData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 vLp = lightData.lightPos.xyz;
float3 toLight = vLp - vVPos;
float distSq = dot(toLight,toLight);
if(lightData.radiusSq>distSq) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
}
// Box
while(l<iNrCoarseLights && uLgtType==BOX_LIGHT)
{
SFiniteLightData lightData = g_vLightData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 toLight = lightData.lightPos.xyz - vVPos;
float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) );
dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange; // not as efficient as it could be
if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1; // but allows us to not write out OuterDists
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0;
}
// in case we have some corrupt data make sure we terminate
if(uLgtType>=MAX_TYPES) ++l;
}
InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
if(t==0) ldsNrLightsFinal = 0;
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(ldsNrLightsFinal, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t]; // we allow up to 64 pruned lights while stored in LDS.
}
}
#endif
正在加载...
取消
保存