浏览代码

HLSLcc: fix out of bounds issues on Metal/iOS, HLSL bytecode being the root cause before Metal generated shaders

/fptl_cleanup
Antti Tapaninen 8 年前
当前提交
09c4949e
共有 6 个文件被更改,包括 45 次插入38 次删除
  1. 10
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
  2. 2
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute
  3. 38
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePassLoop.hlsl
  4. 21
      Assets/ScriptableRenderPipeline/fptl/TiledLightingUtils.hlsl
  5. 10
      Assets/ScriptableRenderPipeline/fptl/lightlistbuild-clustered.compute
  6. 2
      Assets/ScriptableRenderPipeline/fptl/lightlistbuild.compute

10
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-clustered.compute


//////////// cell specific code
{
for(int l=(int) t; l<((iNrCoarseLights+1)>>1); l += NR_THREADS)
int pad = iNrCoarseLights & 1;
for(int l=(int) t; l<((iNrCoarseLights+pad)>>1); l += NR_THREADS)
const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights)];
const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights-pad)];
const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase));
const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase));
const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase));

{
int offs = 0;
for(int l=0; l<iNrCoarseLights; l++)
{ if(coarseList[l]!=0xffffffff) coarseList[offs++] = coarseList[l]; }
{
if(coarseList[l]!=0xffffffff)
coarseList[offs++] = coarseList[l];
}
lightOffsSph = offs;
}

2
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute


for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
{
// We remap the prunedList index to the original LightData / EnvLightData indices
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs] - shiftIndex[category];
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[max(0,2 * l - 1 + localOffs)] - shiftIndex[category];
uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);

38
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePassLoop.hlsl


start = tileOffset;
}
uint FetchIndexTile(uint tileOffset, uint lightIndex)
{
const uint lightIndexPlusOne = lightIndex + 1; // Add +1 as first slot is reserved to store number of light
// Light index are store on 16bit
return (g_vLightListGlobal[DWORD_PER_TILE * tileOffset + (lightIndexPlusOne >> 1)] >> ((lightIndexPlusOne & 1) * DWORD_PER_TILE)) & 0xffff;
}
#ifdef USE_FPTL_LIGHTLIST
uint GetTileSize()

uint FetchIndex(uint tileOffset, uint lightIndex)
{
return FetchIndexTile(tileOffset, lightIndex);
const uint lightIndexPlusOne = lightIndex + 1; // Add +1 as first slot is reserved to store number of light
// Light index are store on 16bit
return (g_vLightListGlobal[DWORD_PER_TILE * tileOffset + (lightIndexPlusOne >> 1)] >> ((lightIndexPlusOne & 1) * DWORD_PER_TILE)) & 0xffff;
}
#elif defined(USE_CLUSTERED_LIGHTLIST)

lightCount = (dataPair >> 27) & 31;
}
uint FetchIndexCluster(uint tileOffset, uint lightIndex)
{
return g_vLightListGlobal[tileOffset + lightIndex];
}
void GetCountAndStart(PositionInputs posInput, uint lightCategory, out uint start, out uint lightCount)
{
if (_UseTileLightList)

uint FetchIndex(uint tileOffset, uint lightIndex)
{
uint offset = tileOffset + lightIndex;
const uint lightIndexPlusOne = lightIndex + 1; // Add +1 as first slot is reserved to store number of light
return FetchIndexTile(tileOffset, lightIndex);
else
return FetchIndexCluster(tileOffset, lightIndex);
offset = DWORD_PER_TILE * tileOffset + (lightIndexPlusOne >> 1);
// Avoid generated HLSL bytecode to always access g_vLightListGlobal with
// two different offsets, fixes out of bounds issue
uint value = g_vLightListGlobal[offset];
// Light index are store on 16bit
return (_UseTileLightList ? ((value >> ((lightIndexPlusOne & 1) * DWORD_PER_TILE)) & 0xffff) : value);
}
#endif

#else // LIGHTLOOP_SINGLE_PASS
uint GetTileSize()
{
return 1;
}
uint GetTileSize()
{
return 1;
}
// bakeDiffuseLighting is part of the prototype so a user is able to implement a "base pass" with GI and multipass direct light (aka old unity rendering path)

21
Assets/ScriptableRenderPipeline/fptl/TiledLightingUtils.hlsl


uStart = tileOffs;
}
uint FetchIndexOpaque(const uint tileOffs, const uint l)
{
const uint l1 = l+1;
return (g_vLightListGlobal[ 16*tileOffs + (l1>>1)]>>((l1&1)*16))&0xffff;
}
#ifdef OPAQUES_ONLY
void GetCountAndStart(out uint uStart, out uint uNrLights, uint2 pixCoord, float linDepth, uint model)

uint FetchIndex(const uint tileOffs, const uint l)
{
return FetchIndexOpaque(tileOffs, l);
const uint l1 = l+1;
return (g_vLightListGlobal[ 16*tileOffs + (l1>>1)]>>((l1&1)*16))&0xffff;
}
#else

uint FetchIndex(const uint tileOffs, const uint l)
{
uint offs = tileOffs+l;
const uint l1 = l+1;
return FetchIndexOpaque(tileOffs, l);
else
return g_vLightListGlobal[ tileOffs+l ];
offs = 16*tileOffs + (l1>>1);
// Avoid generated HLSL bytecode to always access g_vLightListGlobal with
// two different offsets, fixes out of bounds issue
uint value = g_vLightListGlobal[offs];
return (g_isOpaquesOnlyEnabled ? ((value >>((l1&1)*16))&0xffff) : value);
}
#endif

10
Assets/ScriptableRenderPipeline/fptl/lightlistbuild-clustered.compute


//////////// cell specific code
{
for(int l=(int) t; l<((iNrCoarseLights+1)>>1); l += NR_THREADS)
int pad = iNrCoarseLights & 1;
for(int l=(int) t; l<((iNrCoarseLights+pad)>>1); l += NR_THREADS)
const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights)];
const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights-pad)];
const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase));
const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase));
const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase));

{
int offs = 0;
for(int l=0; l<iNrCoarseLights; l++)
{ if(coarseList[l]!=0xffffffff) coarseList[offs++] = coarseList[l]; }
{
if(coarseList[l]!=0xffffffff)
coarseList[offs++] = coarseList[l];
}
lightOffsSph = offs;
}

2
Assets/ScriptableRenderPipeline/fptl/lightlistbuild.compute


const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
{
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs];
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[max(0,2 * l - 1 + localOffs)];
uint uHigh = prunedList[2 * l + 0 + localOffs];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);

正在加载...
取消
保存