浏览代码

allow clustered tiled to use 16, 32 or 64 as its tile size

allow clustered tiled to use 16, 32 or 64 as its tile size. Fptl will
remain 16x16 due to its per pixel nature.
/Branch_Batching2
mmikk 8 年前
当前提交
f8fb534f
共有 10 个文件被更改,包括 62 次插入40 次删除
  1. 26
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
  2. 2
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.hlsl
  3. 2
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePassLoop.hlsl
  4. 19
      Assets/ScriptableRenderPipeline/fptl/FptlLighting.cs
  5. 2
      Assets/ScriptableRenderPipeline/fptl/LightDefinitions.cs
  6. 1
      Assets/ScriptableRenderPipeline/fptl/LightDefinitions.cs.hlsl
  7. 5
      Assets/ScriptableRenderPipeline/fptl/TiledLightingTemplate.hlsl
  8. 17
      Assets/ScriptableRenderPipeline/fptl/TiledLightingUtils.hlsl
  9. 5
      Assets/ScriptableRenderPipeline/fptl/TiledReflectionTemplate.hlsl
  10. 23
      Assets/ScriptableRenderPipeline/fptl/lightlistbuild-clustered.compute

26
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild-clustered.compute


#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING
#pragma kernel ClearAtomic
#define TILE_SIZE_CLUSTERED 16
#include "ShaderLibrary/common.hlsl"
#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"

#else
g_depth_tex.GetDimensions(iWidth, iHeight);
#endif
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
const uint log2TileSize = firstbithigh(TILE_SIZE_CLUSTERED);
uint nrTilesX = (iWidth+(TILE_SIZE_CLUSTERED-1))>>log2TileSize;
uint nrTilesY = (iHeight+(TILE_SIZE_CLUSTERED-1))>>log2TileSize;
uint2 viTilLL = 16*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(16,16), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
uint2 viTilLL = TILE_SIZE_CLUSTERED*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(TILE_SIZE_CLUSTERED,TILE_SIZE_CLUSTERED), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
if(t==0)
{

// establish min and max depth first
dpt_ma=0.0;
for(int idx=t; idx<256; idx+=NR_THREADS)
for(int idx=t; idx<(TILE_SIZE_CLUSTERED*TILE_SIZE_CLUSTERED); idx+=NR_THREADS)
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&(TILE_SIZE_CLUSTERED-1)), viTilLL.y+(idx>>log2TileSize)), uint2(iWidth-1, iHeight-1) );
#ifdef MSAA_ENABLED
for(uint i=0; i<iNumSamplesMSAA; i++)
{

// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING
int NrBigTilesX = (nrTilesX+3)>>2;
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles
const uint log2BigTileToClustTileRatio = firstbithigh(64) - log2TileSize;
int NrBigTilesX = (nrTilesX+((1<<log2BigTileToClustTileRatio)-1))>>log2BigTileToClustTileRatio;
const int bigTileIdx = (tileIDX.y>>log2BigTileToClustTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToClustTileRatio); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
{

int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(TILE_SIZE_CLUSTERED/2,TILE_SIZE_CLUSTERED/2), uint2(iWidth-1, iHeight-1))) );
#endif
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE

#endif
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile
float halfTileSizeAtZDistOne = (TILE_SIZE_CLUSTERED/2)*onePixDiagDist; // scale by half a tile
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
{

2
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.hlsl


#define TILE_SIZE 16 // This is fixed
#define DWORD_PER_TILE 16 // See dwordsPerTile in TilePass.cs, we have roomm for 31 lights and a number of light value all store on 16 bit (ushort)
#define TILE_SIZE_CLUSTERED 16
// these uniforms are only needed for when OPAQUES_ONLY is NOT defined
// but there's a problem with our front-end compilation of compute shaders with multiple kernels causing it to error
//#ifdef USE_CLUSTERED_LIGHTLIST

2
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePassLoop.hlsl


void GetCountAndStartCluster(PositionInputs posInput, uint lightCategory, out uint start, out uint lightCount)
{
uint2 tileIndex = posInput.unPositionSS / TILE_SIZE;
uint2 tileIndex = posInput.unPositionSS / TILE_SIZE_CLUSTERED;
float logBase = g_fClustBase;
if (g_isLogBaseBufferEnabled)

19
Assets/ScriptableRenderPipeline/fptl/FptlLighting.cs


if (enableClustered)
{
s_PerVoxelOffset = new ComputeBuffer(LightDefinitions.NR_LIGHT_MODELS * (1 << k_Log2NumClusters) * nrTiles, sizeof(uint));
s_PerVoxelLightLists = new ComputeBuffer(NumLightIndicesPerClusteredTile() * nrTiles, sizeof(uint));
var tileSizeClust = LightDefinitions.TILE_SIZE_CLUSTERED;
var nrTilesClustX = (width + (tileSizeClust-1)) / tileSizeClust;
var nrTilesClustY = (height + (tileSizeClust-1)) / tileSizeClust;
var nrTilesClust = nrTilesClustX * nrTilesClustY;
s_PerVoxelOffset = new ComputeBuffer(LightDefinitions.NR_LIGHT_MODELS * (1 << k_Log2NumClusters) * nrTilesClust, sizeof(uint));
s_PerVoxelLightLists = new ComputeBuffer(NumLightIndicesPerClusteredTile() * nrTilesClust, sizeof(uint));
s_PerTileLogBaseTweak = new ComputeBuffer(nrTiles, sizeof(float));
s_PerTileLogBaseTweak = new ComputeBuffer(nrTilesClust, sizeof(float));
}
}

cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_logBaseBuffer", s_PerTileLogBaseTweak);
}
var numTilesX = (camera.pixelWidth + 15) / 16;
var numTilesY = (camera.pixelHeight + 15) / 16;
cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, numTilesX, numTilesY, 1);
var tileSizeClust = LightDefinitions.TILE_SIZE_CLUSTERED;
var nrTilesClustX = (camera.pixelWidth + (tileSizeClust-1)) / tileSizeClust;
var nrTilesClustY = (camera.pixelHeight + (tileSizeClust-1)) / tileSizeClust;
cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, nrTilesClustX, nrTilesClustY, 1);
}
void BuildPerTileLightLists(Camera camera, ScriptableRenderContext loop, int numLights, Matrix4x4 projscr, Matrix4x4 invProjscr)

2
Assets/ScriptableRenderPipeline/fptl/LightDefinitions.cs


public static int MAX_NR_BIGTILE_LIGHTS_PLUSONE = 512; // may be overkill but the footprint is 2 bits per pixel using uint16.
public static float VIEWPORT_SCALE_Z = 1.0f;
public static int TILE_SIZE_CLUSTERED = 32;
// enable unity's original left-hand shader camera space (right-hand internally in unity).
public static int USE_LEFTHAND_CAMERASPACE = 0;

1
Assets/ScriptableRenderPipeline/fptl/LightDefinitions.cs.hlsl


//
#define MAX_NR_LIGHTS_PER_CAMERA (1024)
#define MAX_NR_BIGTILE_LIGHTS_PLUSONE (512)
#define TILE_SIZE_CLUSTERED (32)
#define VIEWPORT_SCALE_Z (1)
#define USE_LEFTHAND_CAMERASPACE (0)
#define IS_CIRCULAR_SPOT_SHAPE (1)

5
Assets/ScriptableRenderPipeline/fptl/TiledLightingTemplate.hlsl


float3 ExecuteLightList(out uint numLightsProcessed, uint2 pixCoord, float3 vP, float3 vPw, float3 Vworld)
{
uint nrTilesX = (g_widthRT+15)/16; uint nrTilesY = (g_heightRT+15)/16;
uint2 tileIDX = pixCoord / 16;
GetCountAndStart(start, numLights, tileIDX, nrTilesX, nrTilesY, vP.z, DIRECT_LIGHT);
GetCountAndStart(start, numLights, pixCoord, vP.z, DIRECT_LIGHT);
numLightsProcessed = numLights; // mainly for debugging/heat maps
return ExecuteLightList(start, numLights, vP, vPw, Vworld);

17
Assets/ScriptableRenderPipeline/fptl/TiledLightingUtils.hlsl


StructuredBuffer<uint> g_vLightListGlobal; // don't support Buffer yet in unity
void GetCountAndStartOpaque(out uint uStart, out uint uNrLights, uint2 tileIDX, int nrTilesX, int nrTilesY, float linDepth, uint model)
void GetCountAndStartOpaque(out uint uStart, out uint uNrLights, uint2 pixCoord, float linDepth, uint model)
uint tileSize = 16;
uint nrTilesX = ((uint) (g_widthRT+(tileSize-1)))/tileSize; uint nrTilesY = ((uint) (g_heightRT+(tileSize-1)))/tileSize;
uint2 tileIDX = pixCoord / tileSize;
const int tileOffs = (tileIDX.y+model*nrTilesY)*nrTilesX+tileIDX.x;
uNrLights = g_vLightListGlobal[ 16*tileOffs + 0]&0xffff;

#ifdef OPAQUES_ONLY
void GetCountAndStart(out uint uStart, out uint uNrLights, uint2 tileIDX, int nrTilesX, int nrTilesY, float linDepth, uint model)
void GetCountAndStart(out uint uStart, out uint uNrLights, uint2 pixCoord, float linDepth, uint model)
GetCountAndStartOpaque(uStart, uNrLights, tileIDX, nrTilesX, nrTilesY, linDepth, model);
GetCountAndStartOpaque(uStart, uNrLights, pixCoord, linDepth, model);
}
uint FetchIndex(const uint tileOffs, const uint l)

StructuredBuffer<float> g_logBaseBuffer; // don't support Buffer yet in unity
void GetCountAndStart(out uint uStart, out uint uNrLights, uint2 tileIDX, int nrTilesX, int nrTilesY, float linDepth, uint model)
void GetCountAndStart(out uint uStart, out uint uNrLights, uint2 pixCoord, float linDepth, uint model)
GetCountAndStartOpaque(uStart, uNrLights, tileIDX, nrTilesX, nrTilesY, linDepth, model);
GetCountAndStartOpaque(uStart, uNrLights, pixCoord, linDepth, model);
uint nrTilesX = ((uint) (g_widthRT+(TILE_SIZE_CLUSTERED-1))) / ((uint) TILE_SIZE_CLUSTERED);
uint nrTilesY = ((uint) (g_heightRT+(TILE_SIZE_CLUSTERED-1))) / ((uint) TILE_SIZE_CLUSTERED);
uint2 tileIDX = pixCoord / ((uint) TILE_SIZE_CLUSTERED);
float logBase = g_fClustBase;
if(g_isLogBaseBufferEnabled)
logBase = g_logBaseBuffer[tileIDX.y*nrTilesX + tileIDX.x];

5
Assets/ScriptableRenderPipeline/fptl/TiledReflectionTemplate.hlsl


float3 ExecuteReflectionList(out uint numReflectionProbesProcessed, uint2 pixCoord, float3 vP, float3 vNw, float3 Vworld, float smoothness)
{
uint nrTilesX = (g_widthRT+15)/16; uint nrTilesY = (g_heightRT+15)/16;
uint2 tileIDX = pixCoord / 16;
GetCountAndStart(start, numReflectionProbes, tileIDX, nrTilesX, nrTilesY, vP.z, REFLECTION_LIGHT);
GetCountAndStart(start, numReflectionProbes, pixCoord, vP.z, REFLECTION_LIGHT);
numReflectionProbesProcessed = numReflectionProbes; // mainly for debugging/heat maps
return ExecuteReflectionList(start, numReflectionProbes, vP, vNw, Vworld, smoothness);

23
Assets/ScriptableRenderPipeline/fptl/lightlistbuild-clustered.compute


#else
g_depth_tex.GetDimensions(iWidth, iHeight);
#endif
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
const uint log2TileSize = firstbithigh(TILE_SIZE_CLUSTERED);
uint nrTilesX = (iWidth+(TILE_SIZE_CLUSTERED-1))>>log2TileSize;
uint nrTilesY = (iHeight+(TILE_SIZE_CLUSTERED-1))>>log2TileSize;
uint2 viTilLL = 16*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(16,16), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
uint2 viTilLL = TILE_SIZE_CLUSTERED*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(TILE_SIZE_CLUSTERED,TILE_SIZE_CLUSTERED), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
if(t==0)
{

// establish min and max depth first
dpt_ma=0.0;
for(int idx=t; idx<256; idx+=NR_THREADS)
for(int idx=t; idx<(TILE_SIZE_CLUSTERED*TILE_SIZE_CLUSTERED); idx+=NR_THREADS)
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&(TILE_SIZE_CLUSTERED-1)), viTilLL.y+(idx>>log2TileSize)), uint2(iWidth-1, iHeight-1) );
#ifdef MSAA_ENABLED
for(int i=0; i<iNumSamplesMSAA; i++)
{

// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING
int NrBigTilesX = (nrTilesX+3)>>2;
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles
const uint log2BigTileToClustTileRatio = firstbithigh(64) - log2TileSize;
int NrBigTilesX = (nrTilesX+((1<<log2BigTileToClustTileRatio)-1))>>log2BigTileToClustTileRatio;
const int bigTileIdx = (tileIDX.y>>log2BigTileToClustTileRatio)*NrBigTilesX + (tileIDX.x>>log2BigTileToClustTileRatio); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
{

int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(TILE_SIZE_CLUSTERED/2,TILE_SIZE_CLUSTERED/2), uint2(iWidth-1, iHeight-1))) );
#endif
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE

#endif
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile
float halfTileSizeAtZDistOne = (TILE_SIZE_CLUSTERED/2)*onePixDiagDist; // scale by half a tile
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
{

正在加载...
取消
保存