浏览代码

fixed tile debugging dropdown

added tile classification with indirect dispatch (feature variant shaders)
added debugging mode for feature variants
/fptl_cleanup
runes 7 年前
当前提交
12d98082
共有 8 个文件被更改,包括 857 次插入1704 次删除
  1. 406
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Debug/Resources/DebugViewTiles.shader
  2. 6
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TileLightLoopProducer.cs
  3. 916
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute
  4. 183
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/shadeopaque.compute
  5. 1001
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs
  6. 9
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs.hlsl
  7. 31
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/FeatureFlags.hlsl
  8. 9
      Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/FeatureFlags.hlsl.meta

406
Assets/ScriptableRenderPipeline/HDRenderPipeline/Debug/Resources/DebugViewTiles.shader


Shader "Hidden/HDRenderPipeline/DebugViewTiles"
{
SubShader
{
Pass
{
ZWrite Off
Blend SrcAlpha OneMinusSrcAlpha
HLSLPROGRAM
#pragma target 4.5
#pragma only_renderers d3d11 ps4 metal // TEMP: until we go further in dev
#pragma vertex Vert
#pragma fragment Frag
#define LIGHTLOOP_TILE_PASS
#define LIGHTLOOP_TILE_ALL
#pragma multi_compile USE_FPTL_LIGHTLIST USE_CLUSTERED_LIGHTLIST
//-------------------------------------------------------------------------------------
// Include
//-------------------------------------------------------------------------------------
#include "../../../ShaderLibrary/Common.hlsl"
// Note: We have fix as guidelines that we have only one deferred material (with control of GBuffer enabled). Mean a users that add a new
// deferred material must replace the old one here. If in the future we want to support multiple layout (cause a lot of consistency problem),
// the deferred shader will require to use multicompile.
#define UNITY_MATERIAL_LIT // Need to be define before including Material.hlsl
#include "../../ShaderConfig.cs.hlsl"
#include "../../ShaderVariables.hlsl"
#include "../../Lighting/Lighting.hlsl" // This include Material.hlsl
//-------------------------------------------------------------------------------------
// variable declaration
//-------------------------------------------------------------------------------------
uint _ViewTilesFlags;
float2 _MousePixelCoord;
float4 Vert(float3 positionOS : POSITION): SV_POSITION
{
return TransformWorldToHClip(TransformObjectToWorld(positionOS));
}
float4 AlphaBlend(float4 c0, float4 c1) // c1 over c0
{
return float4(lerp(c0.rgb, c1.rgb, c1.a), c0.a + c1.a - c0.a * c1.a);
}
float4 OverlayHeatMap(uint2 pixCoord, uint numLights)
{
const float4 kRadarColors[12] =
{
float4(0.0, 0.0, 0.0, 0.0), // black
float4(0.0, 0.0, 0.6, 0.5), // dark blue
float4(0.0, 0.0, 0.9, 0.5), // blue
float4(0.0, 0.6, 0.9, 0.5), // light blue
float4(0.0, 0.9, 0.9, 0.5), // cyan
float4(0.0, 0.9, 0.6, 0.5), // blueish green
float4(0.0, 0.9, 0.0, 0.5), // green
float4(0.6, 0.9, 0.0, 0.5), // yellowish green
float4(0.9, 0.9, 0.0, 0.5), // yellow
float4(0.9, 0.6, 0.0, 0.5), // orange
float4(0.9, 0.0, 0.0, 0.5), // red
float4(1.0, 0.0, 0.0, 0.9) // strong red
};
float maxNrLightsPerTile = 31; // TODO: setup a constant for that
int colorIndex = numLights == 0 ? 0 : (1 + (int)floor(10 * (log2((float)numLights) / log2(maxNrLightsPerTile))));
colorIndex = colorIndex < 0 ? 0 : colorIndex;
float4 col = colorIndex > 11 ? float4(1.0, 1.0, 1.0, 1.0) : kRadarColors[colorIndex];
int2 coord = pixCoord - int2(1, 1);
float4 color = float4(PositivePow(col.xyz, 2.2), 0.3 * col.w);
if (numLights > 0)
{
if (SampleDebugFontNumber(coord, numLights)) // Shadow
color = float4(0, 0, 0, 1);
if (SampleDebugFontNumber(coord + 1, numLights)) // Text
color = float4(1, 1, 1, 1);
}
return color;
}
float4 Frag(float4 positionCS : SV_POSITION) : SV_Target
{
// positionCS is SV_Position
PositionInputs posInput = GetPositionInput(positionCS.xy, _ScreenSize.zw, uint2(positionCS.xy) / GetTileSize());
float depth = LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).x;
UpdatePositionInput(depth, _InvViewProjMatrix, _ViewProjMatrix, posInput);
int2 pixelCoord = posInput.unPositionSS.xy;
int2 tileCoord = (float2)pixelCoord / TILE_SIZE;
int2 mouseTileCoord = _MousePixelCoord / TILE_SIZE;
int2 offsetInTile = pixelCoord - tileCoord * TILE_SIZE;
int n = 0;
for (int category = 0; category < LIGHTCATEGORY_COUNT; category++)
{
uint mask = 1u << category;
if (mask & _ViewTilesFlags)
{
uint start;
uint count;
GetCountAndStart(posInput, category, start, count);
n += count;
}
}
float4 result = float4(0.0, 0.0, 0.0, 0.0);
// Tile overlap counter
if (n > 0)
{
result = OverlayHeatMap(int2(posInput.unPositionSS.xy) & (TILE_SIZE - 1), n);
}
// Highlight selected tile
if (all(mouseTileCoord == tileCoord))
{
bool border = any(offsetInTile == 0 || offsetInTile == TILE_SIZE - 1);
float4 result2 = float4(1.0, 1.0, 1.0, border ? 1.0 : 0.5);
result = AlphaBlend(result, result2);
}
// Print light lists for selected tile at the bottom of the screen
int maxLights = 32;
if (tileCoord.y < LIGHTCATEGORY_COUNT && tileCoord.x < maxLights + 3)
{
PositionInputs mousePosInput = GetPositionInput(_MousePixelCoord, _ScreenSize.zw, uint2(0,0));
float depthMouse = LOAD_TEXTURE2D(_MainDepthTexture, mousePosInput.unPositionSS).x;
UpdatePositionInput(depthMouse, _InvViewProjMatrix, _ViewProjMatrix, mousePosInput);
uint category = (LIGHTCATEGORY_COUNT - 1) - tileCoord.y;
uint start;
uint count;
GetCountAndStart(mousePosInput, category, start, count);
float4 result2 = float4(.1,.1,.1,.9);
int2 fontCoord = int2(pixelCoord.x, offsetInTile.y);
int lightListIndex = tileCoord.x - 2;
int n = -1;
if(tileCoord.x == 0)
{
n = (int)count;
}
else if(lightListIndex >= 0 && lightListIndex < (int)count)
{
n = FetchIndex(start, lightListIndex);
}
if (n >= 0)
{
if (SampleDebugFontNumber(offsetInTile, n))
result2 = float4(0.0, 0.0, 0.0, 1.0);
if (SampleDebugFontNumber(offsetInTile + 1, n))
result2 = float4(1.0, 1.0, 1.0, 1.0);
}
result = AlphaBlend(result, result2);
}
return result;
}
ENDHLSL
}
}
Fallback Off
}
Shader "Hidden/HDRenderPipeline/DebugViewTiles"
{
SubShader
{
Pass
{
ZWrite Off
Blend SrcAlpha OneMinusSrcAlpha
HLSLPROGRAM
#pragma target 4.5
#pragma only_renderers d3d11 ps4 metal // TEMP: until we go further in dev
#pragma vertex Vert
#pragma fragment Frag
#define LIGHTLOOP_TILE_PASS
#define LIGHTLOOP_TILE_ALL
#pragma multi_compile USE_FPTL_LIGHTLIST USE_CLUSTERED_LIGHTLIST
#pragma multi_compile SHOW_LIGHT_CATEGORIES SHOW_FEATURE_VARIANTS
//-------------------------------------------------------------------------------------
// Include
//-------------------------------------------------------------------------------------
#include "../../../ShaderLibrary/Common.hlsl"
// Note: We have fix as guidelines that we have only one deferred material (with control of GBuffer enabled). Mean a users that add a new
// deferred material must replace the old one here. If in the future we want to support multiple layout (cause a lot of consistency problem),
// the deferred shader will require to use multicompile.
#define UNITY_MATERIAL_LIT // Need to be define before including Material.hlsl
#include "../../ShaderConfig.cs.hlsl"
#include "../../ShaderVariables.hlsl"
#include "../../Lighting/Lighting.hlsl" // This include Material.hlsl
//-------------------------------------------------------------------------------------
// variable declaration
//-------------------------------------------------------------------------------------
uint _ViewTilesFlags;
uint _NumTiles;
float2 _MousePixelCoord;
StructuredBuffer<uint> g_TileList;
Buffer<uint> g_DispatchIndirectBuffer;
struct VSOut
{
float4 Pos : SV_POSITION;
int Variant : TEXCOORD0;
};
#if SHOW_FEATURE_VARIANTS
VSOut Vert(uint vertexID : SV_VertexID)
{
uint quadIndex = vertexID / 6;
uint quadVertex = vertexID - quadIndex * 6;
quadVertex = (0x312210 >> (quadVertex<<2)) & 3; //remap [0,5]->[0,3]
uint2 tileSize = GetTileSize();
uint variant = 0;
while (quadIndex >= g_DispatchIndirectBuffer[variant * 3 + 0] && variant < NUM_FEATURE_VARIANTS)
{
quadIndex -= g_DispatchIndirectBuffer[variant * 3 + 0];
variant++;
}
uint tileIndex = g_TileList[variant * _NumTiles + quadIndex];
uint2 tileCoord = uint2(tileIndex & 0xFFFF, tileIndex >> 16);
uint2 pixelCoord = (tileCoord + uint2((quadVertex+1) & 1, (quadVertex >> 1) & 1)) * tileSize;
float2 clipCoord = (pixelCoord / _ScreenParams.xy) * 2.0 - 1.0;
clipCoord.y *= -1;
VSOut Out;
Out.Pos = float4(clipCoord, 0, 1.0);
Out.Variant = variant;
return Out;
}
#else
VSOut Vert(float3 positionOS : POSITION)
{
VSOut Out;
Out.Pos = TransformWorldToHClip(TransformObjectToWorld(positionOS));
Out.Variant = 0;
return Out;
}
#endif
float4 AlphaBlend(float4 c0, float4 c1) // c1 over c0
{
return float4(lerp(c0.rgb, c1.rgb, c1.a), c0.a + c1.a - c0.a * c1.a);
}
float4 OverlayHeatMap(uint2 pixCoord, uint n)
{
const float4 kRadarColors[12] =
{
float4(0.0, 0.0, 0.0, 0.0), // black
float4(0.0, 0.0, 0.6, 0.5), // dark blue
float4(0.0, 0.0, 0.9, 0.5), // blue
float4(0.0, 0.6, 0.9, 0.5), // light blue
float4(0.0, 0.9, 0.9, 0.5), // cyan
float4(0.0, 0.9, 0.6, 0.5), // blueish green
float4(0.0, 0.9, 0.0, 0.5), // green
float4(0.6, 0.9, 0.0, 0.5), // yellowish green
float4(0.9, 0.9, 0.0, 0.5), // yellow
float4(0.9, 0.6, 0.0, 0.5), // orange
float4(0.9, 0.0, 0.0, 0.5), // red
float4(1.0, 0.0, 0.0, 0.9) // strong red
};
float maxNrLightsPerTile = 31; // TODO: setup a constant for that
int colorIndex = n == 0 ? 0 : (1 + (int)floor(10 * (log2((float)n) / log2(maxNrLightsPerTile))));
colorIndex = colorIndex < 0 ? 0 : colorIndex;
float4 col = colorIndex > 11 ? float4(1.0, 1.0, 1.0, 1.0) : kRadarColors[colorIndex];
int2 coord = pixCoord - int2(1, 1);
float4 color = float4(PositivePow(col.xyz, 2.2), 0.3 * col.w);
if (n >= 0)
{
if (SampleDebugFontNumber(coord, n)) // Shadow
color = float4(0, 0, 0, 1);
if (SampleDebugFontNumber(coord + 1, n)) // Text
color = float4(1, 1, 1, 1);
}
return color;
}
float4 Frag(float4 positionCS : SV_POSITION, int Variant : TEXCOORD0) : SV_Target
{
// positionCS is SV_Position
PositionInputs posInput = GetPositionInput(positionCS.xy, _ScreenSize.zw, uint2(positionCS.xy) / GetTileSize());
float depth = LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).x;
UpdatePositionInput(depth, _InvViewProjMatrix, _ViewProjMatrix, posInput);
int2 pixelCoord = posInput.unPositionSS.xy;
int2 tileCoord = (float2)pixelCoord / TILE_SIZE;
int2 mouseTileCoord = _MousePixelCoord / TILE_SIZE;
int2 offsetInTile = pixelCoord - tileCoord * TILE_SIZE;
int n = 0;
#ifdef SHOW_LIGHT_CATEGORIES
for (int category = 0; category < LIGHTCATEGORY_COUNT; category++)
{
uint mask = 1u << category;
if (mask & _ViewTilesFlags)
{
uint start;
uint count;
GetCountAndStart(posInput, category, start, count);
n += count;
}
}
if(n == 0) n = -1;
#else
n = Variant;
#endif
float4 result = float4(0.0, 0.0, 0.0, 0.0);
// Tile overlap counter
if (n >= 0)
{
result = OverlayHeatMap(int2(posInput.unPositionSS.xy) & (TILE_SIZE - 1), n);
}
#ifdef SHOW_LIGHT_CATEGORIES
// Highlight selected tile
if (all(mouseTileCoord == tileCoord))
{
bool border = any(offsetInTile == 0 || offsetInTile == TILE_SIZE - 1);
float4 result2 = float4(1.0, 1.0, 1.0, border ? 1.0 : 0.5);
result = AlphaBlend(result, result2);
}
// Print light lists for selected tile at the bottom of the screen
int maxLights = 32;
if (tileCoord.y < LIGHTCATEGORY_COUNT && tileCoord.x < maxLights + 3)
{
PositionInputs mousePosInput = GetPositionInput(_MousePixelCoord, _ScreenSize.zw, uint2(0,0));
float depthMouse = LOAD_TEXTURE2D(_MainDepthTexture, mousePosInput.unPositionSS).x;
UpdatePositionInput(depthMouse, _InvViewProjMatrix, _ViewProjMatrix, mousePosInput);
uint category = (LIGHTCATEGORY_COUNT - 1) - tileCoord.y;
uint start;
uint count;
GetCountAndStart(mousePosInput, category, start, count);
float4 result2 = float4(.1,.1,.1,.9);
int2 fontCoord = int2(pixelCoord.x, offsetInTile.y);
int lightListIndex = tileCoord.x - 2;
int n = -1;
if(tileCoord.x == 0)
{
n = (int)count;
}
else if(lightListIndex >= 0 && lightListIndex < (int)count)
{
n = FetchIndex(start, lightListIndex);
}
if (n >= 0)
{
if (SampleDebugFontNumber(offsetInTile, n))
result2 = float4(0.0, 0.0, 0.0, 1.0);
if (SampleDebugFontNumber(offsetInTile + 1, n))
result2 = float4(1.0, 1.0, 1.0, 1.0);
}
result = AlphaBlend(result, result2);
}
#endif
return result;
}
ENDHLSL
}
}
Fallback Off
}

6
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TileLightLoopProducer.cs


[Range(0.0f, 1.0f)]
public float specularGlobalDimmer = 1.0f;
public enum TileDebugByCategory : int { None = 0, Punctual = 1, Area = 2, AreaAndPunctual = 3, Environment = 4, EnvironmentAndPunctual = 5, EnvironmentAndArea = 6, EnvironmentAndAreaAndPunctual };
public TileDebugByCategory tileDebugByCategory;
public enum TileDebug : int { None = 0, Punctual = 1, Area = 2, AreaAndPunctual = 3, Environment = 4, EnvironmentAndPunctual = 5, EnvironmentAndArea = 6, EnvironmentAndAreaAndPunctual = 7, FeatureVariants = 8 };
public TileDebug tileDebugByCategory;
public static TileSettings defaultSettings = new TileSettings
{

enableComputeFeatureVariants = false,
tileDebugByCategory = TileDebugByCategory.None,
tileDebugByCategory = TileDebug.None,
enableClustered = true,
enableFptlForOpaqueWhenClustered = true,
enableBigTilePrepass = true,

916
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/lightlistbuild.compute


// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
// https://github.com/wolfgangfengel/GPU-Pro-7
#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#include "../../../../ShaderLibrary/common.hlsl"
#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "../SortingComputeUtils.hlsl"
#endif
#define FINE_PRUNING_ENABLED
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
uniform int g_iNrVisibLights;
uniform uint2 g_viDimensions;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform int _EnvLightIndexShift;
Texture2D g_depth_tex : register( t0 );
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<LightVolumeData> _LightVolumeData : register(t2);
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
#ifdef USE_TWO_PASS_TILED_LIGHTING
StructuredBuffer<uint> g_vBigTileLightList : register( t4 ); // don't support Buffer yet in unity
#endif
#define NR_THREADS 64
// output buffer
RWStructuredBuffer<uint> g_vLightList : register( u0 ); // don't support RWBuffer yet in unity
#define MAX_NR_COARSE_ENTRIES 64
#define MAX_NR_PRUNED_ENTRIES 24
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES]; // temporarily support room for all 64 while in LDS
groupshared uint ldsZMin;
groupshared uint ldsZMax;
groupshared uint lightOffs;
#ifdef FINE_PRUNING_ENABLED
groupshared uint ldsDoesLightIntersect[2];
#endif
groupshared int ldsNrLightsFinal;
groupshared int ldsCategoryListCount[LIGHTCATEGORY_COUNT]; // since LIGHTCATEGORY_COUNT is 3
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
groupshared uint lightOffsSph;
#endif
//float GetLinearDepth(float3 vP)
//{
// float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
// return v4Pres.z / v4Pres.w;
//}
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
{
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
float fSx = g_mScrProjection[0].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
float fCy = g_mScrProjection[1].z;
#if USE_LEFTHAND_CAMERASPACE
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
#else
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
#endif
}
float GetOnePixDiagWorldDistAtDepthOne()
{
float fSx = g_mScrProjection[0].x;
float fSy = g_mScrProjection[1].y;
return length( float2(1.0/fSx,1.0/fSy) );
}
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
#endif
#ifdef FINE_PRUNING_ENABLED
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths);
#endif
[numthreads(NR_THREADS, 1, 1)]
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
if(t<MAX_NR_COARSE_ENTRIES)
prunedList[t]=0;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
// build tile scr boundary
const uint uFltMax = 0x7f7fffff; // FLT_MAX as a uint
if(t==0)
{
ldsZMin = uFltMax;
ldsZMax = 0;
lightOffs = 0;
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
uint2 viTilLL = 16*tileIDX;
// establish min and max depth first
float dpt_mi=asfloat(uFltMax), dpt_ma=0.0;
float4 vLinDepths;
{
// Fetch depths and calculate min/max
[unroll]
for(int i = 0; i < 4; i++)
{
int idx = i * NR_THREADS + t;
uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
const float fDepth = FetchDepth(g_depth_tex, uCrd);
vLinDepths[i] = GetLinearDepth(fDepth);
if(fDepth<VIEWPORT_SCALE_Z) // if not skydome
{
dpt_mi = min(fDepth, dpt_mi);
dpt_ma = max(fDepth, dpt_ma);
}
}
InterlockedMax(ldsZMax, asuint(dpt_ma));
InterlockedMin(ldsZMin, asuint(dpt_mi));
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
}
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING
int NrBigTilesX = (nrTilesX+3)>>2;
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
{
int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
#else
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
}
}
#ifdef FINE_PRUNING_ENABLED
if(t<2) ldsDoesLightIntersect[t] = 0;
#endif
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
#endif
#ifndef FINE_PRUNING_ENABLED
{
if((int)t<iNrCoarseLights) prunedList[t] = coarseList[t];
if(t==0) ldsNrLightsFinal=iNrCoarseLights;
}
#else
{
// initializes ldsNrLightsFinal with the number of accepted lights.
// all accepted entries delivered in prunedList[].
FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths);
}
#endif
//
if(t<LIGHTCATEGORY_COUNT) ldsCategoryListCount[t]=0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
{
InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
}
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);
#endif
// write lights to global buffers
int localOffs=0;
int offs = tileIDX.y*nrTilesX + tileIDX.x;
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
for(int category=0; category<LIGHTCATEGORY_COUNT; category++)
{
int nrLightsFinal = ldsCategoryListCount[category];
int nrLightsFinalClamped = nrLightsFinal<MAX_NR_PRUNED_ENTRIES ? nrLightsFinal : MAX_NR_PRUNED_ENTRIES;
const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
{
// We remap the prunedList index to the original LightData / EnvLightData indices
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs] - shiftIndex[category];
uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
}
localOffs += nrLightsFinal;
offs += (nrTilesX*nrTilesY);
}
}
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
{
if(threadID==0) lightOffsSph = 0;
// make a copy of coarseList in prunedList.
int l;
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
prunedList[l]=coarseList[l];
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
#if USE_LEFTHAND_CAMERASPACE
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
#else
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
#endif
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
{
SFiniteLightBound lightData = g_data[prunedList[l]];
if( DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius) )
{
unsigned int uIndex;
InterlockedAdd(lightOffsSph, 1, uIndex);
coarseList[uIndex]=prunedList[l]; // read from the original copy of coarseList which is backed up in prunedList
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
return lightOffsSph;
}
#endif
#ifdef FINE_PRUNING_ENABLED
// initializes ldsNrLightsFinal with the number of accepted lights.
// all accepted entries delivered in prunedList[].
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths)
{
uint t = threadID;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint uLightsFlags[2] = {0,0};
int l=0;
// need this outer loop even on xb1 and ps4 since direct lights and
// reflection lights are kept in separate regions.
while(l<iNrCoarseLights)
{
// fetch light
int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
// spot
while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_CONE)
{
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// TODO: Change by SebL
const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 fromLight = vVPos-lightData.lightPos.xyz;
float distSq = dot(fromLight,fromLight);
const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz); // spotDir = lightData.lightAxisZ.xyz
float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) );
float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y);
if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
}
// sphere
while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_SPHERE)
{
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 vLp = lightData.lightPos.xyz;
float3 toLight = vLp - vVPos;
float distSq = dot(toLight,toLight);
if(lightData.radiusSq>distSq) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
}
// Box
while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_BOX)
{
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 toLight = lightData.lightPos.xyz - vVPos;
float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) );
dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange; // not as efficient as it could be
if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1; // but allows us to not write out OuterDists
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
}
// in case we have some corrupt data make sure we terminate
if(uLightVolume >=LIGHTVOLUMETYPE_COUNT) ++l;
}
InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
if(t==0) ldsNrLightsFinal = 0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(ldsNrLightsFinal, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t]; // we allow up to 64 pruned lights while stored in LDS.
}
}
#endif
// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
// https://github.com/wolfgangfengel/GPU-Pro-7
#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_FeatureFlags LIGHTLISTGEN=TileLightListGen_FeatureFlags USE_FEATURE_FLAGS
#pragma kernel TileLightListGen_SrcBigTile_FeatureFlags LIGHTLISTGEN=TileLightListGen_SrcBigTile_FeatureFlags USE_TWO_PASS_TILED_LIGHTING USE_FEATURE_FLAGS
#include "../../../../ShaderLibrary/common.hlsl"
#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"
#include "../FeatureFlags.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "../SortingComputeUtils.hlsl"
#endif
#define FINE_PRUNING_ENABLED
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
uniform int g_iNrVisibLights;
uniform uint2 g_viDimensions;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform int _EnvLightIndexShift;
Texture2D g_depth_tex : register( t0 );
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<LightVolumeData> _LightVolumeData : register(t2);
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
#ifdef USE_TWO_PASS_TILED_LIGHTING
StructuredBuffer<uint> g_vBigTileLightList : register( t4 ); // don't support Buffer yet in unity
#endif
#define NR_THREADS 64
// output buffer
RWStructuredBuffer<uint> g_vLightList : register( u0 ); // don't support RWBuffer yet in unity
#define MAX_NR_COARSE_ENTRIES 64
#define MAX_NR_PRUNED_ENTRIES 24
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES]; // temporarily support room for all 64 while in LDS
groupshared uint ldsZMin;
groupshared uint ldsZMax;
groupshared uint lightOffs;
#ifdef FINE_PRUNING_ENABLED
groupshared uint ldsDoesLightIntersect[2];
#endif
groupshared int ldsNrLightsFinal;
groupshared int ldsCategoryListCount[LIGHTCATEGORY_COUNT]; // since LIGHTCATEGORY_COUNT is 3
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
groupshared uint lightOffsSph;
#endif
#ifdef USE_FEATURE_FLAGS
groupshared uint ldsFeatureFlags;
RWBuffer<uint> g_DispatchIndirectBuffer;
RWStructuredBuffer<uint> g_TileList;
#endif
//float GetLinearDepth(float3 vP)
//{
// float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
// return v4Pres.z / v4Pres.w;
//}
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
{
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
float fSx = g_mScrProjection[0].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
float fCy = g_mScrProjection[1].z;
#if USE_LEFTHAND_CAMERASPACE
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
#else
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
#endif
}
float GetOnePixDiagWorldDistAtDepthOne()
{
float fSx = g_mScrProjection[0].x;
float fSy = g_mScrProjection[1].y;
return length( float2(1.0/fSx,1.0/fSy) );
}
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
#endif
#ifdef FINE_PRUNING_ENABLED
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths);
#endif
[numthreads(NR_THREADS, 1, 1)]
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
if(t<MAX_NR_COARSE_ENTRIES)
prunedList[t]=0;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
uint nrTiles = nrTilesX * nrTilesY; // Precompute?
// build tile scr boundary
const uint uFltMax = 0x7f7fffff; // FLT_MAX as a uint
if(t==0)
{
ldsZMin = uFltMax;
ldsZMax = 0;
lightOffs = 0;
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
uint2 viTilLL = 16*tileIDX;
// establish min and max depth first
float dpt_mi=asfloat(uFltMax), dpt_ma=0.0;
float4 vLinDepths;
{
// Fetch depths and calculate min/max
[unroll]
for(int i = 0; i < 4; i++)
{
int idx = i * NR_THREADS + t;
uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
const float fDepth = FetchDepth(g_depth_tex, uCrd);
vLinDepths[i] = GetLinearDepth(fDepth);
if(fDepth<VIEWPORT_SCALE_Z) // if not skydome
{
dpt_mi = min(fDepth, dpt_mi);
dpt_ma = max(fDepth, dpt_ma);
}
}
InterlockedMax(ldsZMax, asuint(dpt_ma));
InterlockedMin(ldsZMin, asuint(dpt_mi));
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
}
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING
int NrBigTilesX = (nrTilesX+3)>>2;
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
{
int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
#else
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
}
}
#ifdef FINE_PRUNING_ENABLED
if(t<2) ldsDoesLightIntersect[t] = 0;
#endif
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
#endif
#ifndef FINE_PRUNING_ENABLED
{
if((int)t<iNrCoarseLights) prunedList[t] = coarseList[t];
if(t==0) ldsNrLightsFinal=iNrCoarseLights;
}
#else
{
// initializes ldsNrLightsFinal with the number of accepted lights.
// all accepted entries delivered in prunedList[].
FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths);
}
#endif
//
if(t<LIGHTCATEGORY_COUNT) ldsCategoryListCount[t]=0;
#ifdef USE_FEATURE_FLAGS
if(t==0) ldsFeatureFlags=0;
#endif
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES);
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
{
InterlockedAdd(ldsCategoryListCount[_LightVolumeData[prunedList[i]].lightCategory], 1);
#ifdef USE_FEATURE_FLAGS
InterlockedOr(ldsFeatureFlags, _LightVolumeData[prunedList[i]].featureFlags);
#endif
}
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS);
#endif
#ifdef USE_FEATURE_FLAGS
if(t == 0)
{
uint variant = FeatureFlagsToTileVariant(ldsFeatureFlags);
uint offset;
InterlockedAdd(g_DispatchIndirectBuffer[variant * 3 + 0], 1, offset);
g_TileList[variant*nrTiles + offset] = (tileIDX.y << 16) + tileIDX.x;
}
#endif
// write lights to global buffers
int localOffs=0;
int offs = tileIDX.y*nrTilesX + tileIDX.x;
// All our cull data are in the same list, but at render time envLights are separated so we need to shit the index
// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT] = {0, 0, _EnvLightIndexShift}; // 3 for now, will throw an error if we change LIGHTCATEGORY_COUNT
for(int category=0; category<LIGHTCATEGORY_COUNT; category++)
{
int nrLightsFinal = ldsCategoryListCount[category];
int nrLightsFinalClamped = nrLightsFinal<MAX_NR_PRUNED_ENTRIES ? nrLightsFinal : MAX_NR_PRUNED_ENTRIES;
const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
{
// We remap the prunedList index to the original LightData / EnvLightData indices
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2 * l - 1 + localOffs] - shiftIndex[category];
uint uHigh = prunedList[2 * l + 0 + localOffs] - shiftIndex[category];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
}
localOffs += nrLightsFinal;
offs += (nrTilesX*nrTilesY);
}
}
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
{
if(threadID==0) lightOffsSph = 0;
// make a copy of coarseList in prunedList.
int l;
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
prunedList[l]=coarseList[l];
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
#if USE_LEFTHAND_CAMERASPACE
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
#else
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
#endif
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile
for(l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
{
SFiniteLightBound lightData = g_data[prunedList[l]];
if( DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius) )
{
unsigned int uIndex;
InterlockedAdd(lightOffsSph, 1, uIndex);
coarseList[uIndex]=prunedList[l]; // read from the original copy of coarseList which is backed up in prunedList
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
return lightOffsSph;
}
#endif
#ifdef FINE_PRUNING_ENABLED
// initializes ldsNrLightsFinal with the number of accepted lights.
// all accepted entries delivered in prunedList[].
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths)
{
uint t = threadID;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint uLightsFlags[2] = {0,0};
int l=0;
// need this outer loop even on xb1 and ps4 since direct lights and
// reflection lights are kept in separate regions.
while(l<iNrCoarseLights)
{
// fetch light
int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uint uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
// spot
while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_CONE)
{
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// TODO: Change by SebL
const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 fromLight = vVPos-lightData.lightPos.xyz;
float distSq = dot(fromLight,fromLight);
const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz); // spotDir = lightData.lightAxisZ.xyz
float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) );
float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y);
if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
}
// sphere
while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_SPHERE)
{
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 vLp = lightData.lightPos.xyz;
float3 toLight = vLp - vVPos;
float distSq = dot(toLight,toLight);
if(lightData.radiusSq>distSq) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
}
// Box
while(l<iNrCoarseLights && uLightVolume==LIGHTVOLUMETYPE_BOX)
{
LightVolumeData lightData = _LightVolumeData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 toLight = lightData.lightPos.xyz - vVPos;
float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) );
dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange; // not as efficient as it could be
if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1; // but allows us to not write out OuterDists
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLightVolume = l<iNrCoarseLights ? _LightVolumeData[idxCoarse].lightVolume : 0;
}
// in case we have some corrupt data make sure we terminate
if(uLightVolume >=LIGHTVOLUMETYPE_COUNT) ++l;
}
InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
if(t==0) ldsNrLightsFinal = 0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(ldsNrLightsFinal, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t]; // we allow up to 64 pruned lights while stored in LDS.
}
}
#endif

183
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/Resources/shadeopaque.compute


#pragma kernel ShadeOpaque_Fptl SHADE_OPAQUE_ENTRY=ShadeOpaque_Fptl USE_FPTL_LIGHTLIST
#pragma kernel ShadeOpaque_Fptl_DebugLighting SHADE_OPAQUE_ENTRY=ShadeOpaque_Fptl_DebugLighting USE_FPTL_LIGHTLIST LIGHTING_DEBUG
#pragma kernel ShadeOpaque_Clustered SHADE_OPAQUE_ENTRY=ShadeOpaque_Clustered USE_CLUSTERED_LIGHTLIST
#pragma kernel ShadeOpaque_Clustered_DebugLighting SHADE_OPAQUE_ENTRY=ShadeOpaque_Clustered_DebugLighting USE_CLUSTERED_LIGHTLIST LIGHTING_DEBUG
#pragma #pragma enable_d3d11_debug_symbols
// Split lighting is required for the SSS pass.
// Not currently possible since we need to access the stencil buffer from the compute shader.
// #pragma multi_compile _ OUTPUT_SPLIT_LIGHTING
#define LIGHTLOOP_TILE_PASS 1
#define LIGHTLOOP_TILE_DIRECT 1
#define LIGHTLOOP_TILE_INDIRECT 1
#define LIGHTLOOP_TILE_ALL 1
//-------------------------------------------------------------------------------------
// Include
//-------------------------------------------------------------------------------------
#include "../../../../ShaderLibrary/Common.hlsl"
#include "../../../Debug/HDRenderPipelineDebug.cs.hlsl"
#include "../../../Debug/DebugLighting.hlsl"
// Note: We have fix as guidelines that we have only one deferred material (with control of GBuffer enabled). Mean a users that add a new
// deferred material must replace the old one here. If in the future we want to support multiple layout (cause a lot of consistency problem),
// the deferred shader will require to use multicompile.
#define UNITY_MATERIAL_LIT // Need to be define before including Material.hlsl
#include "../../../ShaderConfig.cs.hlsl"
#include "../../../ShaderVariables.hlsl"
#include "../../../Lighting/Lighting.hlsl" // This include Material.hlsl
//-------------------------------------------------------------------------------------
// variable declaration
//-------------------------------------------------------------------------------------
DECLARE_GBUFFER_TEXTURE(_GBufferTexture);
#ifdef OUTPUT_SPLIT_LIGHTING
RWTexture2D<float4> specularLightingUAV;
RWTexture2D<float3> diffuseLightingUAV;
#else
RWTexture2D<float4> combinedLightingUAV;
#endif
[numthreads(16, 16, 1)]
void SHADE_OPAQUE_ENTRY(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupId : SV_GroupID)
{
uint2 pixelCoord = dispatchThreadId;
//PositionInputs posInput = GetPositionInput(pixelCoord.xy, _ScreenSize.zw, uint2(pixelCoord.xy) / GetTileSize());
PositionInputs posInput = GetPositionInput(pixelCoord.xy, _ScreenSize.zw, groupId);
float depth = LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).x;
UpdatePositionInput(depth, _InvViewProjMatrix, _ViewProjMatrix, posInput);
float3 V = GetWorldSpaceNormalizeViewDir(posInput.positionWS);
FETCH_GBUFFER(gbuffer, _GBufferTexture, posInput.unPositionSS);
BSDFData bsdfData;
float3 bakeDiffuseLighting;
DECODE_FROM_GBUFFER(gbuffer, bsdfData, bakeDiffuseLighting);
PreLightData preLightData = GetPreLightData(V, posInput, bsdfData);
float3 diffuseLighting;
float3 specularLighting;
LightLoop(V, posInput, preLightData, bsdfData, bakeDiffuseLighting, diffuseLighting, specularLighting);
#ifdef OUTPUT_SPLIT_LIGHTING
specularLightingUAV[pixelCoord] = float4(specularLighting, 1.0);
diffuseLightingUAV[pixelCoord] = diffuseLighting;
#else
combinedLightingUAV[pixelCoord] = float4(diffuseLighting + specularLighting, 1.0);
#endif
}
#pragma kernel ShadeOpaque_Direct_Fptl SHADE_OPAQUE_ENTRY=ShadeOpaque_Direct_Fptl USE_FPTL_LIGHTLIST
#pragma kernel ShadeOpaque_Direct_Fptl_DebugLighting SHADE_OPAQUE_ENTRY=ShadeOpaque_Direct_Fptl_DebugLighting USE_FPTL_LIGHTLIST LIGHTING_DEBUG
#pragma kernel ShadeOpaque_Direct_Clustered SHADE_OPAQUE_ENTRY=ShadeOpaque_Direct_Clustered USE_CLUSTERED_LIGHTLIST
#pragma kernel ShadeOpaque_Direct_Clustered_DebugLighting SHADE_OPAQUE_ENTRY=ShadeOpaque_Direct_Clustered_DebugLighting USE_CLUSTERED_LIGHTLIST LIGHTING_DEBUG
#pragma kernel ShadeOpaque_Indirect_Fptl_Variant0 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant0 USE_FPTL_LIGHTLIST USE_INDIRECT VARIANT=0
#pragma kernel ShadeOpaque_Indirect_Fptl_Variant1 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant1 USE_FPTL_LIGHTLIST USE_INDIRECT VARIANT=1
#pragma kernel ShadeOpaque_Indirect_Fptl_Variant2 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant2 USE_FPTL_LIGHTLIST USE_INDIRECT VARIANT=2
#pragma kernel ShadeOpaque_Indirect_Fptl_Variant3 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant3 USE_FPTL_LIGHTLIST USE_INDIRECT VARIANT=3
#pragma kernel ShadeOpaque_Indirect_Fptl_Variant4 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant4 USE_FPTL_LIGHTLIST USE_INDIRECT VARIANT=4
#pragma kernel ShadeOpaque_Indirect_Fptl_Variant5 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant5 USE_FPTL_LIGHTLIST USE_INDIRECT VARIANT=5
#pragma kernel ShadeOpaque_Indirect_Fptl_Variant6 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant6 USE_FPTL_LIGHTLIST USE_INDIRECT VARIANT=6
#pragma kernel ShadeOpaque_Indirect_Fptl_Variant7 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Fptl_Variant7 USE_FPTL_LIGHTLIST USE_INDIRECT VARIANT=7
#pragma kernel ShadeOpaque_Indirect_Clustered_Variant0 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant0 USE_CLUSTERED_LIGHTLIST USE_INDIRECT VARIANT=0
#pragma kernel ShadeOpaque_Indirect_Clustered_Variant1 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant1 USE_CLUSTERED_LIGHTLIST USE_INDIRECT VARIANT=1
#pragma kernel ShadeOpaque_Indirect_Clustered_Variant2 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant2 USE_CLUSTERED_LIGHTLIST USE_INDIRECT VARIANT=2
#pragma kernel ShadeOpaque_Indirect_Clustered_Variant3 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant3 USE_CLUSTERED_LIGHTLIST USE_INDIRECT VARIANT=3
#pragma kernel ShadeOpaque_Indirect_Clustered_Variant4 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant4 USE_CLUSTERED_LIGHTLIST USE_INDIRECT VARIANT=4
#pragma kernel ShadeOpaque_Indirect_Clustered_Variant5 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant5 USE_CLUSTERED_LIGHTLIST USE_INDIRECT VARIANT=5
#pragma kernel ShadeOpaque_Indirect_Clustered_Variant6 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant6 USE_CLUSTERED_LIGHTLIST USE_INDIRECT VARIANT=6
#pragma kernel ShadeOpaque_Indirect_Clustered_Variant7 SHADE_OPAQUE_ENTRY=ShadeOpaque_Indirect_Clustered_Variant7 USE_CLUSTERED_LIGHTLIST USE_INDIRECT VARIANT=7
#pragma #pragma enable_d3d11_debug_symbols
// Split lighting is required for the SSS pass.
// Not currently possible since we need to access the stencil buffer from the compute shader.
// #pragma multi_compile _ OUTPUT_SPLIT_LIGHTING
#define LIGHTLOOP_TILE_PASS 1
#define LIGHTLOOP_TILE_DIRECT 1
#define LIGHTLOOP_TILE_INDIRECT 1
#define LIGHTLOOP_TILE_ALL 1
//-------------------------------------------------------------------------------------
// Include
//-------------------------------------------------------------------------------------
#include "../../../../ShaderLibrary/Common.hlsl"
#include "../../../Debug/HDRenderPipelineDebug.cs.hlsl"
#include "../../../Debug/DebugLighting.hlsl"
// Note: We have fix as guidelines that we have only one deferred material (with control of GBuffer enabled). Mean a users that add a new
// deferred material must replace the old one here. If in the future we want to support multiple layout (cause a lot of consistency problem),
// the deferred shader will require to use multicompile.
#define UNITY_MATERIAL_LIT // Need to be define before including Material.hlsl
#include "../../../ShaderConfig.cs.hlsl"
#include "../../../ShaderVariables.hlsl"
#include "../../../Lighting/Lighting.hlsl" // This include Material.hlsl
#include "../../../Lighting/TilePass/FeatureFlags.hlsl"
//-------------------------------------------------------------------------------------
// variable declaration
//-------------------------------------------------------------------------------------
DECLARE_GBUFFER_TEXTURE(_GBufferTexture);
#ifdef OUTPUT_SPLIT_LIGHTING
RWTexture2D<float4> specularLightingUAV;
RWTexture2D<float3> diffuseLightingUAV;
#else
RWTexture2D<float4> combinedLightingUAV;
#endif
#if USE_INDIRECT
uint g_TileListOffset;
StructuredBuffer<uint> g_TileList;
// Indirect
[numthreads(16, 16, 1)]
void SHADE_OPAQUE_ENTRY(uint2 groupThreadId : SV_GroupThreadID, uint groupId : SV_GroupID)
{
uint tileIndex = g_TileList[g_TileListOffset + groupId];
uint2 tileCoord = uint2(tileIndex & 0xFFFF, tileIndex >> 16);
uint2 pixelCoord = tileCoord * GetTileSize() + groupThreadId;
PositionInputs posInput = GetPositionInput(pixelCoord.xy, _ScreenSize.zw, tileCoord);
uint featureFlags = FeatureFlagsToTileVariant(VARIANT);
#else
// Direct
[numthreads(16, 16, 1)]
void SHADE_OPAQUE_ENTRY(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupId : SV_GroupID)
{
uint2 pixelCoord = dispatchThreadId;
PositionInputs posInput = GetPositionInput(pixelCoord.xy, _ScreenSize.zw, groupId);
uint featureFlags = 0xFFFFFFFF;
#endif
float depth = LOAD_TEXTURE2D(_MainDepthTexture, posInput.unPositionSS).x;
UpdatePositionInput(depth, _InvViewProjMatrix, _ViewProjMatrix, posInput);
float3 V = GetWorldSpaceNormalizeViewDir(posInput.positionWS);
FETCH_GBUFFER(gbuffer, _GBufferTexture, posInput.unPositionSS);
BSDFData bsdfData;
float3 bakeDiffuseLighting;
DECODE_FROM_GBUFFER(gbuffer, bsdfData, bakeDiffuseLighting);
PreLightData preLightData = GetPreLightData(V, posInput, bsdfData);
float3 diffuseLighting;
float3 specularLighting;
LightLoop(V, posInput, preLightData, bsdfData, bakeDiffuseLighting, diffuseLighting, specularLighting);
#ifdef OUTPUT_SPLIT_LIGHTING
specularLightingUAV[pixelCoord] = float4(specularLighting, 1.0);
diffuseLightingUAV[pixelCoord] = diffuseLighting;
#else
combinedLightingUAV[pixelCoord] = float4(diffuseLighting + specularLighting, 1.0);
#endif
}

1001
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs
文件差异内容过多而无法显示
查看文件

9
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/TilePass.cs.hlsl


#define HAS_COOKIE_TEXTURE (2)
#define IS_BOX_PROJECTED (4)
#define HAS_SHADOW (8)
#define FEATURE_FLAG_PUNCTUAL_LIGHT (1)
#define FEATURE_FLAG_AREA_LIGHT (2)
#define NUM_FEATURE_VARIANTS (8)
// Generated from UnityEngine.Experimental.Rendering.HDPipeline.TilePass.SFiniteLightBound
// PackingRules = Exact

float3 lightAxisZ;
float cotan;
float3 boxInnerDist;
float unused;
uint featureFlags;
float3 boxInvRange;
float unused2;
};

{
return value.boxInnerDist;
}
float GetUnused(LightVolumeData value)
uint GetFeatureFlags(LightVolumeData value)
return value.unused;
return value.featureFlags;
}
float3 GetBoxInvRange(LightVolumeData value)
{

31
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/FeatureFlags.hlsl


#ifndef __FEATURE_FLAGS_H__
#define __FEATURE_FLAGS_H__
#include "TilePass.cs.hlsl"
uint FeatureFlagsToTileVariant(uint featureFlags)
{
if(featureFlags & FEATURE_FLAG_AREA_LIGHT)
{
return 1;
}
else
{
return 0;
}
}
uint TileVariantToFeatureFlags(uint variant)
{
if(variant == 0)
{
return 0xFFFFFFFF & (~FEATURE_FLAG_AREA_LIGHT);
}
else if(variant == 1)
{
return 0xFFFFFFFF;
}
}
#endif

9
Assets/ScriptableRenderPipeline/HDRenderPipeline/Lighting/TilePass/FeatureFlags.hlsl.meta


fileFormatVersion: 2
guid: 0ef495ee49d152b419e9fd62aa24523f
timeCreated: 1489679489
licenseType: Pro
ShaderImporter:
defaultTextures: []
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存