sebastienlagarde
8 年前
当前提交
31e4c8ca
共有 13 个文件被更改,包括 1968 次插入 和 23 次删除
-
46Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.cs
-
6Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/Resources/Deferred.shader
-
166Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/TilePass.cs
-
9Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources.meta
-
267Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute
-
9Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute.meta
-
555Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
-
9Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute.meta
-
440Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild.compute
-
9Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild.compute.meta
-
466Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/scrbound.compute
-
9Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/scrbound.compute.meta
|
|||
fileFormatVersion: 2 |
|||
guid: 10637537837597a41861afbe118b246a |
|||
folderAsset: yes |
|||
timeCreated: 1479306736 |
|||
licenseType: Pro |
|||
DefaultImporter: |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
#pragma kernel BigTileLightListGen |
|||
|
|||
#include "..\common\ShaderBase.h" |
|||
#include "LightDefinitions.cs.hlsl" |
|||
|
|||
#include "LightingConvexHullUtils.hlsl" |
|||
#include "SortingComputeUtils.hlsl" |
|||
|
|||
#define EXACT_EDGE_TESTS |
|||
#define PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
|
|||
#define MAX_NR_BIGTILE_LIGHTS (MAX_NR_BIGTILE_LIGHTS_PLUSONE-1) |
|||
|
|||
|
|||
uniform int g_iNrVisibLights; |
|||
uniform uint2 g_viDimensions; |
|||
uniform float4x4 g_mInvScrProjection; |
|||
uniform float4x4 g_mScrProjection; |
|||
uniform float g_fNearPlane; |
|||
uniform float g_fFarPlane; |
|||
|
|||
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 ); |
|||
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 ); |
|||
StructuredBuffer<SFiniteLightBound> g_data : register( t3 ); |
|||
|
|||
|
|||
#define NR_THREADS 64 |
|||
|
|||
// output buffer |
|||
RWBuffer<uint> g_vLightList : register( u0 ); |
|||
|
|||
|
|||
// 2kB (room for roughly 30 wavefronts) |
|||
groupshared unsigned int lightsListLDS[MAX_NR_BIGTILE_LIGHTS_PLUSONE]; |
|||
groupshared uint lightOffs; |
|||
|
|||
|
|||
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far |
|||
{ |
|||
float3 vP = float3(0.0f,0.0f,zDptBufSpace); |
|||
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); |
|||
return v4Pres.z / v4Pres.w; |
|||
} |
|||
|
|||
|
|||
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth) |
|||
{ |
|||
float fSx = g_mScrProjection[0].x; |
|||
float fCx = g_mScrProjection[0].z; |
|||
float fSy = g_mScrProjection[1].y; |
|||
float fCy = g_mScrProjection[1].z; |
|||
|
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 ); |
|||
#else |
|||
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 ); |
|||
#endif |
|||
} |
|||
|
|||
float GetOnePixDiagWorldDistAtDepthOne() |
|||
{ |
|||
float fSx = g_mScrProjection[0].x; |
|||
float fSy = g_mScrProjection[1].y; |
|||
|
|||
return length( float2(1.0/fSx,1.0/fSy) ); |
|||
} |
|||
|
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate); |
|||
#endif |
|||
|
|||
#ifdef EXACT_EDGE_TESTS |
|||
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR); |
|||
#endif |
|||
|
|||
|
|||
|
|||
|
|||
[numthreads(NR_THREADS, 1, 1)] |
|||
void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) |
|||
{ |
|||
uint2 tileIDX = u3GroupID.xy; |
|||
uint t=threadID; |
|||
|
|||
uint iWidth = g_viDimensions.x; |
|||
uint iHeight = g_viDimensions.y; |
|||
uint nrBigTilesX = (iWidth+63)/64; |
|||
uint nrBigTilesY = (iHeight+63)/64; |
|||
|
|||
if(t==0) lightOffs = 0; |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
|
|||
uint2 viTilLL = 64*tileIDX; |
|||
uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner. |
|||
|
|||
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight); |
|||
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight); |
|||
|
|||
// build coarse list using AABB |
|||
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS) |
|||
{ |
|||
const float2 vMi = g_vBoundsBuffer[l].xy; |
|||
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy; |
|||
|
|||
if( all(vMa>vTileLL) && all(vMi<vTileUR)) |
|||
{ |
|||
unsigned int uInc = 1; |
|||
unsigned int uIndex; |
|||
InterlockedAdd(lightOffs, uInc, uIndex); |
|||
if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l; // add to light list |
|||
} |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
int iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS); |
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))) ); |
|||
#endif |
|||
|
|||
#ifdef EXACT_EDGE_TESTS |
|||
CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy); |
|||
#endif |
|||
|
|||
|
|||
// sort lights |
|||
SORTLIST(lightsListLDS, iNrCoarseLights, MAX_NR_BIGTILE_LIGHTS_PLUSONE, t, NR_THREADS); |
|||
|
|||
lightOffs = 0; |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1); |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
iNrCoarseLights = lightOffs; |
|||
|
|||
int offs = tileIDX.y*nrBigTilesX + tileIDX.x; |
|||
|
|||
for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS) |
|||
g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = t==0 ? iNrCoarseLights : lightsListLDS[i-1]; |
|||
} |
|||
|
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate) |
|||
{ |
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0); |
|||
#else |
|||
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0); |
|||
#endif |
|||
|
|||
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne(); |
|||
float halfTileSizeAtZDistOne = 32*onePixDiagDist; // scale by half a tile |
|||
|
|||
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS) |
|||
{ |
|||
SFiniteLightBound lgtDat = g_data[lightsListLDS[l]]; |
|||
|
|||
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) ) |
|||
lightsListLDS[l]=0xffffffff; |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
} |
|||
#endif |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
#ifdef EXACT_EDGE_TESTS |
|||
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane) |
|||
{ |
|||
float x = (i&1)==0 ? viTilLL.x : viTilUR.x; |
|||
float y = (i&2)==0 ? viTilLL.y : viTilUR.y; |
|||
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane; |
|||
#if !USE_LEFTHAND_CAMERASPACE |
|||
z = -z; |
|||
#endif |
|||
return GetViewPosFromLinDepth( float2(x, y), z); |
|||
} |
|||
|
|||
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane) |
|||
{ |
|||
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges |
|||
int iSwizzle = e0&0x3; |
|||
|
|||
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2 |
|||
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane); |
|||
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0))); |
|||
} |
|||
|
|||
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR) |
|||
{ |
|||
const bool bOnlyNeedFrustumSideEdges = true; |
|||
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull. |
|||
|
|||
const int totNrEdgePairs = 12*nrFrustEdges; |
|||
for(int l=0; l<iNrCoarseLights; l++) |
|||
{ |
|||
const int idxCoarse = lightsListLDS[l]; |
|||
[branch]if(idxCoarse<(uint) g_iNrVisibLights && g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes. |
|||
{ |
|||
SFiniteLightBound lgtDat = g_data[idxCoarse]; |
|||
|
|||
const float3 boxX = lgtDat.boxAxisX.xyz; |
|||
const float3 boxY = lgtDat.boxAxisY.xyz; |
|||
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light) |
|||
const float3 center = lgtDat.center.xyz; |
|||
const float2 scaleXY = lgtDat.scaleXY; |
|||
|
|||
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS) |
|||
{ |
|||
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right |
|||
int e1 = i - e0*nrFrustEdges; |
|||
|
|||
int idx_cur=0, idx_twin=0; |
|||
float3 vP0, vE0; |
|||
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY); |
|||
|
|||
|
|||
float3 vP1, vE1; |
|||
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, g_fFarPlane); |
|||
|
|||
// potential separation plane |
|||
float3 vN = cross(vE0, vE1); |
|||
|
|||
int positive=0, negative=0; |
|||
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6). |
|||
{ |
|||
int j = (idx_cur+k)&0x7; |
|||
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j); |
|||
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0); |
|||
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative; |
|||
} |
|||
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0)); |
|||
|
|||
positive=0; negative=0; |
|||
for(int j=0; j<8; j++) |
|||
{ |
|||
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane); |
|||
float fSignDist = dot(vN, vPf-vP0); |
|||
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative; |
|||
} |
|||
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0)); |
|||
|
|||
bool bFoundSepPlane = (resh*resf)<0; |
|||
if(bFoundSepPlane) lightsListLDS[l]=0xffffffff; |
|||
} |
|||
} |
|||
} |
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
} |
|||
#endif |
|
|||
fileFormatVersion: 2 |
|||
guid: d190937525dcb3949942a0a93295d2fe |
|||
timeCreated: 1479306737 |
|||
licenseType: Pro |
|||
ComputeShaderImporter: |
|||
currentAPIMask: 4 |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
#pragma kernel TileLightListGen_NoDepthRT LIGHTLISTGEN=TileLightListGen_NoDepthRT |
|||
#pragma kernel TileLightListGen_DepthRT LIGHTLISTGEN=TileLightListGen_DepthRT ENABLE_DEPTH_TEXTURE_BACKPLANE |
|||
#pragma kernel TileLightListGen_DepthRT_MSAA LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED |
|||
#pragma kernel TileLightListGen_NoDepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile USE_TWO_PASS_TILED_LIGHTING |
|||
#pragma kernel TileLightListGen_DepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING |
|||
#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING |
|||
#pragma kernel ClearAtomic |
|||
|
|||
|
|||
#include "..\common\ShaderBase.h" |
|||
#include "LightDefinitions.cs.hlsl" |
|||
|
|||
#include "LightingConvexHullUtils.hlsl" |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
#include "SortingComputeUtils.hlsl" |
|||
#endif |
|||
|
|||
//#define EXACT_EDGE_TESTS |
|||
#define PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
#define CONV_HULL_TEST_ENABLED |
|||
|
|||
uniform int g_iNrVisibLights; |
|||
uniform float4x4 g_mInvScrProjection; |
|||
uniform float4x4 g_mScrProjection; |
|||
|
|||
uniform float g_fClustScale; |
|||
uniform float g_fClustBase; |
|||
uniform float g_fNearPlane; |
|||
uniform float g_fFarPlane; |
|||
uniform int g_iLog2NumClusters; // numClusters = (1<<g_iLog2NumClusters) |
|||
|
|||
#include "ClusteredUtils.h" |
|||
|
|||
|
|||
#ifdef MSAA_ENABLED |
|||
Texture2DMS<float> g_depth_tex : register( t0 ); |
|||
#else |
|||
Texture2D g_depth_tex : register( t0 ); |
|||
#endif |
|||
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 ); |
|||
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 ); |
|||
StructuredBuffer<SFiniteLightBound> g_data : register( t3 ); |
|||
|
|||
#ifdef USE_TWO_PASS_TILED_LIGHTING |
|||
Buffer<uint> g_vBigTileLightList : register( t4 ); |
|||
#endif |
|||
|
|||
|
|||
#define NR_THREADS 64 |
|||
|
|||
// output buffer |
|||
RWBuffer<uint> g_vLayeredLightList : register( u0 ); |
|||
RWBuffer<uint> g_LayeredOffset : register( u1 ); |
|||
RWBuffer<uint> g_LayeredSingleIdxBuffer : register( u2 ); |
|||
|
|||
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE |
|||
RWBuffer<float> g_logBaseBuffer : register( u3 ); |
|||
#endif |
|||
|
|||
|
|||
#define MAX_NR_COARSE_ENTRIES 128 |
|||
|
|||
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES]; |
|||
groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2]; |
|||
groupshared float4 lightPlanes[4*6]; |
|||
|
|||
groupshared uint lightOffs; |
|||
|
|||
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE |
|||
groupshared int ldsZMax; |
|||
#endif |
|||
|
|||
#ifdef EXACT_EDGE_TESTS |
|||
groupshared uint ldsIsLightInvisible; |
|||
groupshared uint lightOffs2; |
|||
#endif |
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
groupshared uint lightOffsSph; |
|||
#endif |
|||
|
|||
|
|||
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far |
|||
{ |
|||
float3 vP = float3(0.0f,0.0f,zDptBufSpace); |
|||
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); |
|||
return v4Pres.z / v4Pres.w; |
|||
} |
|||
|
|||
|
|||
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth) |
|||
{ |
|||
float fSx = g_mScrProjection[0].x; |
|||
float fCx = g_mScrProjection[0].z; |
|||
float fSy = g_mScrProjection[1].y; |
|||
float fCy = g_mScrProjection[1].z; |
|||
|
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 ); |
|||
#else |
|||
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 ); |
|||
#endif |
|||
} |
|||
|
|||
float GetOnePixDiagWorldDistAtDepthOne() |
|||
{ |
|||
float fSx = g_mScrProjection[0].x; |
|||
float fSy = g_mScrProjection[1].y; |
|||
|
|||
return length( float2(1.0/fSx,1.0/fSy) ); |
|||
} |
|||
|
|||
#ifdef EXACT_EDGE_TESTS |
|||
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane); |
|||
#endif |
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate); |
|||
#endif |
|||
|
|||
|
|||
// returns 1 for intersection and 0 for none |
|||
|
|||
float4 FetchPlane(int l, int p); |
|||
|
|||
|
|||
bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase) |
|||
{ |
|||
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff; |
|||
bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff); |
|||
if(bIsHit) |
|||
{ |
|||
#ifdef CONV_HULL_TEST_ENABLED |
|||
float depthAtNearZ = ClusterIdxToZ(k, suggestedBase); |
|||
float depthAtFarZ = ClusterIdxToZ(k+1, suggestedBase); |
|||
|
|||
for(int p=0; p<6; p++) |
|||
{ |
|||
float4 plane = lightPlanes[6*(l&3)+p]; |
|||
|
|||
bool bAllInvisib = true; |
|||
|
|||
for(int i=0; i<8; i++) |
|||
{ |
|||
float x = (i&1)==0 ? viTilLL.x : viTilUR.x; |
|||
float y = (i&2)==0 ? viTilLL.y : viTilUR.y; |
|||
float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ; |
|||
float3 vP = GetViewPosFromLinDepth( float2(x, y), z); |
|||
|
|||
bAllInvisib = bAllInvisib && dot(plane, float4(vP,1.0))>0; |
|||
} |
|||
|
|||
if(bAllInvisib) bIsHit = false; |
|||
} |
|||
#endif |
|||
} |
|||
|
|||
return bIsHit; |
|||
} |
|||
|
|||
bool CheckIntersectionBasic(int l, int k) |
|||
{ |
|||
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff; |
|||
return ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff); |
|||
} |
|||
|
|||
|
|||
[numthreads(NR_THREADS, 1, 1)] |
|||
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) |
|||
{ |
|||
uint2 tileIDX = u3GroupID.xy; |
|||
uint t=threadID; |
|||
|
|||
uint iWidth; |
|||
uint iHeight; |
|||
#ifdef MSAA_ENABLED |
|||
uint iNumSamplesMSAA; |
|||
g_depth_tex.GetDimensions(iWidth, iHeight, iNumSamplesMSAA); |
|||
#else |
|||
g_depth_tex.GetDimensions(iWidth, iHeight); |
|||
#endif |
|||
uint nrTilesX = (iWidth+15)/16; |
|||
uint nrTilesY = (iHeight+15)/16; |
|||
|
|||
uint2 viTilLL = 16*tileIDX; |
|||
uint2 viTilUR = min( viTilLL+uint2(16,16), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner. |
|||
|
|||
if(t==0) |
|||
{ |
|||
lightOffs = 0; |
|||
|
|||
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE |
|||
ldsZMax = 0; |
|||
#endif |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
float dpt_ma=1.0; |
|||
|
|||
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE |
|||
// establish min and max depth first |
|||
dpt_ma=0.0; |
|||
|
|||
for(int idx=t; idx<256; idx+=NR_THREADS) |
|||
{ |
|||
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) ); |
|||
#ifdef MSAA_ENABLED |
|||
for(int i=0; i<iNumSamplesMSAA; i++) |
|||
{ |
|||
const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i); |
|||
#else |
|||
const float fDpth = FetchDepth(g_depth_tex, uPixCrd); |
|||
#endif |
|||
if(fDpth<VIEWPORT_SCALE_Z) // if not skydome |
|||
{ |
|||
dpt_ma = max(fDpth, dpt_ma); |
|||
} |
|||
#ifdef MSAA_ENABLED |
|||
} |
|||
#endif |
|||
} |
|||
|
|||
InterlockedMax(ldsZMax, asuint(dpt_ma) ); |
|||
|
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
dpt_ma = asfloat(ldsZMax); |
|||
#endif |
|||
|
|||
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0); |
|||
float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0); |
|||
|
|||
|
|||
// build coarse list using AABB |
|||
#ifdef USE_TWO_PASS_TILED_LIGHTING |
|||
int NrBigTilesX = (nrTilesX+3)>>2; |
|||
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles |
|||
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0]; |
|||
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS) |
|||
{ |
|||
int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1]; |
|||
#else |
|||
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS) |
|||
{ |
|||
#endif |
|||
const float3 vMi = g_vBoundsBuffer[l]; |
|||
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights]; |
|||
|
|||
if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy)) |
|||
{ |
|||
unsigned int uInc = 1; |
|||
unsigned int uIndex; |
|||
InterlockedAdd(lightOffs, uInc, uIndex); |
|||
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list |
|||
} |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES); |
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) ); |
|||
#endif |
|||
|
|||
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE |
|||
|
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
float fTileFarPlane = GetLinearDepth(dpt_ma); |
|||
#else |
|||
float fTileFarPlane = -GetLinearDepth(dpt_ma); |
|||
#endif |
|||
float suggestedBase = SuggestLogBase50(fTileFarPlane); |
|||
#else |
|||
float fTileFarPlane = g_fFarPlane; |
|||
float suggestedBase = g_fClustBase; |
|||
#endif |
|||
|
|||
|
|||
#ifdef EXACT_EDGE_TESTS |
|||
iNrCoarseLights = CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, fTileFarPlane); |
|||
#endif |
|||
|
|||
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting). |
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS); |
|||
#endif |
|||
|
|||
//////////// cell specific code |
|||
{ |
|||
for(int l=(int) t; l<((iNrCoarseLights+1)>>1); l += NR_THREADS) |
|||
{ |
|||
const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights)]; |
|||
const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase)); |
|||
const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase)); |
|||
const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase)); |
|||
const unsigned int clustIdxMa1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1+g_iNrVisibLights].z), suggestedBase)); |
|||
|
|||
clusterIdxs[l] = (clustIdxMa1<<24) | (clustIdxMi1<<16) | (clustIdxMa0<<8) | (clustIdxMi0<<0); |
|||
} |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
int nrClusters = (1<<g_iLog2NumClusters); |
|||
|
|||
|
|||
|
|||
////////////////////////////////////////////////////////// |
|||
|
|||
uint start = 0; |
|||
int i=(int) t; |
|||
int iSpaceAvail = 0; |
|||
int iSum = 0; |
|||
if(i<nrClusters) |
|||
{ |
|||
for(int l=0; l<iNrCoarseLights; l++) |
|||
{ |
|||
iSum += (CheckIntersectionBasic(l, i) ? 1 : 0); |
|||
} |
|||
|
|||
iSpaceAvail = min(iSum,MAX_NR_COARSE_ENTRIES); // combined storage for both direct lights and reflection |
|||
InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start); // alloc list memory |
|||
} |
|||
|
|||
int modelListCount[NR_LIGHT_MODELS]={0,0}; // direct light count and reflection lights |
|||
uint offs = start; |
|||
for(int ll=0; ll<iNrCoarseLights; ll+=4) |
|||
{ |
|||
int p = i>>2; |
|||
int m = i&3; |
|||
if(i<24) lightPlanes[6*m+p] = FetchPlane(min(iNrCoarseLights-1,ll+m), p); |
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
for(int l=ll; l<min(iNrCoarseLights,(ll+4)); l++) |
|||
{ |
|||
if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) ) |
|||
{ |
|||
uint lightModel = g_vLightData[ coarseList[l] ].lightModel; |
|||
++modelListCount[ lightModel==REFLECTION_LIGHT ? 1 : 0]; |
|||
g_vLayeredLightList[offs++] = coarseList[l]; // reflection lights will be last since we sorted |
|||
} |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
} |
|||
|
|||
uint localOffs=0; |
|||
offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x; |
|||
for(int m=0; m<NR_LIGHT_MODELS; m++) |
|||
{ |
|||
int numLights = min(modelListCount[m],31); // only allow 5 bits |
|||
if(i<nrClusters) |
|||
{ |
|||
g_LayeredOffset[offs] = (start+localOffs) | (((uint) numLights)<<27); |
|||
offs += (nrClusters*nrTilesX*nrTilesY); |
|||
localOffs += modelListCount[m]; // use unclamped count for localOffs |
|||
} |
|||
} |
|||
|
|||
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE |
|||
g_logBaseBuffer[tileIDX.y*nrTilesX + tileIDX.x] = suggestedBase; |
|||
#endif |
|||
} |
|||
|
|||
|
|||
float4 FetchPlane(int l, int p) |
|||
{ |
|||
SFiniteLightBound lgtDat = g_data[coarseList[l]]; |
|||
|
|||
const float3 boxX = lgtDat.boxAxisX.xyz; |
|||
const float3 boxY = lgtDat.boxAxisY.xyz; |
|||
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light) |
|||
const float3 center = lgtDat.center.xyz; |
|||
const float radius = lgtDat.radius; |
|||
const float2 scaleXY = lgtDat.scaleXY; |
|||
|
|||
return GetPlaneEq(boxX, boxY, boxZ, center, scaleXY, p); |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate) |
|||
{ |
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0); |
|||
#else |
|||
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0); |
|||
#endif |
|||
|
|||
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne(); |
|||
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile |
|||
|
|||
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS) |
|||
{ |
|||
SFiniteLightBound lgtDat = g_data[coarseList[l]]; |
|||
|
|||
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) ) |
|||
coarseList[l]=0xffffffff; |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
// to greedy to double buffer coarseList lds on this so serializing removal of gaps. |
|||
if(threadID==0) |
|||
{ |
|||
int offs = 0; |
|||
for(int l=0; l<iNrCoarseLights; l++) |
|||
{ if(coarseList[l]!=0xffffffff) coarseList[offs++] = coarseList[l]; } |
|||
lightOffsSph = offs; |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
return lightOffsSph; |
|||
} |
|||
#endif |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
#ifdef EXACT_EDGE_TESTS |
|||
|
|||
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane) |
|||
{ |
|||
float x = (i&1)==0 ? viTilLL.x : viTilUR.x; |
|||
float y = (i&2)==0 ? viTilLL.y : viTilUR.y; |
|||
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane; |
|||
#if !USE_LEFTHAND_CAMERASPACE |
|||
z = -z; |
|||
#endif |
|||
return GetViewPosFromLinDepth( float2(x, y), z); |
|||
} |
|||
|
|||
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane) |
|||
{ |
|||
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges |
|||
int iSwizzle = e0&0x3; |
|||
|
|||
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2 |
|||
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane); |
|||
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0))); |
|||
} |
|||
|
|||
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane) |
|||
{ |
|||
if(threadID==0) lightOffs2 = 0; |
|||
|
|||
const bool bOnlyNeedFrustumSideEdges = true; |
|||
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull. |
|||
|
|||
const int totNrEdgePairs = 12*nrFrustEdges; |
|||
for(int l=0; l<iNrCoarseLights; l++) |
|||
{ |
|||
if(threadID==0) ldsIsLightInvisible=0; |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
const int idxCoarse = coarseList[l]; |
|||
[branch]if(g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes. |
|||
{ |
|||
SFiniteLightBound lgtDat = g_data[idxCoarse]; |
|||
|
|||
const float3 boxX = lgtDat.boxAxisX.xyz; |
|||
const float3 boxY = lgtDat.boxAxisY.xyz; |
|||
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light) |
|||
const float3 center = lgtDat.center.xyz; |
|||
const float2 scaleXY = lgtDat.scaleXY; |
|||
|
|||
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS) |
|||
{ |
|||
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right |
|||
int e1 = i - e0*nrFrustEdges; |
|||
|
|||
int idx_cur=0, idx_twin=0; |
|||
float3 vP0, vE0; |
|||
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY); |
|||
|
|||
|
|||
float3 vP1, vE1; |
|||
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane); |
|||
|
|||
// potential separation plane |
|||
float3 vN = cross(vE0, vE1); |
|||
|
|||
int positive=0, negative=0; |
|||
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6). |
|||
{ |
|||
int j = (idx_cur+k)&0x7; |
|||
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j); |
|||
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0); |
|||
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative; |
|||
} |
|||
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0)); |
|||
|
|||
positive=0; negative=0; |
|||
for(int j=0; j<8; j++) |
|||
{ |
|||
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane); |
|||
float fSignDist = dot(vN, vPf-vP0); |
|||
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative; |
|||
} |
|||
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0)); |
|||
|
|||
bool bFoundSepPlane = (resh*resf)<0; |
|||
|
|||
if(bFoundSepPlane) InterlockedOr(ldsIsLightInvisible, 1); |
|||
} |
|||
} |
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
if(threadID==0 && ldsIsLightInvisible==0) |
|||
{ |
|||
coarseList[lightOffs2++] = coarseList[l]; |
|||
} |
|||
} |
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
return lightOffs2; |
|||
} |
|||
#endif |
|||
|
|||
|
|||
|
|||
[numthreads(1, 1, 1)] |
|||
void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) |
|||
{ |
|||
g_LayeredSingleIdxBuffer[0]=0; |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: a19ed36b92650254397f2a566ed76d36 |
|||
timeCreated: 1479306737 |
|||
licenseType: Pro |
|||
ComputeShaderImporter: |
|||
currentAPIMask: 4 |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7. |
|||
// https://github.com/wolfgangfengel/GPU-Pro-7 |
|||
|
|||
#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen |
|||
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING |
|||
|
|||
|
|||
#include "..\common\ShaderBase.h" |
|||
#include "LightDefinitions.cs.hlsl" |
|||
|
|||
#include "LightingConvexHullUtils.hlsl" |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
#include "SortingComputeUtils.hlsl" |
|||
#endif |
|||
|
|||
|
|||
#define FINE_PRUNING_ENABLED |
|||
#define PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
|
|||
|
|||
uniform int g_iNrVisibLights; |
|||
uniform uint2 g_viDimensions; |
|||
uniform float4x4 g_mInvScrProjection; |
|||
uniform float4x4 g_mScrProjection; |
|||
|
|||
|
|||
Texture2D g_depth_tex : register( t0 ); |
|||
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 ); |
|||
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 ); |
|||
StructuredBuffer<SFiniteLightBound> g_data : register( t3 ); |
|||
|
|||
#ifdef USE_TWO_PASS_TILED_LIGHTING |
|||
Buffer<uint> g_vBigTileLightList : register( t4 ); |
|||
#endif |
|||
|
|||
#define NR_THREADS 64 |
|||
|
|||
// output buffer |
|||
RWBuffer<uint> g_vLightList : register( u0 ); |
|||
|
|||
|
|||
#define MAX_NR_COARSE_ENTRIES 64 |
|||
#define MAX_NR_PRUNED_ENTRIES 24 |
|||
|
|||
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES]; |
|||
groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES]; // temporarily support room for all 64 while in LDS |
|||
|
|||
groupshared uint ldsZMin; |
|||
groupshared uint ldsZMax; |
|||
groupshared uint lightOffs; |
|||
#ifdef FINE_PRUNING_ENABLED |
|||
groupshared uint ldsDoesLightIntersect[2]; |
|||
#endif |
|||
groupshared int ldsNrLightsFinal; |
|||
|
|||
groupshared int ldsModelListCount[NR_LIGHT_MODELS]; // since NR_LIGHT_MODELS is 2 |
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
groupshared uint lightOffsSph; |
|||
#endif |
|||
|
|||
|
|||
//float GetLinearDepth(float3 vP) |
|||
//{ |
|||
// float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); |
|||
// return v4Pres.z / v4Pres.w; |
|||
//} |
|||
|
|||
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far |
|||
{ |
|||
float3 vP = float3(0.0f,0.0f,zDptBufSpace); |
|||
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0)); |
|||
return v4Pres.z / v4Pres.w; |
|||
} |
|||
|
|||
|
|||
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth) |
|||
{ |
|||
float fSx = g_mScrProjection[0].x; |
|||
float fCx = g_mScrProjection[0].z; |
|||
float fSy = g_mScrProjection[1].y; |
|||
float fCy = g_mScrProjection[1].z; |
|||
|
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 ); |
|||
#else |
|||
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 ); |
|||
#endif |
|||
} |
|||
|
|||
float GetOnePixDiagWorldDistAtDepthOne() |
|||
{ |
|||
float fSx = g_mScrProjection[0].x; |
|||
float fSy = g_mScrProjection[1].y; |
|||
|
|||
return length( float2(1.0/fSx,1.0/fSy) ); |
|||
} |
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate); |
|||
#endif |
|||
|
|||
#ifdef FINE_PRUNING_ENABLED |
|||
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths); |
|||
#endif |
|||
|
|||
|
|||
[numthreads(NR_THREADS, 1, 1)] |
|||
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) |
|||
{ |
|||
uint2 tileIDX = u3GroupID.xy; |
|||
uint t=threadID; |
|||
|
|||
if(t<MAX_NR_COARSE_ENTRIES) |
|||
prunedList[t]=0; |
|||
|
|||
uint iWidth = g_viDimensions.x; |
|||
uint iHeight = g_viDimensions.y; |
|||
uint nrTilesX = (iWidth+15)/16; |
|||
uint nrTilesY = (iHeight+15)/16; |
|||
|
|||
// build tile scr boundary |
|||
const uint uFltMax = 0x7f7fffff; // FLT_MAX as a uint |
|||
if(t==0) |
|||
{ |
|||
ldsZMin = uFltMax; |
|||
ldsZMax = 0; |
|||
lightOffs = 0; |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
|
|||
uint2 viTilLL = 16*tileIDX; |
|||
|
|||
// establish min and max depth first |
|||
float dpt_mi=asfloat(uFltMax), dpt_ma=0.0; |
|||
|
|||
|
|||
float4 vLinDepths; |
|||
{ |
|||
// Fetch depths and calculate min/max |
|||
[unroll] |
|||
for(int i = 0; i < 4; i++) |
|||
{ |
|||
int idx = i * NR_THREADS + t; |
|||
uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) ); |
|||
const float fDepth = FetchDepth(g_depth_tex, uCrd); |
|||
vLinDepths[i] = GetLinearDepth(fDepth); |
|||
if(fDepth<VIEWPORT_SCALE_Z) // if not skydome |
|||
{ |
|||
dpt_mi = min(fDepth, dpt_mi); |
|||
dpt_ma = max(fDepth, dpt_ma); |
|||
} |
|||
} |
|||
|
|||
InterlockedMax(ldsZMax, asuint(dpt_ma)); |
|||
InterlockedMin(ldsZMin, asuint(dpt_mi)); |
|||
|
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
} |
|||
|
|||
|
|||
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin)); |
|||
float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax)); |
|||
vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy; |
|||
|
|||
|
|||
// build coarse list using AABB |
|||
#ifdef USE_TWO_PASS_TILED_LIGHTING |
|||
int NrBigTilesX = (nrTilesX+3)>>2; |
|||
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles |
|||
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0]; |
|||
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS) |
|||
{ |
|||
int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1]; |
|||
#else |
|||
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS) |
|||
{ |
|||
#endif |
|||
const float3 vMi = g_vBoundsBuffer[l]; |
|||
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights]; |
|||
|
|||
if( all(vMa>vTileLL) && all(vMi<vTileUR)) |
|||
{ |
|||
unsigned int uInc = 1; |
|||
unsigned int uIndex; |
|||
InterlockedAdd(lightOffs, uInc, uIndex); |
|||
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list |
|||
} |
|||
} |
|||
|
|||
#ifdef FINE_PRUNING_ENABLED |
|||
if(t<2) ldsDoesLightIntersect[t] = 0; |
|||
#endif |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES); |
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) ); |
|||
#endif |
|||
|
|||
#ifndef FINE_PRUNING_ENABLED |
|||
{ |
|||
if((int)t<iNrCoarseLights) prunedList[t] = coarseList[t]; |
|||
if(t==0) ldsNrLightsFinal=iNrCoarseLights; |
|||
} |
|||
#else |
|||
{ |
|||
// initializes ldsNrLightsFinal with the number of accepted lights. |
|||
// all accepted entries delivered in prunedList[]. |
|||
FinePruneLights(t, iNrCoarseLights, viTilLL, vLinDepths); |
|||
} |
|||
#endif |
|||
|
|||
// |
|||
if(t<NR_LIGHT_MODELS) ldsModelListCount[t]=0; |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
|
|||
int nrLightsCombinedList = min(ldsNrLightsFinal,MAX_NR_COARSE_ENTRIES); |
|||
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS) |
|||
{ |
|||
InterlockedAdd(ldsModelListCount[ g_vLightData[ prunedList[i] ].lightModel ], 1); |
|||
} |
|||
|
|||
|
|||
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting). |
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
SORTLIST(prunedList, nrLightsCombinedList, MAX_NR_COARSE_ENTRIES, t, NR_THREADS); |
|||
//MERGESORTLIST(prunedList, coarseList, nrLightsCombinedList, t, NR_THREADS); |
|||
#endif |
|||
|
|||
// write lights to global buffers |
|||
int localOffs=0; |
|||
int offs = tileIDX.y*nrTilesX + tileIDX.x; |
|||
for(int m=0; m<NR_LIGHT_MODELS; m++) |
|||
{ |
|||
int nrLightsFinal = ldsModelListCount[ m ]; |
|||
int nrLightsFinalClamped = nrLightsFinal<MAX_NR_PRUNED_ENTRIES ? nrLightsFinal : MAX_NR_PRUNED_ENTRIES; |
|||
|
|||
|
|||
const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1; |
|||
for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS) |
|||
{ |
|||
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs]; |
|||
uint uHigh = prunedList[2*l+0+localOffs]; |
|||
|
|||
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16); |
|||
} |
|||
|
|||
localOffs += nrLightsFinal; |
|||
offs += (nrTilesX*nrTilesY); |
|||
} |
|||
|
|||
} |
|||
|
|||
|
|||
|
|||
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS |
|||
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate) |
|||
{ |
|||
lightOffsSph = 0; |
|||
|
|||
// make a copy of coarseList in prunedList. |
|||
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS) |
|||
prunedList[l]=coarseList[l]; |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0); |
|||
#else |
|||
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0); |
|||
#endif |
|||
|
|||
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne(); |
|||
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile |
|||
|
|||
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS) |
|||
{ |
|||
SFiniteLightBound lightData = g_data[coarseList[l]]; |
|||
|
|||
if( DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lightData.center.xyz, lightData.radius) ) |
|||
{ |
|||
unsigned int uIndex; |
|||
InterlockedAdd(lightOffsSph, 1, uIndex); |
|||
coarseList[uIndex]=prunedList[l]; // read from the original copy of coarseList which is backed up in prunedList |
|||
} |
|||
} |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
return lightOffsSph; |
|||
} |
|||
#endif |
|||
|
|||
|
|||
#ifdef FINE_PRUNING_ENABLED |
|||
// initializes ldsNrLightsFinal with the number of accepted lights. |
|||
// all accepted entries delivered in prunedList[]. |
|||
void FinePruneLights(uint threadID, int iNrCoarseLights, uint2 viTilLL, float4 vLinDepths) |
|||
{ |
|||
uint t = threadID; |
|||
uint iWidth = g_viDimensions.x; |
|||
uint iHeight = g_viDimensions.y; |
|||
|
|||
uint uLightsFlags[2] = {0,0}; |
|||
int l=0; |
|||
// need this outer loop even on xb1 and ps4 since direct lights and |
|||
// reflection lights are kept in separate regions. |
|||
while(l<iNrCoarseLights) |
|||
{ |
|||
// fetch light |
|||
int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0; |
|||
uint uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0; |
|||
|
|||
// spot |
|||
while(l<iNrCoarseLights && uLgtType==SPOT_LIGHT) |
|||
{ |
|||
SFiniteLightData lightData = g_vLightData[idxCoarse]; |
|||
const bool bIsSpotDisc = (lightData.flags&IS_CIRCULAR_SPOT_SHAPE)!=0; |
|||
|
|||
// serially check 4 pixels |
|||
uint uVal = 0; |
|||
for(int i=0; i<4; i++) |
|||
{ |
|||
int idx = t + i*NR_THREADS; |
|||
|
|||
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1)); |
|||
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); |
|||
|
|||
// check pixel |
|||
float3 fromLight = vVPos-lightData.lightPos.xyz; |
|||
float distSq = dot(fromLight,fromLight); |
|||
const float fSclProj = dot(fromLight, lightData.lightAxisZ.xyz); // spotDir = lightData.lightAxisZ.xyz |
|||
|
|||
float2 V = abs( float2( dot(fromLight, lightData.lightAxisX.xyz), dot(fromLight, lightData.lightAxisY.xyz) ) ); |
|||
|
|||
float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y); |
|||
if( all( float2(lightData.radiusSq, fSclProj) > float2(distSq, fDist2D*lightData.cotan) ) ) uVal = 1; |
|||
} |
|||
|
|||
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); |
|||
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0; |
|||
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0; |
|||
} |
|||
|
|||
// sphere |
|||
while(l<iNrCoarseLights && uLgtType==SPHERE_LIGHT) |
|||
{ |
|||
SFiniteLightData lightData = g_vLightData[idxCoarse]; |
|||
|
|||
// serially check 4 pixels |
|||
uint uVal = 0; |
|||
for(int i=0; i<4; i++) |
|||
{ |
|||
int idx = t + i*NR_THREADS; |
|||
|
|||
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1)); |
|||
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); |
|||
|
|||
// check pixel |
|||
float3 vLp = lightData.lightPos.xyz; |
|||
float3 toLight = vLp - vVPos; |
|||
float distSq = dot(toLight,toLight); |
|||
|
|||
if(lightData.radiusSq>distSq) uVal = 1; |
|||
} |
|||
|
|||
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); |
|||
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0; |
|||
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0; |
|||
} |
|||
|
|||
// Box |
|||
while(l<iNrCoarseLights && uLgtType==BOX_LIGHT) |
|||
{ |
|||
SFiniteLightData lightData = g_vLightData[idxCoarse]; |
|||
|
|||
// serially check 4 pixels |
|||
uint uVal = 0; |
|||
for(int i=0; i<4; i++) |
|||
{ |
|||
int idx = t + i*NR_THREADS; |
|||
|
|||
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1)); |
|||
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]); |
|||
|
|||
// check pixel |
|||
float3 toLight = lightData.lightPos.xyz - vVPos; |
|||
|
|||
float3 dist = float3( dot(toLight, lightData.lightAxisX), dot(toLight, lightData.lightAxisY), dot(toLight, lightData.lightAxisZ) ); |
|||
dist = (abs(dist) - lightData.boxInnerDist) * lightData.boxInvRange; // not as efficient as it could be |
|||
if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1; // but allows us to not write out OuterDists |
|||
} |
|||
|
|||
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31)); |
|||
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0; |
|||
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].lightType : 0; |
|||
} |
|||
|
|||
// in case we have some corrupt data make sure we terminate |
|||
if(uLgtType>=MAX_TYPES) ++l; |
|||
} |
|||
|
|||
InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]); |
|||
InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]); |
|||
if(t==0) ldsNrLightsFinal = 0; |
|||
|
|||
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL) |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
#endif |
|||
|
|||
if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 ) |
|||
{ |
|||
unsigned int uInc = 1; |
|||
unsigned int uIndex; |
|||
InterlockedAdd(ldsNrLightsFinal, uInc, uIndex); |
|||
if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t]; // we allow up to 64 pruned lights while stored in LDS. |
|||
} |
|||
} |
|||
#endif |
|
|||
fileFormatVersion: 2 |
|||
guid: 65af3444cbf4b3747a4dead7ee00cfee |
|||
timeCreated: 1479306737 |
|||
licenseType: Pro |
|||
ComputeShaderImporter: |
|||
currentAPIMask: 4 |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7. |
|||
// https://github.com/wolfgangfengel/GPU-Pro-7 |
|||
|
|||
#pragma kernel ScreenBoundsAABB |
|||
|
|||
#include "..\common\ShaderBase.h" |
|||
#include "LightDefinitions.cs.hlsl" |
|||
|
|||
uniform int g_iNrVisibLights; |
|||
uniform float4x4 g_mInvProjection; |
|||
uniform float4x4 g_mProjection; |
|||
|
|||
|
|||
StructuredBuffer<SFiniteLightBound> g_data : register( t0 ); |
|||
|
|||
|
|||
|
|||
#define FLT_EPSILON 1.192092896e-07F // smallest such that 1.0+FLT_EPSILON != 1.0 |
|||
#define NR_THREADS 64 |
|||
|
|||
// output buffer |
|||
RWStructuredBuffer<float3> g_vBoundsBuffer : register( u0 ); |
|||
|
|||
#define MAX_PNTS 9 // strictly this should be 10=6+4 but we get more wavefronts and 10 seems to never hit (fingers crossed) |
|||
// However, worst case the plane that would be skipped if such an extreme case ever happened would be backplane |
|||
// clipping gets skipped which doesn't cause any errors. |
|||
|
|||
|
|||
// LDS (2496 bytes) |
|||
groupshared float posX[MAX_PNTS*8*2]; |
|||
groupshared float posY[MAX_PNTS*8*2]; |
|||
groupshared float posZ[MAX_PNTS*8*2]; |
|||
groupshared float posW[MAX_PNTS*8*2]; |
|||
groupshared unsigned int clipFlags[48]; |
|||
|
|||
|
|||
unsigned int GetClip(const float4 P); |
|||
int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p); |
|||
void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r); |
|||
|
|||
#include "LightingConvexHullUtils.hlsl" |
|||
|
|||
|
|||
[numthreads(NR_THREADS, 1, 1)] |
|||
void ScreenBoundsAABB(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID) |
|||
{ |
|||
uint groupID = u3GroupID.x; |
|||
|
|||
//uint vindex = groupID * NR_THREADS + threadID; |
|||
unsigned int g = groupID; |
|||
unsigned int t = threadID; |
|||
|
|||
const int subLigt = (int) (t/8); |
|||
const int lgtIndex = subLigt+(int) g*8; |
|||
const int sideIndex = (int) (t%8); |
|||
|
|||
SFiniteLightBound lgtDat = g_data[lgtIndex]; |
|||
|
|||
const float3 boxX = lgtDat.boxAxisX.xyz; |
|||
const float3 boxY = lgtDat.boxAxisY.xyz; |
|||
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light) |
|||
const float3 center = lgtDat.center.xyz; |
|||
const float radius = lgtDat.radius; |
|||
const float2 scaleXY = lgtDat.scaleXY; |
|||
|
|||
{ |
|||
if(sideIndex<6 && lgtIndex<(int) g_iNrVisibLights) // mask 2 out of 8 threads |
|||
{ |
|||
float3 q0, q1, q2, q3; |
|||
GetQuad(q0, q1, q2, q3, boxX, boxY, boxZ, center, scaleXY, sideIndex); |
|||
|
|||
|
|||
const float4 vP0 = mul(g_mProjection, float4(q0, 1)); |
|||
const float4 vP1 = mul(g_mProjection, float4(q1, 1)); |
|||
const float4 vP2 = mul(g_mProjection, float4(q2, 1)); |
|||
const float4 vP3 = mul(g_mProjection, float4(q3, 1)); |
|||
|
|||
// test vertices of one quad (of the convex hull) for intersection |
|||
const unsigned int uFlag0 = GetClip(vP0); |
|||
const unsigned int uFlag1 = GetClip(vP1); |
|||
const unsigned int uFlag2 = GetClip(vP2); |
|||
const unsigned int uFlag3 = GetClip(vP3); |
|||
|
|||
const float4 vPnts[] = {vP0, vP1, vP2, vP3}; |
|||
|
|||
// screen-space AABB of one quad (assuming no intersection) |
|||
float3 vMin, vMax; |
|||
for(int k=0; k<4; k++) |
|||
{ |
|||
float fW = vPnts[k].w; |
|||
float fS = fW<0 ? -1 : 1; |
|||
float fWabs = fW<0 ? (-fW) : fW; |
|||
fW = fS * (fWabs<FLT_EPSILON ? FLT_EPSILON : fWabs); |
|||
float3 vP = float3(vPnts[k].x/fW, vPnts[k].y/fW, vPnts[k].z/fW); |
|||
if(k==0) { vMin=vP; vMax=vP; } |
|||
|
|||
vMax = max(vMax, vP); vMin = min(vMin, vP); |
|||
} |
|||
|
|||
clipFlags[subLigt*6+sideIndex] = (uFlag0<<0) | (uFlag1<<6) | (uFlag2<<12) | (uFlag3<<18); |
|||
|
|||
// store in clip buffer (only use these vMin and vMax if light is 100% visible in which case clipping isn't needed) |
|||
posX[subLigt*MAX_PNTS*2 + sideIndex] = vMin.x; |
|||
posY[subLigt*MAX_PNTS*2 + sideIndex] = vMin.y; |
|||
posZ[subLigt*MAX_PNTS*2 + sideIndex] = vMin.z; |
|||
|
|||
posX[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.x; |
|||
posY[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.y; |
|||
posZ[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.z; |
|||
} |
|||
} |
|||
|
|||
// if not XBONE and not PLAYSTATION4 we need a memorybarrier here |
|||
// since we can't rely on the gpu cores being 64 wide. |
|||
// We need a pound define around this. |
|||
GroupMemoryBarrierWithGroupSync(); |
|||
|
|||
|
|||
{ |
|||
int f=0; |
|||
|
|||
if(sideIndex==0 && lgtIndex<(int) g_iNrVisibLights) |
|||
{ |
|||
// quick acceptance or rejection |
|||
unsigned int uCollectiveAnd = (unsigned int) -1; |
|||
unsigned int uCollectiveOr = 0; |
|||
for(f=0; f<6; f++) |
|||
{ |
|||
unsigned int uFlagAnd = clipFlags[subLigt*6+f]&0x3f; |
|||
unsigned int uFlagOr = uFlagAnd; |
|||
for(int i=1; i<4; i++) |
|||
{ |
|||
unsigned int uClipBits = (clipFlags[subLigt*6+f]>>(i*6))&0x3f; |
|||
uFlagAnd &= uClipBits; |
|||
uFlagOr |= uClipBits; |
|||
} |
|||
|
|||
uCollectiveAnd &= uFlagAnd; |
|||
uCollectiveOr |= uFlagOr; |
|||
} |
|||
|
|||
bool bSetBoundYet = false; |
|||
float3 vMin=0.0, vMax=0.0; |
|||
if(uCollectiveAnd!=0 || uCollectiveOr==0) // all invisible or all visible (early out) |
|||
{ |
|||
if(uCollectiveOr==0) // all visible |
|||
{ |
|||
for(f=0; f<6; f++) |
|||
{ |
|||
const int sideIndex = f; |
|||
|
|||
float3 vFaceMi = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 0], posY[subLigt*MAX_PNTS*2 + sideIndex + 0], posZ[subLigt*MAX_PNTS*2 + sideIndex + 0]); |
|||
float3 vFaceMa = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 6], posY[subLigt*MAX_PNTS*2 + sideIndex + 6], posZ[subLigt*MAX_PNTS*2 + sideIndex + 6]); |
|||
|
|||
for(int k=0; k<2; k++) |
|||
{ |
|||
float3 vP = k==0 ? vFaceMi : vFaceMa; |
|||
if(f==0 && k==0) { vMin=vP; vMax=vP; } |
|||
|
|||
vMax = max(vMax, vP); vMin = min(vMin, vP); |
|||
} |
|||
} |
|||
bSetBoundYet=true; |
|||
} |
|||
} |
|||
else // :( need true clipping |
|||
{ |
|||
|
|||
for(f=0; f<6; f++) |
|||
{ |
|||
float3 q0, q1, q2, q3; |
|||
GetQuad(q0, q1, q2, q3, boxX, boxY, boxZ, center, scaleXY, f); |
|||
|
|||
// 4 vertices to a quad of the convex hull in post projection space |
|||
const float4 vP0 = mul(g_mProjection, float4(q0, 1)); |
|||
const float4 vP1 = mul(g_mProjection, float4(q1, 1)); |
|||
const float4 vP2 = mul(g_mProjection, float4(q2, 1)); |
|||
const float4 vP3 = mul(g_mProjection, float4(q3, 1)); |
|||
|
|||
|
|||
int iSrcIndex = 0; |
|||
|
|||
int offs = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; |
|||
|
|||
// fill up source clip buffer with the quad |
|||
posX[offs+0]=vP0.x; posX[offs+1]=vP1.x; posX[offs+2]=vP2.x; posX[offs+3]=vP3.x; |
|||
posY[offs+0]=vP0.y; posY[offs+1]=vP1.y; posY[offs+2]=vP2.y; posY[offs+3]=vP3.y; |
|||
posZ[offs+0]=vP0.z; posZ[offs+1]=vP1.z; posZ[offs+2]=vP2.z; posZ[offs+3]=vP3.z; |
|||
posW[offs+0]=vP0.w; posW[offs+1]=vP1.w; posW[offs+2]=vP2.w; posW[offs+3]=vP3.w; |
|||
|
|||
int iNrSrcVerts = 4; |
|||
|
|||
// do true clipping |
|||
for(int p=0; p<6; p++) |
|||
{ |
|||
const int nrVertsDst = ClipAgainstPlane(iSrcIndex, iNrSrcVerts, subLigt, p); |
|||
|
|||
iSrcIndex = 1-iSrcIndex; |
|||
iNrSrcVerts = nrVertsDst; |
|||
|
|||
if(iNrSrcVerts<3 || iNrSrcVerts>=MAX_PNTS) break; |
|||
} |
|||
|
|||
// final clipped convex primitive is in src buffer |
|||
if(iNrSrcVerts>2) |
|||
{ |
|||
int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; |
|||
for(int k=0; k<iNrSrcVerts; k++) |
|||
{ |
|||
float4 vCur = float4(posX[offs_src+k], posY[offs_src+k], posZ[offs_src+k], posW[offs_src+k]); |
|||
|
|||
// project and apply toward AABB |
|||
float3 vP = float3(vCur.x/vCur.w, vCur.y/vCur.w, vCur.z/vCur.w); |
|||
if(!bSetBoundYet) { vMin=vP; vMax=vP; bSetBoundYet=true; } |
|||
|
|||
vMax = max(vMax, vP); vMin = min(vMin, vP); |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
////////////////////// look for camera frustum verts that need to be included. That is frustum vertices inside the convex hull for the light |
|||
int i=0; |
|||
for(i=0; i<8; i++) // establish 8 camera frustum vertices |
|||
{ |
|||
float3 vVertPSpace = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0); |
|||
|
|||
float4 v4ViewSpace = mul(g_mInvProjection, float4(vVertPSpace,1)); |
|||
float3 vViewSpace = float3(v4ViewSpace.x/v4ViewSpace.w, v4ViewSpace.y/v4ViewSpace.w, v4ViewSpace.z/v4ViewSpace.w); |
|||
|
|||
posX[subLigt*MAX_PNTS*2 + i] = vViewSpace.x; |
|||
posY[subLigt*MAX_PNTS*2 + i] = vViewSpace.y; |
|||
posZ[subLigt*MAX_PNTS*2 + i] = vViewSpace.z; |
|||
} |
|||
|
|||
// determine which camera frustum vertices are inside the convex hull |
|||
uint uVisibFl = 0xff; |
|||
for(f=0; f<6; f++) |
|||
{ |
|||
float3 vP0, vN; |
|||
GetPlane(vP0, vN, boxX, boxY, boxZ, center, scaleXY, f); |
|||
|
|||
for(i=0; i<8; i++) |
|||
{ |
|||
float3 vViewSpace = float3(posX[subLigt*MAX_PNTS*2 + i], posY[subLigt*MAX_PNTS*2 + i], posZ[subLigt*MAX_PNTS*2 + i]); |
|||
uVisibFl &= ( dot(vViewSpace-vP0, vN)<0 ? 0xff : (~(1<<i)) ); |
|||
} |
|||
} |
|||
|
|||
// apply camera frustum vertices inside the convex hull to the AABB |
|||
for(i=0; i<8; i++) |
|||
{ |
|||
if((uVisibFl&(1<<i))!=0) |
|||
{ |
|||
float3 vP = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0); |
|||
|
|||
if(!bSetBoundYet) { vMin=vP; vMax=vP; bSetBoundYet=true; } |
|||
|
|||
vMax = max(vMax, vP); vMin = min(vMin, vP); |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
// determine AABB bound in [-1;1]x[-1;1] screen space using bounding sphere. |
|||
// Use the result to make our already established AABB from the convex hull |
|||
// potentially tighter. |
|||
if(!bSetBoundYet) |
|||
{ |
|||
// set the AABB off-screen |
|||
vMin = float3(-3,-3,-3); |
|||
vMax = float3(-2,-2,-2); |
|||
} |
|||
else |
|||
{ |
|||
//if((center.z+radius)<0.0) |
|||
if( length(center)>radius) |
|||
{ |
|||
float2 vMi, vMa; |
|||
bool2 bMi, bMa; |
|||
CalcBound(bMi, bMa, vMi, vMa, g_mInvProjection, center, radius); |
|||
|
|||
vMin.xy = bMi ? max(vMin.xy, vMi) : vMin.xy; |
|||
vMax.xy = bMa ? min(vMax.xy, vMa) : vMax.xy; |
|||
} |
|||
|
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
if((center.z-radius)>0.0) |
|||
{ |
|||
float4 vPosF = mul(g_mProjection, float4(0,0,center.z-radius,1)); |
|||
vMin.z = max(vMin.z, vPosF.z/vPosF.w); |
|||
} |
|||
if((center.z+radius)>0.0) |
|||
{ |
|||
float4 vPosB = mul(g_mProjection, float4(0,0,center.z+radius,1)); |
|||
vMax.z = min(vMax.z, vPosB.z/vPosB.w); |
|||
} |
|||
#else |
|||
if((center.z+radius)<0.0) |
|||
{ |
|||
float4 vPosF = mul(g_mProjection, float4(0,0,center.z+radius,1)); |
|||
vMin.z = max(vMin.z, vPosF.z/vPosF.w); |
|||
} |
|||
if((center.z-radius)<0.0) |
|||
{ |
|||
float4 vPosB = mul(g_mProjection, float4(0,0,center.z-radius,1)); |
|||
vMax.z = min(vMax.z, vPosB.z/vPosB.w); |
|||
} |
|||
#endif |
|||
else |
|||
{ |
|||
vMin = float3(-3,-3,-3); |
|||
vMax = float3(-2,-2,-2); |
|||
} |
|||
} |
|||
|
|||
|
|||
// we should consider doing a look-up here into a max depth mip chain |
|||
// to see if the light is occluded: vMin.z*VIEWPORT_SCALE_Z > MipTexelMaxDepth |
|||
//g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, -0.5*vMax.y+0.5, vMin.z*VIEWPORT_SCALE_Z); |
|||
//g_vBoundsBuffer[lgtIndex+g_iNrVisibLights] = float3(0.5*vMax.x+0.5, -0.5*vMin.y+0.5, vMax.z*VIEWPORT_SCALE_Z); |
|||
|
|||
// changed for unity |
|||
g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, 0.5*vMin.y+0.5, vMin.z*VIEWPORT_SCALE_Z); |
|||
g_vBoundsBuffer[lgtIndex+(int) g_iNrVisibLights] = float3(0.5*vMax.x+0.5, 0.5*vMax.y+0.5, vMax.z*VIEWPORT_SCALE_Z); |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p); |
|||
|
|||
int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p) |
|||
{ |
|||
int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2; |
|||
int offs_dst = (1-iSrcIndex)*MAX_PNTS+subLigt*MAX_PNTS*2; |
|||
|
|||
float4 vPrev = float4(posX[offs_src+(iNrSrcVerts-1)], posY[offs_src+(iNrSrcVerts-1)], posZ[offs_src+(iNrSrcVerts-1)], posW[offs_src+(iNrSrcVerts-1)]); |
|||
|
|||
int nrVertsDst = 0; |
|||
|
|||
unsigned int uMask = (1<<p); |
|||
bool bIsPrevVisib = (GetClip(vPrev)&uMask)==0; |
|||
for(int i=0; i<iNrSrcVerts; i++) |
|||
{ |
|||
float4 vCur = float4(posX[offs_src+i], posY[offs_src+i], posZ[offs_src+i], posW[offs_src+i]); |
|||
bool bIsCurVisib = (GetClip(vCur)&uMask)==0; |
|||
if( (bIsCurVisib && !bIsPrevVisib) || (!bIsCurVisib && bIsPrevVisib) ) |
|||
{ |
|||
//assert(nrVertsDst<MAX_PNTS); |
|||
if(nrVertsDst<MAX_PNTS) |
|||
{ |
|||
// generate new vertex |
|||
float4 vNew = GenNewVert(bIsCurVisib ? vCur : vPrev, bIsCurVisib ? vPrev : vCur, p); |
|||
posX[offs_dst+nrVertsDst]=vNew.x; posY[offs_dst+nrVertsDst]=vNew.y; posZ[offs_dst+nrVertsDst]=vNew.z; posW[offs_dst+nrVertsDst]=vNew.w; |
|||
++nrVertsDst; |
|||
} |
|||
} |
|||
|
|||
if(bIsCurVisib) |
|||
{ |
|||
//assert(nrVertsDst<MAX_PNTS); |
|||
if(nrVertsDst<MAX_PNTS) |
|||
{ |
|||
posX[offs_dst+nrVertsDst]=vCur.x; posY[offs_dst+nrVertsDst]=vCur.y; posZ[offs_dst+nrVertsDst]=vCur.z; posW[offs_dst+nrVertsDst]=vCur.w; |
|||
++nrVertsDst; |
|||
} |
|||
} |
|||
|
|||
vPrev = vCur; |
|||
bIsPrevVisib = bIsCurVisib; |
|||
} |
|||
|
|||
return nrVertsDst; |
|||
} |
|||
|
|||
|
|||
|
|||
unsigned int GetClip(const float4 P) |
|||
{ |
|||
//-P.w <= P.x <= P.w |
|||
return ((P.x<-P.w)?1:0) | ((P.x>P.w)?2:0) | ((P.y<-P.w)?4:0) | ((P.y>P.w)?8:0) | ((P.z<0)?16:0) | ((P.z>P.w)?32:0); |
|||
} |
|||
|
|||
float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p) |
|||
{ |
|||
const float fS = p==4 ? 0 : ((p&1)==0 ? -1 : 1); |
|||
const int index = ((uint) p)/2; |
|||
float x1 = index==0 ? vVisib.x : (index==1 ? vVisib.y : vVisib.z); |
|||
float x0 = index==0 ? vInvisib.x : (index==1 ? vInvisib.y : vInvisib.z); |
|||
|
|||
//fS*((vVisib.w-vInvisib.w)*t + vInvisib.w) = (x1-x0)*t + x0; |
|||
|
|||
const float fT = (fS*vInvisib.w-x0)/((x1-x0) - fS*(vVisib.w-vInvisib.w)); |
|||
float4 vNew = vVisib*fT + vInvisib*(1-fT); |
|||
|
|||
// just to be really anal we make sure the clipped against coordinate is precise |
|||
if(index==0) vNew.x = fS*vNew.w; |
|||
else if(index==1) vNew.y = fS*vNew.w; |
|||
else vNew.z = fS*vNew.w; |
|||
|
|||
return vNew; |
|||
} |
|||
|
|||
|
|||
float4 TransformPlaneToPostSpace(float4x4 InvProjection, float4 plane) |
|||
{ |
|||
return mul(plane, InvProjection); |
|||
} |
|||
|
|||
float4 EvalPlanePair(float2 posXY_in, float r) |
|||
{ |
|||
// rotate by 90 degrees to avoid potential division by zero |
|||
bool bMustFlip = abs(posXY_in.y)<abs(posXY_in.x); |
|||
float2 posXY = bMustFlip ? float2(-posXY_in.y, posXY_in.x) : posXY_in; |
|||
|
|||
float fLenSQ = dot(posXY, posXY); |
|||
float D = posXY.y * sqrt(fLenSQ - r*r); |
|||
|
|||
float4 res; |
|||
res.x = (-r*posXY.x - D) / fLenSQ; |
|||
res.z = (-r*posXY.x + D) / fLenSQ; |
|||
res.y = (-r-res.x*posXY.x) / posXY.y; |
|||
res.w = (-r-res.z*posXY.x) / posXY.y; |
|||
|
|||
// rotate back by 90 degrees |
|||
res = bMustFlip ? float4(res.y, -res.x, res.w, -res.z) : res; |
|||
|
|||
return res; |
|||
} |
|||
|
|||
void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r) |
|||
{ |
|||
float4 planeX = EvalPlanePair(float2(pos_view_space.x, pos_view_space.z), r); |
|||
float4 planeY = EvalPlanePair(float2(pos_view_space.y, pos_view_space.z), r); |
|||
|
|||
|
|||
#if USE_LEFTHAND_CAMERASPACE |
|||
planeX = planeX.zwxy; // need to swap left/right and top/bottom planes when using left hand system |
|||
planeY = planeY.zwxy; |
|||
#endif |
|||
|
|||
bIsMinValid = bool2(planeX.z<0, planeY.z<0); |
|||
bIsMaxValid = bool2((-planeX.x)<0, (-planeY.x)<0); |
|||
|
|||
// hopefully the compiler takes zeros into account |
|||
// should be the case since the transformation in TransformPlaneToPostSpace() |
|||
// is done using multiply-adds and not dot product instructions. |
|||
float4 planeX0 = TransformPlaneToPostSpace(InvProjection, float4(planeX.x, 0, planeX.y, 0)); |
|||
float4 planeX1 = TransformPlaneToPostSpace(InvProjection, float4(planeX.z, 0, planeX.w, 0)); |
|||
float4 planeY0 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.x, planeY.y, 0)); |
|||
float4 planeY1 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.z, planeY.w, 0)); |
|||
|
|||
|
|||
// convert planes to the forms (1,0,0,D) and (0,1,0,D) |
|||
// 2D bound is given by -D components |
|||
float2 A = -float2(planeX0.w / planeX0.x, planeY0.w / planeY0.y); |
|||
float2 B = -float2(planeX1.w / planeX1.x, planeY1.w / planeY1.y); |
|||
|
|||
// Bound is complete |
|||
vMin = B; |
|||
vMax = A; |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 728dce960f8a9c44bbc3abb3b851d8f6 |
|||
timeCreated: 1479306737 |
|||
licenseType: Pro |
|||
ComputeShaderImporter: |
|||
currentAPIMask: 4 |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
撰写
预览
正在加载...
取消
保存
Reference in new issue