浏览代码

Merge remote-tracking branch 'refs/remotes/origin/master' into ShadowMapSupport

/main
Sebastien Lagarde 8 年前
当前提交
eed1a7a3
共有 8 个文件被更改,包括 542 次插入128 次删除
  1. 58
      Assets/ScriptableRenderLoop/fptl/FptlLighting.cs
  2. 1
      Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs
  3. 5
      Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs.hlsl
  4. 77
      Assets/ScriptableRenderLoop/fptl/lightlistbuild-clustered.compute
  5. 22
      Assets/ScriptableRenderLoop/fptl/lightlistbuild.compute
  6. 60
      Assets/ScriptableRenderLoop/fptl/scrbound.compute
  7. 102
      Assets/ScriptableRenderLoop/fptl/LightingConvexHullUtils.hlsl
  8. 345
      Assets/ScriptableRenderLoop/fptl/lightlistbuild-bigtile.compute

58
Assets/ScriptableRenderLoop/fptl/FptlLighting.cs


public ComputeShader buildScreenAABBShader;
public ComputeShader buildPerTileLightListShader; // FPTL
public ComputeShader buildPerBigTileLightListShader;
public ComputeShader buildPerVoxelLightListShader; // clustered

private static ComputeBuffer s_LightList;
private static ComputeBuffer s_DirLightList;
private static ComputeBuffer s_BigTileLightList; // used for pre-pass coarse culling on 64x64 tiles
private static int s_GenListPerBigTileKernel;
public bool disableFptlWhenClustered = false; // still useful on opaques
public bool enableBigTilePrepass = true;
const bool disableFptlWhenClustered = false; // still useful on opaques
const int k_Log2NumClusters = 6; // accepted range is from 0 to 6. NumClusters is 1<<g_iLog2NumClusters
const float k_ClustLogBase = 1.02f; // each slice 2% bigger than the previous
float m_ClustScale;

m_DeferredReflectionMaterial.hideFlags = HideFlags.HideAndDontSave;
s_GenAABBKernel = buildScreenAABBShader.FindKernel("ScreenBoundsAABB");
s_GenListPerTileKernel = buildPerTileLightListShader.FindKernel("TileLightListGen");
s_GenListPerTileKernel = buildPerTileLightListShader.FindKernel(enableBigTilePrepass ? "TileLightListGen_SrcBigTile" : "TileLightListGen");
s_AABBBoundsBuffer = new ComputeBuffer(2 * MaxNumLights, 3 * sizeof(float));
s_ConvexBoundsBuffer = new ComputeBuffer(MaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightBound)));
s_LightDataBuffer = new ComputeBuffer(MaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightData)));

if (enableClustered)
{
s_GenListPerVoxelKernel = buildPerVoxelLightListShader.FindKernel(k_UseDepthBuffer ? "TileLightListGen_DepthRT" : "TileLightListGen_NoDepthRT");
var kernelName = enableBigTilePrepass ? (k_UseDepthBuffer ? "TileLightListGen_DepthRT_SrcBigTile" : "TileLightListGen_NoDepthRT_SrcBigTile") : (k_UseDepthBuffer ? "TileLightListGen_DepthRT" : "TileLightListGen_NoDepthRT");
s_GenListPerVoxelKernel = buildPerVoxelLightListShader.FindKernel(kernelName);
s_ClearVoxelAtomicKernel = buildPerVoxelLightListShader.FindKernel("ClearAtomic");
buildPerVoxelLightListShader.SetBuffer(s_GenListPerVoxelKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
buildPerVoxelLightListShader.SetBuffer(s_GenListPerVoxelKernel, "g_vLightData", s_LightDataBuffer);

}
if(enableBigTilePrepass)
{
s_GenListPerBigTileKernel = buildPerBigTileLightListShader.FindKernel("BigTileLightListGen");
buildPerBigTileLightListShader.SetBuffer(s_GenListPerBigTileKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
buildPerBigTileLightListShader.SetBuffer(s_GenListPerBigTileKernel, "g_vLightData", s_LightDataBuffer);
buildPerBigTileLightListShader.SetBuffer(s_GenListPerBigTileKernel, "g_data", s_ConvexBoundsBuffer);
}
m_CookieTexArray = new TextureCache2D();

m_DebugLightBoundsMaterial = new Material(debugLightBoundsShader) { hideFlags = HideFlags.HideAndDontSave };
s_LightList = null;
s_BigTileLightList = null;
}
void OnDisable()

var numTilesX = (w + 15) / 16;
var numTilesY = (h + 15) / 16;
var numBigTilesX = (w + 63) / 64;
var numBigTilesY = (h + 63) / 64;
//ComputeBuffer lightList = new ComputeBuffer(nrTilesX * nrTilesY * (32 / 2), sizeof(uint));

cmd.SetComputeBufferParam(buildScreenAABBShader, s_GenAABBKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (numLights + 7) / 8, 1, 1);
// enable coarse 2D pass on 64x64 tiles.
if(enableBigTilePrepass)
{
cmd.SetComputeIntParams(buildPerBigTileLightListShader, "g_viDimensions", new int[2] { w, h });
cmd.SetComputeIntParam(buildPerBigTileLightListShader, "g_iNrVisibLights", numLights);
SetMatrixCS(cmd, buildPerBigTileLightListShader, "g_mScrProjection", projscr);
SetMatrixCS(cmd, buildPerBigTileLightListShader, "g_mInvScrProjection", invProjscr);
cmd.SetComputeFloatParam(buildPerBigTileLightListShader, "g_fNearPlane", camera.nearClipPlane);
cmd.SetComputeFloatParam(buildPerBigTileLightListShader, "g_fFarPlane", camera.farClipPlane);
cmd.SetComputeBufferParam(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, "g_vLightList", s_BigTileLightList);
cmd.DispatchCompute(buildPerBigTileLightListShader, s_GenListPerBigTileKernel, numBigTilesX, numBigTilesY, 1);
}
if( UsingFptl() )
{
cmd.SetComputeIntParams(buildPerTileLightListShader, "g_viDimensions", new int[2] { w, h });

cmd.SetComputeTextureParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_depth_tex", new RenderTargetIdentifier(s_CameraDepthTexture));
cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_vLightList", s_LightList);
if(enableBigTilePrepass) cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_vBigTileLightList", s_BigTileLightList);
cmd.DispatchCompute(buildPerTileLightListShader, s_GenListPerTileKernel, numTilesX, numTilesY, 1);
}

void ResizeIfNecessary(int curWidth, int curHeight)
{
if (curWidth != s_WidthOnRecord || curHeight != s_HeightOnRecord || s_LightList == null)
if (curWidth != s_WidthOnRecord || curHeight != s_HeightOnRecord || s_LightList == null ||
(s_BigTileLightList==null && enableBigTilePrepass) || (s_PerVoxelLightLists==null && enableClustered) )
{
if (s_WidthOnRecord > 0 && s_HeightOnRecord > 0)
ReleaseResolutionDependentBuffers();

if (k_UseDepthBuffer && s_PerTileLogBaseTweak != null)
s_PerTileLogBaseTweak.Release();
}
if(enableBigTilePrepass)
{
if(s_BigTileLightList!=null) s_BigTileLightList.Release();
}
}
int NumLightIndicesPerClusteredTile()

s_PerTileLogBaseTweak = new ComputeBuffer(nrTiles, sizeof(float));
}
}
if(enableBigTilePrepass)
{
var nrBigTilesX = (width + 63) / 64;
var nrBigTilesY = (height + 63) / 64;
var nrBigTiles = nrBigTilesX * nrBigTilesY;
s_BigTileLightList = new ComputeBuffer(LightDefinitions.MAX_NR_BIGTILE_LIGHTS_PLUSONE * nrBigTiles, sizeof(uint));
}
}
void VoxelLightListGeneration(CommandBuffer cmd, Camera camera, int numLights, Matrix4x4 projscr, Matrix4x4 invProjscr)

cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_vLayeredLightList", s_PerVoxelLightLists);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_LayeredOffset", s_PerVoxelOffset);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_LayeredSingleIdxBuffer", s_GlobalLightListAtomic);
if(enableBigTilePrepass) cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_vBigTileLightList", s_BigTileLightList);
if (k_UseDepthBuffer)
{

cmd.SetGlobalTexture("_reflRootCubeTexture", topCube);
cmd.SetGlobalFloat("_reflRootHdrDecodeMult", defdecode.x);
cmd.SetGlobalFloat("_reflRootHdrDecodeExp", defdecode.y);
if(enableBigTilePrepass)
cmd.SetGlobalBuffer("g_vBigTileLightList", s_BigTileLightList);
if (enableClustered)
{

1
Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs


public class LightDefinitions
{
public static int MAX_NR_LIGHTS_PER_CAMERA = 1024;
public static int MAX_NR_BIGTILE_LIGHTS_PLUSONE = 512; // may be overkill but the footprint is 2 bits per pixel using uint16.
public static float VIEWPORT_SCALE_Z = 1.0f;
// flags

5
Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs.hlsl


#ifndef LORTIHOVEDET
#define LORTIHOVEDET
//
// This file was automatically generated from Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs. Please don't edit by hand.
//

//
#define MAX_NR_LIGHTS_PER_CAMERA (1024)
#define MAX_NR_BIGTILE_LIGHTS_PLUSONE (512)
#define VIEWPORT_SCALE_Z (1)
#define IS_CIRCULAR_SPOT_SHAPE (1)
#define HAS_COOKIE_TEXTURE (2)

}
#endif

77
Assets/ScriptableRenderLoop/fptl/lightlistbuild-clustered.compute


#pragma kernel TileLightListGen_NoDepthRT LIGHTLISTGEN=TileLightListGen_NoDepthRT
#pragma kernel TileLightListGen_DepthRT LIGHTLISTGEN=TileLightListGen_DepthRT ENABLE_DEPTH_TEXTURE_BACKPLANE
#pragma kernel TileLightListGen_DepthRT_MSAA LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED
#pragma kernel TileLightListGen_NoDepthRT LIGHTLISTGEN=TileLightListGen_NoDepthRT
#pragma kernel TileLightListGen_DepthRT LIGHTLISTGEN=TileLightListGen_DepthRT ENABLE_DEPTH_TEXTURE_BACKPLANE
#pragma kernel TileLightListGen_DepthRT_MSAA LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED
#pragma kernel TileLightListGen_NoDepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_DepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING
#pragma kernel ClearAtomic

#include "LightingConvexHullUtils.hlsl"
//#define EXACT_EDGE_TESTS
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
#define CONV_HULL_TEST_ENABLED

StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
#ifdef USE_TWO_PASS_TILED_LIGHTING
StructuredBuffer<uint> g_vBigTileLightList : register( t4 );
#endif
#define NR_THREADS 64

// returns 1 for intersection and 0 for none
float4 GetPlaneEq(const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 vScaleXZ, const int sideIndex);
float4 FetchPlane(int l, int p);

uint nrTilesY = (iHeight+15)/16;
uint2 viTilLL = 16*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(16,16), uint2(iWidth-1, iHeight-1) );
uint2 viTilUR = min( viTilLL+uint2(16,16), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
if(t==0)
{

#endif
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, 1.0);
vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
// build coarse list using AABB

float4 GetPlaneEq(const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int sideIndex)
{
const int absSide = (sideIndex == 0 || sideIndex == 1) ? 0 : ((sideIndex == 2 || sideIndex == 3) ? 1 : 2);
const float fS = (sideIndex & 1) != 0 ? 1 : (-1);
float3 vA = fS*(absSide == 0 ? boxX : (absSide == 1 ? (-boxY) : boxZ));
float3 vB = fS*(absSide == 0 ? (-boxY) : (absSide == 1 ? (-boxX) : (-boxY)));
float3 vC = absSide == 0 ? boxZ : (absSide == 1 ? boxZ : (-boxX));
bool bIsTopQuad = absSide == 2 && (sideIndex & 1) != 0; // in this case all 4 verts get scaled.
bool bIsSideQuad = (absSide == 0 || absSide == 1); // if side quad only two verts get scaled (impacts q1 and q2)
if (bIsTopQuad) { vB *= scaleXY.y; vC *= scaleXY.x; }
float3 vA2 = vA;
float3 vB2 = vB;
if (bIsSideQuad) { vA2 *= (absSide == 0 ? scaleXY.x : scaleXY.y); vB2 *= (absSide == 0 ? scaleXY.y : scaleXY.x); }
float3 p0 = center + (vA + vB - vC); // center + vA is center of face when scaleXY is 1.0
float3 vN = cross( vB2, 0.5*(vA-vA2) - vC );
#ifdef LEFT_HAND_COORDINATES
vN = -vN;
#endif
return float4(vN, -dot(vN,p0));
}
float4 FetchPlane(int l, int p)
{

#ifdef LEFT_HAND_COORDINATES
fRad = fRad + (center.z+fOffs)*worldDistAtDepthOne;
#else
fRad = fRad + (center.z-fOffs)*worldDistAtDepthOne;
fRad = fRad - (center.z-fOffs)*worldDistAtDepthOne;
#endif
float a = dot(V,V);

#ifdef EXACT_EDGE_TESTS
float3 GetHullVertex(const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int p)
{
const bool bIsTopVertex = (p&4)!=0;
float3 vScales = float3( ((p&1)!=0 ? 1.0f : (-1.0f))*(bIsTopVertex ? scaleXY.x : 1.0), ((p&2)!=0 ? 1.0f : (-1.0f))*(bIsTopVertex ? scaleXY.y : 1.0), (p&4)!=0 ? 1.0f : (-1.0f) );
return (vScales.x*boxX + vScales.y*boxY + vScales.z*boxZ) + center;
}
void GetHullEdge(out int idx0, out int idx_twin, out float3 vP0, out float3 vE0, const int e0, const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY)
{
int iAxis = e0>>2;
int iSwizzle = e0&0x3;
bool bIsSwizzleOneOrTwo = ((iSwizzle-1)&0x2)==0;
const int i0 = iAxis==0 ? (2*iSwizzle+0) : ( iAxis==1 ? (iSwizzle+(iSwizzle&2)) : iSwizzle);
const int i1 = i0 + (1<<iAxis);
const bool bSwap = iAxis==0 ? (!bIsSwizzleOneOrTwo) : (iAxis==1 ? false : bIsSwizzleOneOrTwo);
idx0 = bSwap ? i1 : i0;
idx_twin = bSwap ? i0 : i1;
float3 p0 = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, idx0);
float3 p1 = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, idx_twin);
vP0 = p0;
vE0 = p1-p0;
}
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
{

22
Assets/ScriptableRenderLoop/fptl/lightlistbuild.compute


// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
// https://github.com/wolfgangfengel/GPU-Pro-7
#pragma kernel TileLightListGen
#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#include "..\common\ShaderBase.h"
#include "LightDefinitions.cs.hlsl"

StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
#ifdef USE_TWO_PASS_TILED_LIGHTING
StructuredBuffer<uint> g_vBigTileLightList : register( t4 );
#endif
#define NR_THREADS 64

[numthreads(NR_THREADS, 1, 1)]
void TileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;

// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING
int NrBigTilesX = (nrTilesX+3)>>2;
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
{
int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
#else
#endif
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];

const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
for(l=(int) t; l<(int) nrDWords; l += NR_THREADS)
for(int l=(int) t; l<(int) nrDWords; l += NR_THREADS)
{
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs];
uint uHigh = prunedList[2*l+0+localOffs];

#ifdef LEFT_HAND_COORDINATES
fRad = fRad + (center.z+fOffs)*worldDistAtDepthOne;
#else
fRad = fRad + (center.z-fOffs)*worldDistAtDepthOne;
fRad = fRad - (center.z-fOffs)*worldDistAtDepthOne;
#endif
float a = dot(V,V);

60
Assets/ScriptableRenderLoop/fptl/scrbound.compute


unsigned int GetClip(const float4 P);
int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p);
void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r);
void GetQuad(out float3 p0, out float3 p1, out float3 p2, out float3 p3, const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int sideIndex);
void GetPlane(out float3 p0, out float3 vN, const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int sideIndex);
#include "LightingConvexHullUtils.hlsl"
[numthreads(NR_THREADS, 1, 1)]

return vNew;
}
void GetQuad(out float3 p0, out float3 p1, out float3 p2, out float3 p3, const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int sideIndex)
{
const int iAbsSide = (sideIndex == 0 || sideIndex == 1) ? 0 : ((sideIndex == 2 || sideIndex == 3) ? 1 : 2);
const float fS = (sideIndex & 1) != 0 ? 1 : (-1);
float3 vA = fS*(iAbsSide == 0 ? boxX : (iAbsSide == 1 ? (-boxY) : boxZ));
float3 vB = fS*(iAbsSide == 0 ? (-boxY) : (iAbsSide == 1 ? (-boxX) : (-boxY)));
float3 vC = iAbsSide == 0 ? boxZ : (iAbsSide == 1 ? boxZ : (-boxX));
bool bIsTopQuad = iAbsSide == 2 && (sideIndex & 1) != 0; // in this case all 4 verts get scaled.
bool bIsSideQuad = (iAbsSide == 0 || iAbsSide == 1); // if side quad only two verts get scaled (impacts q1 and q2)
if (bIsTopQuad) { vB *= scaleXY.y; vC *= scaleXY.x; }
float3 vA2 = vA;
float3 vB2 = vB;
if (bIsSideQuad) { vA2 *= (iAbsSide == 0 ? scaleXY.x : scaleXY.y); vB2 *= (iAbsSide == 0 ? scaleXY.y : scaleXY.x); }
// delivered counterclockwise in right hand space and clockwise in left hand space
p0 = center + (vA + vB - vC); // center + vA is center of face when scaleXY is 1.0
p1 = center + (vA - vB - vC);
p2 = center + (vA2 - vB2 + vC);
p3 = center + (vA2 + vB2 + vC);
}
void GetPlane(out float3 p0, out float3 vN, const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int sideIndex)
{
const int iAbsSide = (sideIndex == 0 || sideIndex == 1) ? 0 : ((sideIndex == 2 || sideIndex == 3) ? 1 : 2);
const float fS = (sideIndex & 1) != 0 ? 1 : (-1);
float3 vA = fS*(iAbsSide == 0 ? boxX : (iAbsSide == 1 ? (-boxY) : boxZ));
float3 vB = fS*(iAbsSide == 0 ? (-boxY) : (iAbsSide == 1 ? (-boxX) : (-boxY)));
float3 vC = iAbsSide == 0 ? boxZ : (iAbsSide == 1 ? boxZ : (-boxX));
bool bIsTopQuad = iAbsSide == 2 && (sideIndex & 1) != 0; // in this case all 4 verts get scaled.
bool bIsSideQuad = (iAbsSide == 0 || iAbsSide == 1); // if side quad only two verts get scaled (impacts q1 and q2)
if (bIsTopQuad) { vB *= scaleXY.y; vC *= scaleXY.x; }
float3 vA2 = vA;
float3 vB2 = vB;
if (bIsSideQuad) { vA2 *= (iAbsSide == 0 ? scaleXY.x : scaleXY.y); vB2 *= (iAbsSide == 0 ? scaleXY.y : scaleXY.x); }
p0 = center + (vA + vB - vC); // center + vA is center of face when scaleXY is 1.0
float3 vNout = cross( vB2, 0.5*(vA-vA2) - vC );
#ifdef LEFT_HAND_COORDINATES
vNout = -vNout;
#endif
vN = vNout;
}
float4 TransformPlaneToPostSpace(float4x4 InvProjection, float4 plane)

102
Assets/ScriptableRenderLoop/fptl/LightingConvexHullUtils.hlsl


#ifndef __LIGHTINGCONVEXHULLUTILS_H__
#define __LIGHTINGCONVEXHULLUTILS_H__
#include "..\common\ShaderBase.h"
#include "LightDefinitions.cs.hlsl"
float3 GetHullVertex(const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int p)
{
const bool bIsTopVertex = (p&4)!=0;
float3 vScales = float3( ((p&1)!=0 ? 1.0f : (-1.0f))*(bIsTopVertex ? scaleXY.x : 1.0), ((p&2)!=0 ? 1.0f : (-1.0f))*(bIsTopVertex ? scaleXY.y : 1.0), (p&4)!=0 ? 1.0f : (-1.0f) );
return (vScales.x*boxX + vScales.y*boxY + vScales.z*boxZ) + center;
}
void GetHullEdge(out int idx0, out int idx_twin, out float3 vP0, out float3 vE0, const int e0, const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY)
{
int iAxis = e0>>2;
int iSwizzle = e0&0x3;
bool bIsSwizzleOneOrTwo = ((iSwizzle-1)&0x2)==0;
const int i0 = iAxis==0 ? (2*iSwizzle+0) : ( iAxis==1 ? (iSwizzle+(iSwizzle&2)) : iSwizzle);
const int i1 = i0 + (1<<iAxis);
const bool bSwap = iAxis==0 ? (!bIsSwizzleOneOrTwo) : (iAxis==1 ? false : bIsSwizzleOneOrTwo);
idx0 = bSwap ? i1 : i0;
idx_twin = bSwap ? i0 : i1;
float3 p0 = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, idx0);
float3 p1 = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, idx_twin);
vP0 = p0;
vE0 = p1-p0;
}
void GetQuad(out float3 p0, out float3 p1, out float3 p2, out float3 p3, const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int sideIndex)
{
//const int iAbsSide = (sideIndex == 0 || sideIndex == 1) ? 0 : ((sideIndex == 2 || sideIndex == 3) ? 1 : 2);
const int iAbsSide = min(sideIndex>>1, 2);
const float fS = (sideIndex & 1) != 0 ? 1 : (-1);
float3 vA = fS*(iAbsSide == 0 ? boxX : (iAbsSide == 1 ? (-boxY) : boxZ));
float3 vB = fS*(iAbsSide == 0 ? (-boxY) : (iAbsSide == 1 ? (-boxX) : (-boxY)));
float3 vC = iAbsSide == 0 ? boxZ : (iAbsSide == 1 ? boxZ : (-boxX));
bool bIsTopQuad = iAbsSide == 2 && (sideIndex & 1) != 0; // in this case all 4 verts get scaled.
bool bIsSideQuad = (iAbsSide == 0 || iAbsSide == 1); // if side quad only two verts get scaled (impacts q1 and q2)
if (bIsTopQuad) { vB *= scaleXY.y; vC *= scaleXY.x; }
float3 vA2 = vA;
float3 vB2 = vB;
if (bIsSideQuad) { vA2 *= (iAbsSide == 0 ? scaleXY.x : scaleXY.y); vB2 *= (iAbsSide == 0 ? scaleXY.y : scaleXY.x); }
// delivered counterclockwise in right hand space and clockwise in left hand space
p0 = center + (vA + vB - vC); // center + vA is center of face when scaleXY is 1.0
p1 = center + (vA - vB - vC);
p2 = center + (vA2 - vB2 + vC);
p3 = center + (vA2 + vB2 + vC);
}
void GetPlane(out float3 p0, out float3 vN, const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int sideIndex)
{
//const int iAbsSide = (sideIndex == 0 || sideIndex == 1) ? 0 : ((sideIndex == 2 || sideIndex == 3) ? 1 : 2);
const int iAbsSide = min(sideIndex>>1, 2);
const float fS = (sideIndex & 1) != 0 ? 1 : (-1);
float3 vA = fS*(iAbsSide == 0 ? boxX : (iAbsSide == 1 ? (-boxY) : boxZ));
float3 vB = fS*(iAbsSide == 0 ? (-boxY) : (iAbsSide == 1 ? (-boxX) : (-boxY)));
float3 vC = iAbsSide == 0 ? boxZ : (iAbsSide == 1 ? boxZ : (-boxX));
bool bIsTopQuad = iAbsSide == 2 && (sideIndex & 1) != 0; // in this case all 4 verts get scaled.
bool bIsSideQuad = (iAbsSide == 0 || iAbsSide == 1); // if side quad only two verts get scaled (impacts q1 and q2)
if (bIsTopQuad) { vB *= scaleXY.y; vC *= scaleXY.x; }
float3 vA2 = vA;
float3 vB2 = vB;
if (bIsSideQuad) { vA2 *= (iAbsSide == 0 ? scaleXY.x : scaleXY.y); vB2 *= (iAbsSide == 0 ? scaleXY.y : scaleXY.x); }
p0 = center + (vA + vB - vC); // center + vA is center of face when scaleXY is 1.0
float3 vNout = cross( vB2, 0.5*(vA-vA2) - vC );
#ifdef LEFT_HAND_COORDINATES
vNout = -vNout;
#endif
vN = vNout;
}
float4 GetPlaneEq(const float3 boxX, const float3 boxY, const float3 boxZ, const float3 center, const float2 scaleXY, const int sideIndex)
{
float3 p0, vN;
GetPlane(p0, vN, boxX, boxY, boxZ, center, scaleXY, sideIndex);
return float4(vN, -dot(vN,p0));
}
#endif

345
Assets/ScriptableRenderLoop/fptl/lightlistbuild-bigtile.compute


#pragma kernel BigTileLightListGen
#include "..\common\ShaderBase.h"
#include "LightDefinitions.cs.hlsl"
#include "LightingConvexHullUtils.hlsl"
#define EXACT_EDGE_TESTS
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
#define MAX_NR_BIGTILE_LIGHTS (MAX_NR_BIGTILE_LIGHTS_PLUSONE-1)
uniform int g_iNrVisibLights;
uniform uint2 g_viDimensions;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform float g_fNearPlane;
uniform float g_fFarPlane;
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
#define NR_THREADS 64
// output buffer
RWStructuredBuffer<uint> g_vLightList : register( u0 );
// 2kB (room for roughly 30 wavefronts)
groupshared unsigned int lightsListLDS[MAX_NR_BIGTILE_LIGHTS_PLUSONE];
groupshared uint lightOffs;
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
{
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
float fSx = g_mScrProjection[0].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
float fCy = g_mScrProjection[1].z;
#ifdef LEFT_HAND_COORDINATES
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
#else
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
#endif
}
float GetOnePixDiagWorldDistAtDepthOne()
{
float fSx = g_mScrProjection[0].x;
float fSy = g_mScrProjection[1].y;
return length( float2(1.0/fSx,1.0/fSy) );
}
void sortLightList(int localThreadID, int n);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
#endif
#ifdef EXACT_EDGE_TESTS
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR);
#endif
[numthreads(NR_THREADS, 1, 1)]
void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrBigTilesX = (iWidth+63)/64;
uint nrBigTilesY = (iHeight+63)/64;
if(t==0) lightOffs = 0;
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
uint2 viTilLL = 64*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
// build coarse list using AABB
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
const float2 vMi = g_vBoundsBuffer[l].xy;
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l; // add to light list
}
}
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = lightOffs<MAX_NR_BIGTILE_LIGHTS ? lightOffs : MAX_NR_BIGTILE_LIGHTS;
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))) );
#endif
#ifdef EXACT_EDGE_TESTS
CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy);
#endif
// sort lights
sortLightList((int) t, iNrCoarseLights); // sorting on GCN as well for now since we need the culled lights weeded out.
lightOffs = 0;
GroupMemoryBarrierWithGroupSync();
for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
GroupMemoryBarrierWithGroupSync();
iNrCoarseLights = lightOffs;
int offs = tileIDX.y*nrBigTilesX + tileIDX.x;
for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = t==0 ? iNrCoarseLights : lightsListLDS[i-1];
}
// NOTE! returns 1 when value_in==0
unsigned int LimitPow2AndClamp(unsigned int value_in, unsigned int maxValue)
{
unsigned int value = 1;
while(value<value_in && (value<<1)<=maxValue)
value<<=1;
return value;
}
void sortLightList(int localThreadID, int length)
{
// closest pow2 integer greater than or equal to length
const int N = (const int) LimitPow2AndClamp((unsigned int) length, MAX_NR_BIGTILE_LIGHTS_PLUSONE); // N is 1 when length is zero but will still not enter first for-loop
// bitonic sort can only handle arrays with a power of two length. Fill remaining entries with greater than possible index.
for(int t=length+localThreadID; t<N; t+=NR_THREADS) { lightsListLDS[t]=0xffffffff; } // impossible index
GroupMemoryBarrierWithGroupSync();
for(int k=2; k<=N; k=2*k)
{
for(int j=k>>1; j>0; j=j>>1)
{
for(int i=localThreadID; i<N; i+=NR_THREADS)
{
int ixj=i^j;
if((ixj)>i)
{
const unsigned int Avalue = lightsListLDS[i];
const unsigned int Bvalue = lightsListLDS[ixj];
const bool mustSwap = ((i&k)!=0^(Avalue>Bvalue)) && Avalue!=Bvalue;
if(mustSwap)
{
lightsListLDS[i]=Bvalue;
lightsListLDS[ixj]=Avalue;
}
}
}
GroupMemoryBarrierWithGroupSync();
}
}
}
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
{
#ifdef LEFT_HAND_COORDINATES
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
#else
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
#endif
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float worldDistAtDepthOne = 32*onePixDiagDist; // scale by half a tile
int iNrVisib = 0;
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
{
SFiniteLightBound lgtDat = g_data[lightsListLDS[l]];
const float3 center = lgtDat.center.xyz;
float fRad = lgtDat.radius;
#if 1
float3 maxZdir = float3(-center.z*center.x, -center.z*center.y, center.x*center.x + center.y*center.y); // cross(center,cross(Zaxis,center))
float len = length(maxZdir);
float scalarProj = len>0.0001 ? (maxZdir.z/len) : len; // since len>=(maxZdir.z/len) we can use len as an approximate value when len<=epsilon
float fOffs = scalarProj*fRad;
#else
float fOffs = fRad; // more false positives due to larger radius but works too
#endif
#ifdef LEFT_HAND_COORDINATES
fRad = fRad + (center.z+fOffs)*worldDistAtDepthOne;
#else
fRad = fRad - (center.z-fOffs)*worldDistAtDepthOne;
#endif
float a = dot(V,V);
float CdotV = dot(center,V);
float c = dot(center,center) - fRad*fRad;
float fDescDivFour = CdotV*CdotV - a*c;
if(!(c<0 || (fDescDivFour>0 && CdotV>0))) // if ray misses bounding sphere
lightsListLDS[l]=0xffffffff;
}
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
}
#endif
#ifdef EXACT_EDGE_TESTS
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
{
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
#ifndef LEFT_HAND_COORDINATES
z = -z;
#endif
return GetViewPosFromLinDepth( float2(x, y), z);
}
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
{
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
int iSwizzle = e0&0x3;
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
}
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
{
const bool bOnlyNeedFrustumSideEdges = true;
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
const int totNrEdgePairs = 12*nrFrustEdges;
for(int l=0; l<iNrCoarseLights; l++)
{
const int idxCoarse = lightsListLDS[l];
[branch]if(idxCoarse<(uint) g_iNrVisibLights && g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
{
SFiniteLightBound lgtDat = g_data[idxCoarse];
const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip an axis to make it right handed since Determinant(worldToView)<0
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
{
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
int e1 = i - e0*nrFrustEdges;
int idx_cur=0, idx_twin=0;
float3 vP0, vE0;
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
float3 vP1, vE1;
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, g_fFarPlane);
// potential separation plane
float3 vN = cross(vE0, vE1);
int positive=0, negative=0;
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6).
{
int j = (idx_cur+k)&0x7;
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
}
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
positive=0; negative=0;
for(int j=0; j<8; j++)
{
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane);
float fSignDist = dot(vN, vPf-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
}
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
bool bFoundSepPlane = (resh*resf)<0;
if(bFoundSepPlane) lightsListLDS[l]=0xffffffff;
}
}
}
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
}
#endif
正在加载...
取消
保存