浏览代码

Added support for oblique matrices in light loop culling

/main
Frédéric Vauchelles 7 年前
当前提交
c2317504
共有 6 个文件被更改,包括 189 次插入93 次删除
  1. 17
      com.unity.render-pipelines.core/CoreRP/Utilities/GeometryUtils.cs
  2. 76
      com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/LightLoop.cs
  3. 20
      com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/lightlistbuild-bigtile.compute
  4. 68
      com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/lightlistbuild-clustered.compute
  5. 34
      com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/lightlistbuild.compute
  6. 67
      com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/scrbound.compute

17
com.unity.render-pipelines.core/CoreRP/Utilities/GeometryUtils.cs


1.0f,
1.0f);
var q = inversion * cps;
var c = clipPlane * (2.0f / Vector4.Dot(clipPlane, q));
Vector4 M4 = new Vector4(projection[3], projection[7], projection[11], projection[15]);
projection[2] = c.x - projection[3];
projection[6] = c.y - projection[7];
projection[10] = c.z - projection[11];
projection[14] = c.w - projection[15];
var c = clipPlane * ((2.0f*Vector4.Dot(M4, q)) / Vector4.Dot(clipPlane, q));
projection[2] = c.x - M4.x;
projection[6] = c.y - M4.y;
projection[10] = c.z - M4.z;
projection[14] = c.w - M4.w;
return projection;
}

public static Matrix4x4 GetProjectionMatrixLHS(this Camera camera)
{
return camera.projectionMatrix * FlipMatrixLHSRHS;
}
public static bool IsProjectionMatrixOblique(Matrix4x4 projectionMatrix)
{
return projectionMatrix[2] != 0 || projectionMatrix[6] != 0;
}
public static Matrix4x4 CalculateProjectionMatrix(Camera camera)

76
com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/LightLoop.cs


static int s_GenAABBKernel;
static int s_GenAABBKernel_Oblique;
static int s_GenListPerTileKernel_Oblique;
static int s_GenListPerVoxelKernelOblique;
static int s_ClearVoxelAtomicKernel;
static int s_ClearDispatchIndirectKernel;
static int s_BuildDispatchIndirectKernel;

{
{ "TileLightListGen_NoDepthRT", "TileLightListGen_DepthRT", "TileLightListGen_DepthRT_MSAA" },
{ "TileLightListGen_NoDepthRT_SrcBigTile", "TileLightListGen_DepthRT_SrcBigTile", "TileLightListGen_DepthRT_MSAA_SrcBigTile" }
};
static string[,] s_ClusterObliqueKernelNames = new string[(int)ClusterPrepassSource.Count, (int)ClusterDepthSource.Count]
{
{ "No Oblique Support for: TileLightListGen_NoDepthRT ", "TileLightListGen_DepthRT_Oblique", "TileLightListGen_DepthRT_MSAA_Oblique" },
{ "No Oblique Support for: TileLightListGen_NoDepthRT_SrcBigTile", "TileLightListGen_DepthRT_SrcBigTile_Oblique", "TileLightListGen_DepthRT_MSAA_SrcBigTile_Oblique" }
};
// clustered light list specific buffers and data end

m_ReflectionPlanarProbeCache = new PlanarReflectionProbeCache(hdAsset, iblFilterGGX, gLightLoopSettings.planarReflectionProbeCacheSize, (int)gLightLoopSettings.planarReflectionTextureSize, planarProbeCacheFormat, true);
s_GenAABBKernel = buildScreenAABBShader.FindKernel("ScreenBoundsAABB");
s_GenAABBKernel_Oblique = buildScreenAABBShader.FindKernel("ScreenBoundsAABB_Oblique");
// The bounds and light volumes are view-dependent, and AABB is additionally projection dependent.
// The view and proj matrices are per eye in stereo. This means we have to double the size of these buffers.

s_AABBBoundsBuffer = new ComputeBuffer(k_MaxStereoEyes * 2 * k_MaxLightsOnScreen, 3 * sizeof(float));
s_AABBBoundsBuffer = new ComputeBuffer(k_MaxStereoEyes * 2 * k_MaxLightsOnScreen, 4 * sizeof(float));
s_ConvexBoundsBuffer = new ComputeBuffer(k_MaxStereoEyes * k_MaxLightsOnScreen, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightBound)));
s_LightVolumeDataBuffer = new ComputeBuffer(k_MaxStereoEyes * k_MaxLightsOnScreen, System.Runtime.InteropServices.Marshal.SizeOf(typeof(LightVolumeData)));
s_DispatchIndirectBuffer = new ComputeBuffer(LightDefinitions.s_NumFeatureVariants * 3, sizeof(uint), ComputeBufferType.IndirectArguments);

clustDepthSourceIdx = ClusterDepthSource.Depth;
}
var kernelName = s_ClusterKernelNames[(int)clustPrepassSourceIdx, (int)clustDepthSourceIdx];
var kernelObliqueName = s_ClusterObliqueKernelNames[(int)clustPrepassSourceIdx, (int)clustDepthSourceIdx];
s_GenListPerVoxelKernelOblique = !kernelObliqueName.Contains("No Oblique Support for")
? buildPerVoxelLightListShader.FindKernel(kernelObliqueName)
: -1;
s_GenListPerTileKernel_Oblique = buildPerTileLightListShader.FindKernel(m_FrameSettings.lightLoopSettings.enableBigTilePrepass ? "TileLightListGen_SrcBigTile_FeatureFlags_Oblique" : "TileLightListGen_FeatureFlags_Oblique");
s_GenListPerTileKernel_Oblique = buildPerTileLightListShader.FindKernel(m_FrameSettings.lightLoopSettings.enableBigTilePrepass ? "TileLightListGen_SrcBigTile_Oblique" : "TileLightListGen_Oblique");
}
m_CookieTexArray.NewFrame();

void VoxelLightListGeneration(CommandBuffer cmd, HDCamera hdCamera, Matrix4x4[] projscrArr, Matrix4x4[] invProjscrArr, RenderTargetIdentifier cameraDepthBufferRT)
{
Camera camera = hdCamera.camera;
var isProjectionOblique = GeometryUtils.IsProjectionMatrixOblique(camera.projectionMatrix);
// clear atomic offset index
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_ClearVoxelAtomicKernel, HDShaderIDs.g_LayeredSingleIdxBuffer, s_GlobalLightListAtomic);
cmd.DispatchCompute(buildPerVoxelLightListShader, s_ClearVoxelAtomicKernel, 1, 1, 1);

cmd.SetComputeFloatParam(buildPerVoxelLightListShader, HDShaderIDs.g_fClustScale, m_ClustScale);
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, HDShaderIDs.g_fClustBase, k_ClustLogBase);
cmd.SetComputeTextureParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs.g_depth_tex, cameraDepthBufferRT);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs.g_vLayeredLightList, s_PerVoxelLightLists);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs.g_LayeredOffset, s_PerVoxelOffset);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs.g_LayeredSingleIdxBuffer, s_GlobalLightListAtomic);
var genListPerVoxelKernel = isProjectionOblique ? s_GenListPerVoxelKernelOblique : s_GenListPerVoxelKernel;
cmd.SetComputeTextureParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs.g_depth_tex, cameraDepthBufferRT);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs.g_vLayeredLightList, s_PerVoxelLightLists);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs.g_LayeredOffset, s_PerVoxelOffset);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs.g_LayeredSingleIdxBuffer, s_GlobalLightListAtomic);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs.g_vBigTileLightList, s_BigTileLightList);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs.g_vBigTileLightList, s_BigTileLightList);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs.g_logBaseBuffer, s_PerTileLogBaseTweak);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs.g_logBaseBuffer, s_PerTileLogBaseTweak);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs._LightVolumeData, s_LightVolumeDataBuffer);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs._LightVolumeData, s_LightVolumeDataBuffer);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, genListPerVoxelKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
//cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, numTilesX, numTilesY, 1);
cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, numTilesX, numTilesY, numEyes);
//cmd.DispatchCompute(buildPerVoxelLightListShader, genListPerVoxelKernel, numTilesX, numTilesY, 1);
cmd.DispatchCompute(buildPerVoxelLightListShader, genListPerVoxelKernel, numTilesX, numTilesY, numEyes);
}
public void BuildGPULightListsCommon(HDCamera hdCamera, CommandBuffer cmd, RenderTargetIdentifier cameraDepthBufferRT, RenderTargetIdentifier stencilTextureRT, bool skyEnabled)

projArr[eyeIndex] = CameraProjectionStereoLHS(hdCamera.camera, (Camera.StereoscopicEye)eyeIndex);
projscrArr[eyeIndex] = temp * projArr[eyeIndex];
invProjscrArr[eyeIndex] = projscrArr[eyeIndex].inverse;
projArr[0] = CameraProjectionNonObliqueLHS(hdCamera);
projArr[0] = GeometryUtils.GetProjectionMatrixLHS(hdCamera.camera);
var isProjectionOblique = GeometryUtils.IsProjectionMatrixOblique(projArr[0]);
// generate screen-space AABBs (used for both fptl and clustered).
if (m_lightCount != 0)

invProjhArr[0] = projhArr[0].inverse;
}
var genAABBKernel = isProjectionOblique ? s_GenAABBKernel_Oblique : s_GenAABBKernel;
cmd.SetComputeBufferParam(buildScreenAABBShader, s_GenAABBKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
cmd.SetComputeBufferParam(buildScreenAABBShader, genAABBKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
cmd.SetComputeBufferParam(buildScreenAABBShader, s_GenAABBKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
cmd.SetComputeBufferParam(buildScreenAABBShader, genAABBKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (m_lightCount + 7) / 8, tgY, 1);
cmd.DispatchCompute(buildScreenAABBShader, genAABBKernel, (m_lightCount + 7) / 8, tgY, 1);
}
// enable coarse 2D pass on 64x64 tiles (used for both fptl and clustered).

// optimized for opaques only
if (m_FrameSettings.lightLoopSettings.isFptlEnabled)
{
var genListPerTileKernel = isProjectionOblique ? s_GenListPerTileKernel_Oblique : s_GenListPerTileKernel;
cmd.SetComputeIntParam(buildPerTileLightListShader, HDShaderIDs.g_isOrthographic, isOrthographic ? 1 : 0);
cmd.SetComputeIntParams(buildPerTileLightListShader, HDShaderIDs.g_viDimensions, s_TempScreenDimArray);
cmd.SetComputeIntParam(buildPerTileLightListShader, HDShaderIDs._EnvLightIndexShift, m_lightList.lights.Count);

cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, HDShaderIDs._LightVolumeData, s_LightVolumeDataBuffer);
cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_vBoundsBuffer, s_AABBBoundsBuffer);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs._LightVolumeData, s_LightVolumeDataBuffer);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_data, s_ConvexBoundsBuffer);
cmd.SetComputeTextureParam(buildPerTileLightListShader, s_GenListPerTileKernel, HDShaderIDs.g_depth_tex, cameraDepthBufferRT);
cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, HDShaderIDs.g_vLightList, s_LightList);
cmd.SetComputeTextureParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_depth_tex, cameraDepthBufferRT);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_vLightList, s_LightList);
cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, HDShaderIDs.g_vBigTileLightList, s_BigTileLightList);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_vBigTileLightList, s_BigTileLightList);
if (enableFeatureVariants)
{

baseFeatureFlags |= LightDefinitions.s_MaterialFeatureMaskFlags;
}
cmd.SetComputeIntParam(buildPerTileLightListShader, HDShaderIDs.g_BaseFeatureFlags, (int)baseFeatureFlags);
cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, HDShaderIDs.g_TileFeatureFlags, s_TileFeatureFlags);
cmd.SetComputeBufferParam(buildPerTileLightListShader, genListPerTileKernel, HDShaderIDs.g_TileFeatureFlags, s_TileFeatureFlags);
cmd.DispatchCompute(buildPerTileLightListShader, s_GenListPerTileKernel, numTilesX, numTilesY, 1);
cmd.DispatchCompute(buildPerTileLightListShader, genListPerTileKernel, numTilesX, numTilesY, 1);
}
// Cluster

20
com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/lightlistbuild-bigtile.compute


#define EXACT_EDGE_TESTS
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
// is not actually used for anything in this kernel
#define USE_OBLIQUE_MODE
#define MAX_NR_BIGTILE_LIGHTS (MAX_NR_BIG_TILE_LIGHTS_PLUS_ONE-1)

uniform float g_fFarPlane;
uniform uint g_isOrthographic;
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<float4> g_vBoundsBuffer : register( t1 );
StructuredBuffer<LightVolumeData> _LightVolumeData : register(t2);
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );

// TODO: Remove this function and g_mInvScrProjectionArr from constants.
// Only usage of that constant.
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
float GetLinearDepth(float2 pixXY, float zDptBufSpace) // 0 is near 1 is far
float4x4 g_mInvScrProjection = g_mInvScrProjectionArr[0];
float4x4 g_mInvScrProjection = g_mInvScrProjectionArr[0];
// for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
#ifdef USE_OBLIQUE_MODE
float2 res2 = mul(g_mInvScrProjection, float4(pixXY, zDptBufSpace, 1.0)).zw;
return res2.x / res2.y;
#else
// for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
//float3 vP = float3(0.0f,0.0f,zDptBufSpace);
//float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
//return v4Pres.z / v4Pres.w;
#endif
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth, uint eyeIndex)

68
com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/lightlistbuild-clustered.compute


#pragma kernel TileLightListGen_NoDepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_DepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_DepthRT_Oblique LIGHTLISTGEN=TileLightListGen_DepthRT_Oblique ENABLE_DEPTH_TEXTURE_BACKPLANE USE_OBLIQUE_MODE
#pragma kernel TileLightListGen_DepthRT_MSAA_Oblique LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_Oblique ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_OBLIQUE_MODE
#pragma kernel TileLightListGen_DepthRT_SrcBigTile_Oblique LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile_Oblique ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING USE_OBLIQUE_MODE
#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile_Oblique LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile_Oblique ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING USE_OBLIQUE_MODE
#include "CoreRP/ShaderLibrary/Common.hlsl"
#include "ShaderBase.hlsl"

#else
Texture2D g_depth_tex : register( t0 );
#endif
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<float4> g_vBoundsBuffer : register( t1 );
StructuredBuffer<LightVolumeData> _LightVolumeData : register(t2);
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );

groupshared uint lightOffsSph;
#endif
float GetLinearDepth(float zDptBufSpace, uint eyeIndex) // 0 is near 1 is far
float GetLinearDepth(float2 pixXY, float zDptBufSpace, uint eyeIndex) // 0 is near 1 is far
#ifdef USE_OBLIQUE_MODE
float2 res2 = mul(g_mInvScrProjection, float4(pixXY, zDptBufSpace, 1.0)).zw;
return res2.x / res2.y;
#else
// for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
// however this function must also work for orthographic projection so we keep it like this.
float m22 = g_mInvScrProjection[2].z, m23 = g_mInvScrProjection[2].w;

//float3 vP = float3(0.0f,0.0f,zDptBufSpace);
//float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
//return v4Pres.z / v4Pres.w;
#endif
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth, uint eyeIndex)

bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase, uint eyeIndex)
{
// If this light's screen space depth bounds intersect this cluster...simple cluster test
// TODO: Unify this code with the code in CheckIntersectionBasic...
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
if(bIsHit)

GroupMemoryBarrierWithGroupSync();
#endif
float dpt_ma=1.0;
//float linMaDist=g_fFarPlane;
// establish min and max depth first
dpt_ma=0.0;
// establish max depth first
float linMaDist=0.0;
for(int idx=t; idx<(TILE_SIZE_CLUSTERED*TILE_SIZE_CLUSTERED); idx+=NR_THREADS)
{

for(int i=0; i<g_iNumSamplesMSAA; i++)
{
const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i);
const float2 fracSampleCoord = g_depth_tex.GetSamplePosition(i).xy; // this is optimized away when USE_OBLIQUE_MODE is NOT set.
const float2 fracSampleCoord = float2(0.5,0.5);
dpt_ma = max(fDpth, dpt_ma);
// unclear here if stereoDWOffset is taken into account in g_mInvScrProjectionArr[eyeIndex] used in GetLinearDepth()
// otherwise it should not be included in uPixCrd when querying GetLinearDepth().
float linZ = GetLinearDepth(uPixCrd+fracSampleCoord, fDpth, eyeIndex);
#if USE_LEFT_HAND_CAMERA_SPACE
float linDistZ = linZ;
#else
float linDistZ = -linZ;
#endif
linMaDist = max(linDistZ, linMaDist);
}
#ifdef MSAA_ENABLED
}

// Why is this a uint? Doesn't InterlockedMax support shared mem floats?
InterlockedMax(ldsZMax, asuint(dpt_ma) );
linMaDist = max(linMaDist, 0.0);
InterlockedMax(ldsZMax, asuint(linMaDist) );
dpt_ma = asfloat(ldsZMax);
if(dpt_ma<=0.0) dpt_ma = VIEWPORT_SCALE_Z; // assume sky pixel
linMaDist = asfloat(ldsZMax);
if(linMaDist<=0.0) linMaDist = g_fFarPlane; // assume sky pixel
#endif
// 'Normalized' coordinates of tile, for use with AABB bounds in g_vBoundsBuffer

for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
// TODO: Seems kinda funny that we repeat this exact code here, bigtile, and FPTL...
const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(l, g_iNrVisibLights, eyeIndex);
const float2 vMi = g_vBoundsBuffer[boundsIndices.min].xy;
const float2 vMa = g_vBoundsBuffer[boundsIndices.max].xy;

#endif
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
#if USE_LEFT_HAND_CAMERA_SPACE
float fTileFarPlane = GetLinearDepth(dpt_ma, eyeIndex);
#else // USE_LEFT_HAND_CAMERA_SPACE
float fTileFarPlane = -GetLinearDepth(dpt_ma, eyeIndex);
#endif
float fTileFarPlane = linMaDist;
float suggestedBase = SuggestLogBase50(fTileFarPlane);
#else // ENABLE_DEPTH_TEXTURE_BACKPLANE
float fTileFarPlane = g_fFarPlane;

#endif
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
// NOTE: Why not sort on console?
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
#endif

const ScreenSpaceBoundsIndices l0Bounds = GenerateScreenSpaceBoundsIndices(l0, g_iNrVisibLights, eyeIndex);
const ScreenSpaceBoundsIndices l1Bounds = GenerateScreenSpaceBoundsIndices(l1, g_iNrVisibLights, eyeIndex);
const unsigned int clustIdxMi0 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0Bounds.min].z, eyeIndex), suggestedBase));
const unsigned int clustIdxMa0 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0Bounds.max].z, eyeIndex), suggestedBase));
const unsigned int clustIdxMi1 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1Bounds.min].z, eyeIndex), suggestedBase));
const unsigned int clustIdxMa1 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1Bounds.max].z, eyeIndex), suggestedBase));
const unsigned int clustIdxMi0 = (const unsigned int)min(255, SnapToClusterIdx(g_vBoundsBuffer[l0Bounds.min].w, suggestedBase));
const unsigned int clustIdxMa0 = (const unsigned int)min(255, SnapToClusterIdx(g_vBoundsBuffer[l0Bounds.max].w, suggestedBase));
const unsigned int clustIdxMi1 = (const unsigned int)min(255, SnapToClusterIdx(g_vBoundsBuffer[l1Bounds.min].w, suggestedBase));
const unsigned int clustIdxMa1 = (const unsigned int)min(255, SnapToClusterIdx(g_vBoundsBuffer[l1Bounds.max].w, suggestedBase));
clusterIdxs[l] = (clustIdxMa1<<24) | (clustIdxMi1<<16) | (clustIdxMa0<<8) | (clustIdxMi0<<0);
}
}

// to make it work correctly
int shiftIndex[LIGHTCATEGORY_COUNT];
ZERO_INITIALIZE_ARRAY(int, shiftIndex, LIGHTCATEGORY_COUNT);
// NOTE: Why is this indexed like this?
shiftIndex[LIGHTCATEGORY_COUNT - 3] = _EnvLightIndexShift;
shiftIndex[LIGHTCATEGORY_COUNT - 2] = _DecalIndexShift;
shiftIndex[LIGHTCATEGORY_COUNT - 1] = _DensityVolumeIndexShift;

34
com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/lightlistbuild.compute


// https://github.com/wolfgangfengel/GPU-Pro-7
#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_FeatureFlags LIGHTLISTGEN=TileLightListGen_FeatureFlags USE_FEATURE_FLAGS
#pragma kernel TileLightListGen_SrcBigTile_FeatureFlags LIGHTLISTGEN=TileLightListGen_SrcBigTile_FeatureFlags USE_TWO_PASS_TILED_LIGHTING USE_FEATURE_FLAGS
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_FeatureFlags LIGHTLISTGEN=TileLightListGen_FeatureFlags USE_FEATURE_FLAGS
#pragma kernel TileLightListGen_SrcBigTile_FeatureFlags LIGHTLISTGEN=TileLightListGen_SrcBigTile_FeatureFlags USE_TWO_PASS_TILED_LIGHTING USE_FEATURE_FLAGS
#pragma kernel TileLightListGen_Oblique LIGHTLISTGEN=TileLightListGen_Oblique USE_OBLIQUE_MODE
#pragma kernel TileLightListGen_SrcBigTile_Oblique LIGHTLISTGEN=TileLightListGen_SrcBigTile_Oblique USE_TWO_PASS_TILED_LIGHTING USE_OBLIQUE_MODE
#pragma kernel TileLightListGen_FeatureFlags_Oblique LIGHTLISTGEN=TileLightListGen_FeatureFlags_Oblique USE_FEATURE_FLAGS USE_OBLIQUE_MODE
#pragma kernel TileLightListGen_SrcBigTile_FeatureFlags_Oblique LIGHTLISTGEN=TileLightListGen_SrcBigTile_FeatureFlags_Oblique USE_TWO_PASS_TILED_LIGHTING USE_FEATURE_FLAGS USE_OBLIQUE_MODE
//#pragma #pragma enable_d3d11_debug_symbols

#define FINE_PRUNING_ENABLED
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
uniform int g_iNrVisibLights;
uniform uint2 g_viDimensions;
uniform float4x4 g_mInvScrProjection;

uniform uint g_BaseFeatureFlags;
Texture2D g_depth_tex : register( t0 );
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<float4> g_vBoundsBuffer : register( t1 );
StructuredBuffer<LightVolumeData> _LightVolumeData : register(t2);
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );

RWStructuredBuffer<uint> g_TileFeatureFlags;
#endif
//float GetLinearDepth(float3 vP)
//{
// float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
// return v4Pres.z / v4Pres.w;
//}
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
float GetLinearDepth(float2 pixXY, float zDptBufSpace) // 0 is near 1 is far
// for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
#ifdef USE_OBLIQUE_MODE
float2 res2 = mul(g_mInvScrProjection, float4(pixXY, zDptBufSpace, 1.0)).zw;
return res2.x / res2.y;
#else
// for perspective projection m22 is zero and m23 is +1/-1 (depends on left/right hand proj)
//float3 vP = float3(0.0f,0.0f,zDptBufSpace);
//float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
//return v4Pres.z / v4Pres.w;
#endif
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)

int idx = i * NR_THREADS + t;
uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
const float fDepth = FetchDepth(g_depth_tex, uCrd);
vLinDepths[i] = GetLinearDepth(fDepth);
vLinDepths[i] = GetLinearDepth(uCrd+float2(0.5,0.5), fDepth);
if(fDepth<VIEWPORT_SCALE_Z) // if not skydome
{
dpt_mi = min(fDepth, dpt_mi);

67
com.unity.render-pipelines.high-definition/HDRP/Lighting/LightLoop/scrbound.compute


// The implementation is based on the demo on "fine pruned tiled lighting" published in GPU Pro 7.
// https://github.com/wolfgangfengel/GPU-Pro-7
#pragma kernel ScreenBoundsAABB
#pragma kernel ScreenBoundsAABB SCRAABBGEN=ScreenBoundsAABB
#pragma kernel ScreenBoundsAABB_Oblique SCRAABBGEN=ScreenBoundsAABB_Oblique USE_OBLIQUE_MODE
#include "CoreRP/ShaderLibrary/Common.hlsl"
#include "LightLoop.cs.hlsl"

#define NR_THREADS 64
// output buffer
RWStructuredBuffer<float3> g_vBoundsBuffer : register( u0 );
RWStructuredBuffer<float4> g_vBoundsBuffer : register( u0 );
#define MAX_PNTS 9 // strictly this should be 10=6+4 but we get more wavefronts and 10 seems to never hit (fingers crossed)
// However, worst case the plane that would be skipped if such an extreme case ever happened would be backplane

[numthreads(NR_THREADS, 1, 1)]
void ScreenBoundsAABB(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
void SCRAABBGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint groupID = u3GroupID.x;
uint eyeIndex = u3GroupID.y; // currently, can only be 0 or 1

}
////////////////////// look for camera frustum verts that need to be included. That is frustum vertices inside the convex hull for the light
#ifdef USE_OBLIQUE_MODE
bool bIsObliqueClipPlane = true;
#else
bool bIsObliqueClipPlane = false;
#endif
const int nrFrustVertsToTest = bIsObliqueClipPlane ? 4 : 8;
for(i=0; i<8; i++) // establish 8 camera frustum vertices
for(i=0; i<nrFrustVertsToTest; i++) // establish 8 camera frustum vertices
{
float3 vVertPSpace = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0);

float3 vP0, vN;
GetHullPlane(vP0, vN, boxX, boxY, boxZ, center, scaleXY, f);
for(i=0; i<8; i++)
for(i=0; i<nrFrustVertsToTest; i++)
{
float3 vViewSpace = float3(posX[subLigt*MAX_PNTS*2 + i], posY[subLigt*MAX_PNTS*2 + i], posZ[subLigt*MAX_PNTS*2 + i]);
uVisibFl &= ( dot(vViewSpace-vP0, vN)<0 ? 0xff : (~(1<<i)) );

// apply camera frustum vertices inside the convex hull to the AABB
for(i=0; i<8; i++)
for(i=0; i<nrFrustVertsToTest; i++)
{
if((uVisibFl&(1<<i))!=0)
{

vMin.xy = max(vMin.xy, vMi);
vMax.xy = min(vMax.xy, vMa);
}
#ifndef USE_OBLIQUE_MODE
#if USE_LEFT_HAND_CAMERA_SPACE
if((center.z-radius)>0.0)
{

vMin = float3(-3,-3,-3);
vMax = float3(-2,-2,-2);
}
#endif
}

// a set of maxs, and each set is equal to g_iNrVisibLights.
const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(lgtIndex, g_iNrVisibLights, eyeIndex);
g_vBoundsBuffer[boundsIndices.min] = float3(0.5*vMin.x + 0.5, 0.5*vMin.y + 0.5, vMin.z*VIEWPORT_SCALE_Z);
g_vBoundsBuffer[boundsIndices.max] = float3(0.5*vMax.x + 0.5, 0.5*vMax.y + 0.5, vMax.z*VIEWPORT_SCALE_Z);
// build a linear (in camera space) min/max Z for the aabb. This is needed for clustered when oblique is active
float linMiZ, linMaZ;
#ifndef USE_OBLIQUE_MODE
float2 vMiZW = mul(g_mInvProjection, float4(vMin,1)).zw;
float2 vMaZW = mul(g_mInvProjection, float4(vMax,1)).zw;
linMiZ = vMiZW.x/vMiZW.y; linMaZ = vMaZW.x/vMaZW.y;
#else
for(int i=0; i<8; i++) // establish 8 aabb points in camera space.
{
float3 vP = float3((i&1)!=0 ? vMax.x : vMin.x, (i&2)!=0 ? vMax.y : vMin.y, (i&4)!=0 ? vMax.z : vMin.z);
float2 v2Pc = mul(g_mInvProjection, float4(vP,1)).zw;
float linZ = v2Pc.x/v2Pc.y;
if(i==0) { linMiZ=linZ; linMaZ=linZ; }
#if USE_LEFT_HAND_CAMERA_SPACE
linMiZ = min(linMiZ, linZ); linMaZ = max(linMaZ, linZ);
#else
linMiZ = max(linMiZ, linZ); linMaZ = min(linMaZ, linZ);
#endif
}
float z0 = center.z-radius, z1 = center.z+radius;
#if USE_LEFT_HAND_CAMERA_SPACE
linMiZ = max(linMiZ, z0); linMaZ = min(linMaZ, z1);
#else
linMiZ = min(linMiZ, z1); linMaZ = max(linMaZ, z0);
#endif
#endif
g_vBoundsBuffer[boundsIndices.min] = float4(0.5*vMin.x + 0.5, 0.5*vMin.y + 0.5, vMin.z*VIEWPORT_SCALE_Z, linMiZ);
g_vBoundsBuffer[boundsIndices.max] = float4(0.5*vMax.x + 0.5, 0.5*vMax.y + 0.5, vMax.z*VIEWPORT_SCALE_Z, linMaZ);
}
}
}

unsigned int GetClip(const float4 P)
{
#ifdef USE_OBLIQUE_MODE
bool bIsObliqueClipPlane = true;
#else
bool bIsObliqueClipPlane = false;
#endif
return ((P.x<-P.w)?1:0) | ((P.x>P.w)?2:0) | ((P.y<-P.w)?4:0) | ((P.y>P.w)?8:0) | ((P.z<0)?16:0) | ((P.z>P.w)?32:0);
return (((P.x<-P.w)?1:0) | ((P.x>P.w)?2:0) | ((P.y<-P.w)?4:0) | ((P.y>P.w)?8:0) | ((P.z<0)?16:0) | ((P.z>P.w)?32:0)) & (bIsObliqueClipPlane ? 0x1f : 0x3f);
}
float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p)

正在加载...
取消
保存