浏览代码

Cleanup structured comments

I use comments to track where to fix places, time to go!
/main
Robert Srinivasiah 7 年前
当前提交
2b42cb90
共有 2 个文件被更改,包括 6 次插入103 次删除
  1. 5
      ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/LightLoopDef.hlsl
  2. 104
      ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/lightlistbuild-clustered.compute

5
ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/LightLoopDef.hlsl


float logBase = g_fClustBase;
if (g_isLogBaseBufferEnabled)
{
// XRTODO - DONE: Stereo-ize access to g_logBaseBuffer
const uint logBaseIndex = GenerateLogBaseBufferIndex(tileIndex, _NumTileClusteredX, _NumTileClusteredY, unity_StereoEyeIndex);
logBase = g_logBaseBuffer[logBaseIndex];
}

{
int nrClusters = (1 << g_iLog2NumClusters);
// XRTODO - DONE: Add the eye offset. Each eye is split into category, cluster, x, y
const int idx = GenerateLayeredOffsetBufferIndex(lightCategory, tileIndex, clusterIndex, _NumTileClusteredX, _NumTileClusteredY, nrClusters, unity_StereoEyeIndex);
uint dataPair = g_vLayeredOffsetsBuffer[idx];

void GetCountAndStartCluster(PositionInputs posInput, uint lightCategory, out uint start, out uint lightCount)
{
// XRTODO - DONE: Will have to fix up GetLightClusterIndex and GetCountAndStartCluster
// basicaly the log base and offset buffers are stereo ized. depth tex anywhere as well?
// Note: XR depends on unity_StereoEyeIndex already being defined,
// which means ShaderVariables.hlsl needs to be defined ahead of this!

104
ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/LightLoop/lightlistbuild-clustered.compute


CBUFFER_START(UnityLightListClustered)
int g_iNrVisibLights;
// XRTODO - DONE: Stereo-ize these
float4x4 g_mInvScrProjectionArr[2];
float4x4 g_mScrProjectionArr[2];

// g_fClustBase, g_fNearPlane, g_fFarPlane, g_iLog2NumClusters
#include "ClusteredUtils.hlsl"
// XRTODO - DONE: Reading from these textures and buffers must be stereo-ized
#ifdef MSAA_ENABLED
Texture2DMS<float> g_depth_tex : register( t0 );
#else

#define NR_THREADS 64
// XRTODO - DONE: Stereo-ize writes to g_LayeredOffset
// g_vLayeredLightList is a dynamically allocated cluster list, and g_LayeredSingleIdxBuffer tracks allocations
// so these do not need to be stereoized.
// XRTODO - DONE: Stereo-ize writes to g_logBaseBuffer
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
RWStructuredBuffer<float> g_logBaseBuffer : register( u3 ); // don't support RWBuffer yet in unity
#endif

groupshared uint lightOffsSph;
#endif
// XRTODO - DONE: Stereo-ize access to g_mInvScrProjection, pass in eyeIndex
float GetLinearDepth(float zDptBufSpace, uint eyeIndex) // 0 is near 1 is far
{
float4x4 g_mInvScrProjection = g_mInvScrProjectionArr[eyeIndex];

//return v4Pres.z / v4Pres.w;
}
// XRTODO - DONE: Stereo-ize access to g_mScrProjection, pass in eyeIndex
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth, uint eyeIndex)
{
float4x4 g_mScrProjection = g_mScrProjectionArr[eyeIndex];

return float3(isOrthographic ? p.xy : (fLinDepth*p.xy), fLinDepth);
}
// XRTODO - DONE: Stereo-ize access to g_mScrProjection, pass in eyeIndex
float GetOnePixDiagWorldDistAtDepthOne(uint eyeIndex)
{
float4x4 g_mScrProjection = g_mScrProjectionArr[eyeIndex];

// in lightlistbuild-bigtile.compute. But would need more re-factoring than needed
// right now.
// XRTODO - DONE: Stereo-ize these functions with eyeIndex
#ifdef EXACT_EDGE_TESTS
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex);
#endif

// returns 1 for intersection and 0 for none
// XRTODO - DONE: Pass in eyeIndex
// XRTODO - DONE: Stereo-ize due to access to GetViewPosFromLinDepth
// We need eyeIndex
bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase, uint eyeIndex)
{
// If this light's screen space depth bounds intersect this cluster...simple cluster test

float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ;
// XRTODO - DONE: Stereo-ize this by passing in eyeIndex
float3 vP = GetViewPosFromLinDepth( float2(x, y), z, eyeIndex);
// Test each corner of the cluster against the light bounding box planes

[numthreads(NR_THREADS, 1, 1)]
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
// XRTODO - DONE: Generate eyeIndex from u3GroupID.z
uint eyeIndex = u3GroupID.z;
uint2 tileIDX = u3GroupID.xy;

for(int idx=t; idx<(TILE_SIZE_CLUSTERED*TILE_SIZE_CLUSTERED); idx+=NR_THREADS)
{
// XRTODO - DONE: We need to stereo-ize access to g_depth_tex. This is the only time we use viTilLL
// to generate a screen-space texture coordinate, so we can localize our stereo texture access here.
// For double-wide, we need to sample the correct half.
// For instancing/multi-view, the right texture layer. When we add support for that, we need to pass in
// an extra uint for layer index.
// XRTODO: We need to stereo-ize access to g_depth_tex for texture arrays.
// TODO: For stereo double-wide, I need a proper way to insert the second eye width offset. Right now, I can just
// use g_screenSize.x, but that's kinda cheating.
// Additionally, we're going to have a method to select between a doublewide texture or texture array. Doubling

#endif
}
// Max across TG
// Why is this a uint? Can't we save floats in shared mem?
// Why is this a uint? Doesn't InterlockedMax support shared mem floats?
InterlockedMax(ldsZMax, asuint(dpt_ma) );

#ifdef USE_TWO_PASS_TILED_LIGHTING
const uint log2BigTileToClustTileRatio = firstbithigh(64) - log2TileSize;
// XRTODO - DONE: Properly stereo-ize access to g_vBigTileLightList
// All of this code is localized here, so I don't really have to worry about side-effects further on down
// I need to generate NrBigTilesY, so I can generate a per-eye offset/base into g_vBigTileLightList
// Since bigTileIdx is used twice (once for count, once for the light list), I should probably fix that one.
// Would be worth function-alizing, because this code is shared with FPTL/lightlistbuild.compute
int NrBigTilesX = (nrTilesX + ((1<<log2BigTileToClustTileRatio)-1)) >> log2BigTileToClustTileRatio;
int NrBigTilesY = (nrTilesY + ((1<<log2BigTileToClustTileRatio)-1)) >> log2BigTileToClustTileRatio;
const int bigTileBase = eyeIndex * NrBigTilesX * NrBigTilesY;

for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
// XRTODO - DONE: Once we have our light index (l), we need to make sure it indexes the
// correct portion of g_vBoundsBuffer. I have that code in GenerateScreenSpaceBoundsIndices
// TODO: Seems kinda funny that we repeat this exact code here, bigtile, and FPTL...
const ScreenSpaceBoundsIndices boundsIndices = GenerateScreenSpaceBoundsIndices(l, g_iNrVisibLights, eyeIndex);

int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
// XRTODO - DONE: Stereo-ize by passing in eyeIndex
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(TILE_SIZE_CLUSTERED/2,TILE_SIZE_CLUSTERED/2), uint2(g_screenSize.x-1, g_screenSize.y-1))), eyeIndex );
#endif

// XRTODO - DONE: Stereo-ize access to GetLinearDepth with eyeIndex
#if USE_LEFT_HAND_CAMERA_SPACE
float fTileFarPlane = GetLinearDepth(dpt_ma, eyeIndex);
#else // USE_LEFT_HAND_CAMERA_SPACE

#endif
// XRTODO - DONE: Stereo-ize by passing in eyeIndex
// TODO: Why not sort on console?
// NOTE: Why not sort on console?
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
#endif

// This code is a little tricky. For each light in our coarse list (and the list is associated with the screen tile),
// we find a cluster associated with the lights AABB Z value. For each light, we have a min and max cluster index.
// And because the cluster index is a max of 255, we are able to encode 4 cluster indices per 32-bit DWORD.
// Therefore, we can encode 2 lights per uint (light 0 min idx, light 0 max idx, light 1...)
// Each iteration of the loop goes over two neighboring lights in the coarseList, unfortunate name choice of 'l'
// XRTODO - DONE: Stereo-ize access to g_vBoundsBuffer, run l0 and l1 through GenerateScreenSpaceBoundsIndices
// The logic here is a bit confusing. We seem to process pairs of lights, and
// It's for the cluster in the tile (the depth layer slice whatever)
// XRTODO - DONE: Stereo-ize GetLinearDepth with eyeIndex
const unsigned int clustIdxMi0 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0Bounds.min].z, eyeIndex), suggestedBase));
const unsigned int clustIdxMa0 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0Bounds.max].z, eyeIndex), suggestedBase));
const unsigned int clustIdxMi1 = (const unsigned int)min(255, SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1Bounds.min].z, eyeIndex), suggestedBase));

int i=(int) t;
int iSpaceAvail = 0;
int iSum = 0;
// Each thread in the TG represents a cluster index, and tests all coarse lights against that cluster.
// It should be noted that nrClusters can never be greater than the TG size, otherwise, this code doesn't work!
if(i<nrClusters)
{
// Each thread checks it's respective cluster against all coarse lights for intersection.

// want to allocate out of g_LayeredSingleIdxBuffer.
iSpaceAvail = min(iSum,MAX_NR_COARSE_ENTRIES); // combined storage for both direct lights and reflection
InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start); // alloc list memory
// All the light lists live in g_vLayeredLightList. They aren't sorted in any manner, and it's tightly packed.
// The allocation can handle the max lights per tile, but it likely won't use all the memory.
// g_LayeredSingleIdxBuffer is recording the allocations out of the buffer.
// 'start' indicates the offset into g_vLayeredLightList for the _cluster_ being
// processed by this thread. And the TG is processing the tile.
// 'iSpaceAvail' is how many total lights are in this cluster.
// This allocation might be roughly over, because CheckIntersectionBasic is a very basic check.
// XRTODO - DONE: For stereo, we don't have to adjust anything into g_LayeredSingleIdxBuffer. Each thread is processing it's own
// cluster, so we just need to make sure there is enough memory allocated in g_vLayeredLightList for two eyes worth of
// lists. The offset we get from start is good enough. We do have to store the offset into the
// stereo-corrected half of g_LayeredOffset.
}
// All our cull data are in the same list, but at render time envLights are separated so we need to shift the index

// TODO: Why is this indexed like this?
// NOTE: Why is this indexed like this?
shiftIndex[LIGHTCATEGORY_COUNT - 2] = _EnvLightIndexShift;
shiftIndex[LIGHTCATEGORY_COUNT - 1] = _DecalIndexShift;

uint offs = start;
for(int ll=0; ll<iNrCoarseLights; ll+=4)
{
// Process in chunks of 4 lights from the coarse tile list
// XRTODO - DONE: Pass in eyeIndex to FetchPlane, as it looks into g_data (SFiniteLightBound)
// The first 24 threads in the TG each generate 1 plane equation. There are
// 6 planes per light, and we process 4 lights at a time, hence, 24 threads.
// We could do more, but this might be all the LDS that can be spared...
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();

{
// XRTODO - DONE: Stereo-ize CheckIntersection by passing in eyeIndex to GetViewPosFromLinDepth
// XRTODO - DONE: Stereo-ize index into _LightVolumeData, run coarseList[l] thru GenerateLightCullDataIndex
// TODO: I think there _might_ be a potential bug here. The way this code seems to work is that
// NOTE: I think there _might_ be a potential bug here. The way this code seems to work is that
// as each light comes in, they are added to the allocated list in g_vLayeredLightList.
// As each light is added, the respective category count is incremented, and the raw light index is
// altered by subtracting the shift associated with the category. So these light indices are category

#endif
}
// We now fill information about each light type into g_LayeredOffset
// The offset into g_vLayeredLightList, encoded with the number of lights in the category
// The encoded offsets are assuming the lights are sorted by category in the cluster list
// XRTODO - DONE: Stereo-ize this initial 'offs' to jump into the correct half of g_LayeredOffs.
// The offsets are organized Category/Cluster/Row/Column.
// For stereo, we just add eyeIndex*LIGHTCATEGORY_COUNT*nrClusters*nrTilesX*nrTilesY
offs = GenerateLayeredOffsetBufferIndex(0, tileIDX, i, nrTilesX, nrTilesY, nrClusters, eyeIndex);
for(int category=0; category<LIGHTCATEGORY_COUNT; category++)
{

}
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
// XRTODO - DONE: Stereo-ize this, as this is per-eye set of tiles
// Add eyeIndex*nrTilesY*nrTilesX to this index
const uint logBaseIndex = GenerateLogBaseBufferIndex(tileIDX, nrTilesX, nrTilesY, eyeIndex);
if(threadID==0) g_logBaseBuffer[logBaseIndex] = suggestedBase;
#endif

// XRTODO - DONE: Must be stereo-ized because it fetches from g_data
// Pass in eyeIndex
// XRTODO - DONE: Use eyeIndex and coarseList[l] to index into g_data
// use GenerateLightCullDataIndex
const int lightBoundIndex = GenerateLightCullDataIndex(coarseList[l], g_iNrVisibLights, eyeIndex);
SFiniteLightBound lgtDat = g_data[lightBoundIndex];

// XRTODO - DONE: Stereo-ize for a bunch of use cases! Pass in eyeIndex!
// Needed for GetViewPosFromLinDepth, GetOnePixDiagWorldDistAtDepthOne, and g_data
// XRTODO - DONE: Stereo-ize access with eyeIndex
#if USE_LEFT_HAND_CAMERA_SPACE
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0, eyeIndex);
#else

// XRTODO - DONE: Stereo-ize access with eyeIndex
// XRTODO - DONE: Stereo-ize access with eyeIndex and GenerateLightCullDataIndex
// Use it on coarseList[l], use result to index g_data
const int lightBoundIndex = GenerateLightCullDataIndex(coarseList[l], g_iNrVisibLights, eyeIndex);
SFiniteLightBound lgtDat = g_data[lightBoundIndex];

#ifdef EXACT_EDGE_TESTS
// XRTODO - DONE: Stereo-ize for access to GetViewPosFromLinDepth, use eyeIndex
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane, uint eyeIndex)
{
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;

z = -z;
#endif
// XRTODO - DONE: Pass in eyeIndex
// XRTODO - DONE: Stereo-ize for access to GetTileVertex, use eyeIndex
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex)
{
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges

// XRTODO - DONE: Pass in eyeIndex
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane, eyeIndex);
#if USE_LEFT_HAND_CAMERA_SPACE

vE0 = iSection == 0 ? edgeSectionZero : (((iSwizzle & 0x2) == 0 ? 1.0f : (-1.0f)) * ((int)(iSwizzle & 0x1) == (iSwizzle >> 1) ? float3(1, 0, 0) : float3(0, 1, 0)));
}
// XRTODO - DONE: Stereo-ize with eyeIndex, used for indexing _LightVolumeData and g_data,
// and with GetFrustEdge and GetTileVertex functions
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane, uint eyeIndex)
{
if(threadID==0) lightOffs2 = 0;

#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
// XRTODO - DONE: stereo-ize index used to access _LightVolumeData (and g_data), use GenerateLightCullDataIndex and eyeIndex
// XRTODO - DONE: stereo-ize index used to access g_data, use the same index generated above from idxCoarse
SFiniteLightBound lgtDat = g_data[lightCullIndex];
const float3 boxX = lgtDat.boxAxisX.xyz;

float3 vP1, vE1;
// XRTODO - DONE: Stereo-ize to use GetFrustEdge, use eyeIndex
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane, eyeIndex);
// potential separation plane

positive=0; negative=0;
for(int j=0; j<8; j++)
{
// XRTODO - DONE: Stereo-ize to use GetTileVertex, use eyeIndex
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane, eyeIndex);
float fSignDist = dot(vN, vPf-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;

正在加载...
取消
保存