浏览代码

HDRenderLoop: Merge FTPL Build Light List part (tile, bigtile, cluster..)

- Only build light list, not light application for now
- untested, crash when big tile are enabled
/main
Sebastien Lagarde 8 年前
当前提交
882368f7
共有 16 个文件被更改,包括 1278 次插入277 次删除
  1. 2
      Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.asset.meta
  2. 120
      Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.cs
  3. 2
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/LightDefinition.cs
  4. 2
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/SinglePass/SinglePass.cs
  5. 10
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild.compute
  6. 413
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/TilePass.cs
  7. 21
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/TilePass.cs.hlsl
  8. 90
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ClusteredUtils.hlsl
  9. 9
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ClusteredUtils.hlsl.meta
  10. 267
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute
  11. 9
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute.meta
  12. 553
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute
  13. 9
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute.meta
  14. 30
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ShaderBase.hlsl
  15. 9
      Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ShaderBase.hlsl.meta
  16. 9
      Assets/ScriptableRenderLoop/HDRenderLoop/Shaders.meta

2
Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.asset.meta


fileFormatVersion: 2
guid: 2400b74f5ce370c4481e5dc417d03703
timeCreated: 1479395301
timeCreated: 1479691644
licenseType: Pro
NativeFormatImporter:
userData:

120
Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.cs


Material m_DebugViewMaterialGBuffer;
// Various buffer
int s_CameraColorBuffer;
int s_CameraDepthBuffer;
int s_VelocityBuffer;
int s_DistortionBuffer;
int m_CameraColorBuffer;
int m_CameraDepthBuffer;
int m_VelocityBuffer;
int m_DistortionBuffer;
public class LightList
{

public List<EnvLightData> envLights;
public Vector4[] directionalShadowSplitSphereSqr;
// Index mapping list to go from GPU lights (above) to CPU light (in cullResult)
public List<int> directionalCullIndices;
public List<int> punctualCullIndices;
public List<int> areaCullIndices;
public List<int> envCullIndices;
public void Clear()
{
directionalLights.Clear();

areaLights.Clear();
envLights.Clear();
directionalCullIndices.Clear();
punctualCullIndices.Clear();
areaCullIndices.Clear();
envCullIndices.Clear();
}
public void Allocate()
{
directionalLights = new List<DirectionalLightData>();
punctualLights = new List<LightData>();
areaLights = new List<LightData>();
envLights = new List<EnvLightData>();
punctualShadows = new List<PunctualShadowData>();
directionalShadows = new List<DirectionalShadowData>();
directionalShadowSplitSphereSqr = new Vector4[k_MaxCascadeCount];
directionalCullIndices = new List<int>();
punctualCullIndices = new List<int>();
areaCullIndices = new List<int>();
envCullIndices = new List<int>();
}
}

// TODO: Find a way to automatically create/iterate through lightloop
SinglePass.LightLoop m_SinglePassLightLoop;
// TilePass.LightLoop m_TilePassLightLoop;
TilePass.LightLoop m_TilePassLightLoop;
// TODO: Find a way to automatically create/iterate through deferred material
Lit.RenderLoop m_LitRenderLoop;

public override void Rebuild()
{
s_CameraColorBuffer = Shader.PropertyToID("_CameraColorTexture");
s_CameraDepthBuffer = Shader.PropertyToID("_CameraDepthTexture");
m_CameraColorBuffer = Shader.PropertyToID("_CameraColorTexture");
m_CameraDepthBuffer = Shader.PropertyToID("_CameraDepthTexture");
// TODO: We need to have an API to send our sky information to Enlighten. For now use a workaround through skybox/cubemap material...
m_SkyboxMaterial = CreateEngineMaterial("Skybox/Cubemap");

}
#pragma warning disable 162 // warning CS0162: Unreachable code detected
s_VelocityBuffer = Shader.PropertyToID("_VelocityTexture");
m_VelocityBuffer = Shader.PropertyToID("_VelocityTexture");
if (ShaderConfig.VelocityInGbuffer == 1)
{
// If velocity is in GBuffer then it is in the last RT. Assign a different name to it.

#pragma warning restore 162
s_DistortionBuffer = Shader.PropertyToID("_DistortionTexture");
m_DistortionBuffer = Shader.PropertyToID("_DistortionTexture");
m_LitRenderLoop.Rebuild();

// Init various light loop
m_SinglePassLightLoop = new SinglePass.LightLoop();
m_SinglePassLightLoop.Rebuild();
// m_TilePassLightLoop = new TilePass.LightLoop();
// m_TilePassLightLoop.Rebuild();
m_TilePassLightLoop = new TilePass.LightLoop();
m_TilePassLightLoop.Rebuild();
m_lightList.directionalLights = new List<DirectionalLightData>();
m_lightList.punctualLights = new List<LightData>();
m_lightList.areaLights = new List<LightData>();
m_lightList.envLights = new List<EnvLightData>();
m_lightList.punctualShadows = new List<PunctualShadowData>();
m_lightList.directionalShadows = new List<DirectionalShadowData>();
m_lightList.directionalShadowSplitSphereSqr = new Vector4[k_MaxCascadeCount];
m_lightList.Allocate();
}
void OnDisable()

//m_TilePassLightLoop.OnDisable();
m_TilePassLightLoop.OnDisable();
if (m_SkyboxMaterial) DestroyImmediate(m_SkyboxMaterial);
if (m_SkyHDRIMaterial) DestroyImmediate(m_SkyHDRIMaterial);

int w = camera.pixelWidth;
int h = camera.pixelHeight;
cmd.GetTemporaryRT(s_CameraColorBuffer, w, h, 0, FilterMode.Point, RenderTextureFormat.ARGBHalf, RenderTextureReadWrite.Linear);
cmd.GetTemporaryRT(s_CameraDepthBuffer, w, h, 24, FilterMode.Point, RenderTextureFormat.Depth);
cmd.GetTemporaryRT(m_CameraColorBuffer, w, h, 0, FilterMode.Point, RenderTextureFormat.ARGBHalf, RenderTextureReadWrite.Linear);
cmd.GetTemporaryRT(m_CameraDepthBuffer, w, h, 24, FilterMode.Point, RenderTextureFormat.Depth);
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
cmd.ClearRenderTarget(true, false, new Color(0, 0, 0, 0));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

{
var cmd = new CommandBuffer();
cmd.name = "Clear HDR target";
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
cmd.ClearRenderTarget(false, true, new Color(0, 0, 0, 0));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

var cmd = new CommandBuffer();
cmd.name = "Clear GBuffer";
// Write into the Camera Depth buffer
cmd.SetRenderTarget(m_gbufferManager.GetGBuffers(cmd), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(m_gbufferManager.GetGBuffers(cmd), new RenderTargetIdentifier(m_CameraDepthBuffer));
// Clear everything
// TODO: Clear is not required for color as we rewrite everything, will save performance.
cmd.ClearRenderTarget(false, true, new Color(0, 0, 0, 0));

// TODO: Must do opaque then alpha masked for performance!
// TODO: front to back for opaque and by materal for opaque tested when we split in two
var cmd = new CommandBuffer { name = "Depth Prepass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraDepthBuffer));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

// setup GBuffer for rendering
var cmd = new CommandBuffer { name = "GBuffer Pass" };
cmd.SetRenderTarget(m_gbufferManager.GetGBuffers(cmd), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(m_gbufferManager.GetGBuffers(cmd), new RenderTargetIdentifier(m_CameraDepthBuffer));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

// TODO: Use the render queue index to only send the forward opaque!
var cmd = new CommandBuffer { name = "Depth Prepass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraDepthBuffer));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

// Render Opaque forward
{
var cmd = new CommandBuffer { name = "DebugView Material Mode Pass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
cmd.ClearRenderTarget(true, true, new Color(0, 0, 0, 0));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

// m_gbufferManager.BindBuffers(m_DeferredMaterial);
// TODO: Bind depth textures
var cmd = new CommandBuffer { name = "GBuffer Debug Pass" };
cmd.Blit(null, new RenderTargetIdentifier(s_CameraColorBuffer), m_DebugViewMaterialGBuffer, 0);
cmd.Blit(null, new RenderTargetIdentifier(m_CameraColorBuffer), m_DebugViewMaterialGBuffer, 0);
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();
}

// Last blit
{
var cmd = new CommandBuffer { name = "Blit DebugView Material Debug" };
cmd.Blit(new RenderTargetIdentifier(s_CameraColorBuffer), BuiltinRenderTextureType.CameraTarget);
cmd.Blit(new RenderTargetIdentifier(m_CameraColorBuffer), BuiltinRenderTextureType.CameraTarget);
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();
}

// m_gbufferManager.BindBuffers(m_DeferredMaterial);
// TODO: Bind depth textures
var cmd = new CommandBuffer { name = "Deferred Ligthing Pass" };
cmd.Blit(null, new RenderTargetIdentifier(s_CameraColorBuffer), m_DeferredMaterial, 0);
cmd.Blit(null, new RenderTargetIdentifier(m_CameraColorBuffer), m_DeferredMaterial, 0);
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();
}

m_LitRenderLoop.Bind();
var cmd = new CommandBuffer { name = "Forward Pass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

m_LitRenderLoop.Bind();
var cmd = new CommandBuffer { name = "Forward Unlit Pass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

int h = camera.pixelHeight;
var cmd = new CommandBuffer { name = "Velocity Pass" };
cmd.GetTemporaryRT(s_VelocityBuffer, w, h, 0, FilterMode.Point, Builtin.RenderLoop.GetVelocityBufferFormat(), Builtin.RenderLoop.GetVelocityBufferReadWrite());
cmd.SetRenderTarget(new RenderTargetIdentifier(s_VelocityBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.GetTemporaryRT(m_VelocityBuffer, w, h, 0, FilterMode.Point, Builtin.RenderLoop.GetVelocityBufferFormat(), Builtin.RenderLoop.GetVelocityBufferReadWrite());
cmd.SetRenderTarget(new RenderTargetIdentifier(m_VelocityBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

int h = camera.pixelHeight;
var cmd = new CommandBuffer { name = "Distortion Pass" };
cmd.GetTemporaryRT(s_DistortionBuffer, w, h, 0, FilterMode.Point, Builtin.RenderLoop.GetDistortionBufferFormat(), Builtin.RenderLoop.GetDistortionBufferReadWrite());
cmd.SetRenderTarget(new RenderTargetIdentifier(s_DistortionBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.GetTemporaryRT(m_DistortionBuffer, w, h, 0, FilterMode.Point, Builtin.RenderLoop.GetDistortionBufferFormat(), Builtin.RenderLoop.GetDistortionBufferReadWrite());
cmd.SetRenderTarget(new RenderTargetIdentifier(m_DistortionBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();

var cmd = new CommandBuffer { name = "FinalPass" };
// Resolve our HDR texture to CameraTarget.
cmd.Blit(new RenderTargetIdentifier(s_CameraColorBuffer), BuiltinRenderTextureType.CameraTarget, m_FinalPassMaterial, 0);
cmd.Blit(new RenderTargetIdentifier(m_CameraColorBuffer), BuiltinRenderTextureType.CameraTarget, m_FinalPassMaterial, 0);
void ConvertLightForGPU(CullResults cullResults, ref ShadowOutput shadowOutput, ref LightList lightList)
void PrepareLightsForGPU(CullResults cullResults, Camera camera, ref ShadowOutput shadowOutput, ref LightList lightList)
{
lightList.Clear();

}
lightList.directionalLights.Add(directionalLightData);
lightList.directionalCullIndices.Add(lightIndex);
continue;
}

if (additionalData.archetype == LightArchetype.Punctual)
{
lightList.punctualLights.Add(lightData);
lightList.punctualCullIndices.Add(lightIndex);
lightList.areaCullIndices.Add(lightIndex);
}
}

envLightData.offsetLS = probe.center; // center is misnamed, it is the offset (in local space) from center of the bounding box to the cubemap capture point
envLightData.blendDistance = blendDistance;
lightList.envLights.Add(envLightData);
lightList.envCullIndices.Add(probeIndex);
// build per tile light lists
m_SinglePassLightLoop.PrepareLightsForGPU(cullResults, camera, m_lightList);
m_TilePassLightLoop.PrepareLightsForGPU(cullResults, camera, m_lightList);
{ /*
{
if (camera.pixelWidth != m_WidthOnRecord || camera.pixelHeight != m_HeightOnRecord || m_TilePassLightLoop.NeedResize())
{
if (m_WidthOnRecord > 0 && m_HeightOnRecord > 0)

m_WidthOnRecord = camera.pixelWidth;
m_HeightOnRecord = camera.pixelHeight;
}
*/
}
public void PushGlobalParams(Camera camera, RenderLoop renderLoop, HDRenderLoop.LightList lightList)

Shader.SetGlobalTexture("_EnvTextures", m_CubeReflTexArray.GetTexCache());
m_SinglePassLightLoop.PushGlobalParams(camera, renderLoop, lightList);
// m_TilePassLightLoop.PushGlobalParams(camera, renderLoop, lightList);
m_TilePassLightLoop.PushGlobalParams(camera, renderLoop, lightList);
}
public override void Render(Camera[] cameras, RenderLoop renderLoop)

renderLoop.SetupCameraProperties(camera); // Need to recall SetupCameraProperties after m_ShadowPass.Render
ConvertLightForGPU(cullResults, ref shadows, ref m_lightList);
PrepareLightsForGPU(cullResults, camera, ref shadows, ref m_lightList);
m_TilePassLightLoop.BuildGPULightLists(camera, renderLoop, m_lightList, m_CameraDepthBuffer);
// build per tile light lists
//var numLights = 0; // GenerateSourceLightBuffers(camera, cullResults);
//m_tilePassLightLoop.BuildPerTileLightLists(camera, loop, numLights, projscr, invProjscr);
RenderDeferredLighting(camera, renderLoop);

2
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/LightDefinition.cs


public int IESIndex;
public int cookieIndex;
public GPULightType lightType;
public GPULightType lightType;
// Area Light specific
public Vector2 size;
public bool twoSided;

2
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/SinglePass/SinglePass.cs


s_PunctualShadowList = null;
}
public void PrepareLightsForGPU(CullResults cullResults, Camera camera, HDRenderLoop.LightList lightList) {}
public void PushGlobalParams(Camera camera, RenderLoop loop, HDRenderLoop.LightList lightList)
{
s_DirectionalLights.SetData(lightList.directionalLights.ToArray());

10
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild.compute


#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
float FetchDepth(Texture2D depthTexture, uint2 pixCoord)
{
return 1 - depthTexture.Load(uint3(pixCoord.xy, 0)).x;
}
#include "../ShaderBase.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "../SortingComputeUtils.hlsl"
#endif

while(l<iNrCoarseLights && uLgtType==SPOT_LIGHT)
{
SFiniteLightData lightData = g_vLightData[idxCoarse];
const bool bIsSpotDisc = (lightData.flags&IS_CIRCULAR_SPOT_SHAPE)!=0;
// TODO: Change by SebL
const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
// serially check 4 pixels
uint uVal = 0;

413
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/TilePass.cs


using UnityEngine;
using UnityEngine.Experimental.Rendering;
using UnityEngine.Rendering;
using System;
namespace UnityEngine.Experimental.ScriptableRenderLoop

public struct SFiniteLightData
{
public Vector3 lightPos;
public int flags;
public Vector3 lightAxisX;
public uint lightType;

public Vector3 lightAxisZ; // spot +Z axis
public float cotan;
public Vector3 color;
public uint lightModel; // DIRECT_LIGHT=0, REFLECTION_LIGHT=1
public float unusued;
public uint lightModel; // DIRECT_LIGHT=0, REFLECTION_LIGHT=1
public Vector3 boxInvRange;
public float unused2;

return "LIGHTLOOP_SINGLE_PASS";
}
/*
public const int MaxNumLights = HDRenderLoop.k_MaxPunctualLightsOnSCreen + HDRenderLoop.k_MaxAreaLightsOnSCreen + HDRenderLoop.k_MaxEnvLightsOnSCreen;
public const int MaxNumLights = 1024;
ComputeShader buildScreenAABBShader;
ComputeShader buildPerTileLightListShader; // FPTL
ComputeShader buildPerBigTileLightListShader;
ComputeShader buildPerVoxelLightListShader; // clustered
static ComputeShader buildScreenAABBShader;
static ComputeShader buildPerTileLightListShader; // FPTL
static ComputeShader buildPerBigTileLightListShader;
static ComputeShader buildPerVoxelLightListShader; // clustered
private static int s_GenAABBKernel;
private static int s_GenListPerTileKernel;

private static ComputeBuffer s_ConvexBoundsBuffer;
private static ComputeBuffer s_AABBBoundsBuffer;
private static ComputeBuffer s_LightList;
private static ComputeBuffer s_DirLightList;
private static ComputeBuffer s_BigTileLightList; // used for pre-pass coarse culling on 64x64 tiles
private static int s_GenListPerBigTileKernel;

public bool disableFptlWhenClustered = false; // still useful on opaques
public bool enableBigTilePrepass = true;
public bool enableBigTilePrepass = false; // SebL - TODO: I get a crash when enabling this
public bool enableDrawLightBoundsDebug = false;
public bool enableDrawTileDebug = false;
public bool enableComputeLightEvaluation = false;

// clustered light list specific buffers and data end
const int k_TileSize = 16;
*/
SFiniteLightBound[] m_boundData;
SFiniteLightData[] m_lightData;
int m_lightCount;
/*
*/
return true;
}
}

/*
ReleaseResolutionDependentBuffers();
if (s_AABBBoundsBuffer != null)

if (s_LightDataBuffer != null)
s_LightDataBuffer.Release();
if (s_DirLightList != null)
s_DirLightList.Release();
*/
/*
ClearComputeBuffers();
buildScreenAABBShader = Resources.Load<ComputeShader>("scrbound");

s_AABBBoundsBuffer = new ComputeBuffer(2 * MaxNumLights, 3 * sizeof(float));
s_ConvexBoundsBuffer = new ComputeBuffer(MaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightBound)));
s_LightDataBuffer = new ComputeBuffer(MaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightData)));
s_DirLightList = new ComputeBuffer(MaxNumDirLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(DirectionalLight)));
buildScreenAABBShader.SetBuffer(s_GenAABBKernel, "g_data", s_ConvexBoundsBuffer);
buildPerTileLightListShader.SetBuffer(s_GenListPerTileKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
buildPerTileLightListShader.SetBuffer(s_GenListPerTileKernel, "g_vLightData", s_LightDataBuffer);

s_GlobalLightListAtomic = new ComputeBuffer(1, sizeof(uint));
}
if (enableBigTilePrepass)
{
s_GenListPerBigTileKernel = buildPerBigTileLightListShader.FindKernel("BigTileLightListGen");

}
*/
m_boundData = new SFiniteLightBound[MaxNumLights];
m_lightData = new SFiniteLightData[MaxNumLights];
m_lightCount = 0;
{/*
{
// TODO: do something for Resources.Load<ComputeShader> ?
s_AABBBoundsBuffer.Release();

s_DirLightList.Release();
*/
/*
public bool NeedResize()
{
return s_LightList == null || (s_BigTileLightList == null && enableBigTilePrepass) || (s_PerVoxelLightLists == null && enableClustered);

s_BigTileLightList = new ComputeBuffer(LightDefinitions.MAX_NR_BIGTILE_LIGHTS_PLUSONE * nrBigTiles, sizeof(uint));
}
}
*/
// TEMP: These functions should be implemented C++ side, for now do it in C#
private static void SetMatrixCS(CommandBuffer cmd, ComputeShader shadercs, string name, Matrix4x4 mat)
{
var data = new float[16];
for (int c = 0; c < 4; c++)
for (int r = 0; r < 4; r++)
data[4 * c + r] = mat[r, c];
cmd.SetComputeFloatParams(shadercs, name, data);
}
private static void SetMatrixArrayCS(CommandBuffer cmd, ComputeShader shadercs, string name, Matrix4x4[] matArray)
{
int numMatrices = matArray.Length;
var data = new float[numMatrices * 16];
for (int n = 0; n < numMatrices; n++)
for (int c = 0; c < 4; c++)
for (int r = 0; r < 4; r++)
data[16 * n + 4 * c + r] = matArray[n][r, c];
cmd.SetComputeFloatParams(shadercs, name, data);
}
/*
int GenerateSourceLightBuffers(Camera camera, CullResults inputs)
private static void SetVectorArrayCS(CommandBuffer cmd, ComputeShader shadercs, string name, Vector4[] vecArray)
{
int numVectors = vecArray.Length;
var data = new float[numVectors * 4];
for (int n = 0; n < numVectors; n++)
for (int i = 0; i < 4; i++)
data[4 * n + i] = vecArray[n][i];
cmd.SetComputeFloatParams(shadercs, name, data);
}
static Matrix4x4 GetFlipMatrix()
{
Matrix4x4 flip = Matrix4x4.identity;
bool isLeftHand = ((int)LightDefinitions.USE_LEFTHAND_CAMERASPACE) != 0;
if (isLeftHand) flip.SetColumn(2, new Vector4(0.0f, 0.0f, -1.0f, 0.0f));
return flip;
}
static Matrix4x4 WorldToCamera(Camera camera)
{
return GetFlipMatrix() * camera.worldToCameraMatrix;
}
static Matrix4x4 CameraProjection(Camera camera)
var probes = inputs.visibleReflectionProbes;
//ReflectionProbe[] probes = Object.FindObjectsOfType<ReflectionProbe>();
return camera.projectionMatrix * GetFlipMatrix();
}
public void PrepareLightsForGPU(CullResults cullResults, Camera camera, HDRenderLoop.LightList lightList)
{
// Use for first space screen AABB
// Use for the second pass (fine pruning)
// first pass. Figure out how much we have of each and establish offsets
foreach (var cl in inputs.visibleLights)
// TODO manage area lights
foreach (var punctualLight in lightList.punctualLights)
var volType = cl.lightType == LightType.Spot ? LightDefinitions.SPOT_LIGHT : (cl.lightType == LightType.Point ? LightDefinitions.SPHERE_LIGHT : -1);
if (volType >= 0) ++numEntries[LightDefinitions.DIRECT_LIGHT, volType];
var volType = punctualLight.lightType == GPULightType.Spot ? LightDefinitions.SPOT_LIGHT : (punctualLight.lightType == GPULightType.Point ? LightDefinitions.SPHERE_LIGHT : -1);
if (volType >= 0)
++numEntries[LightDefinitions.DIRECT_LIGHT, volType];
foreach (var rl in probes)
// TODO: manage sphere_light
foreach (var envLight in lightList.envLights)
if (rl.texture != null) ++numEntries[LightDefinitions.REFLECTION_LIGHT, volType];
++numEntries[LightDefinitions.REFLECTION_LIGHT, volType];
}
// add decals here too similar to the above

{
offsets[m, 0] = m == 0 ? 0 : (numEntries[m - 1, numVolTypes - 1] + offsets[m - 1, numVolTypes - 1]);
for (var v = 1; v < numVolTypes; v++) offsets[m, v] = numEntries[m, v - 1] + offsets[m, v - 1];
for (var v = 1; v < numVolTypes; v++)
{
offsets[m, v] = numEntries[m, v - 1] + offsets[m, v - 1];
}
var numLights = inputs.visibleLights.Length;
var numProbes = probes.Length;
var numVolumes = numLights + numProbes;
var lightData = new SFiniteLightData[numVolumes];
var boundData = new SFiniteLightBound[numVolumes];
bool isNegDeterminant = Vector3.Dot(worldToView.GetColumn(0), Vector3.Cross(worldToView.GetColumn(1), worldToView.GetColumn(2))) < 0.0f; // 3x3 Determinant.
uint shadowLightIndex = 0;
foreach (var cl in inputs.visibleLights)
for (int lightIndex = 0; lightIndex < lightList.punctualLights.Count; lightIndex++)
var range = cl.range;
LightData punctualLightData = lightList.punctualLights[lightIndex];
VisibleLight light = cullResults.visibleLights[lightList.punctualCullIndices[lightIndex]];
var lightToWorld = cl.localToWorld;
var range = light.range;
var lightToWorld = light.localToWorld;
// Fill bounds
var light = new SFiniteLightData();
var lightData = new SFiniteLightData();
int index = -1;
bound.boxAxisX.Set(1, 0, 0);
bound.boxAxisY.Set(0, 1, 0);
bound.boxAxisZ.Set(0, 0, 1);
bound.scaleXY.Set(1.0f, 1.0f);
bound.radius = range;
light.flags = 0;
light.recipRange = 1.0f / range;
light.color.Set(cl.finalColor.r, cl.finalColor.g, cl.finalColor.b);
light.sliceIndex = 0;
light.lightModel = (uint)LightDefinitions.DIRECT_LIGHT;
light.shadowLightIndex = shadowLightIndex;
shadowLightIndex++;
lightData.lightModel = (uint)LightDefinitions.DIRECT_LIGHT;
var bHasCookie = cl.light.cookie != null;
var bHasShadow = cl.light.shadows != LightShadows.None;
var idxOut = 0;
if (cl.lightType == LightType.Spot)
if (punctualLightData.lightType == GPULightType.Spot || punctualLightData.lightType == GPULightType.ProjectorPyramid)
var isCircularSpot = !bHasCookie;
if (!isCircularSpot) // square spots always have cookie
{
light.sliceIndex = m_CookieTexArray.FetchSlice(cl.light.cookie);
}
Vector3 lightDir = lightToWorld.GetColumn(2); // Z axis in world space
// represents a left hand coordinate system in world space

const float degToRad = (float)(pi / 180.0);
var sa = cl.light.spotAngle;
var sa = light.light.spotAngle;
var cs = Mathf.Cos(0.5f * sa * degToRad);
var si = Mathf.Sin(0.5f * sa * degToRad);

var squeeze = true;//sa < 0.7f * 90.0f; // arb heuristic
var fS = squeeze ? ta : si;
bound.center = worldToView.MultiplyPoint(lightPos + ((0.5f * range) * lightDir)); // use mid point of the spot as the center of the bounding volume for building screen-space AABB for tiled lighting.
light.lightAxisX = vx;
light.lightAxisY = vy;
light.lightAxisZ = vz;
// scale axis to match box or base of pyramid
bound.boxAxisX = (fS * range) * vx;

fAltDx *= range; fAltDy *= range;
var altDist = Mathf.Sqrt(fAltDy * fAltDy + (isCircularSpot ? 1.0f : 2.0f) * fAltDx * fAltDx);
var altDist = Mathf.Sqrt(fAltDy * fAltDy + (punctualLightData.lightType == GPULightType.Spot ? 1.0f : 2.0f) * fAltDx * fAltDx);
// fill up ldata
light.lightType = (uint)LightDefinitions.SPOT_LIGHT;
light.lightPos = worldToView.MultiplyPoint(lightPos);
light.radiusSq = range * range;
light.penumbra = cs;
light.cotan = cota;
light.flags |= (isCircularSpot ? LightDefinitions.IS_CIRCULAR_SPOT_SHAPE : 0);
light.flags |= (bHasCookie ? LightDefinitions.HAS_COOKIE_TEXTURE : 0);
light.flags |= (bHasShadow ? LightDefinitions.HAS_SHADOW : 0);
lightData.lightAxisX = vx;
lightData.lightAxisY = vy;
lightData.lightAxisZ = vz;
lightData.lightType = (uint)LightDefinitions.SPOT_LIGHT;
lightData.lightPos = worldToView.MultiplyPoint(lightPos);
lightData.radiusSq = range * range;
lightData.cotan = cota;
idxOut = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
index = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
else if (cl.lightType == LightType.Point)
else // if (punctualLightData.lightType == GPULightType.Point)
if (bHasCookie)
{
light.sliceIndex = m_CubeCookieTexArray.FetchSlice(cl.light.cookie);
}
bool isNegDeterminant = Vector3.Dot(worldToView.GetColumn(0), Vector3.Cross(worldToView.GetColumn(1), worldToView.GetColumn(2))) < 0.0f; // 3x3 Determinant.
bound.center = worldToView.MultiplyPoint(lightPos);
bound.boxAxisX.Set(range, 0, 0);

Vector3 vz = lightToView.GetColumn(2);
// fill up ldata
light.lightType = (uint)LightDefinitions.SPHERE_LIGHT;
light.lightPos = bound.center;
light.radiusSq = range * range;
light.lightAxisX = vx;
light.lightAxisY = vy;
light.lightAxisZ = vz;
light.flags |= (bHasCookie ? LightDefinitions.HAS_COOKIE_TEXTURE : 0);
light.flags |= (bHasShadow ? LightDefinitions.HAS_SHADOW : 0);
lightData.lightAxisX = vx;
lightData.lightAxisY = vy;
lightData.lightAxisZ = vz;
lightData.lightType = (uint)LightDefinitions.SPHERE_LIGHT;
lightData.lightPos = bound.center;
lightData.radiusSq = range * range;
idxOut = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
}
else
{
//Assert(false);
index = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
// next light
if (cl.lightType == LightType.Spot || cl.lightType == LightType.Point)
{
boundData[idxOut] = bound;
lightData[idxOut] = light;
}
m_boundData[index] = bound;
m_lightData[index] = lightData;
var numLightsOut = offsets[LightDefinitions.DIRECT_LIGHT, numVolTypes - 1] + numEntries[LightDefinitions.DIRECT_LIGHT, numVolTypes - 1];
// probe.m_BlendDistance
// Vector3f extents = 0.5*Abs(probe.m_BoxSize);
// C center of rendered refl box <-- GetComponent (Transform).GetPosition() + m_BoxOffset;
// cube map capture point: GetComponent (Transform).GetPosition()
// shader parameter min and max are C+/-(extents+blendDistance)
foreach (var rl in probes)
for (int envIndex = 0; envIndex < lightList.envLights.Count; envIndex++)
var cubemap = rl.texture;
EnvLightData envLightData = lightList.envLights[envIndex];
VisibleReflectionProbe probe = cullResults.visibleReflectionProbes[lightList.envCullIndices[envIndex]];
// always a box for now
if (cubemap == null)
continue;
var bound = new SFiniteLightBound();
var lightData = new SFiniteLightData();
var bndData = new SFiniteLightBound();
var lgtData = new SFiniteLightData();
var idxOut = 0;
lgtData.flags = 0;
var bnds = rl.bounds;
var boxOffset = rl.center; // reflection volume offset relative to cube map capture point
var blendDistance = rl.blendDistance;
float imp = rl.importance;
var bnds = probe.bounds;
var boxOffset = probe.center; // reflection volume offset relative to cube map capture point
var blendDistance = probe.blendDistance;
var mat = rl.localToWorld;
//Matrix4x4 mat = rl.transform.localToWorldMatrix;
Vector3 cubeCapturePos = mat.GetColumn(3); // cube map capture position in world space
// implicit in CalculateHDRDecodeValues() --> float ints = rl.intensity;
var boxProj = (rl.boxProjection != 0);
var decodeVals = rl.hdr;
//Vector4 decodeVals = rl.CalculateHDRDecodeValues();
var mat = probe.localToWorld;
// C is reflection volume center in world space (NOT same as cube map capture point)
var e = bnds.extents; // 0.5f * Vector3.Max(-boxSizes[p], boxSizes[p]);

//Vector3 posForShaderParam = bnds.center - boxOffset; // gives same as rl.GetComponent<Transform>().position;
var posForShaderParam = cubeCapturePos; // same as commented out line above when rot is identity
var combinedExtent = e + new Vector3(blendDistance, blendDistance, blendDistance);
var combinedExtent = e + new Vector3(blendDistance, blendDistance, blendDistance);
Vector3 vx = mat.GetColumn(0);
Vector3 vy = mat.GetColumn(1);

var Cw = worldToView.MultiplyPoint(C);
if (boxProj) lgtData.flags |= LightDefinitions.IS_BOX_PROJECTED;
bound.center = Cw;
bound.boxAxisX = combinedExtent.x * vx;
bound.boxAxisY = combinedExtent.y * vy;
bound.boxAxisZ = combinedExtent.z * vz;
bound.scaleXY.Set(1.0f, 1.0f);
bound.radius = combinedExtent.magnitude;
lgtData.lightPos = Cw;
lgtData.lightAxisX = vx;
lgtData.lightAxisY = vy;
lgtData.lightAxisZ = vz;
lgtData.localCubeCapturePoint = -boxOffset;
lgtData.probeBlendDistance = blendDistance;
lgtData.lightIntensity = decodeVals.x;
lgtData.decodeExp = decodeVals.y;
lgtData.sliceIndex = m_CubeReflTexArray.FetchSlice(cubemap);
lightData.lightPos = Cw;
lightData.lightAxisX = vx;
lightData.lightAxisY = vy;
lightData.lightAxisZ = vz;
lgtData.boxInnerDist = e;
lgtData.boxInvRange.Set(1.0f / delta.x, 1.0f / delta.y, 1.0f / delta.z);
bndData.center = Cw;
bndData.boxAxisX = combinedExtent.x * vx;
bndData.boxAxisY = combinedExtent.y * vy;
bndData.boxAxisZ = combinedExtent.z * vz;
bndData.scaleXY.Set(1.0f, 1.0f);
bndData.radius = combinedExtent.magnitude;
// fill up ldata
lgtData.lightType = (uint)LightDefinitions.BOX_LIGHT;
lgtData.lightModel = (uint)LightDefinitions.REFLECTION_LIGHT;
lightData.boxInnerDist = e;
lightData.boxInvRange.Set(1.0f / delta.x, 1.0f / delta.y, 1.0f / delta.z);
idxOut = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
boundData[idxOut] = bndData;
lightData[idxOut] = lgtData;
int index = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
m_boundData[index] = bound;
var numProbesOut = offsets[LightDefinitions.REFLECTION_LIGHT, numVolTypes - 1] + numEntries[LightDefinitions.REFLECTION_LIGHT, numVolTypes - 1];
// Sanity check
{
}
s_ConvexBoundsBuffer.SetData(boundData);
s_LightDataBuffer.SetData(lightData);
m_lightCount = lightList.punctualLights.Count + lightList.envLights.Count;
s_ConvexBoundsBuffer.SetData(m_boundData); // TODO: check with Vlad what is happening here, do we copy 1024 element always ? Could we setup the size we want to copy ?
}
void VoxelLightListGeneration(CommandBuffer cmd, Camera camera, Matrix4x4 projscr, Matrix4x4 invProjscr, int cameraDepthBuffer)
{
// clear atomic offset index
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_ClearVoxelAtomicKernel, "g_LayeredSingleIdxBuffer", s_GlobalLightListAtomic);
cmd.DispatchCompute(buildPerVoxelLightListShader, s_ClearVoxelAtomicKernel, 1, 1, 1);
cmd.SetComputeIntParam(buildPerVoxelLightListShader, "g_iNrVisibLights", m_lightCount);
SetMatrixCS(cmd, buildPerVoxelLightListShader, "g_mScrProjection", projscr);
SetMatrixCS(cmd, buildPerVoxelLightListShader, "g_mInvScrProjection", invProjscr);
cmd.SetComputeIntParam(buildPerVoxelLightListShader, "g_iLog2NumClusters", k_Log2NumClusters);
//Vector4 v2_near = invProjscr * new Vector4(0.0f, 0.0f, 0.0f, 1.0f);
//Vector4 v2_far = invProjscr * new Vector4(0.0f, 0.0f, 1.0f, 1.0f);
//float nearPlane2 = -(v2_near.z/v2_near.w);
//float farPlane2 = -(v2_far.z/v2_far.w);
var nearPlane = camera.nearClipPlane;
var farPlane = camera.farClipPlane;
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, "g_fNearPlane", nearPlane);
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, "g_fFarPlane", farPlane);
const float C = (float)(1 << k_Log2NumClusters);
var geomSeries = (1.0 - Mathf.Pow(k_ClustLogBase, C)) / (1 - k_ClustLogBase); // geometric series: sum_k=0^{C-1} base^k
m_ClustScale = (float)(geomSeries / (farPlane - nearPlane));
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, "g_fClustScale", m_ClustScale);
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, "g_fClustBase", k_ClustLogBase);
return numLightsOut + numProbesOut;
}
cmd.SetComputeTextureParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_depth_tex", new RenderTargetIdentifier(cameraDepthBuffer));
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_vLayeredLightList", s_PerVoxelLightLists);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_LayeredOffset", s_PerVoxelOffset);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_LayeredSingleIdxBuffer", s_GlobalLightListAtomic);
if (enableBigTilePrepass) cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_vBigTileLightList", s_BigTileLightList);
if (k_UseDepthBuffer)
{
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_logBaseBuffer", s_PerTileLogBaseTweak);
}
void BuildPerTileLightLists(Camera camera, RenderLoop loop, int numLights, Matrix4x4 projscr, Matrix4x4 invProjscr)
var numTilesX = (camera.pixelWidth + 15) / 16;
var numTilesY = (camera.pixelHeight + 15) / 16;
cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, numTilesX, numTilesY, 1);
}
public void BuildGPULightLists(Camera camera, RenderLoop loop, HDRenderLoop.LightList lightList, int cameraDepthBuffer)
{
var w = camera.pixelWidth;
var h = camera.pixelHeight;

var numBigTilesY = (h + 63) / 64;
// camera to screen matrix (and it's inverse)
var proj = CameraProjection(camera);
var temp = new Matrix4x4();
temp.SetRow(0, new Vector4(0.5f * w, 0.0f, 0.0f, 0.5f * w));
temp.SetRow(1, new Vector4(0.0f, 0.5f * h, 0.0f, 0.5f * h));
temp.SetRow(2, new Vector4(0.0f, 0.0f, 0.5f, 0.5f));
temp.SetRow(3, new Vector4(0.0f, 0.0f, 0.0f, 1.0f));
var projscr = temp * proj;
var invProjscr = projscr.inverse;
var proj = CameraProjection(camera);
var temp = new Matrix4x4();
temp.SetRow(0, new Vector4(1.0f, 0.0f, 0.0f, 0.0f));
temp.SetRow(1, new Vector4(0.0f, 1.0f, 0.0f, 0.0f));
temp.SetRow(2, new Vector4(0.0f, 0.0f, 0.5f, 0.5f));

cmd.SetComputeIntParam(buildScreenAABBShader, "g_iNrVisibLights", numLights);
cmd.SetComputeIntParam(buildScreenAABBShader, "g_iNrVisibLights", m_lightCount);
cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (numLights + 7) / 8, 1, 1);
cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (m_lightCount + 7) / 8, 1, 1);
}
// enable coarse 2D pass on 64x64 tiles (used for both fptl and clustered).

cmd.SetComputeIntParam(buildPerBigTileLightListShader, "g_iNrVisibLights", numLights);
cmd.SetComputeIntParam(buildPerBigTileLightListShader, "g_iNrVisibLights", m_lightCount);
SetMatrixCS(cmd, buildPerBigTileLightListShader, "g_mScrProjection", projscr);
SetMatrixCS(cmd, buildPerBigTileLightListShader, "g_mInvScrProjection", invProjscr);
cmd.SetComputeFloatParam(buildPerBigTileLightListShader, "g_fNearPlane", camera.nearClipPlane);

if (usingFptl) // optimized for opaques only
{
cmd.SetComputeIntParams(buildPerTileLightListShader, "g_viDimensions", new int[2] { w, h });
cmd.SetComputeIntParam(buildPerTileLightListShader, "g_iNrVisibLights", numLights);
cmd.SetComputeIntParam(buildPerTileLightListShader, "g_iNrVisibLights", m_lightCount);
cmd.SetComputeTextureParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_depth_tex", new RenderTargetIdentifier(s_CameraDepthTexture));
cmd.SetComputeTextureParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_depth_tex", new RenderTargetIdentifier(cameraDepthBuffer));
cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_vLightList", s_LightList);
if (enableBigTilePrepass) cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_vBigTileLightList", s_BigTileLightList);
cmd.DispatchCompute(buildPerTileLightListShader, s_GenListPerTileKernel, numTilesX, numTilesY, 1);

{
VoxelLightListGeneration(cmd, camera, numLights, projscr, invProjscr);
VoxelLightListGeneration(cmd, camera, projscr, invProjscr, cameraDepthBuffer);
*/
}
}
}

21
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/TilePass.cs.hlsl


struct SFiniteLightData
{
float3 lightPos;
int flags;
float3 lightAxisX;
uint lightType;
float3 lightAxisY;

float3 color;
uint lightModel;
float unusued;
uint lightModel;
float3 boxInvRange;
float unused2;
};

{
return value.lightPos;
}
int GetFlags(SFiniteLightData value)
{
return value.flags;
}
float3 GetLightAxisX(SFiniteLightData value)
{
return value.lightAxisX;

{
return value.cotan;
}
float3 GetColor(SFiniteLightData value)
float3 GetBoxInnerDist(SFiniteLightData value)
return value.color;
return value.boxInnerDist;
}
float3 GetBoxInnerDist(SFiniteLightData value)
{
return value.boxInnerDist;
}
float GetUnusued(SFiniteLightData value)
{
return value.unusued;
}
float3 GetBoxInvRange(SFiniteLightData value)
{

90
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ClusteredUtils.hlsl


#ifndef __CLUSTEREDUTILS_H__
#define __CLUSTEREDUTILS_H__
#ifndef FLT_EPSILON
#define FLT_EPSILON 1.192092896e-07f
#endif
float GetScaleFromBase(float base)
{
const float C = (float)(1 << g_iLog2NumClusters);
const float geomSeries = (1.0 - pow(base, C)) / (1 - base); // geometric series: sum_k=0^{C-1} base^k
return geomSeries / (g_fFarPlane - g_fNearPlane);
}
int SnapToClusterIdxFlex(float z_in, float suggestedBase, bool logBasePerTile)
{
#if USE_LEFTHAND_CAMERASPACE
float z = z_in;
#else
float z = -z_in;
#endif
float userscale = g_fClustScale;
if (logBasePerTile)
userscale = GetScaleFromBase(suggestedBase);
// using the inverse of the geometric series
const float dist = max(0, z - g_fNearPlane);
return (int)clamp(log2(dist * userscale * (suggestedBase - 1.0f) + 1) / log2(suggestedBase), 0.0, (float)((1 << g_iLog2NumClusters) - 1));
}
int SnapToClusterIdx(float z_in, float suggestedBase)
{
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
bool logBasePerTile = true; // resolved compile time
#else
bool logBasePerTile = false;
#endif
return SnapToClusterIdxFlex(z_in, suggestedBase, logBasePerTile);
}
float ClusterIdxToZFlex(int k, float suggestedBase, bool logBasePerTile)
{
float res;
float userscale = g_fClustScale;
if (logBasePerTile)
userscale = GetScaleFromBase(suggestedBase);
float dist = (pow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
res = dist + g_fNearPlane;
#if USE_LEFTHAND_CAMERASPACE
return res;
#else
return -res;
#endif
}
float ClusterIdxToZ(int k, float suggestedBase)
{
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
bool logBasePerTile = true; // resolved compile time
#else
bool logBasePerTile = false;
#endif
return ClusterIdxToZFlex(k, suggestedBase, logBasePerTile);
}
// generate a log-base value such that half of the clusters are consumed from near plane to max. opaque depth of tile.
float SuggestLogBase50(float tileFarPlane)
{
const float C = (float)(1 << g_iLog2NumClusters);
float normDist = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPSILON, 1.0);
float suggested_base = pow((1.0 + sqrt(max(0.0, 1.0 - 4.0 * normDist * (1.0 - normDist)))) / (2.0 * normDist), 2.0 / C); //
return max(g_fClustBase, suggested_base);
}
// generate a log-base value such that (approximately) a quarter of the clusters are consumed from near plane to max. opaque depth of tile.
float SuggestLogBase25(float tileFarPlane)
{
const float C = (float)(1 << g_iLog2NumClusters);
float normDist = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPSILON, 1.0);
float suggested_base = pow((1 / 2.3) * max(0.0, (0.8 / normDist) - 1), 4.0 / (C * 2)); // approximate inverse of d*x^4 + (-x) + (1-d) = 0 - d is normalized distance
return max(g_fClustBase, suggested_base);
}
#endif

9
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ClusteredUtils.hlsl.meta


fileFormatVersion: 2
guid: 54f8006db9236c148af831b7fcaadc0c
timeCreated: 1479691314
licenseType: Pro
ShaderImporter:
defaultTextures: []
userData:
assetBundleName:
assetBundleVariant:

267
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute


#pragma kernel BigTileLightListGen
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "../SortingComputeUtils.hlsl"
#endif
#define EXACT_EDGE_TESTS
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
#define MAX_NR_BIGTILE_LIGHTS (MAX_NR_BIGTILE_LIGHTS_PLUSONE-1)
uniform int g_iNrVisibLights;
uniform uint2 g_viDimensions;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform float g_fNearPlane;
uniform float g_fFarPlane;
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
#define NR_THREADS 64
// output buffer
RWBuffer<uint> g_vLightList : register( u0 );
// 2kB (room for roughly 30 wavefronts)
groupshared unsigned int lightsListLDS[MAX_NR_BIGTILE_LIGHTS_PLUSONE];
groupshared uint lightOffs;
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
{
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
float fSx = g_mScrProjection[0].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
float fCy = g_mScrProjection[1].z;
#if USE_LEFTHAND_CAMERASPACE
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
#else
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
#endif
}
float GetOnePixDiagWorldDistAtDepthOne()
{
float fSx = g_mScrProjection[0].x;
float fSy = g_mScrProjection[1].y;
return length( float2(1.0/fSx,1.0/fSy) );
}
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
#endif
#ifdef EXACT_EDGE_TESTS
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR);
#endif
[numthreads(NR_THREADS, 1, 1)]
void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrBigTilesX = (iWidth+63)/64;
uint nrBigTilesY = (iHeight+63)/64;
if(t==0) lightOffs = 0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
uint2 viTilLL = 64*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
// build coarse list using AABB
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
const float2 vMi = g_vBoundsBuffer[l].xy;
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l; // add to light list
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))) );
#endif
#ifdef EXACT_EDGE_TESTS
CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy);
#endif
// sort lights
SORTLIST(lightsListLDS, iNrCoarseLights, MAX_NR_BIGTILE_LIGHTS_PLUSONE, t, NR_THREADS);
lightOffs = 0;
GroupMemoryBarrierWithGroupSync();
for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
GroupMemoryBarrierWithGroupSync();
iNrCoarseLights = lightOffs;
int offs = tileIDX.y*nrBigTilesX + tileIDX.x;
for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = t==0 ? iNrCoarseLights : lightsListLDS[i-1];
}
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
{
#if USE_LEFTHAND_CAMERASPACE
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
#else
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
#endif
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 32*onePixDiagDist; // scale by half a tile
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
{
SFiniteLightBound lgtDat = g_data[lightsListLDS[l]];
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
lightsListLDS[l]=0xffffffff;
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
}
#endif
#ifdef EXACT_EDGE_TESTS
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
{
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
#if !USE_LEFTHAND_CAMERASPACE
z = -z;
#endif
return GetViewPosFromLinDepth( float2(x, y), z);
}
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
{
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
int iSwizzle = e0&0x3;
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
}
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
{
const bool bOnlyNeedFrustumSideEdges = true;
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
const int totNrEdgePairs = 12*nrFrustEdges;
for(int l=0; l<iNrCoarseLights; l++)
{
const int idxCoarse = lightsListLDS[l];
[branch]if(idxCoarse<(uint) g_iNrVisibLights && g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
{
SFiniteLightBound lgtDat = g_data[idxCoarse];
const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
{
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
int e1 = i - e0*nrFrustEdges;
int idx_cur=0, idx_twin=0;
float3 vP0, vE0;
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
float3 vP1, vE1;
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, g_fFarPlane);
// potential separation plane
float3 vN = cross(vE0, vE1);
int positive=0, negative=0;
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6).
{
int j = (idx_cur+k)&0x7;
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
}
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
positive=0; negative=0;
for(int j=0; j<8; j++)
{
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane);
float fSignDist = dot(vN, vPf-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
}
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
bool bFoundSepPlane = (resh*resf)<0;
if(bFoundSepPlane) lightsListLDS[l]=0xffffffff;
}
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
}
#endif

9
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-bigtile.compute.meta


fileFormatVersion: 2
guid: 5ee1f9d6e09abe045b2f5e0b784b9072
timeCreated: 1479689024
licenseType: Pro
ComputeShaderImporter:
currentAPIMask: 4
userData:
assetBundleName:
assetBundleVariant:

553
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute


#pragma kernel TileLightListGen_NoDepthRT LIGHTLISTGEN=TileLightListGen_NoDepthRT
#pragma kernel TileLightListGen_DepthRT LIGHTLISTGEN=TileLightListGen_DepthRT ENABLE_DEPTH_TEXTURE_BACKPLANE
#pragma kernel TileLightListGen_DepthRT_MSAA LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED
#pragma kernel TileLightListGen_NoDepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_DepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE USE_TWO_PASS_TILED_LIGHTING
#pragma kernel TileLightListGen_DepthRT_MSAA_SrcBigTile LIGHTLISTGEN=TileLightListGen_DepthRT_MSAA_SrcBigTile ENABLE_DEPTH_TEXTURE_BACKPLANE MSAA_ENABLED USE_TWO_PASS_TILED_LIGHTING
#pragma kernel ClearAtomic
#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "../SortingComputeUtils.hlsl"
#endif
//#define EXACT_EDGE_TESTS
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
#define CONV_HULL_TEST_ENABLED
uniform int g_iNrVisibLights;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform float g_fClustScale;
uniform float g_fClustBase;
uniform float g_fNearPlane;
uniform float g_fFarPlane;
uniform int g_iLog2NumClusters; // numClusters = (1<<g_iLog2NumClusters)
#include "../ClusteredUtils.hlsl"
#ifdef MSAA_ENABLED
Texture2DMS<float> g_depth_tex : register( t0 );
#else
Texture2D g_depth_tex : register( t0 );
#endif
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
#ifdef USE_TWO_PASS_TILED_LIGHTING
Buffer<uint> g_vBigTileLightList : register( t4 );
#endif
#define NR_THREADS 64
// output buffer
RWBuffer<uint> g_vLayeredLightList : register( u0 );
RWBuffer<uint> g_LayeredOffset : register( u1 );
RWBuffer<uint> g_LayeredSingleIdxBuffer : register( u2 );
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
RWBuffer<float> g_logBaseBuffer : register( u3 );
#endif
#define MAX_NR_COARSE_ENTRIES 128
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2];
groupshared float4 lightPlanes[4*6];
groupshared uint lightOffs;
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
groupshared int ldsZMax;
#endif
#ifdef EXACT_EDGE_TESTS
groupshared uint ldsIsLightInvisible;
groupshared uint lightOffs2;
#endif
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
groupshared uint lightOffsSph;
#endif
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
{
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
float fSx = g_mScrProjection[0].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
float fCy = g_mScrProjection[1].z;
#if USE_LEFTHAND_CAMERASPACE
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
#else
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
#endif
}
float GetOnePixDiagWorldDistAtDepthOne()
{
float fSx = g_mScrProjection[0].x;
float fSy = g_mScrProjection[1].y;
return length( float2(1.0/fSx,1.0/fSy) );
}
#ifdef EXACT_EDGE_TESTS
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane);
#endif
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
#endif
// returns 1 for intersection and 0 for none
float4 FetchPlane(int l, int p);
bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase)
{
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
if(bIsHit)
{
#ifdef CONV_HULL_TEST_ENABLED
float depthAtNearZ = ClusterIdxToZ(k, suggestedBase);
float depthAtFarZ = ClusterIdxToZ(k+1, suggestedBase);
for(int p=0; p<6; p++)
{
float4 plane = lightPlanes[6*(l&3)+p];
bool bAllInvisib = true;
for(int i=0; i<8; i++)
{
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ;
float3 vP = GetViewPosFromLinDepth( float2(x, y), z);
bAllInvisib = bAllInvisib && dot(plane, float4(vP,1.0))>0;
}
if(bAllInvisib) bIsHit = false;
}
#endif
}
return bIsHit;
}
bool CheckIntersectionBasic(int l, int k)
{
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
return ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
}
[numthreads(NR_THREADS, 1, 1)]
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
uint iWidth;
uint iHeight;
#ifdef MSAA_ENABLED
uint iNumSamplesMSAA;
g_depth_tex.GetDimensions(iWidth, iHeight, iNumSamplesMSAA);
#else
g_depth_tex.GetDimensions(iWidth, iHeight);
#endif
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
uint2 viTilLL = 16*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(16,16), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
if(t==0)
{
lightOffs = 0;
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
ldsZMax = 0;
#endif
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
float dpt_ma=1.0;
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
// establish min and max depth first
dpt_ma=0.0;
for(int idx=t; idx<256; idx+=NR_THREADS)
{
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
#ifdef MSAA_ENABLED
for(int i=0; i<iNumSamplesMSAA; i++)
{
const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i);
#else
const float fDpth = FetchDepth(g_depth_tex, uPixCrd);
#endif
if(fDpth<VIEWPORT_SCALE_Z) // if not skydome
{
dpt_ma = max(fDpth, dpt_ma);
}
#ifdef MSAA_ENABLED
}
#endif
}
InterlockedMax(ldsZMax, asuint(dpt_ma) );
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
dpt_ma = asfloat(ldsZMax);
#endif
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
// build coarse list using AABB
#ifdef USE_TWO_PASS_TILED_LIGHTING
int NrBigTilesX = (nrTilesX+3)>>2;
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
{
int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
#else
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
#endif
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy))
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
#endif
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
#if USE_LEFTHAND_CAMERASPACE
float fTileFarPlane = GetLinearDepth(dpt_ma);
#else
float fTileFarPlane = -GetLinearDepth(dpt_ma);
#endif
float suggestedBase = SuggestLogBase50(fTileFarPlane);
#else
float fTileFarPlane = g_fFarPlane;
float suggestedBase = g_fClustBase;
#endif
#ifdef EXACT_EDGE_TESTS
iNrCoarseLights = CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, fTileFarPlane);
#endif
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
SORTLIST(coarseList, iNrCoarseLights, MAX_NR_COARSE_ENTRIES, t, NR_THREADS);
#endif
//////////// cell specific code
{
for(int l=(int) t; l<((iNrCoarseLights+1)>>1); l += NR_THREADS)
{
const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights)];
const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase));
const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase));
const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase));
const unsigned int clustIdxMa1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1+g_iNrVisibLights].z), suggestedBase));
clusterIdxs[l] = (clustIdxMa1<<24) | (clustIdxMi1<<16) | (clustIdxMa0<<8) | (clustIdxMi0<<0);
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
int nrClusters = (1<<g_iLog2NumClusters);
//////////////////////////////////////////////////////////
uint start = 0;
int i=(int) t;
int iSpaceAvail = 0;
int iSum = 0;
if(i<nrClusters)
{
for(int l=0; l<iNrCoarseLights; l++)
{
iSum += (CheckIntersectionBasic(l, i) ? 1 : 0);
}
iSpaceAvail = min(iSum,MAX_NR_COARSE_ENTRIES); // combined storage for both direct lights and reflection
InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start); // alloc list memory
}
int modelListCount[NR_LIGHT_MODELS]={0,0}; // direct light count and reflection lights
uint offs = start;
for(int ll=0; ll<iNrCoarseLights; ll+=4)
{
int p = i>>2;
int m = i&3;
if(i<24) lightPlanes[6*m+p] = FetchPlane(min(iNrCoarseLights-1,ll+m), p);
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
for(int l=ll; l<min(iNrCoarseLights,(ll+4)); l++)
{
if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
{
uint lightModel = g_vLightData[ coarseList[l] ].lightModel;
++modelListCount[ lightModel==REFLECTION_LIGHT ? 1 : 0];
g_vLayeredLightList[offs++] = coarseList[l]; // reflection lights will be last since we sorted
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
}
uint localOffs=0;
offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
for(int m=0; m<NR_LIGHT_MODELS; m++)
{
int numLights = min(modelListCount[m],31); // only allow 5 bits
if(i<nrClusters)
{
g_LayeredOffset[offs] = (start+localOffs) | (((uint) numLights)<<27);
offs += (nrClusters*nrTilesX*nrTilesY);
localOffs += modelListCount[m]; // use unclamped count for localOffs
}
}
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
g_logBaseBuffer[tileIDX.y*nrTilesX + tileIDX.x] = suggestedBase;
#endif
}
float4 FetchPlane(int l, int p)
{
SFiniteLightBound lgtDat = g_data[coarseList[l]];
const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float radius = lgtDat.radius;
const float2 scaleXY = lgtDat.scaleXY;
return GetPlaneEq(boxX, boxY, boxZ, center, scaleXY, p);
}
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
{
#if USE_LEFTHAND_CAMERASPACE
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
#else
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
#endif
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
{
SFiniteLightBound lgtDat = g_data[coarseList[l]];
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
coarseList[l]=0xffffffff;
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
// to greedy to double buffer coarseList lds on this so serializing removal of gaps.
if(threadID==0)
{
int offs = 0;
for(int l=0; l<iNrCoarseLights; l++)
{ if(coarseList[l]!=0xffffffff) coarseList[offs++] = coarseList[l]; }
lightOffsSph = offs;
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
return lightOffsSph;
}
#endif
#ifdef EXACT_EDGE_TESTS
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
{
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
#if !USE_LEFTHAND_CAMERASPACE
z = -z;
#endif
return GetViewPosFromLinDepth( float2(x, y), z);
}
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
{
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
int iSwizzle = e0&0x3;
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
}
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
{
if(threadID==0) lightOffs2 = 0;
const bool bOnlyNeedFrustumSideEdges = true;
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
const int totNrEdgePairs = 12*nrFrustEdges;
for(int l=0; l<iNrCoarseLights; l++)
{
if(threadID==0) ldsIsLightInvisible=0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
const int idxCoarse = coarseList[l];
[branch]if(g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
{
SFiniteLightBound lgtDat = g_data[idxCoarse];
const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
{
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
int e1 = i - e0*nrFrustEdges;
int idx_cur=0, idx_twin=0;
float3 vP0, vE0;
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
float3 vP1, vE1;
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane);
// potential separation plane
float3 vN = cross(vE0, vE1);
int positive=0, negative=0;
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6).
{
int j = (idx_cur+k)&0x7;
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
}
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
positive=0; negative=0;
for(int j=0; j<8; j++)
{
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane);
float fSignDist = dot(vN, vPf-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
}
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
bool bFoundSepPlane = (resh*resf)<0;
if(bFoundSepPlane) InterlockedOr(ldsIsLightInvisible, 1);
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
if(threadID==0 && ldsIsLightInvisible==0)
{
coarseList[lightOffs2++] = coarseList[l];
}
}
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
GroupMemoryBarrierWithGroupSync();
#endif
return lightOffs2;
}
#endif
[numthreads(1, 1, 1)]
void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
g_LayeredSingleIdxBuffer[0]=0;
}

9
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/Resources/lightlistbuild-clustered.compute.meta


fileFormatVersion: 2
guid: 0bb1b7e0ddcd5c44baf3ddc7456eb196
timeCreated: 1479689584
licenseType: Pro
ComputeShaderImporter:
currentAPIMask: 4
userData:
assetBundleName:
assetBundleVariant:

30
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ShaderBase.hlsl


#ifndef __SHADERBASE_H__
#define __SHADERBASE_H__
#ifdef SHADER_API_PSSL
#ifndef Texture2DMS
#define Texture2DMS MS_Texture2D
#endif
#ifndef SampleCmpLevelZero
#define SampleCmpLevelZero SampleCmpLOD0
#endif
#ifndef firstbithigh
#define firstbithigh FirstSetBit_Hi
#endif
#endif
float FetchDepth(Texture2D depthTexture, uint2 pixCoord)
{
return 1 - depthTexture.Load(uint3(pixCoord.xy, 0)).x;
}
float FetchDepthMSAA(Texture2DMS<float> depthTexture, uint2 pixCoord, uint sampleIdx)
{
return 1 - depthTexture.Load(uint3(pixCoord.xy, 0), sampleIdx).x;
}
#endif

9
Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ShaderBase.hlsl.meta


fileFormatVersion: 2
guid: 3c90176b54c922b4e9cf65c9ec9cb750
timeCreated: 1479691479
licenseType: Pro
ShaderImporter:
defaultTextures: []
userData:
assetBundleName:
assetBundleVariant:

9
Assets/ScriptableRenderLoop/HDRenderLoop/Shaders.meta


fileFormatVersion: 2
guid: fafbb144d7f66074785b7727293d89c5
folderAsset: yes
timeCreated: 1474297943
licenseType: Pro
DefaultImporter:
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存