
HDRenderLoop: Merge FTPL Build Light List part (tile, bigtile, cluster..)

- Only build light list, not light application for now
- untested, crash when big tile are enabled
Sebastien Lagarde 8 年前
共有 16 个文件被更改,包括 1278 次插入277 次删除
  1. 2
  2. 120
  3. 2
  4. 2
  5. 10
  6. 413
  7. 21
  8. 90
  9. 9
  10. 267
  11. 9
  12. 553
  13. 9
  14. 30
  15. 9
  16. 9


fileFormatVersion: 2
guid: 2400b74f5ce370c4481e5dc417d03703
timeCreated: 1479395301
timeCreated: 1479691644
licenseType: Pro


Material m_DebugViewMaterialGBuffer;
// Various buffer
int s_CameraColorBuffer;
int s_CameraDepthBuffer;
int s_VelocityBuffer;
int s_DistortionBuffer;
int m_CameraColorBuffer;
int m_CameraDepthBuffer;
int m_VelocityBuffer;
int m_DistortionBuffer;
public class LightList

public List<EnvLightData> envLights;
public Vector4[] directionalShadowSplitSphereSqr;
// Index mapping list to go from GPU lights (above) to CPU light (in cullResult)
public List<int> directionalCullIndices;
public List<int> punctualCullIndices;
public List<int> areaCullIndices;
public List<int> envCullIndices;
public void Clear()

public void Allocate()
directionalLights = new List<DirectionalLightData>();
punctualLights = new List<LightData>();
areaLights = new List<LightData>();
envLights = new List<EnvLightData>();
punctualShadows = new List<PunctualShadowData>();
directionalShadows = new List<DirectionalShadowData>();
directionalShadowSplitSphereSqr = new Vector4[k_MaxCascadeCount];
directionalCullIndices = new List<int>();
punctualCullIndices = new List<int>();
areaCullIndices = new List<int>();
envCullIndices = new List<int>();

// TODO: Find a way to automatically create/iterate through lightloop
SinglePass.LightLoop m_SinglePassLightLoop;
// TilePass.LightLoop m_TilePassLightLoop;
TilePass.LightLoop m_TilePassLightLoop;
// TODO: Find a way to automatically create/iterate through deferred material
Lit.RenderLoop m_LitRenderLoop;

public override void Rebuild()
s_CameraColorBuffer = Shader.PropertyToID("_CameraColorTexture");
s_CameraDepthBuffer = Shader.PropertyToID("_CameraDepthTexture");
m_CameraColorBuffer = Shader.PropertyToID("_CameraColorTexture");
m_CameraDepthBuffer = Shader.PropertyToID("_CameraDepthTexture");
// TODO: We need to have an API to send our sky information to Enlighten. For now use a workaround through skybox/cubemap material...
m_SkyboxMaterial = CreateEngineMaterial("Skybox/Cubemap");

#pragma warning disable 162 // warning CS0162: Unreachable code detected
s_VelocityBuffer = Shader.PropertyToID("_VelocityTexture");
m_VelocityBuffer = Shader.PropertyToID("_VelocityTexture");
if (ShaderConfig.VelocityInGbuffer == 1)
// If velocity is in GBuffer then it is in the last RT. Assign a different name to it.

#pragma warning restore 162
s_DistortionBuffer = Shader.PropertyToID("_DistortionTexture");
m_DistortionBuffer = Shader.PropertyToID("_DistortionTexture");

// Init various light loop
m_SinglePassLightLoop = new SinglePass.LightLoop();
// m_TilePassLightLoop = new TilePass.LightLoop();
// m_TilePassLightLoop.Rebuild();
m_TilePassLightLoop = new TilePass.LightLoop();
m_lightList.directionalLights = new List<DirectionalLightData>();
m_lightList.punctualLights = new List<LightData>();
m_lightList.areaLights = new List<LightData>();
m_lightList.envLights = new List<EnvLightData>();
m_lightList.punctualShadows = new List<PunctualShadowData>();
m_lightList.directionalShadows = new List<DirectionalShadowData>();
m_lightList.directionalShadowSplitSphereSqr = new Vector4[k_MaxCascadeCount];
void OnDisable()

if (m_SkyboxMaterial) DestroyImmediate(m_SkyboxMaterial);
if (m_SkyHDRIMaterial) DestroyImmediate(m_SkyHDRIMaterial);

int w = camera.pixelWidth;
int h = camera.pixelHeight;
cmd.GetTemporaryRT(s_CameraColorBuffer, w, h, 0, FilterMode.Point, RenderTextureFormat.ARGBHalf, RenderTextureReadWrite.Linear);
cmd.GetTemporaryRT(s_CameraDepthBuffer, w, h, 24, FilterMode.Point, RenderTextureFormat.Depth);
cmd.GetTemporaryRT(m_CameraColorBuffer, w, h, 0, FilterMode.Point, RenderTextureFormat.ARGBHalf, RenderTextureReadWrite.Linear);
cmd.GetTemporaryRT(m_CameraDepthBuffer, w, h, 24, FilterMode.Point, RenderTextureFormat.Depth);
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
cmd.ClearRenderTarget(true, false, new Color(0, 0, 0, 0));

var cmd = new CommandBuffer();
cmd.name = "Clear HDR target";
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
cmd.ClearRenderTarget(false, true, new Color(0, 0, 0, 0));

var cmd = new CommandBuffer();
cmd.name = "Clear GBuffer";
// Write into the Camera Depth buffer
cmd.SetRenderTarget(m_gbufferManager.GetGBuffers(cmd), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(m_gbufferManager.GetGBuffers(cmd), new RenderTargetIdentifier(m_CameraDepthBuffer));
// Clear everything
// TODO: Clear is not required for color as we rewrite everything, will save performance.
cmd.ClearRenderTarget(false, true, new Color(0, 0, 0, 0));

// TODO: Must do opaque then alpha masked for performance!
// TODO: front to back for opaque and by materal for opaque tested when we split in two
var cmd = new CommandBuffer { name = "Depth Prepass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraDepthBuffer));

// setup GBuffer for rendering
var cmd = new CommandBuffer { name = "GBuffer Pass" };
cmd.SetRenderTarget(m_gbufferManager.GetGBuffers(cmd), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(m_gbufferManager.GetGBuffers(cmd), new RenderTargetIdentifier(m_CameraDepthBuffer));

// TODO: Use the render queue index to only send the forward opaque!
var cmd = new CommandBuffer { name = "Depth Prepass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraDepthBuffer));

// Render Opaque forward
var cmd = new CommandBuffer { name = "DebugView Material Mode Pass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));
cmd.ClearRenderTarget(true, true, new Color(0, 0, 0, 0));

// m_gbufferManager.BindBuffers(m_DeferredMaterial);
// TODO: Bind depth textures
var cmd = new CommandBuffer { name = "GBuffer Debug Pass" };
cmd.Blit(null, new RenderTargetIdentifier(s_CameraColorBuffer), m_DebugViewMaterialGBuffer, 0);
cmd.Blit(null, new RenderTargetIdentifier(m_CameraColorBuffer), m_DebugViewMaterialGBuffer, 0);

// Last blit
var cmd = new CommandBuffer { name = "Blit DebugView Material Debug" };
cmd.Blit(new RenderTargetIdentifier(s_CameraColorBuffer), BuiltinRenderTextureType.CameraTarget);
cmd.Blit(new RenderTargetIdentifier(m_CameraColorBuffer), BuiltinRenderTextureType.CameraTarget);

// m_gbufferManager.BindBuffers(m_DeferredMaterial);
// TODO: Bind depth textures
var cmd = new CommandBuffer { name = "Deferred Ligthing Pass" };
cmd.Blit(null, new RenderTargetIdentifier(s_CameraColorBuffer), m_DeferredMaterial, 0);
cmd.Blit(null, new RenderTargetIdentifier(m_CameraColorBuffer), m_DeferredMaterial, 0);

var cmd = new CommandBuffer { name = "Forward Pass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));

var cmd = new CommandBuffer { name = "Forward Unlit Pass" };
cmd.SetRenderTarget(new RenderTargetIdentifier(s_CameraColorBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.SetRenderTarget(new RenderTargetIdentifier(m_CameraColorBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));

int h = camera.pixelHeight;
var cmd = new CommandBuffer { name = "Velocity Pass" };
cmd.GetTemporaryRT(s_VelocityBuffer, w, h, 0, FilterMode.Point, Builtin.RenderLoop.GetVelocityBufferFormat(), Builtin.RenderLoop.GetVelocityBufferReadWrite());
cmd.SetRenderTarget(new RenderTargetIdentifier(s_VelocityBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.GetTemporaryRT(m_VelocityBuffer, w, h, 0, FilterMode.Point, Builtin.RenderLoop.GetVelocityBufferFormat(), Builtin.RenderLoop.GetVelocityBufferReadWrite());
cmd.SetRenderTarget(new RenderTargetIdentifier(m_VelocityBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));

int h = camera.pixelHeight;
var cmd = new CommandBuffer { name = "Distortion Pass" };
cmd.GetTemporaryRT(s_DistortionBuffer, w, h, 0, FilterMode.Point, Builtin.RenderLoop.GetDistortionBufferFormat(), Builtin.RenderLoop.GetDistortionBufferReadWrite());
cmd.SetRenderTarget(new RenderTargetIdentifier(s_DistortionBuffer), new RenderTargetIdentifier(s_CameraDepthBuffer));
cmd.GetTemporaryRT(m_DistortionBuffer, w, h, 0, FilterMode.Point, Builtin.RenderLoop.GetDistortionBufferFormat(), Builtin.RenderLoop.GetDistortionBufferReadWrite());
cmd.SetRenderTarget(new RenderTargetIdentifier(m_DistortionBuffer), new RenderTargetIdentifier(m_CameraDepthBuffer));

var cmd = new CommandBuffer { name = "FinalPass" };
// Resolve our HDR texture to CameraTarget.
cmd.Blit(new RenderTargetIdentifier(s_CameraColorBuffer), BuiltinRenderTextureType.CameraTarget, m_FinalPassMaterial, 0);
cmd.Blit(new RenderTargetIdentifier(m_CameraColorBuffer), BuiltinRenderTextureType.CameraTarget, m_FinalPassMaterial, 0);
void ConvertLightForGPU(CullResults cullResults, ref ShadowOutput shadowOutput, ref LightList lightList)
void PrepareLightsForGPU(CullResults cullResults, Camera camera, ref ShadowOutput shadowOutput, ref LightList lightList)


if (additionalData.archetype == LightArchetype.Punctual)

envLightData.offsetLS = probe.center; // center is misnamed, it is the offset (in local space) from center of the bounding box to the cubemap capture point
envLightData.blendDistance = blendDistance;
// build per tile light lists
m_SinglePassLightLoop.PrepareLightsForGPU(cullResults, camera, m_lightList);
m_TilePassLightLoop.PrepareLightsForGPU(cullResults, camera, m_lightList);
{ /*
if (camera.pixelWidth != m_WidthOnRecord || camera.pixelHeight != m_HeightOnRecord || m_TilePassLightLoop.NeedResize())
if (m_WidthOnRecord > 0 && m_HeightOnRecord > 0)

m_WidthOnRecord = camera.pixelWidth;
m_HeightOnRecord = camera.pixelHeight;
public void PushGlobalParams(Camera camera, RenderLoop renderLoop, HDRenderLoop.LightList lightList)

Shader.SetGlobalTexture("_EnvTextures", m_CubeReflTexArray.GetTexCache());
m_SinglePassLightLoop.PushGlobalParams(camera, renderLoop, lightList);
// m_TilePassLightLoop.PushGlobalParams(camera, renderLoop, lightList);
m_TilePassLightLoop.PushGlobalParams(camera, renderLoop, lightList);
public override void Render(Camera[] cameras, RenderLoop renderLoop)

renderLoop.SetupCameraProperties(camera); // Need to recall SetupCameraProperties after m_ShadowPass.Render
ConvertLightForGPU(cullResults, ref shadows, ref m_lightList);
PrepareLightsForGPU(cullResults, camera, ref shadows, ref m_lightList);
m_TilePassLightLoop.BuildGPULightLists(camera, renderLoop, m_lightList, m_CameraDepthBuffer);
// build per tile light lists
//var numLights = 0; // GenerateSourceLightBuffers(camera, cullResults);
//m_tilePassLightLoop.BuildPerTileLightLists(camera, loop, numLights, projscr, invProjscr);
RenderDeferredLighting(camera, renderLoop);


public int IESIndex;
public int cookieIndex;
public GPULightType lightType;
public GPULightType lightType;
// Area Light specific
public Vector2 size;
public bool twoSided;


s_PunctualShadowList = null;
public void PrepareLightsForGPU(CullResults cullResults, Camera camera, HDRenderLoop.LightList lightList) {}
public void PushGlobalParams(Camera camera, RenderLoop loop, HDRenderLoop.LightList lightList)


#pragma kernel TileLightListGen LIGHTLISTGEN=TileLightListGen
#pragma kernel TileLightListGen_SrcBigTile LIGHTLISTGEN=TileLightListGen_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
float FetchDepth(Texture2D depthTexture, uint2 pixCoord)
return 1 - depthTexture.Load(uint3(pixCoord.xy, 0)).x;
#include "../ShaderBase.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "../SortingComputeUtils.hlsl"

while(l<iNrCoarseLights && uLgtType==SPOT_LIGHT)
SFiniteLightData lightData = g_vLightData[idxCoarse];
const bool bIsSpotDisc = (lightData.flags&IS_CIRCULAR_SPOT_SHAPE)!=0;
// TODO: Change by SebL
const bool bIsSpotDisc = true; // (lightData.flags&IS_CIRCULAR_SPOT_SHAPE) != 0;
// serially check 4 pixels
uint uVal = 0;


using UnityEngine;
using UnityEngine.Experimental.Rendering;
using UnityEngine.Rendering;
using System;
namespace UnityEngine.Experimental.ScriptableRenderLoop

public struct SFiniteLightData
public Vector3 lightPos;
public int flags;
public Vector3 lightAxisX;
public uint lightType;

public Vector3 lightAxisZ; // spot +Z axis
public float cotan;
public Vector3 color;
public uint lightModel; // DIRECT_LIGHT=0, REFLECTION_LIGHT=1
public float unusued;
public uint lightModel; // DIRECT_LIGHT=0, REFLECTION_LIGHT=1
public Vector3 boxInvRange;
public float unused2;

public const int MaxNumLights = HDRenderLoop.k_MaxPunctualLightsOnSCreen + HDRenderLoop.k_MaxAreaLightsOnSCreen + HDRenderLoop.k_MaxEnvLightsOnSCreen;
public const int MaxNumLights = 1024;
ComputeShader buildScreenAABBShader;
ComputeShader buildPerTileLightListShader; // FPTL
ComputeShader buildPerBigTileLightListShader;
ComputeShader buildPerVoxelLightListShader; // clustered
static ComputeShader buildScreenAABBShader;
static ComputeShader buildPerTileLightListShader; // FPTL
static ComputeShader buildPerBigTileLightListShader;
static ComputeShader buildPerVoxelLightListShader; // clustered
private static int s_GenAABBKernel;
private static int s_GenListPerTileKernel;

private static ComputeBuffer s_ConvexBoundsBuffer;
private static ComputeBuffer s_AABBBoundsBuffer;
private static ComputeBuffer s_LightList;
private static ComputeBuffer s_DirLightList;
private static ComputeBuffer s_BigTileLightList; // used for pre-pass coarse culling on 64x64 tiles
private static int s_GenListPerBigTileKernel;

public bool disableFptlWhenClustered = false; // still useful on opaques
public bool enableBigTilePrepass = true;
public bool enableBigTilePrepass = false; // SebL - TODO: I get a crash when enabling this
public bool enableDrawLightBoundsDebug = false;
public bool enableDrawTileDebug = false;
public bool enableComputeLightEvaluation = false;

// clustered light list specific buffers and data end
const int k_TileSize = 16;
SFiniteLightBound[] m_boundData;
SFiniteLightData[] m_lightData;
int m_lightCount;
return true;

if (s_AABBBoundsBuffer != null)

if (s_LightDataBuffer != null)
if (s_DirLightList != null)
buildScreenAABBShader = Resources.Load<ComputeShader>("scrbound");

s_AABBBoundsBuffer = new ComputeBuffer(2 * MaxNumLights, 3 * sizeof(float));
s_ConvexBoundsBuffer = new ComputeBuffer(MaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightBound)));
s_LightDataBuffer = new ComputeBuffer(MaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightData)));
s_DirLightList = new ComputeBuffer(MaxNumDirLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(DirectionalLight)));
buildScreenAABBShader.SetBuffer(s_GenAABBKernel, "g_data", s_ConvexBoundsBuffer);
buildPerTileLightListShader.SetBuffer(s_GenListPerTileKernel, "g_vBoundsBuffer", s_AABBBoundsBuffer);
buildPerTileLightListShader.SetBuffer(s_GenListPerTileKernel, "g_vLightData", s_LightDataBuffer);

s_GlobalLightListAtomic = new ComputeBuffer(1, sizeof(uint));
if (enableBigTilePrepass)
s_GenListPerBigTileKernel = buildPerBigTileLightListShader.FindKernel("BigTileLightListGen");

m_boundData = new SFiniteLightBound[MaxNumLights];
m_lightData = new SFiniteLightData[MaxNumLights];
m_lightCount = 0;
// TODO: do something for Resources.Load<ComputeShader> ?

public bool NeedResize()
return s_LightList == null || (s_BigTileLightList == null && enableBigTilePrepass) || (s_PerVoxelLightLists == null && enableClustered);

s_BigTileLightList = new ComputeBuffer(LightDefinitions.MAX_NR_BIGTILE_LIGHTS_PLUSONE * nrBigTiles, sizeof(uint));
// TEMP: These functions should be implemented C++ side, for now do it in C#
private static void SetMatrixCS(CommandBuffer cmd, ComputeShader shadercs, string name, Matrix4x4 mat)
var data = new float[16];
for (int c = 0; c < 4; c++)
for (int r = 0; r < 4; r++)
data[4 * c + r] = mat[r, c];
cmd.SetComputeFloatParams(shadercs, name, data);
private static void SetMatrixArrayCS(CommandBuffer cmd, ComputeShader shadercs, string name, Matrix4x4[] matArray)
int numMatrices = matArray.Length;
var data = new float[numMatrices * 16];
for (int n = 0; n < numMatrices; n++)
for (int c = 0; c < 4; c++)
for (int r = 0; r < 4; r++)
data[16 * n + 4 * c + r] = matArray[n][r, c];
cmd.SetComputeFloatParams(shadercs, name, data);
int GenerateSourceLightBuffers(Camera camera, CullResults inputs)
private static void SetVectorArrayCS(CommandBuffer cmd, ComputeShader shadercs, string name, Vector4[] vecArray)
int numVectors = vecArray.Length;
var data = new float[numVectors * 4];
for (int n = 0; n < numVectors; n++)
for (int i = 0; i < 4; i++)
data[4 * n + i] = vecArray[n][i];
cmd.SetComputeFloatParams(shadercs, name, data);
static Matrix4x4 GetFlipMatrix()
Matrix4x4 flip = Matrix4x4.identity;
bool isLeftHand = ((int)LightDefinitions.USE_LEFTHAND_CAMERASPACE) != 0;
if (isLeftHand) flip.SetColumn(2, new Vector4(0.0f, 0.0f, -1.0f, 0.0f));
return flip;
static Matrix4x4 WorldToCamera(Camera camera)
return GetFlipMatrix() * camera.worldToCameraMatrix;
static Matrix4x4 CameraProjection(Camera camera)
var probes = inputs.visibleReflectionProbes;
//ReflectionProbe[] probes = Object.FindObjectsOfType<ReflectionProbe>();
return camera.projectionMatrix * GetFlipMatrix();
public void PrepareLightsForGPU(CullResults cullResults, Camera camera, HDRenderLoop.LightList lightList)
// Use for first space screen AABB
// Use for the second pass (fine pruning)
// first pass. Figure out how much we have of each and establish offsets
foreach (var cl in inputs.visibleLights)
// TODO manage area lights
foreach (var punctualLight in lightList.punctualLights)
var volType = cl.lightType == LightType.Spot ? LightDefinitions.SPOT_LIGHT : (cl.lightType == LightType.Point ? LightDefinitions.SPHERE_LIGHT : -1);
if (volType >= 0) ++numEntries[LightDefinitions.DIRECT_LIGHT, volType];
var volType = punctualLight.lightType == GPULightType.Spot ? LightDefinitions.SPOT_LIGHT : (punctualLight.lightType == GPULightType.Point ? LightDefinitions.SPHERE_LIGHT : -1);
if (volType >= 0)
++numEntries[LightDefinitions.DIRECT_LIGHT, volType];
foreach (var rl in probes)
// TODO: manage sphere_light
foreach (var envLight in lightList.envLights)
if (rl.texture != null) ++numEntries[LightDefinitions.REFLECTION_LIGHT, volType];
++numEntries[LightDefinitions.REFLECTION_LIGHT, volType];
// add decals here too similar to the above

offsets[m, 0] = m == 0 ? 0 : (numEntries[m - 1, numVolTypes - 1] + offsets[m - 1, numVolTypes - 1]);
for (var v = 1; v < numVolTypes; v++) offsets[m, v] = numEntries[m, v - 1] + offsets[m, v - 1];
for (var v = 1; v < numVolTypes; v++)
offsets[m, v] = numEntries[m, v - 1] + offsets[m, v - 1];
var numLights = inputs.visibleLights.Length;
var numProbes = probes.Length;
var numVolumes = numLights + numProbes;
var lightData = new SFiniteLightData[numVolumes];
var boundData = new SFiniteLightBound[numVolumes];
bool isNegDeterminant = Vector3.Dot(worldToView.GetColumn(0), Vector3.Cross(worldToView.GetColumn(1), worldToView.GetColumn(2))) < 0.0f; // 3x3 Determinant.
uint shadowLightIndex = 0;
foreach (var cl in inputs.visibleLights)
for (int lightIndex = 0; lightIndex < lightList.punctualLights.Count; lightIndex++)
var range = cl.range;
LightData punctualLightData = lightList.punctualLights[lightIndex];
VisibleLight light = cullResults.visibleLights[lightList.punctualCullIndices[lightIndex]];
var lightToWorld = cl.localToWorld;
var range = light.range;
var lightToWorld = light.localToWorld;
// Fill bounds
var light = new SFiniteLightData();
var lightData = new SFiniteLightData();
int index = -1;
bound.boxAxisX.Set(1, 0, 0);
bound.boxAxisY.Set(0, 1, 0);
bound.boxAxisZ.Set(0, 0, 1);
bound.scaleXY.Set(1.0f, 1.0f);
bound.radius = range;
light.flags = 0;
light.recipRange = 1.0f / range;
light.color.Set(cl.finalColor.r, cl.finalColor.g, cl.finalColor.b);
light.sliceIndex = 0;
light.lightModel = (uint)LightDefinitions.DIRECT_LIGHT;
light.shadowLightIndex = shadowLightIndex;
lightData.lightModel = (uint)LightDefinitions.DIRECT_LIGHT;
var bHasCookie = cl.light.cookie != null;
var bHasShadow = cl.light.shadows != LightShadows.None;
var idxOut = 0;
if (cl.lightType == LightType.Spot)
if (punctualLightData.lightType == GPULightType.Spot || punctualLightData.lightType == GPULightType.ProjectorPyramid)
var isCircularSpot = !bHasCookie;
if (!isCircularSpot) // square spots always have cookie
light.sliceIndex = m_CookieTexArray.FetchSlice(cl.light.cookie);
Vector3 lightDir = lightToWorld.GetColumn(2); // Z axis in world space
// represents a left hand coordinate system in world space

const float degToRad = (float)(pi / 180.0);
var sa = cl.light.spotAngle;
var sa = light.light.spotAngle;
var cs = Mathf.Cos(0.5f * sa * degToRad);
var si = Mathf.Sin(0.5f * sa * degToRad);

var squeeze = true;//sa < 0.7f * 90.0f; // arb heuristic
var fS = squeeze ? ta : si;
bound.center = worldToView.MultiplyPoint(lightPos + ((0.5f * range) * lightDir)); // use mid point of the spot as the center of the bounding volume for building screen-space AABB for tiled lighting.
light.lightAxisX = vx;
light.lightAxisY = vy;
light.lightAxisZ = vz;
// scale axis to match box or base of pyramid
bound.boxAxisX = (fS * range) * vx;

fAltDx *= range; fAltDy *= range;
var altDist = Mathf.Sqrt(fAltDy * fAltDy + (isCircularSpot ? 1.0f : 2.0f) * fAltDx * fAltDx);
var altDist = Mathf.Sqrt(fAltDy * fAltDy + (punctualLightData.lightType == GPULightType.Spot ? 1.0f : 2.0f) * fAltDx * fAltDx);
// fill up ldata
light.lightType = (uint)LightDefinitions.SPOT_LIGHT;
light.lightPos = worldToView.MultiplyPoint(lightPos);
light.radiusSq = range * range;
light.penumbra = cs;
light.cotan = cota;
light.flags |= (isCircularSpot ? LightDefinitions.IS_CIRCULAR_SPOT_SHAPE : 0);
light.flags |= (bHasCookie ? LightDefinitions.HAS_COOKIE_TEXTURE : 0);
light.flags |= (bHasShadow ? LightDefinitions.HAS_SHADOW : 0);
lightData.lightAxisX = vx;
lightData.lightAxisY = vy;
lightData.lightAxisZ = vz;
lightData.lightType = (uint)LightDefinitions.SPOT_LIGHT;
lightData.lightPos = worldToView.MultiplyPoint(lightPos);
lightData.radiusSq = range * range;
lightData.cotan = cota;
idxOut = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
index = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
else if (cl.lightType == LightType.Point)
else // if (punctualLightData.lightType == GPULightType.Point)
if (bHasCookie)
light.sliceIndex = m_CubeCookieTexArray.FetchSlice(cl.light.cookie);
bool isNegDeterminant = Vector3.Dot(worldToView.GetColumn(0), Vector3.Cross(worldToView.GetColumn(1), worldToView.GetColumn(2))) < 0.0f; // 3x3 Determinant.
bound.center = worldToView.MultiplyPoint(lightPos);
bound.boxAxisX.Set(range, 0, 0);

Vector3 vz = lightToView.GetColumn(2);
// fill up ldata
light.lightType = (uint)LightDefinitions.SPHERE_LIGHT;
light.lightPos = bound.center;
light.radiusSq = range * range;
light.lightAxisX = vx;
light.lightAxisY = vy;
light.lightAxisZ = vz;
light.flags |= (bHasCookie ? LightDefinitions.HAS_COOKIE_TEXTURE : 0);
light.flags |= (bHasShadow ? LightDefinitions.HAS_SHADOW : 0);
lightData.lightAxisX = vx;
lightData.lightAxisY = vy;
lightData.lightAxisZ = vz;
lightData.lightType = (uint)LightDefinitions.SPHERE_LIGHT;
lightData.lightPos = bound.center;
lightData.radiusSq = range * range;
idxOut = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
index = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
// next light
if (cl.lightType == LightType.Spot || cl.lightType == LightType.Point)
boundData[idxOut] = bound;
lightData[idxOut] = light;
m_boundData[index] = bound;
m_lightData[index] = lightData;
var numLightsOut = offsets[LightDefinitions.DIRECT_LIGHT, numVolTypes - 1] + numEntries[LightDefinitions.DIRECT_LIGHT, numVolTypes - 1];
// probe.m_BlendDistance
// Vector3f extents = 0.5*Abs(probe.m_BoxSize);
// C center of rendered refl box <-- GetComponent (Transform).GetPosition() + m_BoxOffset;
// cube map capture point: GetComponent (Transform).GetPosition()
// shader parameter min and max are C+/-(extents+blendDistance)
foreach (var rl in probes)
for (int envIndex = 0; envIndex < lightList.envLights.Count; envIndex++)
var cubemap = rl.texture;
EnvLightData envLightData = lightList.envLights[envIndex];
VisibleReflectionProbe probe = cullResults.visibleReflectionProbes[lightList.envCullIndices[envIndex]];
// always a box for now
if (cubemap == null)
var bound = new SFiniteLightBound();
var lightData = new SFiniteLightData();
var bndData = new SFiniteLightBound();
var lgtData = new SFiniteLightData();
var idxOut = 0;
lgtData.flags = 0;
var bnds = rl.bounds;
var boxOffset = rl.center; // reflection volume offset relative to cube map capture point
var blendDistance = rl.blendDistance;
float imp = rl.importance;
var bnds = probe.bounds;
var boxOffset = probe.center; // reflection volume offset relative to cube map capture point
var blendDistance = probe.blendDistance;
var mat = rl.localToWorld;
//Matrix4x4 mat = rl.transform.localToWorldMatrix;
Vector3 cubeCapturePos = mat.GetColumn(3); // cube map capture position in world space
// implicit in CalculateHDRDecodeValues() --> float ints = rl.intensity;
var boxProj = (rl.boxProjection != 0);
var decodeVals = rl.hdr;
//Vector4 decodeVals = rl.CalculateHDRDecodeValues();
var mat = probe.localToWorld;
// C is reflection volume center in world space (NOT same as cube map capture point)
var e = bnds.extents; // 0.5f * Vector3.Max(-boxSizes[p], boxSizes[p]);

//Vector3 posForShaderParam = bnds.center - boxOffset; // gives same as rl.GetComponent<Transform>().position;
var posForShaderParam = cubeCapturePos; // same as commented out line above when rot is identity
var combinedExtent = e + new Vector3(blendDistance, blendDistance, blendDistance);
var combinedExtent = e + new Vector3(blendDistance, blendDistance, blendDistance);
Vector3 vx = mat.GetColumn(0);
Vector3 vy = mat.GetColumn(1);

var Cw = worldToView.MultiplyPoint(C);
if (boxProj) lgtData.flags |= LightDefinitions.IS_BOX_PROJECTED;
bound.center = Cw;
bound.boxAxisX = combinedExtent.x * vx;
bound.boxAxisY = combinedExtent.y * vy;
bound.boxAxisZ = combinedExtent.z * vz;
bound.scaleXY.Set(1.0f, 1.0f);
bound.radius = combinedExtent.magnitude;
lgtData.lightPos = Cw;
lgtData.lightAxisX = vx;
lgtData.lightAxisY = vy;
lgtData.lightAxisZ = vz;
lgtData.localCubeCapturePoint = -boxOffset;
lgtData.probeBlendDistance = blendDistance;
lgtData.lightIntensity = decodeVals.x;
lgtData.decodeExp = decodeVals.y;
lgtData.sliceIndex = m_CubeReflTexArray.FetchSlice(cubemap);
lightData.lightPos = Cw;
lightData.lightAxisX = vx;
lightData.lightAxisY = vy;
lightData.lightAxisZ = vz;
lgtData.boxInnerDist = e;
lgtData.boxInvRange.Set(1.0f / delta.x, 1.0f / delta.y, 1.0f / delta.z);
bndData.center = Cw;
bndData.boxAxisX = combinedExtent.x * vx;
bndData.boxAxisY = combinedExtent.y * vy;
bndData.boxAxisZ = combinedExtent.z * vz;
bndData.scaleXY.Set(1.0f, 1.0f);
bndData.radius = combinedExtent.magnitude;
// fill up ldata
lgtData.lightType = (uint)LightDefinitions.BOX_LIGHT;
lgtData.lightModel = (uint)LightDefinitions.REFLECTION_LIGHT;
lightData.boxInnerDist = e;
lightData.boxInvRange.Set(1.0f / delta.x, 1.0f / delta.y, 1.0f / delta.z);
idxOut = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
boundData[idxOut] = bndData;
lightData[idxOut] = lgtData;
int index = numEntries2nd[i, j] + offsets[i, j]; ++numEntries2nd[i, j];
m_boundData[index] = bound;
var numProbesOut = offsets[LightDefinitions.REFLECTION_LIGHT, numVolTypes - 1] + numEntries[LightDefinitions.REFLECTION_LIGHT, numVolTypes - 1];
// Sanity check
m_lightCount = lightList.punctualLights.Count + lightList.envLights.Count;
s_ConvexBoundsBuffer.SetData(m_boundData); // TODO: check with Vlad what is happening here, do we copy 1024 element always ? Could we setup the size we want to copy ?
void VoxelLightListGeneration(CommandBuffer cmd, Camera camera, Matrix4x4 projscr, Matrix4x4 invProjscr, int cameraDepthBuffer)
// clear atomic offset index
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_ClearVoxelAtomicKernel, "g_LayeredSingleIdxBuffer", s_GlobalLightListAtomic);
cmd.DispatchCompute(buildPerVoxelLightListShader, s_ClearVoxelAtomicKernel, 1, 1, 1);
cmd.SetComputeIntParam(buildPerVoxelLightListShader, "g_iNrVisibLights", m_lightCount);
SetMatrixCS(cmd, buildPerVoxelLightListShader, "g_mScrProjection", projscr);
SetMatrixCS(cmd, buildPerVoxelLightListShader, "g_mInvScrProjection", invProjscr);
cmd.SetComputeIntParam(buildPerVoxelLightListShader, "g_iLog2NumClusters", k_Log2NumClusters);
//Vector4 v2_near = invProjscr * new Vector4(0.0f, 0.0f, 0.0f, 1.0f);
//Vector4 v2_far = invProjscr * new Vector4(0.0f, 0.0f, 1.0f, 1.0f);
//float nearPlane2 = -(v2_near.z/v2_near.w);
//float farPlane2 = -(v2_far.z/v2_far.w);
var nearPlane = camera.nearClipPlane;
var farPlane = camera.farClipPlane;
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, "g_fNearPlane", nearPlane);
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, "g_fFarPlane", farPlane);
const float C = (float)(1 << k_Log2NumClusters);
var geomSeries = (1.0 - Mathf.Pow(k_ClustLogBase, C)) / (1 - k_ClustLogBase); // geometric series: sum_k=0^{C-1} base^k
m_ClustScale = (float)(geomSeries / (farPlane - nearPlane));
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, "g_fClustScale", m_ClustScale);
cmd.SetComputeFloatParam(buildPerVoxelLightListShader, "g_fClustBase", k_ClustLogBase);
return numLightsOut + numProbesOut;
cmd.SetComputeTextureParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_depth_tex", new RenderTargetIdentifier(cameraDepthBuffer));
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_vLayeredLightList", s_PerVoxelLightLists);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_LayeredOffset", s_PerVoxelOffset);
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_LayeredSingleIdxBuffer", s_GlobalLightListAtomic);
if (enableBigTilePrepass) cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_vBigTileLightList", s_BigTileLightList);
if (k_UseDepthBuffer)
cmd.SetComputeBufferParam(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, "g_logBaseBuffer", s_PerTileLogBaseTweak);
void BuildPerTileLightLists(Camera camera, RenderLoop loop, int numLights, Matrix4x4 projscr, Matrix4x4 invProjscr)
var numTilesX = (camera.pixelWidth + 15) / 16;
var numTilesY = (camera.pixelHeight + 15) / 16;
cmd.DispatchCompute(buildPerVoxelLightListShader, s_GenListPerVoxelKernel, numTilesX, numTilesY, 1);
public void BuildGPULightLists(Camera camera, RenderLoop loop, HDRenderLoop.LightList lightList, int cameraDepthBuffer)
var w = camera.pixelWidth;
var h = camera.pixelHeight;

var numBigTilesY = (h + 63) / 64;
// camera to screen matrix (and it's inverse)
var proj = CameraProjection(camera);
var temp = new Matrix4x4();
temp.SetRow(0, new Vector4(0.5f * w, 0.0f, 0.0f, 0.5f * w));
temp.SetRow(1, new Vector4(0.0f, 0.5f * h, 0.0f, 0.5f * h));
temp.SetRow(2, new Vector4(0.0f, 0.0f, 0.5f, 0.5f));
temp.SetRow(3, new Vector4(0.0f, 0.0f, 0.0f, 1.0f));
var projscr = temp * proj;
var invProjscr = projscr.inverse;
var proj = CameraProjection(camera);
var temp = new Matrix4x4();
temp.SetRow(0, new Vector4(1.0f, 0.0f, 0.0f, 0.0f));
temp.SetRow(1, new Vector4(0.0f, 1.0f, 0.0f, 0.0f));
temp.SetRow(2, new Vector4(0.0f, 0.0f, 0.5f, 0.5f));

cmd.SetComputeIntParam(buildScreenAABBShader, "g_iNrVisibLights", numLights);
cmd.SetComputeIntParam(buildScreenAABBShader, "g_iNrVisibLights", m_lightCount);
cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (numLights + 7) / 8, 1, 1);
cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (m_lightCount + 7) / 8, 1, 1);
// enable coarse 2D pass on 64x64 tiles (used for both fptl and clustered).

cmd.SetComputeIntParam(buildPerBigTileLightListShader, "g_iNrVisibLights", numLights);
cmd.SetComputeIntParam(buildPerBigTileLightListShader, "g_iNrVisibLights", m_lightCount);
SetMatrixCS(cmd, buildPerBigTileLightListShader, "g_mScrProjection", projscr);
SetMatrixCS(cmd, buildPerBigTileLightListShader, "g_mInvScrProjection", invProjscr);
cmd.SetComputeFloatParam(buildPerBigTileLightListShader, "g_fNearPlane", camera.nearClipPlane);

if (usingFptl) // optimized for opaques only
cmd.SetComputeIntParams(buildPerTileLightListShader, "g_viDimensions", new int[2] { w, h });
cmd.SetComputeIntParam(buildPerTileLightListShader, "g_iNrVisibLights", numLights);
cmd.SetComputeIntParam(buildPerTileLightListShader, "g_iNrVisibLights", m_lightCount);
cmd.SetComputeTextureParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_depth_tex", new RenderTargetIdentifier(s_CameraDepthTexture));
cmd.SetComputeTextureParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_depth_tex", new RenderTargetIdentifier(cameraDepthBuffer));
cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_vLightList", s_LightList);
if (enableBigTilePrepass) cmd.SetComputeBufferParam(buildPerTileLightListShader, s_GenListPerTileKernel, "g_vBigTileLightList", s_BigTileLightList);
cmd.DispatchCompute(buildPerTileLightListShader, s_GenListPerTileKernel, numTilesX, numTilesY, 1);

VoxelLightListGeneration(cmd, camera, numLights, projscr, invProjscr);
VoxelLightListGeneration(cmd, camera, projscr, invProjscr, cameraDepthBuffer);


struct SFiniteLightData
float3 lightPos;
int flags;
float3 lightAxisX;
uint lightType;
float3 lightAxisY;

float3 color;
uint lightModel;
float unusued;
uint lightModel;
float3 boxInvRange;
float unused2;

return value.lightPos;
int GetFlags(SFiniteLightData value)
return value.flags;
float3 GetLightAxisX(SFiniteLightData value)
return value.lightAxisX;

return value.cotan;
float3 GetColor(SFiniteLightData value)
float3 GetBoxInnerDist(SFiniteLightData value)
return value.color;
return value.boxInnerDist;
float3 GetBoxInnerDist(SFiniteLightData value)
return value.boxInnerDist;
float GetUnusued(SFiniteLightData value)
return value.unusued;
float3 GetBoxInvRange(SFiniteLightData value)


#define FLT_EPSILON 1.192092896e-07f
float GetScaleFromBase(float base)
const float C = (float)(1 << g_iLog2NumClusters);
const float geomSeries = (1.0 - pow(base, C)) / (1 - base); // geometric series: sum_k=0^{C-1} base^k
return geomSeries / (g_fFarPlane - g_fNearPlane);
int SnapToClusterIdxFlex(float z_in, float suggestedBase, bool logBasePerTile)
float z = z_in;
float z = -z_in;
float userscale = g_fClustScale;
if (logBasePerTile)
userscale = GetScaleFromBase(suggestedBase);
// using the inverse of the geometric series
const float dist = max(0, z - g_fNearPlane);
return (int)clamp(log2(dist * userscale * (suggestedBase - 1.0f) + 1) / log2(suggestedBase), 0.0, (float)((1 << g_iLog2NumClusters) - 1));
int SnapToClusterIdx(float z_in, float suggestedBase)
bool logBasePerTile = true; // resolved compile time
bool logBasePerTile = false;
return SnapToClusterIdxFlex(z_in, suggestedBase, logBasePerTile);
float ClusterIdxToZFlex(int k, float suggestedBase, bool logBasePerTile)
float res;
float userscale = g_fClustScale;
if (logBasePerTile)
userscale = GetScaleFromBase(suggestedBase);
float dist = (pow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
res = dist + g_fNearPlane;
return res;
return -res;
float ClusterIdxToZ(int k, float suggestedBase)
bool logBasePerTile = true; // resolved compile time
bool logBasePerTile = false;
return ClusterIdxToZFlex(k, suggestedBase, logBasePerTile);
// generate a log-base value such that half of the clusters are consumed from near plane to max. opaque depth of tile.
float SuggestLogBase50(float tileFarPlane)
const float C = (float)(1 << g_iLog2NumClusters);
float normDist = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPSILON, 1.0);
float suggested_base = pow((1.0 + sqrt(max(0.0, 1.0 - 4.0 * normDist * (1.0 - normDist)))) / (2.0 * normDist), 2.0 / C); //
return max(g_fClustBase, suggested_base);
// generate a log-base value such that (approximately) a quarter of the clusters are consumed from near plane to max. opaque depth of tile.
float SuggestLogBase25(float tileFarPlane)
const float C = (float)(1 << g_iLog2NumClusters);
float normDist = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPSILON, 1.0);
float suggested_base = pow((1 / 2.3) * max(0.0, (0.8 / normDist) - 1), 4.0 / (C * 2)); // approximate inverse of d*x^4 + (-x) + (1-d) = 0 - d is normalized distance
return max(g_fClustBase, suggested_base);


fileFormatVersion: 2
guid: 54f8006db9236c148af831b7fcaadc0c
timeCreated: 1479691314
licenseType: Pro
defaultTextures: []


#pragma kernel BigTileLightListGen
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "../SortingComputeUtils.hlsl"
uniform int g_iNrVisibLights;
uniform uint2 g_viDimensions;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform float g_fNearPlane;
uniform float g_fFarPlane;
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
#define NR_THREADS 64
// output buffer
RWBuffer<uint> g_vLightList : register( u0 );
// 2kB (room for roughly 30 wavefronts)
groupshared unsigned int lightsListLDS[MAX_NR_BIGTILE_LIGHTS_PLUSONE];
groupshared uint lightOffs;
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
float fSx = g_mScrProjection[0].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
float fCy = g_mScrProjection[1].z;
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
float GetOnePixDiagWorldDistAtDepthOne()
float fSx = g_mScrProjection[0].x;
float fSy = g_mScrProjection[1].y;
return length( float2(1.0/fSx,1.0/fSy) );
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR);
[numthreads(NR_THREADS, 1, 1)]
void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrBigTilesX = (iWidth+63)/64;
uint nrBigTilesY = (iHeight+63)/64;
if(t==0) lightOffs = 0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
uint2 viTilLL = 64*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
// build coarse list using AABB
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
const float2 vMi = g_vBoundsBuffer[l].xy;
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
if( all(vMa>vTileLL) && all(vMi<vTileUR))
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l; // add to light list
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
int iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS);
SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))) );
CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy);
// sort lights
lightOffs = 0;
for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
iNrCoarseLights = lightOffs;
int offs = tileIDX.y*nrBigTilesX + tileIDX.x;
for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = t==0 ? iNrCoarseLights : lightsListLDS[i-1];
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 32*onePixDiagDist; // scale by half a tile
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
SFiniteLightBound lgtDat = g_data[lightsListLDS[l]];
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
z = -z;
return GetViewPosFromLinDepth( float2(x, y), z);
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
int iSwizzle = e0&0x3;
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
const bool bOnlyNeedFrustumSideEdges = true;
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
const int totNrEdgePairs = 12*nrFrustEdges;
for(int l=0; l<iNrCoarseLights; l++)
const int idxCoarse = lightsListLDS[l];
[branch]if(idxCoarse<(uint) g_iNrVisibLights && g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
SFiniteLightBound lgtDat = g_data[idxCoarse];
const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
int e1 = i - e0*nrFrustEdges;
int idx_cur=0, idx_twin=0;
float3 vP0, vE0;
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
float3 vP1, vE1;
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, g_fFarPlane);
// potential separation plane
float3 vN = cross(vE0, vE1);
int positive=0, negative=0;
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6).
int j = (idx_cur+k)&0x7;
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
positive=0; negative=0;
for(int j=0; j<8; j++)
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane);
float fSignDist = dot(vN, vPf-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
bool bFoundSepPlane = (resh*resf)<0;
if(bFoundSepPlane) lightsListLDS[l]=0xffffffff;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)


fileFormatVersion: 2
guid: 5ee1f9d6e09abe045b2f5e0b784b9072
timeCreated: 1479689024
licenseType: Pro
currentAPIMask: 4


#pragma kernel TileLightListGen_NoDepthRT LIGHTLISTGEN=TileLightListGen_NoDepthRT
#pragma kernel TileLightListGen_DepthRT LIGHTLISTGEN=TileLightListGen_DepthRT ENABLE_DEPTH_TEXTURE_BACKPLANE
#pragma kernel TileLightListGen_NoDepthRT_SrcBigTile LIGHTLISTGEN=TileLightListGen_NoDepthRT_SrcBigTile USE_TWO_PASS_TILED_LIGHTING
#pragma kernel ClearAtomic
#include "../ShaderBase.hlsl"
#include "../TilePass.cs.hlsl"
#include "../LightingConvexHullUtils.hlsl"
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
#include "../SortingComputeUtils.hlsl"
uniform int g_iNrVisibLights;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
uniform float g_fClustScale;
uniform float g_fClustBase;
uniform float g_fNearPlane;
uniform float g_fFarPlane;
uniform int g_iLog2NumClusters; // numClusters = (1<<g_iLog2NumClusters)
#include "../ClusteredUtils.hlsl"
Texture2DMS<float> g_depth_tex : register( t0 );
Texture2D g_depth_tex : register( t0 );
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
Buffer<uint> g_vBigTileLightList : register( t4 );
#define NR_THREADS 64
// output buffer
RWBuffer<uint> g_vLayeredLightList : register( u0 );
RWBuffer<uint> g_LayeredOffset : register( u1 );
RWBuffer<uint> g_LayeredSingleIdxBuffer : register( u2 );
RWBuffer<float> g_logBaseBuffer : register( u3 );
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
groupshared unsigned int clusterIdxs[MAX_NR_COARSE_ENTRIES/2];
groupshared float4 lightPlanes[4*6];
groupshared uint lightOffs;
groupshared int ldsZMax;
groupshared uint ldsIsLightInvisible;
groupshared uint lightOffs2;
groupshared uint lightOffsSph;
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
float fSx = g_mScrProjection[0].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
float fCy = g_mScrProjection[1].z;
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
float GetOnePixDiagWorldDistAtDepthOne()
float fSx = g_mScrProjection[0].x;
float fSy = g_mScrProjection[1].y;
return length( float2(1.0/fSx,1.0/fSy) );
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane);
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
// returns 1 for intersection and 0 for none
float4 FetchPlane(int l, int p);
bool CheckIntersection(int l, int k, uint2 viTilLL, uint2 viTilUR, float suggestedBase)
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
bool bIsHit = ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
float depthAtNearZ = ClusterIdxToZ(k, suggestedBase);
float depthAtFarZ = ClusterIdxToZ(k+1, suggestedBase);
for(int p=0; p<6; p++)
float4 plane = lightPlanes[6*(l&3)+p];
bool bAllInvisib = true;
for(int i=0; i<8; i++)
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? depthAtNearZ : depthAtFarZ;
float3 vP = GetViewPosFromLinDepth( float2(x, y), z);
bAllInvisib = bAllInvisib && dot(plane, float4(vP,1.0))>0;
if(bAllInvisib) bIsHit = false;
return bIsHit;
bool CheckIntersectionBasic(int l, int k)
unsigned int val = (clusterIdxs[l>>1]>>(16*(l&1)))&0xffff;
return ((val>>0)&0xff)<=((uint) k) && ((uint) k)<=((val>>8)&0xff);
[numthreads(NR_THREADS, 1, 1)]
void LIGHTLISTGEN(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
uint iWidth;
uint iHeight;
uint iNumSamplesMSAA;
g_depth_tex.GetDimensions(iWidth, iHeight, iNumSamplesMSAA);
g_depth_tex.GetDimensions(iWidth, iHeight);
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
uint2 viTilLL = 16*tileIDX;
uint2 viTilUR = min( viTilLL+uint2(16,16), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
lightOffs = 0;
ldsZMax = 0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
float dpt_ma=1.0;
// establish min and max depth first
for(int idx=t; idx<256; idx+=NR_THREADS)
uint2 uPixCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
for(int i=0; i<iNumSamplesMSAA; i++)
const float fDpth = FetchDepthMSAA(g_depth_tex, uPixCrd, i);
const float fDpth = FetchDepth(g_depth_tex, uPixCrd);
if(fDpth<VIEWPORT_SCALE_Z) // if not skydome
dpt_ma = max(fDpth, dpt_ma);
InterlockedMax(ldsZMax, asuint(dpt_ma) );
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
dpt_ma = asfloat(ldsZMax);
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, 0.0);
float3 vTileUR = float3(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight, 1.0);
// build coarse list using AABB
int NrBigTilesX = (nrTilesX+3)>>2;
const int bigTileIdx = (tileIDX.y>>2)*NrBigTilesX + (tileIDX.x>>2); // map the idx to 64x64 tiles
int nrBigTileLights = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+0];
for(int l0=(int) t; l0<(int) nrBigTileLights; l0 += NR_THREADS)
int l = g_vBigTileLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*bigTileIdx+l0+1];
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
if( all(vMa.xy>vTileLL.xy) && all(vMi.xy<vTileUR.xy))
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
int iNrCoarseLights = min(lightOffs,MAX_NR_COARSE_ENTRIES);
iNrCoarseLights = SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(16/2,16/2), uint2(iWidth-1, iHeight-1))) );
float fTileFarPlane = GetLinearDepth(dpt_ma);
float fTileFarPlane = -GetLinearDepth(dpt_ma);
float suggestedBase = SuggestLogBase50(fTileFarPlane);
float fTileFarPlane = g_fFarPlane;
float suggestedBase = g_fClustBase;
iNrCoarseLights = CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy, fTileFarPlane);
// sort lights (gives a more efficient execution in both deferred and tiled forward lighting).
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
//////////// cell specific code
for(int l=(int) t; l<((iNrCoarseLights+1)>>1); l += NR_THREADS)
const int l0 = coarseList[2*l+0], l1 = coarseList[min(2*l+1,iNrCoarseLights)];
const unsigned int clustIdxMi0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0].z), suggestedBase));
const unsigned int clustIdxMa0 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l0+g_iNrVisibLights].z), suggestedBase));
const unsigned int clustIdxMi1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1].z), suggestedBase));
const unsigned int clustIdxMa1 = (const unsigned int) min(255,SnapToClusterIdx(GetLinearDepth(g_vBoundsBuffer[l1+g_iNrVisibLights].z), suggestedBase));
clusterIdxs[l] = (clustIdxMa1<<24) | (clustIdxMi1<<16) | (clustIdxMa0<<8) | (clustIdxMi0<<0);
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
int nrClusters = (1<<g_iLog2NumClusters);
uint start = 0;
int i=(int) t;
int iSpaceAvail = 0;
int iSum = 0;
for(int l=0; l<iNrCoarseLights; l++)
iSum += (CheckIntersectionBasic(l, i) ? 1 : 0);
iSpaceAvail = min(iSum,MAX_NR_COARSE_ENTRIES); // combined storage for both direct lights and reflection
InterlockedAdd(g_LayeredSingleIdxBuffer[0], (uint) iSpaceAvail, start); // alloc list memory
int modelListCount[NR_LIGHT_MODELS]={0,0}; // direct light count and reflection lights
uint offs = start;
for(int ll=0; ll<iNrCoarseLights; ll+=4)
int p = i>>2;
int m = i&3;
if(i<24) lightPlanes[6*m+p] = FetchPlane(min(iNrCoarseLights-1,ll+m), p);
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
for(int l=ll; l<min(iNrCoarseLights,(ll+4)); l++)
if(offs<(start+iSpaceAvail) && i<nrClusters && CheckIntersection(l, i, viTilLL.xy, viTilUR.xy, suggestedBase) )
uint lightModel = g_vLightData[ coarseList[l] ].lightModel;
++modelListCount[ lightModel==REFLECTION_LIGHT ? 1 : 0];
g_vLayeredLightList[offs++] = coarseList[l]; // reflection lights will be last since we sorted
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
uint localOffs=0;
offs = i*nrTilesX*nrTilesY + tileIDX.y*nrTilesX + tileIDX.x;
for(int m=0; m<NR_LIGHT_MODELS; m++)
int numLights = min(modelListCount[m],31); // only allow 5 bits
g_LayeredOffset[offs] = (start+localOffs) | (((uint) numLights)<<27);
offs += (nrClusters*nrTilesX*nrTilesY);
localOffs += modelListCount[m]; // use unclamped count for localOffs
g_logBaseBuffer[tileIDX.y*nrTilesX + tileIDX.x] = suggestedBase;
float4 FetchPlane(int l, int p)
SFiniteLightBound lgtDat = g_data[coarseList[l]];
const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float radius = lgtDat.radius;
const float2 scaleXY = lgtDat.scaleXY;
return GetPlaneEq(boxX, boxY, boxZ, center, scaleXY, p);
int SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
float halfTileSizeAtZDistOne = 8*onePixDiagDist; // scale by half a tile
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
SFiniteLightBound lgtDat = g_data[coarseList[l]];
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
// to greedy to double buffer coarseList lds on this so serializing removal of gaps.
int offs = 0;
for(int l=0; l<iNrCoarseLights; l++)
{ if(coarseList[l]!=0xffffffff) coarseList[offs++] = coarseList[l]; }
lightOffsSph = offs;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
return lightOffsSph;
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
z = -z;
return GetViewPosFromLinDepth( float2(x, y), z);
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
int iSwizzle = e0&0x3;
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
int CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
if(threadID==0) lightOffs2 = 0;
const bool bOnlyNeedFrustumSideEdges = true;
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
const int totNrEdgePairs = 12*nrFrustEdges;
for(int l=0; l<iNrCoarseLights; l++)
if(threadID==0) ldsIsLightInvisible=0;
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
const int idxCoarse = coarseList[l];
[branch]if(g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
SFiniteLightBound lgtDat = g_data[idxCoarse];
const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
const float3 center = lgtDat.center.xyz;
const float2 scaleXY = lgtDat.scaleXY;
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
int e1 = i - e0*nrFrustEdges;
int idx_cur=0, idx_twin=0;
float3 vP0, vE0;
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
float3 vP1, vE1;
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, fTileFarPlane);
// potential separation plane
float3 vN = cross(vE0, vE1);
int positive=0, negative=0;
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6).
int j = (idx_cur+k)&0x7;
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
positive=0; negative=0;
for(int j=0; j<8; j++)
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, fTileFarPlane);
float fSignDist = dot(vN, vPf-vP0);
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
bool bFoundSepPlane = (resh*resf)<0;
if(bFoundSepPlane) InterlockedOr(ldsIsLightInvisible, 1);
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
if(threadID==0 && ldsIsLightInvisible==0)
coarseList[lightOffs2++] = coarseList[l];
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
return lightOffs2;
[numthreads(1, 1, 1)]
void ClearAtomic(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)


fileFormatVersion: 2
guid: 0bb1b7e0ddcd5c44baf3ddc7456eb196
timeCreated: 1479689584
licenseType: Pro
currentAPIMask: 4


#ifndef __SHADERBASE_H__
#define __SHADERBASE_H__
#ifndef Texture2DMS
#define Texture2DMS MS_Texture2D
#ifndef SampleCmpLevelZero
#define SampleCmpLevelZero SampleCmpLOD0
#ifndef firstbithigh
#define firstbithigh FirstSetBit_Hi
float FetchDepth(Texture2D depthTexture, uint2 pixCoord)
return 1 - depthTexture.Load(uint3(pixCoord.xy, 0)).x;
float FetchDepthMSAA(Texture2DMS<float> depthTexture, uint2 pixCoord, uint sampleIdx)
return 1 - depthTexture.Load(uint3(pixCoord.xy, 0), sampleIdx).x;


fileFormatVersion: 2
guid: 3c90176b54c922b4e9cf65c9ec9cb750
timeCreated: 1479691479
licenseType: Pro
defaultTextures: []


fileFormatVersion: 2
guid: fafbb144d7f66074785b7727293d89c5
folderAsset: yes
timeCreated: 1474297943
licenseType: Pro