浏览代码

throwing in fptl project

it's a render pipeline in the making
/main
mmikk 8 年前
当前提交
27ed7249
共有 26 个文件被更改,包括 2741 次插入1 次删除
  1. 3
      .gitignore
  2. 2
      Assets/TestScenes/ForwardRenderLoop/ForwardRenderLoop.unity
  3. 9
      Assets/ScriptableRenderLoop/common.meta
  4. 9
      Assets/ScriptableRenderLoop/fptl.meta
  5. 42
      Assets/ScriptableRenderLoop/common/ShaderBase.cs
  6. 12
      Assets/ScriptableRenderLoop/common/ShaderBase.cs.meta
  7. 22
      Assets/ScriptableRenderLoop/common/ShaderBase.h
  8. 20
      Assets/ScriptableRenderLoop/common/ShaderBase.h.meta
  9. 282
      Assets/ScriptableRenderLoop/common/TextureCache.cs
  10. 12
      Assets/ScriptableRenderLoop/common/TextureCache.cs.meta
  11. 554
      Assets/ScriptableRenderLoop/fptl/FptlLighting.cs
  12. 12
      Assets/ScriptableRenderLoop/fptl/FptlLighting.cs.meta
  13. 338
      Assets/ScriptableRenderLoop/fptl/Internal-DeferredReflections.shader
  14. 9
      Assets/ScriptableRenderLoop/fptl/Internal-DeferredReflections.shader.meta
  15. 309
      Assets/ScriptableRenderLoop/fptl/Internal-DeferredShading.shader
  16. 9
      Assets/ScriptableRenderLoop/fptl/Internal-DeferredShading.shader.meta
  17. 86
      Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs
  18. 12
      Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs.meta
  19. 457
      Assets/ScriptableRenderLoop/fptl/lightlistbuild.compute
  20. 9
      Assets/ScriptableRenderLoop/fptl/lightlistbuild.compute.meta
  21. 20
      Assets/ScriptableRenderLoop/fptl/renderloopfptl.asset
  22. 8
      Assets/ScriptableRenderLoop/fptl/renderloopfptl.asset.meta
  23. 497
      Assets/ScriptableRenderLoop/fptl/scrbound.compute
  24. 9
      Assets/ScriptableRenderLoop/fptl/scrbound.compute.meta

3
.gitignore


Library/*
Temp/*
*.csproj
*.sln
*.suo

2
Assets/TestScenes/ForwardRenderLoop/ForwardRenderLoop.unity


m_Script: {fileID: 11500000, guid: 92bb16b4ee20841929b24d6bd771738d, type: 3}
m_Name:
m_EditorClassIdentifier:
m_RenderLoop: {fileID: 11400000, guid: 873753be6b0da44d58b061c5ec3d12bc, type: 2}
m_RenderLoop: {fileID: 11400000, guid: 4dc14083116279a438e1b2de3830999f, type: 2}
--- !u!1 &1758207072
GameObject:
m_ObjectHideFlags: 0

9
Assets/ScriptableRenderLoop/common.meta


fileFormatVersion: 2
guid: 88c7531cefb4c7043bcfda092215f20c
folderAsset: yes
timeCreated: 1467917164
licenseType: Pro
DefaultImporter:
userData:
assetBundleName:
assetBundleVariant:

9
Assets/ScriptableRenderLoop/fptl.meta


fileFormatVersion: 2
guid: 4ae727bb0a95bdf4aada784867082de5
folderAsset: yes
timeCreated: 1467916766
licenseType: Pro
DefaultImporter:
userData:
assetBundleName:
assetBundleVariant:

42
Assets/ScriptableRenderLoop/common/ShaderBase.cs


#if !__HLSL
using UnityEngine;
public struct Vec2
{
public static implicit operator Vec2(Vector2 v) { return new Vec2(v.x, v.y); }
public Vec2(Vec2 v) { x = v.x; y = v.y; }
public Vec2(float fX, float fY) { x = fX; y = fY; }
public float x, y;
};
public struct Vec3
{
public static implicit operator Vec3(Vector3 v) { return new Vec3(v.x, v.y, v.z); }
public static implicit operator Vec3(Vector4 v) { return new Vec3(v.x, v.y, v.z); }
public Vec3(Vec3 v) { x = v.x; y = v.y; z = v.z; }
public Vec3(float fX, float fY, float fZ) { x = fX; y = fY; z = fZ; }
public float x, y, z;
};
public struct Vec4
{
public static implicit operator Vec4(Vector4 v) { return new Vec4(v.x, v.y, v.z, v.w); }
public static implicit operator Vec4(Vector3 v) { return new Vec4(v.x, v.y, v.z, 1.0f); }
public Vec4(Vec4 v) { x = v.x; y = v.y; z = v.z; w = v.w; }
public Vec4(float fX, float fY, float fZ, float fW) { x = fX; y = fY; z = fZ; w = fW; }
public float x, y, z, w;
};
public struct Mat44
{
public Mat44( Matrix4x4 m ) { c0 = new Vec4(m.GetColumn(0)); c1 = new Vec4(m.GetColumn(1)); c2 = new Vec4(m.GetColumn(2)); c3 = new Vec4(m.GetColumn(3)); }
public Vec4 c0, c1, c2, c3;
};
#endif

12
Assets/ScriptableRenderLoop/common/ShaderBase.cs.meta


fileFormatVersion: 2
guid: c1ebf357fa7277c4697f21c295e8c4c2
timeCreated: 1467917164
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

22
Assets/ScriptableRenderLoop/common/ShaderBase.h


#ifndef __SHADERBASE_H__
#define __SHADERBASE_H__
#define __HLSL 1
#define public
#define Vec2 float2
#define Vec3 float3
#define Vec4 float4
#define Mat44 float4x4
#define unistruct cbuffer
#define hbool bool
#define _CB_REGSLOT(x) : register(x)
#define _QALIGN(x) : packoffset(c0);
#endif

20
Assets/ScriptableRenderLoop/common/ShaderBase.h.meta


fileFormatVersion: 2
guid: 29c17a8a0e3839341bef6e5f440c7dac
timeCreated: 1467917168
licenseType: Pro
PluginImporter:
serializedVersion: 1
iconMap: {}
executionOrder: {}
isPreloaded: 0
platformData:
Any:
enabled: 1
settings: {}
Editor:
enabled: 0
settings:
DefaultValueInitialized: true
userData:
assetBundleName:
assetBundleVariant:

282
Assets/ScriptableRenderLoop/common/TextureCache.cs


using UnityEngine;
//using System;
using System.Collections.Generic;
public class TextureCache2D : TextureCache
{
private Texture2DArray cache;
public override void TransferToSlice(int sliceIndex, Texture texture)
{
for (int m=0; m<m_numMipLevels; m++)
Graphics.CopyTexture(texture, 0, m, cache, sliceIndex, m);
}
public override Texture GetTexCache()
{
return cache;
}
public bool AllocTextureArray(int numTextures, int width, int height, TextureFormat format, bool isMipMapped)
{
bool res = AllocTextureArray(numTextures);
m_numMipLevels = GetNumMips(width, height);
cache = new Texture2DArray(width, height, numTextures, format, isMipMapped);
cache.wrapMode = TextureWrapMode.Clamp;
return res;
}
public void Release()
{
Texture.DestroyImmediate(cache); // do I need this?
}
}
public class TextureCacheCubemap : TextureCache
{
private CubemapArray cache;
public override void TransferToSlice(int sliceIndex, Texture texture)
{
for(int f=0; f<6; f++)
for(int m=0; m<m_numMipLevels; m++)
Graphics.CopyTexture(texture, f, m, cache, 6*sliceIndex + f, m);
}
public override Texture GetTexCache()
{
return cache;
}
public bool AllocTextureArray(int numCubeMaps, int width, int height, TextureFormat format, bool isMipMapped)
{
bool res = AllocTextureArray(6*numCubeMaps);
m_numMipLevels = GetNumMips(width, height);
cache = new CubemapArray(width, height, numCubeMaps, format, isMipMapped);
cache.wrapMode = TextureWrapMode.Clamp;
return res;
}
public void Release()
{
Texture.DestroyImmediate(cache); // do I need this?
}
}
abstract public class TextureCache : Object
{
protected int m_numMipLevels;
private struct SSliceEntry
{
public uint TexID;
public uint CountLRU;
};
private int m_numTextures;
private int [] m_SortedIdxArray;
private SSliceEntry [] m_SliceArray;
Dictionary<uint, int> m_locatorInSliceArray;
private static uint g_MaxFrameCount = unchecked( (uint) (-1) );
private static uint g_InvalidTexID = (uint) 0;
public int FetchSlice(Texture texture)
{
uint TexID = (uint)texture.GetInstanceID();
//assert(TexID!=g_InvalidTexID);
if(TexID==g_InvalidTexID) return 0;
bool bSwapSlice = false;
bool bFoundAvailOrExistingSlice = false;
int sliceIndex = -1;
// search for existing copy
if(m_locatorInSliceArray.ContainsKey(TexID))
{
sliceIndex = m_locatorInSliceArray[TexID];
bFoundAvailOrExistingSlice = true;
//assert(m_SliceArray[sliceIndex].TexID==TexID);
}
// If no existing copy found in the array
if(!bFoundAvailOrExistingSlice)
{
// look for first non zero entry. Will by the least recently used entry
// since the array was pre-sorted (in linear time) in NewFrame()
bool bFound = false;
int j=0, idx=0;
while((!bFound) && j<m_numTextures)
{
idx = m_SortedIdxArray[j];
if(m_SliceArray[idx].CountLRU==0) ++j; // if entry already snagged by a new texture in this frame then ++j
else bFound=true;
}
if(bFound)
{
// if we are replacing an existing entry delete it from m_locatorInSliceArray.
if(m_SliceArray[idx].TexID!=g_InvalidTexID)
{
m_locatorInSliceArray.Remove( m_SliceArray[idx].TexID );
}
m_locatorInSliceArray.Add(TexID,idx);
m_SliceArray[idx].TexID=TexID;
sliceIndex=idx;
bFoundAvailOrExistingSlice=true;
bSwapSlice = true;
}
}
// wrap up
//assert(bFoundAvailOrExistingSlice);
if(bFoundAvailOrExistingSlice)
{
m_SliceArray[sliceIndex].CountLRU=0; // mark slice as in use this frame
if(bSwapSlice) // if this was a miss
{
// transfer new slice to sliceIndex from source texture
TransferToSlice(sliceIndex, texture);
}
}
return sliceIndex;
}
public void NewFrame()
{
int numNonZeros = 0;
int [] tmpBuffer = new int[m_numTextures];
for(int i=0; i<m_numTextures; i++)
{
tmpBuffer[i]=m_SortedIdxArray[i]; // copy buffer
if(m_SliceArray[m_SortedIdxArray[i]].CountLRU!=0) ++numNonZeros;
}
int nonZerosBase = 0, zerosBase = 0;
for(int i=0; i<m_numTextures; i++)
{
if( m_SliceArray[tmpBuffer[i]].CountLRU==0 )
{
m_SortedIdxArray[zerosBase+numNonZeros]=tmpBuffer[i];
++zerosBase;
}
else
{
m_SortedIdxArray[nonZerosBase]=tmpBuffer[i];
++nonZerosBase;
}
}
for(int i=0; i<m_numTextures; i++)
{
if(m_SliceArray[i].CountLRU<g_MaxFrameCount) ++m_SliceArray[i].CountLRU; // next frame
}
//for(int q=1; q<m_numTextures; q++)
// assert(m_SliceArray[m_SortedIdxArray[q-1]].CountLRU>=m_SliceArray[m_SortedIdxArray[q]].CountLRU);
}
public TextureCache()
{
m_numTextures=0;
m_numMipLevels=0;
}
public virtual void TransferToSlice(int sliceIndex, Texture texture)
{
}
public virtual Texture GetTexCache()
{
return null;
}
protected bool AllocTextureArray(int numTextures)
{
if (numTextures > 0)
{
m_SliceArray = new SSliceEntry[numTextures];
m_SortedIdxArray = new int[numTextures];
m_locatorInSliceArray = new Dictionary<uint, int>();
m_numTextures = numTextures;
for (int i = 0; i < m_numTextures; i++)
{
m_SliceArray[i].CountLRU = g_MaxFrameCount; // never used before
m_SliceArray[i].TexID = g_InvalidTexID;
m_SortedIdxArray[i] = i;
}
}
//return m_SliceArray != NULL && m_SortedIdxArray != NULL && numTextures > 0;
return numTextures > 0;
}
// should not really be used in general. Assuming lights are culled properly entries will automatically be replaced efficiently.
public void RemoveEntryFromSlice(Texture texture)
{
uint TexID = (uint)texture.GetInstanceID();
//assert(TexID!=g_InvalidTexID);
if(TexID==g_InvalidTexID) return;
// search for existing copy
if(m_locatorInSliceArray.ContainsKey(TexID))
{
int sliceIndex = m_locatorInSliceArray[TexID];
//assert(m_SliceArray[sliceIndex].TexID==TexID);
// locate entry sorted by uCountLRU in m_pSortedIdxArray
bool bFoundIdxSortLRU = false;
int i=0;
while((!bFoundIdxSortLRU) && i<m_numTextures)
{
if(m_SortedIdxArray[i]==sliceIndex) bFoundIdxSortLRU=true;
else ++i;
}
if(bFoundIdxSortLRU)
{
// relocate sliceIndex to front of m_pSortedIdxArray since uCountLRU will be set to maximum.
for(int j=0; j<i; j++) { m_SortedIdxArray[j+1]=m_SortedIdxArray[j]; }
m_SortedIdxArray[0]=sliceIndex;
// delete from m_locatorInSliceArray and m_pSliceArray.
m_locatorInSliceArray.Remove( TexID );
m_SliceArray[sliceIndex].CountLRU=g_MaxFrameCount; // never used before
m_SliceArray[sliceIndex].TexID=g_InvalidTexID;
}
}
}
protected int GetNumMips(int width, int height)
{
return GetNumMips(width>height ? width : height);
}
protected int GetNumMips(int dim)
{
uint uDim = (uint) dim;
int iNumMips = 0;
while(uDim>0)
{ ++iNumMips; uDim>>=1; }
return iNumMips;
}
}

12
Assets/ScriptableRenderLoop/common/TextureCache.cs.meta


fileFormatVersion: 2
guid: f7748b8cc626e65419cc66c679de56e5
timeCreated: 1467917164
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

554
Assets/ScriptableRenderLoop/fptl/FptlLighting.cs


using UnityEngine;
using UnityEngine.Rendering;
using System.Collections;
using UnityEditor;
namespace UnityEngine.ScriptableRenderLoop
{
//[ExecuteInEditMode]
public class FptlLighting : ScriptableRenderLoop
{
[MenuItem("Renderloop/CreateRenderLoopFPTL")]
static void CreateRenderLoopFPTL()
{
var instance = ScriptableObject.CreateInstance<FptlLighting>();
AssetDatabase.CreateAsset(instance, "Assets/renderloopfptl.asset");
//AssetDatabase.CreateAsset(instance, "Assets/ScriptableRenderLoop/fptl/renderloopfptl.asset");
}
public Shader m_DeferredShader;
public Shader m_DeferredReflectionShader;
public ComputeShader m_BuildScreenAABBShader;
public ComputeShader m_BuildPerTileLightListShader;
private Material m_DeferredMaterial;
private Material m_DeferredReflectionMaterial;
static private int kGBufferAlbedo;
static private int kGBufferSpecRough;
static private int kGBufferNormal;
static private int kGBufferEmission;
static private int kGBufferZ;
static private int kCameraDepthTexture;
static private int kGenAABBKernel;
static private int kGenListPerTileKernel;
static private ComputeBuffer m_lightDataBuffer;
static private ComputeBuffer m_convexBoundsBuffer;
static private ComputeBuffer m_aabbBoundsBuffer;
static private ComputeBuffer lightList;
public const int gMaxNumLights = 1024;
public const float gFltMax = 3.402823466e+38F;
private TextureCache2D m_cookieTexArray;
private TextureCacheCubemap m_cubeCookieTexArray;
private TextureCacheCubemap m_cubeReflTexArray;
void OnEnable()
{
Rebuild();
}
void OnValidate()
{
Rebuild();
}
void Rebuild()
{
kGBufferAlbedo = Shader.PropertyToID("_CameraGBufferTexture0");
kGBufferSpecRough = Shader.PropertyToID("_CameraGBufferTexture1");
kGBufferNormal = Shader.PropertyToID("_CameraGBufferTexture2");
kGBufferEmission = Shader.PropertyToID("_CameraGBufferTexture3");
kGBufferZ = Shader.PropertyToID("_CameraGBufferZ"); // used while rendering into G-buffer+
kCameraDepthTexture = Shader.PropertyToID("_CameraDepthTexture"); // copy of that for later sampling in shaders
// RenderLoop.renderLoopDelegate += ExecuteRenderLoop;
//var deferredShader = GraphicsSettings.GetCustomShader (BuiltinShaderType.DeferredShading);
var deferredShader = m_DeferredShader;
var deferredReflectionShader = m_DeferredReflectionShader;
m_DeferredMaterial = new Material(deferredShader);
m_DeferredReflectionMaterial = new Material(deferredReflectionShader);
kGenAABBKernel = m_BuildScreenAABBShader.FindKernel("ScreenBoundsAABB");
kGenListPerTileKernel = m_BuildPerTileLightListShader.FindKernel("TileLightListGen");
m_aabbBoundsBuffer = new ComputeBuffer(2 * gMaxNumLights, 3 * sizeof(float));
m_convexBoundsBuffer = new ComputeBuffer(gMaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightBound)));
m_lightDataBuffer = new ComputeBuffer(gMaxNumLights, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightData)));
lightList = new ComputeBuffer(LightDefinitions.NR_LIGHT_MODELS * 1024 * 1024, sizeof(uint)); // enough list memory for a 4k x 4k display
m_BuildScreenAABBShader.SetBuffer(kGenAABBKernel, "g_data", m_convexBoundsBuffer);
//m_BuildScreenAABBShader.SetBuffer(kGenAABBKernel, "g_vBoundsBuffer", m_aabbBoundsBuffer);
m_DeferredMaterial.SetBuffer("g_vLightData", m_lightDataBuffer);
m_DeferredReflectionMaterial.SetBuffer("g_vLightData", m_lightDataBuffer);
m_BuildPerTileLightListShader.SetBuffer(kGenListPerTileKernel, "g_vBoundsBuffer", m_aabbBoundsBuffer);
m_BuildPerTileLightListShader.SetBuffer(kGenListPerTileKernel, "g_vLightData", m_lightDataBuffer);
m_cookieTexArray = new TextureCache2D();
m_cubeCookieTexArray = new TextureCacheCubemap();
m_cubeReflTexArray = new TextureCacheCubemap();
m_cookieTexArray.AllocTextureArray(8, 128, 128, TextureFormat.Alpha8, true);
m_cubeCookieTexArray.AllocTextureArray(4, 512, 512, TextureFormat.Alpha8, true);
m_cubeReflTexArray.AllocTextureArray(64, 128, 128, TextureFormat.DXT5, true);
m_DeferredMaterial.SetTexture("_spotCookieTextures", m_cookieTexArray.GetTexCache());
m_DeferredMaterial.SetTexture("_pointCookieTextures", m_cubeCookieTexArray.GetTexCache());
m_DeferredReflectionMaterial.SetTexture("_reflCubeTextures", m_cubeReflTexArray.GetTexCache());
}
void OnDisable()
{
// RenderLoop.renderLoopDelegate -= ExecuteRenderLoop;
DestroyImmediate(m_DeferredMaterial);
DestroyImmediate(m_DeferredReflectionMaterial);
m_cookieTexArray.Release();
m_cubeCookieTexArray.Release();
m_cubeReflTexArray.Release();
m_aabbBoundsBuffer.Release();
m_convexBoundsBuffer.Release();
m_lightDataBuffer.Release();
lightList.Release();
}
static void SetupGBuffer(CommandBuffer cmd)
{
var format10 = RenderTextureFormat.ARGB32;
if (SystemInfo.SupportsRenderTextureFormat(RenderTextureFormat.ARGB2101010))
format10 = RenderTextureFormat.ARGB2101010;
//@TODO: GetGraphicsCaps().buggyMRTSRGBWriteFlag
cmd.GetTemporaryRT(kGBufferAlbedo, -1, -1, 0, FilterMode.Point, RenderTextureFormat.ARGB32, RenderTextureReadWrite.Default);
cmd.GetTemporaryRT(kGBufferSpecRough, -1, -1, 0, FilterMode.Point, RenderTextureFormat.ARGB32, RenderTextureReadWrite.Default);
cmd.GetTemporaryRT(kGBufferNormal, -1, -1, 0, FilterMode.Point, format10, RenderTextureReadWrite.Linear);
cmd.GetTemporaryRT(kGBufferEmission, -1, -1, 0, FilterMode.Point, format10, RenderTextureReadWrite.Linear); //@TODO: HDR
cmd.GetTemporaryRT(kGBufferZ, -1, -1, 24, FilterMode.Point, RenderTextureFormat.Depth);
cmd.GetTemporaryRT(kCameraDepthTexture, -1, -1, 24, FilterMode.Point, RenderTextureFormat.Depth);
var colorMRTs = new RenderTargetIdentifier[4] { kGBufferAlbedo, kGBufferSpecRough, kGBufferNormal, kGBufferEmission };
cmd.SetRenderTarget(colorMRTs, new RenderTargetIdentifier(kGBufferZ));
cmd.ClearRenderTarget(true, true, new Color(0, 0, 0, 0));
//@TODO: render VR occlusion mesh
}
static void RenderGBuffer(CullResults cull, Camera camera, RenderLoop loop)
{
// setup GBuffer for rendering
var cmd = new CommandBuffer();
cmd.name = "Create G-Buffer";
SetupGBuffer(cmd);
loop.ExecuteCommandBuffer(cmd);
cmd.Dispose();
// render opaque objects using Deferred pass
DrawRendererSettings settings = new DrawRendererSettings(cull, camera, new ShaderPassName("Deferred"));
settings.sorting.sortOptions = SortOptions.SortByMaterialThenMesh;
settings.inputCullingOptions.SetQueuesOpaque();
loop.DrawRenderers(ref settings);
}
static void CopyDepthAfterGBuffer(RenderLoop loop)
{
var cmd = new CommandBuffer();
cmd.CopyTexture(new RenderTargetIdentifier(kGBufferZ), new RenderTargetIdentifier(kCameraDepthTexture));
loop.ExecuteCommandBuffer(cmd);
cmd.Dispose();
}
void DoTiledDeferredLighting(RenderLoop loop, Matrix4x4 viewToWorld, Matrix4x4 scrProj, Matrix4x4 incScrProj, ComputeBuffer lightList)
{
m_DeferredMaterial.SetBuffer("g_vLightList", lightList);
m_DeferredReflectionMaterial.SetBuffer("g_vLightList", lightList);
var cmd = new CommandBuffer();
//cmd.SetRenderTarget(new RenderTargetIdentifier(kGBufferEmission), new RenderTargetIdentifier(kGBufferZ));
cmd.SetGlobalMatrix("g_mViewToWorld", viewToWorld);
cmd.SetGlobalMatrix("g_mWorldToView", viewToWorld.inverse);
cmd.SetGlobalMatrix("g_mScrProjection", scrProj);
cmd.SetGlobalMatrix("g_mInvScrProjection", incScrProj);
//cmd.Blit (kGBufferNormal, (RenderTexture)null); // debug: display normals
cmd.Blit(kGBufferEmission, (RenderTexture)null, m_DeferredMaterial, 0);
cmd.Blit(kGBufferEmission, (RenderTexture)null, m_DeferredReflectionMaterial, 0);
loop.ExecuteCommandBuffer(cmd);
cmd.Dispose();
}
void SetMatrixCS(CommandBuffer cmd, ComputeShader shadercs, string name, Matrix4x4 mat)
{
float[] data = new float[16];
for (int c = 0; c < 4; c++)
for (int r = 0; r < 4; r++)
data[4 * c + r] = mat[r, c];
cmd.SetComputeFloatParams(shadercs, name, data);
}
int GenerateSourceLightBuffers(Camera camera, CullResults inputs)
{
ReflectionProbe[] probes = Object.FindObjectsOfType<ReflectionProbe>();
int numLights = inputs.culledLights.Length;
int numProbes = probes.Length;
int numVolumes = numLights + numProbes;
SFiniteLightData[] lightData = new SFiniteLightData[numVolumes];
SFiniteLightBound[] boundData = new SFiniteLightBound[numVolumes];
Matrix4x4 worldToView = camera.worldToCameraMatrix;
int i = 0;
foreach (var cl in inputs.culledLights)
{
float range = cl.range;
Matrix4x4 lightToWorld = cl.localToWorld;
//Matrix4x4 worldToLight = l.worldToLocal;
Vector3 lightPos = lightToWorld.GetColumn(3);
boundData[i].vBoxAxisX = new Vec3(1, 0, 0);
boundData[i].vBoxAxisY = new Vec3(0, 1, 0);
boundData[i].vBoxAxisZ = new Vec3(0, 0, 1);
boundData[i].vScaleXY = new Vec2(1.0f, 1.0f);
boundData[i].fRadius = range;
lightData[i].flags = 0;
lightData[i].fRecipRange = 1.0f / range;
lightData[i].vCol = new Vec3(cl.finalColor.r, cl.finalColor.g, cl.finalColor.b);
lightData[i].iSliceIndex = 0;
lightData[i].uLightModel = (uint)LightDefinitions.DIRECT_LIGHT;
bool bHasCookie = cl.light.cookie != null;
if (cl.lightType == LightType.Spot)
{
bool bIsCircularSpot = !bHasCookie;
if (!bIsCircularSpot) // square spots always have cookie
{
lightData[i].iSliceIndex = m_cookieTexArray.FetchSlice(cl.light.cookie);
}
Vector3 lightDir = lightToWorld.GetColumn(2); // Z axis in world space
// represents a left hand coordinate system in world space
Vector3 vx = lightToWorld.GetColumn(0); // X axis in world space
Vector3 vy = lightToWorld.GetColumn(1); // Y axis in world space
Vector3 vz = lightDir; // Z axis in world space
// transform to camera space (becomes a left hand coordinate frame in Unity since Determinant(worldToView)<0)
vx = worldToView.MultiplyVector(vx);
vy = worldToView.MultiplyVector(vy);
vz = worldToView.MultiplyVector(vz);
const float pi = 3.1415926535897932384626433832795f;
const float degToRad = (float)(pi / 180.0);
const float radToDeg = (float)(180.0 / pi);
//float sa = cl.GetSpotAngle(); // total field of view from left to right side
float sa = radToDeg * (2 * Mathf.Acos(1.0f / cl.invCosHalfSpotAngle)); // spot angle doesn't exist in the structure so reversing it for now.
float cs = Mathf.Cos(0.5f * sa * degToRad);
float si = Mathf.Sin(0.5f * sa * degToRad);
float ta = cs > 0.0f ? (si / cs) : gFltMax;
float cota = si > 0.0f ? (cs / si) : gFltMax;
//const float cotasa = l.GetCotanHalfSpotAngle();
// apply nonuniform scale to OBB of spot light
bool bSqueeze = sa < 0.7f * 90.0f; // arb heuristic
float fS = bSqueeze ? ta : si;
boundData[i].vCen = worldToView.MultiplyPoint(lightPos + ((0.5f * range) * lightDir)); // use mid point of the spot as the center of the bounding volume for building screen-space AABB for tiled lighting.
lightData[i].vLaxisX = vx;
lightData[i].vLaxisY = vy;
lightData[i].vLaxisZ = vz;
// scale axis to match box or base of pyramid
boundData[i].vBoxAxisX = (fS * range) * vx;
boundData[i].vBoxAxisY = (fS * range) * vy;
boundData[i].vBoxAxisZ = (0.5f * range) * vz;
// generate bounding sphere radius
float fAltDx = si;
float fAltDy = cs;
fAltDy = fAltDy - 0.5f;
//if(fAltDy<0) fAltDy=-fAltDy;
fAltDx *= range; fAltDy *= range;
float fAltDist = Mathf.Sqrt(fAltDy * fAltDy + (bIsCircularSpot ? 1.0f : 2.0f) * fAltDx * fAltDx);
boundData[i].fRadius = fAltDist > (0.5f * range) ? fAltDist : (0.5f * range); // will always pick fAltDist
boundData[i].vScaleXY = bSqueeze ? new Vec2(0.01f, 0.01f) : new Vec2(1.0f, 1.0f);
// fill up ldata
lightData[i].uLightType = (uint)LightDefinitions.SPOT_LIGHT;
lightData[i].vLpos = worldToView.MultiplyPoint(lightPos);
lightData[i].fSphRadiusSq = range * range;
lightData[i].fPenumbra = cs;
lightData[i].cotan = cota;
lightData[i].flags |= (bIsCircularSpot ? LightDefinitions.IS_CIRCULAR_SPOT_SHAPE : 0);
lightData[i].flags |= (bHasCookie ? LightDefinitions.HAS_COOKIE_TEXTURE : 0);
}
else if (cl.lightType == LightType.Point)
{
if (bHasCookie)
{
lightData[i].iSliceIndex = m_cubeCookieTexArray.FetchSlice(cl.light.cookie);
}
boundData[i].vCen = worldToView.MultiplyPoint(lightPos);
boundData[i].vBoxAxisX = new Vec3(range, 0, 0);
boundData[i].vBoxAxisY = new Vec3(0, range, 0);
boundData[i].vBoxAxisZ = new Vec3(0, 0, -range); // transform to camera space (becomes a left hand coordinate frame in Unity since Determinant(worldToView)<0)
boundData[i].vScaleXY = new Vec2(1.0f, 1.0f);
boundData[i].fRadius = range;
// fill up ldata
lightData[i].uLightType = (uint)LightDefinitions.SPHERE_LIGHT;
lightData[i].vLpos = boundData[i].vCen;
lightData[i].fSphRadiusSq = range * range;
lightData[i].flags |= (bHasCookie ? LightDefinitions.HAS_COOKIE_TEXTURE : 0);
}
else
{
//Assert(false);
}
++i;
}
// probe.m_BlendDistance
// Vector3f extents = 0.5*Abs(probe.m_BoxSize);
// C center of rendered refl box <-- GetComponent (Transform).GetPosition() + m_BoxOffset;
// P parameter position to shader: GetComponent (Transform).GetPosition()
// shader parameter min and max are C+/-(extents+blendDistance)
//Vector3[] Ps = new Vector3[3] { new Vector3(6.28f, -1.18f, -5.67f), new Vector3(14.23f, -1.18f, 0.21f), new Vector3(6.28f, -1.18f, 1.91f) };
//Vector3[] boxSizes = new Vector3[3] { new Vector3(20.0f, 10.0f, 10.0f), new Vector3(10.0f, 10.0f, 10.0f), new Vector3(10.0f, 10.0f, 10.0f) };
//Vector3 boxOffset = new Vector3(0.0f, 0.0f, 0.0f);
//float[] bd = new float[3] { 4.0f, 4.0f, 4.0f };
int numProbesOut = 0;
foreach (var rl in probes)
{
Texture cubemap = rl.mode == ReflectionProbeMode.Custom ? rl.customBakedTexture : rl.bakedTexture;
if (cubemap != null) // always a box for now
{
i = numProbesOut + numLights;
lightData[i].flags = 0;
Bounds bnds = rl.bounds;
Vector3 boxOffset = rl.center;
float blendDistance = rl.blendDistance;
float imp = rl.importance;
// implicit in CalculateHDRDecodeValues() --> float ints = rl.intensity;
bool boxProj = rl.boxProjection;
Vector4 decodeVals = rl.CalculateHDRDecodeValues();
Vector3 e = bnds.extents; // 0.5f * Vector3.Max(-boxSizes[p], boxSizes[p]);
Vector3 C = bnds.center; // P + boxOffset;
Vector3 posForShaderParam = bnds.center - boxOffset; // gives same as rl.GetComponent<Transform>().position;
Vector3 combinedExtent = e + new Vector3(blendDistance, blendDistance, blendDistance);
Vector3 vx = new Vector3(1, 0, 0); // always axis aligned in world space for now
Vector3 vy = new Vector3(0, 1, 0);
Vector3 vz = new Vector3(0, 0, 1);
// transform to camera space (becomes a left hand coordinate frame in Unity since Determinant(worldToView)<0)
vx = worldToView.MultiplyVector(vx);
vy = worldToView.MultiplyVector(vy);
vz = worldToView.MultiplyVector(vz);
Vector3 Cw = worldToView.MultiplyPoint(C);
if (boxProj) lightData[i].flags |= LightDefinitions.IS_BOX_PROJECTED;
lightData[i].vLpos = Cw;
lightData[i].vLaxisX = vx;
lightData[i].vLaxisY = vy;
lightData[i].vLaxisZ = vz;
lightData[i].vProbeBoxOffset = boxOffset;
lightData[i].fProbeBlendDistance = blendDistance;
lightData[i].fLightIntensity = decodeVals.x;
lightData[i].fDecodeExp = decodeVals.y;
lightData[i].iSliceIndex = m_cubeReflTexArray.FetchSlice(cubemap);
Vector3 delta = combinedExtent - e;
lightData[i].vBoxInnerDist = e;
lightData[i].vBoxInvRange = new Vec3(1.0f / delta.x, 1.0f / delta.y, 1.0f / delta.z);
boundData[i].vCen = Cw;
boundData[i].vBoxAxisX = combinedExtent.x * vx;
boundData[i].vBoxAxisY = combinedExtent.y * vy;
boundData[i].vBoxAxisZ = combinedExtent.z * vz;
boundData[i].vScaleXY = new Vec2(1.0f, 1.0f);
boundData[i].fRadius = combinedExtent.magnitude;
// fill up ldata
lightData[i].uLightType = (uint)LightDefinitions.BOX_LIGHT;
lightData[i].uLightModel = (uint)LightDefinitions.REFLECTION_LIGHT;
++numProbesOut;
}
}
m_convexBoundsBuffer.SetData(boundData);
m_lightDataBuffer.SetData(lightData);
return numLights + numProbesOut;
}
/* public override void Render(Camera[] cameras, RenderLoop renderLoop)
{
foreach (var camera in cameras)
{
CullResults cullResults;
CullingParameters cullingParams;
if (!CullResults.GetCullingParameters(camera, out cullingParams))
continue;
m_ShadowPass.UpdateCullingParameters(ref cullingParams);
cullResults = CullResults.Cull(ref cullingParams, renderLoop);
ShadowOutput shadows;
m_ShadowPass.Render(renderLoop, cullResults, out shadows);
renderLoop.SetupCameraProperties(camera);
UpdateLightConstants(cullResults.culledLights, ref shadows);
DrawRendererSettings settings = new DrawRendererSettings(cullResults, camera, new ShaderPassName("ForwardBase"));
settings.rendererConfiguration = RendererConfiguration.ConfigureOneLightProbePerRenderer | RendererConfiguration.ConfigureReflectionProbesProbePerRenderer;
settings.sorting.sortOptions = SortOptions.SortByMaterialThenMesh;
renderLoop.DrawRenderers(ref settings);
renderLoop.Submit();
}
// Post effects
}*/
public override void Render(Camera[] cameras, RenderLoop renderLoop)
{
foreach (var camera in cameras)
{
CullResults cullResults;
CullingParameters cullingParams;
if (!CullResults.GetCullingParameters(camera, out cullingParams))
continue;
if (CullResults.Cull(camera, renderLoop, out cullResults))
ExecuteRenderLoop(camera, cullResults, renderLoop);
}
}
void ExecuteRenderLoop(Camera camera, CullResults cullResults, RenderLoop loop)
{
// do anything we need to do upon a new frame.
NewFrame();
m_DeferredMaterial.SetInt("_SrcBlend", camera.hdr ? (int)BlendMode.One : (int)BlendMode.DstColor);
m_DeferredMaterial.SetInt("_DstBlend", camera.hdr ? (int)BlendMode.One : (int)BlendMode.Zero);
m_DeferredReflectionMaterial.SetInt("_SrcBlend", camera.hdr ? (int)BlendMode.One : (int)BlendMode.DstColor);
m_DeferredReflectionMaterial.SetInt("_DstBlend", camera.hdr ? (int)BlendMode.One : (int)BlendMode.Zero);
loop.SetupCameraProperties(camera);
RenderGBuffer(cullResults, camera, loop);
//@TODO: render forward-only objects into depth buffer
CopyDepthAfterGBuffer(loop);
//@TODO: render reflection probes
//RenderLighting(camera, inputs, loop);
//
Matrix4x4 proj = camera.projectionMatrix;
Matrix4x4 temp = new Matrix4x4();
temp.SetRow(0, new Vector4(1.0f, 0.0f, 0.0f, 0.0f));
temp.SetRow(1, new Vector4(0.0f, 1.0f, 0.0f, 0.0f));
temp.SetRow(2, new Vector4(0.0f, 0.0f, 0.5f, 0.5f));
temp.SetRow(3, new Vector4(0.0f, 0.0f, 0.0f, 1.0f));
Matrix4x4 projh = temp * proj;
Matrix4x4 invProjh = projh.inverse;
int iW = camera.pixelWidth;
int iH = camera.pixelHeight;
temp.SetRow(0, new Vector4(0.5f * iW, 0.0f, 0.0f, 0.5f * iW));
temp.SetRow(1, new Vector4(0.0f, 0.5f * iH, 0.0f, 0.5f * iH));
temp.SetRow(2, new Vector4(0.0f, 0.0f, 0.5f, 0.5f));
temp.SetRow(3, new Vector4(0.0f, 0.0f, 0.0f, 1.0f));
Matrix4x4 projscr = temp * proj;
Matrix4x4 invProjscr = projscr.inverse;
int numLights = GenerateSourceLightBuffers(camera, cullResults);
int nrTilesX = (iW + 15) / 16;
int nrTilesY = (iH + 15) / 16;
//ComputeBuffer lightList = new ComputeBuffer(nrTilesX * nrTilesY * (32 / 2), sizeof(uint));
var cmd = new CommandBuffer();
cmd.SetComputeIntParam(m_BuildScreenAABBShader, "g_iNrVisibLights", numLights);
SetMatrixCS(cmd, m_BuildScreenAABBShader, "g_mProjection", projh);
SetMatrixCS(cmd, m_BuildScreenAABBShader, "g_mInvProjection", invProjh);
cmd.SetComputeBufferParam(m_BuildScreenAABBShader, kGenAABBKernel, "g_vBoundsBuffer", m_aabbBoundsBuffer);
cmd.ComputeDispatch(m_BuildScreenAABBShader, kGenAABBKernel, (numLights + 7) / 8, 1, 1);
cmd.SetComputeIntParam(m_BuildPerTileLightListShader, "g_iNrVisibLights", numLights);
SetMatrixCS(cmd, m_BuildPerTileLightListShader, "g_mScrProjection", projscr);
SetMatrixCS(cmd, m_BuildPerTileLightListShader, "g_mInvScrProjection", invProjscr);
cmd.SetComputeTextureParam(m_BuildPerTileLightListShader, kGenListPerTileKernel, "g_depth_tex", new RenderTargetIdentifier(kCameraDepthTexture));
cmd.SetComputeBufferParam(m_BuildPerTileLightListShader, kGenListPerTileKernel, "g_vLightList", lightList);
cmd.ComputeDispatch(m_BuildPerTileLightListShader, kGenListPerTileKernel, nrTilesX, nrTilesY, 1);
loop.ExecuteCommandBuffer(cmd);
cmd.Dispose();
DoTiledDeferredLighting(loop, camera.cameraToWorldMatrix, projscr, invProjscr, lightList);
//lightList.Release();
loop.Submit();
}
void NewFrame()
{
// update texture caches
m_cookieTexArray.NewFrame();
m_cubeCookieTexArray.NewFrame();
m_cubeReflTexArray.NewFrame();
}
}
}

12
Assets/ScriptableRenderLoop/fptl/FptlLighting.cs.meta


fileFormatVersion: 2
guid: ad5bf4f8e45bdd1429eadc3445df2c89
timeCreated: 1467917164
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

338
Assets/ScriptableRenderLoop/fptl/Internal-DeferredReflections.shader


Shader "Hidden/Internal-TiledReflections" {
Properties {
_LightTexture0 ("", any) = "" {}
_ShadowMapTexture ("", any) = "" {}
_SrcBlend ("", Float) = 1
_DstBlend ("", Float) = 1
}
SubShader {
Pass
{
ZWrite Off
ZTest Always
Cull Off
//Blend Off
Blend [_SrcBlend] [_DstBlend]
CGPROGRAM
#pragma target 5.0
#pragma vertex vert
#pragma fragment frag
#include "UnityCG.cginc"
#include "UnityStandardBRDF.cginc"
#include "UnityStandardUtils.cginc"
#include "UnityPBSLighting.cginc"
#include "..\common\ShaderBase.h"
#include "LightDefinitions.cs"
uniform float4x4 g_mViewToWorld;
uniform float4x4 g_mWorldToView;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
Texture2D _CameraDepthTexture;
Texture2D _CameraGBufferTexture0;
Texture2D _CameraGBufferTexture1;
Texture2D _CameraGBufferTexture2;
UNITY_DECLARE_TEXCUBEARRAY(_reflCubeTextures);
StructuredBuffer<uint> g_vLightList;
StructuredBuffer<SFiniteLightData> g_vLightData;
float GetLinearDepth(float3 vP)
{
Vec3 var = 1.0;
//float4 v4Pres = mul(float4(vP,1.0), g_mInvScrProjection);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
float fSx = g_mScrProjection[0].x;
//float fCx = g_mScrProjection[2].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
//float fCy = g_mScrProjection[2].y;
float fCy = g_mScrProjection[1].z;
#ifdef LEFT_HAND_COORDINATES
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
#else
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
#endif
}
uint FetchLightCount(const uint tileOffs)
{
return g_vLightList[ 16*tileOffs + 0]&0xffff;
}
uint FetchIndex(const uint tileOffs, const uint l)
{
const uint l1 = l+1;
return (g_vLightList[ 16*tileOffs + (l1>>1)]>>((l1&1)*16))&0xffff;
}
float3 ExecuteReflectionProbes(uint2 pixCoord, const uint offs);
float3 OverlayHeatMap(uint uNumLights, float3 c);
struct v2f {
float4 vertex : SV_POSITION;
float2 texcoord : TEXCOORD0;
};
v2f vert (float4 vertex : POSITION, float2 texcoord : TEXCOORD0)
{
v2f o;
o.vertex = UnityObjectToClipPosMono(vertex);
o.texcoord = texcoord.xy;
return o;
}
half4 frag (v2f i) : SV_Target
{
uint2 pixCoord = ((uint2) i.vertex.xy);
uint iWidth;
uint iHeight;
_CameraDepthTexture.GetDimensions(iWidth, iHeight);
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
pixCoord.y = (iHeight-1) - pixCoord.y;
uint2 tileIDX = pixCoord / 16;
const int offs = tileIDX.y*nrTilesX+tileIDX.x + nrTilesX*nrTilesY; // offset to where the reflection probes are
float3 c = ExecuteReflectionProbes(pixCoord, offs);
//c = OverlayHeatMap(FetchLightCount(offs), c);
return float4(c,1.0);
}
struct UnityStandardData
{
float3 specularColor;
float3 diffuseColor;
float3 normalWorld;
float smoothness;
float occlusion;
};
UnityStandardData UnityStandardDataFromGbuffer(float4 gbuffer0, float4 gbuffer1, float4 gbuffer2)
{
UnityStandardData data;
data.normalWorld = normalize(2*gbuffer2.xyz-1);
data.smoothness = gbuffer1.a;
data.diffuseColor = gbuffer0.xyz; data.specularColor = gbuffer1.xyz;
data.occlusion = gbuffer0.a;
return data;
}
half3 distanceFromAABB(half3 p, half3 aabbMin, half3 aabbMax)
{
return max(max(p - aabbMax, aabbMin - p), half3(0.0, 0.0, 0.0));
}
half3 Unity_GlossyEnvironment (UNITY_ARGS_TEXCUBEARRAY(tex), int sliceIndex, half4 hdr, Unity_GlossyEnvironmentData glossIn);
float3 ExecuteReflectionProbes(uint2 pixCoord, const uint offs)
{
float3 v3ScrPos = float3(pixCoord.x+0.5, pixCoord.y+0.5, FetchDepth(_CameraDepthTexture, pixCoord.xy).x);
float linDepth = GetLinearDepth(v3ScrPos);
float3 vP = GetViewPosFromLinDepth(v3ScrPos.xy, linDepth);
float3 worldPos = mul(g_mViewToWorld, float4(vP.xyz,1.0)).xyz; //unity_CameraToWorld
float3 vWSpaceVDir = normalize(mul((float3x3) g_mViewToWorld, -vP).xyz); //unity_CameraToWorld
float4 gbuffer0 = _CameraGBufferTexture0.Load( uint3(pixCoord.xy, 0) );
float4 gbuffer1 = _CameraGBufferTexture1.Load( uint3(pixCoord.xy, 0) );
float4 gbuffer2 = _CameraGBufferTexture2.Load( uint3(pixCoord.xy, 0) );
UnityStandardData data = UnityStandardDataFromGbuffer(gbuffer0, gbuffer1, gbuffer2);
float oneMinusReflectivity = 1.0 - SpecularStrength(data.specularColor.rgb);
float3 worldNormalRefl = reflect(-vWSpaceVDir, data.normalWorld);
float3 vspaceRefl = mul((float3x3) g_mWorldToView, worldNormalRefl).xyz;
UnityLight light;
light.color = 0;
light.dir = 0;
light.ndotl = 0;
float3 ints = 0;
const uint uNrLights = FetchLightCount(offs);
uint l=0;
// we need this outer loop for when we cannot assume a wavefront is 64 wide
// since in this case we cannot assume the lights will remain sorted by type
// during processing in lightlist_cs.hlsl
#if !defined(XBONE) && !defined(PLAYSTATION4)
while(l<uNrLights)
#endif
{
uint uIndex = l<uNrLights ? FetchIndex(offs, l) : 0;
uint uLgtType = l<uNrLights ? g_vLightData[uIndex].uLightType : 0;
// specialized loop for sphere lights
while(l<uNrLights && uLgtType==(uint) BOX_LIGHT)
{
SFiniteLightData lgtDat = g_vLightData[uIndex];
float3 vLp = lgtDat.vLpos.xyz;
float3 fromProbeToSurfPoint = vP - vLp; // in camera space
float3 posInProbeSpace = float3( dot(fromProbeToSurfPoint, lgtDat.vLaxisX), dot(fromProbeToSurfPoint, lgtDat.vLaxisY), dot(fromProbeToSurfPoint, lgtDat.vLaxisZ) );
float blendDistance = lgtDat.fProbeBlendDistance;//unity_SpecCube1_ProbePosition.w; // will be set to blend distance for this probe
float3 worldNormal0;
if((lgtDat.flags&IS_BOX_PROJECTED)!=0)
{
// For box projection, use expanded bounds as they are rendered; otherwise
// box projection artifacts when outside of the box.
//float4 boxMin = unity_SpecCube0_BoxMin - float4(blendDistance,blendDistance,blendDistance,0);
//float4 boxMax = unity_SpecCube0_BoxMax + float4(blendDistance,blendDistance,blendDistance,0);
//worldNormal0 = BoxProjectedCubemapDirection (worldNormalRefl, worldPos, unity_SpecCube0_ProbePosition, boxMin, boxMax);
float4 vBoxOuterDistance = float4( lgtDat.vBoxInnerDist + float3(blendDistance, blendDistance, blendDistance), 0.0 );
#if 0
worldNormal0 = BoxProjectedCubemapDirection(worldNormalRefl, posInProbeSpace, float4(-lgtDat.vProbeBoxOffset, 1.0), -vBoxOuterDistance, vBoxOuterDistance);
#else
float3 probeSpaceRefl = float3( dot(vspaceRefl, lgtDat.vLaxisX), dot(vspaceRefl, lgtDat.vLaxisY), dot(vspaceRefl, lgtDat.vLaxisZ) );
float3 vPR = BoxProjectedCubemapDirection(probeSpaceRefl, posInProbeSpace, float4(-lgtDat.vProbeBoxOffset, 1.0), -vBoxOuterDistance, vBoxOuterDistance); // probe space corrected reflection vector
worldNormal0 = mul( (float3x3) g_mViewToWorld, vPR.x*lgtDat.vLaxisX + vPR.y*lgtDat.vLaxisY + vPR.z*lgtDat.vLaxisZ );
#endif
}
else
worldNormal0 = worldNormalRefl;
Unity_GlossyEnvironmentData g;
g.roughness = 1 - data.smoothness;
g.reflUVW = worldNormal0;
half3 env0 = Unity_GlossyEnvironment(UNITY_PASS_TEXCUBEARRAY(_reflCubeTextures), lgtDat.iSliceIndex, float4(lgtDat.fLightIntensity, lgtDat.fDecodeExp, 0.0, 0.0), g);
UnityIndirect ind;
ind.diffuse = 0;
ind.specular = env0 * data.occlusion;
half3 rgb = UNITY_BRDF_PBS(0, data.specularColor, oneMinusReflectivity, data.smoothness, data.normalWorld, vWSpaceVDir, light, ind).rgb;
// Calculate falloff value, so reflections on the edges of the probe would gradually blend to previous reflection.
// Also this ensures that pixels not located in the reflection probe AABB won't
// accidentally pick up reflections from this probe.
//half3 distance = distanceFromAABB(worldPos, unity_SpecCube0_BoxMin.xyz, unity_SpecCube0_BoxMax.xyz);
half3 distance = distanceFromAABB(posInProbeSpace, -lgtDat.vBoxInnerDist, lgtDat.vBoxInnerDist);
half falloff = saturate(1.0 - length(distance)/blendDistance);
ints = lerp(ints, rgb, falloff);
// next probe
++l; uIndex = l<uNrLights ? FetchIndex(offs, l) : 0;
uLgtType = l<uNrLights ? g_vLightData[uIndex].uLightType : 0;
}
#if !defined(XBONE) && !defined(PLAYSTATION4)
if(uLgtType!=BOX_LIGHT) ++l;
#endif
}
return ints;
}
float3 OverlayHeatMap(uint uNumLights, float3 c)
{
/////////////////////////////////////////////////////////////////////
//
const float4 kRadarColors[12] =
{
float4(0.0,0.0,0.0,0.0), // black
float4(0.0,0.0,0.6,0.5), // dark blue
float4(0.0,0.0,0.9,0.5), // blue
float4(0.0,0.6,0.9,0.5), // light blue
float4(0.0,0.9,0.9,0.5), // cyan
float4(0.0,0.9,0.6,0.5), // blueish green
float4(0.0,0.9,0.0,0.5), // green
float4(0.6,0.9,0.0,0.5), // yellowish green
float4(0.9,0.9,0.0,0.5), // yellow
float4(0.9,0.6,0.0,0.5), // orange
float4(0.9,0.0,0.0,0.5), // red
float4(1.0,0.0,0.0,0.9) // strong red
};
float fMaxNrLightsPerTile = 24;
int nColorIndex = uNumLights==0 ? 0 : (1 + (int) floor(10 * (log2((float)uNumLights) / log2(fMaxNrLightsPerTile))) );
nColorIndex = nColorIndex<0 ? 0 : nColorIndex;
float4 col = nColorIndex>11 ? float4(1.0,1.0,1.0,1.0) : kRadarColors[nColorIndex];
return lerp(c, pow(col.xyz, 2.2), 0.3*col.w);
}
half3 Unity_GlossyEnvironment (UNITY_ARGS_TEXCUBEARRAY(tex), int sliceIndex, half4 hdr, Unity_GlossyEnvironmentData glossIn)
{
#if UNITY_GLOSS_MATCHES_MARMOSET_TOOLBAG2 && (SHADER_TARGET >= 30)
// TODO: remove pow, store cubemap mips differently
half roughness = pow(glossIn.roughness, 3.0/4.0);
#else
half roughness = glossIn.roughness; // MM: switched to this
#endif
//roughness = sqrt(sqrt(2/(64.0+2))); // spec power to the square root of real roughness
#if 0
float m = roughness*roughness; // m is the real roughness parameter
const float fEps = 1.192092896e-07F; // smallest such that 1.0+FLT_EPSILON != 1.0 (+1e-4h is NOT good here. is visibly very wrong)
float n = (2.0/max(fEps, m*m))-2.0; // remap to spec power. See eq. 21 in --> https://dl.dropboxusercontent.com/u/55891920/papers/mm_brdf.pdf
n /= 4; // remap from n_dot_h formulatino to n_dot_r. See section "Pre-convolved Cube Maps vs Path Tracers" --> https://s3.amazonaws.com/docs.knaldtech.com/knald/1.0.0/lys_power_drops.html
roughness = pow( 2/(n+2), 0.25); // remap back to square root of real roughness
#else
// MM: came up with a surprisingly close approximation to what the #if 0'ed out code above does.
roughness = roughness*(1.7 - 0.7*roughness);
#endif
half mip = roughness * UNITY_SPECCUBE_LOD_STEPS;
half4 rgbm = UNITY_SAMPLE_TEXCUBEARRAY_LOD(tex, float4(glossIn.reflUVW.xyz, sliceIndex), mip);
//return rgbm.xyz;
return DecodeHDR_NoLinearSupportInSM2 (rgbm, hdr);
}
ENDCG
}
}
Fallback Off
}

9
Assets/ScriptableRenderLoop/fptl/Internal-DeferredReflections.shader.meta


fileFormatVersion: 2
guid: 3899e06d641c2cb4cbff794df0da536b
timeCreated: 1467917168
licenseType: Pro
ShaderImporter:
defaultTextures: []
userData:
assetBundleName:
assetBundleVariant:

309
Assets/ScriptableRenderLoop/fptl/Internal-DeferredShading.shader


Shader "Hidden/Internal-Obscurity" {
Properties {
_LightTexture0 ("", any) = "" {}
_ShadowMapTexture ("", any) = "" {}
_SrcBlend ("", Float) = 1
_DstBlend ("", Float) = 1
}
SubShader {
Pass
{
ZWrite Off
ZTest Always
Cull Off
Blend Off
//Blend [_SrcBlend] [_DstBlend]
CGPROGRAM
#pragma target 5.0
#pragma vertex vert
#pragma fragment frag
//#include "UnityCG.cginc"
//#include "UnityPBSLighting.cginc"
//#include "UnityDeferredLibrary.cginc"
#include "UnityCG.cginc"
#include "UnityStandardBRDF.cginc"
#include "UnityStandardUtils.cginc"
#include "UnityPBSLighting.cginc"
#include "..\common\ShaderBase.h"
#include "LightDefinitions.cs"
uniform float4x4 g_mViewToWorld;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
Texture2D _CameraDepthTexture;
Texture2D _CameraGBufferTexture0;
Texture2D _CameraGBufferTexture1;
Texture2D _CameraGBufferTexture2;
//UNITY_DECLARE_TEX2D(_LightTextureB0);
sampler2D _LightTextureB0;
UNITY_DECLARE_TEX2DARRAY(_spotCookieTextures);
UNITY_DECLARE_TEXCUBEARRAY(_pointCookieTextures);
StructuredBuffer<uint> g_vLightList;
StructuredBuffer<SFiniteLightData> g_vLightData;
float GetLinearDepth(float3 vP)
{
Vec3 var = 1.0;
//float4 v4Pres = mul(float4(vP,1.0), g_mInvScrProjection);
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
float fSx = g_mScrProjection[0].x;
//float fCx = g_mScrProjection[2].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
//float fCy = g_mScrProjection[2].y;
float fCy = g_mScrProjection[1].z;
#ifdef LEFT_HAND_COORDINATES
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
#else
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
#endif
}
uint FetchLightCount(const uint tileOffs)
{
return g_vLightList[ 16*tileOffs + 0]&0xffff;
}
uint FetchIndex(const uint tileOffs, const uint l)
{
const uint l1 = l+1;
return (g_vLightList[ 16*tileOffs + (l1>>1)]>>((l1&1)*16))&0xffff;
}
float3 ExecuteLightList(uint2 pixCoord, const uint offs);
float3 OverlayHeatMap(uint uNumLights, float3 c);
struct v2f {
float4 vertex : SV_POSITION;
float2 texcoord : TEXCOORD0;
};
v2f vert (float4 vertex : POSITION, float2 texcoord : TEXCOORD0)
{
v2f o;
o.vertex = UnityObjectToClipPosMono(vertex);
o.texcoord = texcoord.xy;
return o;
}
half4 frag (v2f i) : SV_Target
{
uint2 pixCoord = ((uint2) i.vertex.xy);
uint iWidth;
uint iHeight;
_CameraDepthTexture.GetDimensions(iWidth, iHeight);
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
pixCoord.y = (iHeight-1) - pixCoord.y;
uint2 tileIDX = pixCoord / 16;
const int offs = tileIDX.y*nrTilesX+tileIDX.x;
float3 c = ExecuteLightList(pixCoord, offs);
c = OverlayHeatMap(FetchLightCount(offs), c);
//return float4(c,1.0);
return float4(pow(c,1/2.2),1.0);
}
struct UnityStandardData
{
float3 specularColor;
float3 diffuseColor;
float3 normalWorld;
float smoothness;
};
UnityStandardData UnityStandardDataFromGbuffer(float4 gbuffer0, float4 gbuffer1, float4 gbuffer2)
{
UnityStandardData data;
data.normalWorld = normalize(2*gbuffer2.xyz-1);
data.smoothness = gbuffer1.a;
data.diffuseColor = gbuffer0.xyz; data.specularColor = gbuffer1.xyz;
float ao = gbuffer0.a;
return data;
}
float3 ExecuteLightList(uint2 pixCoord, const uint offs)
{
float3 v3ScrPos = float3(pixCoord.x+0.5, pixCoord.y+0.5, FetchDepth(_CameraDepthTexture, pixCoord.xy).x);
float linDepth = GetLinearDepth(v3ScrPos);
float3 vP = GetViewPosFromLinDepth(v3ScrPos.xy, linDepth);
float3 vWSpaceVDir = normalize(mul((float3x3) g_mViewToWorld, -vP).xyz); //unity_CameraToWorld
float4 gbuffer0 = _CameraGBufferTexture0.Load( uint3(pixCoord.xy, 0) );
float4 gbuffer1 = _CameraGBufferTexture1.Load( uint3(pixCoord.xy, 0) );
float4 gbuffer2 = _CameraGBufferTexture2.Load( uint3(pixCoord.xy, 0) );
UnityStandardData data = UnityStandardDataFromGbuffer(gbuffer0, gbuffer1, gbuffer2);
float oneMinusReflectivity = 1.0 - SpecularStrength(data.specularColor.rgb);
UnityIndirect ind;
UNITY_INITIALIZE_OUTPUT(UnityIndirect, ind);
ind.diffuse = 0;
ind.specular = 0;
float3 ints = 0;
const uint uNrLights = FetchLightCount(offs);
uint l=0;
// we need this outer loop for when we cannot assume a wavefront is 64 wide
// since in this case we cannot assume the lights will remain sorted by type
// during processing in lightlist_cs.hlsl
#if !defined(XBONE) && !defined(PLAYSTATION4)
while(l<uNrLights)
#endif
{
uint uIndex = l<uNrLights ? FetchIndex(offs, l) : 0;
uint uLgtType = l<uNrLights ? g_vLightData[uIndex].uLightType : 0;
// specialized loop for spot lights
while(l<uNrLights && uLgtType==SPOT_LIGHT)
{
SFiniteLightData lgtDat = g_vLightData[uIndex];
float3 vLp = lgtDat.vLpos.xyz;
float3 toLight = vLp - vP;
float dist = length(toLight);
float3 vL = toLight / dist;
float attLookUp = dist*lgtDat.fRecipRange; attLookUp *= attLookUp;
float atten = tex2Dlod(_LightTextureB0, float4(attLookUp.rr, 0.0, 0.0)).UNITY_ATTEN_CHANNEL;
// spot attenuation
const float fProjVec = -dot(vL, lgtDat.vLaxisZ.xyz); // spotDir = lgtDat.vLaxisZ.xyz
float2 cookCoord = (-lgtDat.cotan)*float2( dot(vL, lgtDat.vLaxisX.xyz), dot(vL, lgtDat.vLaxisY.xyz) ) / fProjVec;
const bool bHasCookie = (lgtDat.flags&IS_CIRCULAR_SPOT_SHAPE)==0; // all square spots have cookies
float d0=0.65, angularAtt = smoothstep(0.0, 1.0-d0, 1.0-length(cookCoord));
[branch]if(bHasCookie)
{
cookCoord = cookCoord*0.5 + 0.5;
angularAtt = UNITY_SAMPLE_TEX2DARRAY_LOD(_spotCookieTextures, float3(cookCoord, lgtDat.iSliceIndex), 0.0).w;
}
atten *= angularAtt*(fProjVec>0.0); // finally apply this to the dist att.
UnityLight light;
light.color.xyz = lgtDat.vCol.xyz*atten;
light.dir.xyz = mul((float3x3) g_mViewToWorld, vL).xyz; //unity_CameraToWorld
ints += UNITY_BRDF_PBS (data.diffuseColor, data.specularColor, oneMinusReflectivity, data.smoothness, data.normalWorld, vWSpaceVDir, light, ind);
++l; uIndex = l<uNrLights ? FetchIndex(offs, l) : 0;
uLgtType = l<uNrLights ? g_vLightData[uIndex].uLightType : 0;
}
// specialized loop for sphere lights
while(l<uNrLights && uLgtType==SPHERE_LIGHT)
{
SFiniteLightData lgtDat = g_vLightData[uIndex];
float3 vLp = lgtDat.vLpos.xyz;
float3 toLight = vLp - vP;
float dist = length(toLight);
float3 vL = toLight / dist;
float3 vLw = mul((float3x3) g_mViewToWorld, vL).xyz; //unity_CameraToWorld
float attLookUp = dist*lgtDat.fRecipRange; attLookUp *= attLookUp;
float atten = tex2Dlod(_LightTextureB0, float4(attLookUp.rr, 0.0, 0.0)).UNITY_ATTEN_CHANNEL;
const bool bHasCookie = (lgtDat.flags&HAS_COOKIE_TEXTURE)!=0;
[branch]if(bHasCookie)
{
atten *= UNITY_SAMPLE_TEXCUBEARRAY_LOD(_pointCookieTextures, float4(-vLw, lgtDat.iSliceIndex), 0.0).w;
}
UnityLight light;
light.color.xyz = lgtDat.vCol.xyz*atten;
light.dir.xyz = vLw;
ints += UNITY_BRDF_PBS (data.diffuseColor, data.specularColor, oneMinusReflectivity, data.smoothness, data.normalWorld, vWSpaceVDir, light, ind);
++l; uIndex = l<uNrLights ? FetchIndex(offs, l) : 0;
uLgtType = l<uNrLights ? g_vLightData[uIndex].uLightType : 0;
}
#if !defined(XBONE) && !defined(PLAYSTATION4)
//if(uLgtType>=MAX_TYPES) ++l;
if(uLgtType!=SPOT_LIGHT && uLgtType!=SPHERE_LIGHT) ++l;
#endif
}
return ints;
}
float3 OverlayHeatMap(uint uNumLights, float3 c)
{
/////////////////////////////////////////////////////////////////////
//
const float4 kRadarColors[12] =
{
float4(0.0,0.0,0.0,0.0), // black
float4(0.0,0.0,0.6,0.5), // dark blue
float4(0.0,0.0,0.9,0.5), // blue
float4(0.0,0.6,0.9,0.5), // light blue
float4(0.0,0.9,0.9,0.5), // cyan
float4(0.0,0.9,0.6,0.5), // blueish green
float4(0.0,0.9,0.0,0.5), // green
float4(0.6,0.9,0.0,0.5), // yellowish green
float4(0.9,0.9,0.0,0.5), // yellow
float4(0.9,0.6,0.0,0.5), // orange
float4(0.9,0.0,0.0,0.5), // red
float4(1.0,0.0,0.0,0.9) // strong red
};
float fMaxNrLightsPerTile = 24;
int nColorIndex = uNumLights==0 ? 0 : (1 + (int) floor(10 * (log2((float)uNumLights) / log2(fMaxNrLightsPerTile))) );
nColorIndex = nColorIndex<0 ? 0 : nColorIndex;
float4 col = nColorIndex>11 ? float4(1.0,1.0,1.0,1.0) : kRadarColors[nColorIndex];
return lerp(c, pow(col.xyz, 2.2), 0.3*col.w);
}
ENDCG
}
}
Fallback Off
}

9
Assets/ScriptableRenderLoop/fptl/Internal-DeferredShading.shader.meta


fileFormatVersion: 2
guid: 1c102a89f3460254a8c413dbdcd63a2a
timeCreated: 1467917168
licenseType: Pro
ShaderImporter:
defaultTextures: []
userData:
assetBundleName:
assetBundleVariant:

86
Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs


//#define LEFT_HAND_COORDINATES
public struct SFiniteLightData
{
// setup constant buffer
public float fPenumbra;
public int flags;
public uint uLightType;
public uint uLightModel; // DIRECT_LIGHT=0, REFLECTION_LIGHT=1
public Vec3 vLpos;
public float fLightIntensity;
public Vec3 vLaxisX;
public float fRecipRange;
public Vec3 vLaxisY;
public float fSphRadiusSq;
public Vec3 vLaxisZ; // spot +Z axis
public float cotan;
public Vec3 vCol;
public int iSliceIndex;
public Vec3 vBoxInnerDist;
public float fDecodeExp;
public Vec3 vBoxInvRange;
public float fPad0;
public Vec3 vProbeBoxOffset;
public float fProbeBlendDistance;
};
public struct SFiniteLightBound
{
public Vec3 vBoxAxisX;
public Vec3 vBoxAxisY;
public Vec3 vBoxAxisZ;
public Vec3 vCen; // a center in camera space inside the bounding volume of the light source.
public Vec2 vScaleXY;
public float fRadius;
};
#if !__HLSL
public class LightDefinitions
{
#endif
public static int MAX_NR_LIGHTS_PER_CAMERA = 1024;
public static float VIEWPORT_SCALE_Z = 1.0f;
// flags
public static int IS_CIRCULAR_SPOT_SHAPE = 1;
public static int HAS_COOKIE_TEXTURE = 2;
public static int IS_BOX_PROJECTED = 4;
// types
public static int MAX_TYPES = 3;
public static int SPOT_LIGHT = 0;
public static int SPHERE_LIGHT = 1;
public static int BOX_LIGHT = 2;
// direct lights and reflection probes for now
public static int NR_LIGHT_MODELS = 2;
public static int DIRECT_LIGHT = 0;
public static int REFLECTION_LIGHT = 1;
#if !__HLSL
}
#endif
#if __HLSL
float FetchDepth(Texture2D depthTexture, uint2 pixCoord)
{
return /*1-*/depthTexture.Load( uint3(pixCoord.xy, 0) ).x;
}
#endif

12
Assets/ScriptableRenderLoop/fptl/LightDefinitions.cs.meta


fileFormatVersion: 2
guid: b796ac9ec090af44aba9d5cf983c21b3
timeCreated: 1467917164
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

457
Assets/ScriptableRenderLoop/fptl/lightlistbuild.compute


#pragma kernel TileLightListGen
#include "..\common\ShaderBase.h"
#include "LightDefinitions.cs"
#define FINE_PRUNING_ENABLED
uniform int g_iNrVisibLights;
uniform float4x4 g_mInvScrProjection;
uniform float4x4 g_mScrProjection;
Texture2D g_depth_tex : register( t0 );
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
#define NR_THREADS 64
// output buffer
//RWBuffer<uint4> g_vLightList : register( u0 );
RWStructuredBuffer<uint> g_vLightList : register( u0 );
#define MAX_NR_COARSE_ENTRIES 64
#define MAX_NR_PRUNED_ENTRIES 24
groupshared unsigned int coarseList[MAX_NR_COARSE_ENTRIES];
groupshared unsigned int prunedList[MAX_NR_COARSE_ENTRIES]; // temporarily support room for all 64 while in LDS
groupshared uint ldsZMin;
groupshared uint ldsZMax;
groupshared uint lightOffs;
#ifdef FINE_PRUNING_ENABLED
groupshared uint ldsDoesLightIntersect[2];
#endif
groupshared int ldsNrLightsFinal;
groupshared int ldsModelListCount[2]; // since NR_LIGHT_MODELS is 2
float GetLinearDepth(float3 vP)
{
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
return v4Pres.z / v4Pres.w;
}
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
{
float fSx = g_mScrProjection[0].x;
float fCx = g_mScrProjection[0].z;
float fSy = g_mScrProjection[1].y;
float fCy = g_mScrProjection[1].z;
#ifdef LEFT_HAND_COORDINATES
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
#else
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
#endif
}
void sortLightList(int localThreadID, int n);
[numthreads(NR_THREADS, 1, 1)]
void TileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint2 tileIDX = u3GroupID.xy;
uint t=threadID;
if(t<MAX_NR_COARSE_ENTRIES)
prunedList[t]=0;
uint iWidth;
uint iHeight;
g_depth_tex.GetDimensions(iWidth, iHeight);
uint nrTilesX = (iWidth+15)/16;
uint nrTilesY = (iHeight+15)/16;
// build tile scr boundary
const uint uFltMax = 0x7f7fffff; // FLT_MAX as a uint
if(t==0)
{
ldsZMin = uFltMax;
ldsZMax = 0;
lightOffs = 0;
}
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
uint2 viTilLL = 16*tileIDX;
// establish min and max depth first
float dpt_mi=asfloat(uFltMax), dpt_ma=0.0;
for(int idx=t; idx<256; idx+=NR_THREADS)
{
uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
const float fDpth = FetchDepth(g_depth_tex, uCrd);
if(fDpth<VIEWPORT_SCALE_Z) // if not skydome
{
dpt_mi = min(fDpth, dpt_mi);
dpt_ma = max(fDpth, dpt_ma);
}
}
InterlockedMax(ldsZMax, asuint(dpt_ma) );
InterlockedMin(ldsZMin, asuint(dpt_mi) );
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
float3 vTileLL = float3(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight, asfloat(ldsZMin));
float3 vTileUR = float3((viTilLL.x+16)/(float) iWidth, (viTilLL.y+16)/(float) iHeight, asfloat(ldsZMax));
vTileUR.xy = min(vTileUR.xy,float2(1.0,1.0)).xy;
// build coarse list using AABB
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
{
const float3 vMi = g_vBoundsBuffer[l];
const float3 vMa = g_vBoundsBuffer[l+g_iNrVisibLights];
if( all(vMa>vTileLL) && all(vMi<vTileUR))
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(lightOffs, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) coarseList[uIndex] = l; // add to light list
}
}
#ifdef FINE_PRUNING_ENABLED
if(t<2) ldsDoesLightIntersect[t] = 0;
#endif
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
int iNrCoarseLights = lightOffs<MAX_NR_COARSE_ENTRIES ? lightOffs : MAX_NR_COARSE_ENTRIES;
#ifndef FINE_PRUNING_ENABLED
{
int iNrLightsOut = iNrCoarseLights<MAX_NR_PRUNED_ENTRIES ? iNrCoarseLights : MAX_NR_PRUNED_ENTRIES;
if((int)t<iNrLightsOut) prunedList[t] = coarseList[t];
if(t==0) ldsNrLightsFinal=iNrLightsOut;
}
#else
{
float4 vLinDepths;
[unroll]for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uCrd = min( uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1) );
float3 v3ScrPos = float3(uCrd.x+0.5, uCrd.y+0.5, FetchDepth(g_depth_tex, uCrd));
vLinDepths[i] = GetLinearDepth(v3ScrPos);
}
uint uLightsFlags[2] = {0,0};
int l=0;
// we need this outer loop for when we cannot assume a wavefront is 64 wide
// since in this case we cannot assume the lights will remain sorted by type
#if !defined(XBONE) && !defined(PLAYSTATION4)
while(l<iNrCoarseLights)
#endif
{
// fetch light
int idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uint uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].uLightType : 0;
// spot
while(l<iNrCoarseLights && uLgtType==SPOT_LIGHT)
{
SFiniteLightData lgtDat = g_vLightData[idxCoarse];
const bool bIsSpotDisc = (lgtDat.flags&IS_CIRCULAR_SPOT_SHAPE)!=0;
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 fromLight = vVPos-lgtDat.vLpos.xyz;
float distSq = dot(fromLight,fromLight);
const float fSclProj = dot(fromLight, lgtDat.vLaxisZ.xyz); // spotDir = lgtDat.vLaxisZ.xyz
float2 V = abs( float2( dot(fromLight, lgtDat.vLaxisX.xyz), dot(fromLight, lgtDat.vLaxisY.xyz) ) );
float fDist2D = bIsSpotDisc ? length(V) : max(V.x,V.y);
if( all( float2(lgtDat.fSphRadiusSq, fSclProj) > float2(distSq, fDist2D*lgtDat.cotan) ) ) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].uLightType : 0;
}
// sphere
while(l<iNrCoarseLights && uLgtType==SPHERE_LIGHT)
{
SFiniteLightData lgtDat = g_vLightData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 vLp = lgtDat.vLpos.xyz;
float3 toLight = vLp - vVPos;
float distSq = dot(toLight,toLight);
if(lgtDat.fSphRadiusSq>distSq) uVal = 1;
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].uLightType : 0;
}
// Box
while(l<iNrCoarseLights && uLgtType==BOX_LIGHT)
{
SFiniteLightData lgtDat = g_vLightData[idxCoarse];
// serially check 4 pixels
uint uVal = 0;
for(int i=0; i<4; i++)
{
int idx = t + i*NR_THREADS;
uint2 uPixLoc = min(uint2(viTilLL.x+(idx&0xf), viTilLL.y+(idx>>4)), uint2(iWidth-1, iHeight-1));
float3 vVPos = GetViewPosFromLinDepth(uPixLoc + float2(0.5,0.5), vLinDepths[i]);
// check pixel
float3 toLight = lgtDat.vLpos.xyz - vVPos;
float3 dist = float3( dot(toLight, lgtDat.vLaxisX), dot(toLight, lgtDat.vLaxisY), dot(toLight, lgtDat.vLaxisZ) );
dist = (abs(dist) - lgtDat.vBoxInnerDist) * lgtDat.vBoxInvRange; // not as efficient as it could be
if( max(max(dist.x, dist.y), dist.z)<1 ) uVal = 1; // but allows us to not write out OuterDists
}
uLightsFlags[l<32 ? 0 : 1] |= (uVal<<(l&31));
++l; idxCoarse = l<iNrCoarseLights ? coarseList[l] : 0;
uLgtType = l<iNrCoarseLights ? g_vLightData[idxCoarse].uLightType : 0;
}
#if !defined(XBONE) && !defined(PLAYSTATION4)
// in case we have some corrupt data make sure we terminate
if(uLgtType>=MAX_TYPES) ++l;
#endif
}
InterlockedOr(ldsDoesLightIntersect[0], uLightsFlags[0]);
InterlockedOr(ldsDoesLightIntersect[1], uLightsFlags[1]);
if(t==0) ldsNrLightsFinal = 0;
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
if(t<(uint) iNrCoarseLights && (ldsDoesLightIntersect[t<32 ? 0 : 1]&(1<<(t&31)))!=0 )
{
unsigned int uInc = 1;
unsigned int uIndex;
InterlockedAdd(ldsNrLightsFinal, uInc, uIndex);
if(uIndex<MAX_NR_COARSE_ENTRIES) prunedList[uIndex] = coarseList[t]; // we allow up to 64 pruned lights while stored in LDS.
}
}
#endif
//
if(t<NR_LIGHT_MODELS) ldsModelListCount[t]=0;
#if !defined(XBONE) && !defined(PLAYSTATION4)
GroupMemoryBarrierWithGroupSync();
#endif
int nrLightsCombinedList = ldsNrLightsFinal<MAX_NR_COARSE_ENTRIES ? ldsNrLightsFinal : MAX_NR_COARSE_ENTRIES;
for(int i=t; i<nrLightsCombinedList; i+=NR_THREADS)
{
InterlockedAdd(ldsModelListCount[ g_vLightData[ prunedList[i] ].uLightModel ], 1);
}
// sort lights
#if !defined(XBONE) && !defined(PLAYSTATION4)
sortLightList((int) t, nrLightsCombinedList);
#endif
// write lights to global buffers
int localOffs=0;
int offs = tileIDX.y*nrTilesX + tileIDX.x;
for(int m=0; m<NR_LIGHT_MODELS; m++)
{
int nrLightsFinal = ldsModelListCount[ m ];
int nrLightsFinalClamped = nrLightsFinal<MAX_NR_PRUNED_ENTRIES ? nrLightsFinal : MAX_NR_PRUNED_ENTRIES;
const int nrDWords = ((nrLightsFinalClamped+1)+1)>>1;
for(l=(int) t; l<(int) nrDWords; l += NR_THREADS)
{
uint uLow = l==0 ? nrLightsFinalClamped : prunedList[2*l-1+localOffs];
uint uHigh = prunedList[2*l+0+localOffs];
g_vLightList[16*offs + l] = (uLow&0xffff) | (uHigh<<16);
}
localOffs += nrLightsFinal;
offs += (nrTilesX*nrTilesY);
}
}
// original version
//float2 vRay2D = float2(max(V.x,V.y), fSclProj);
//float distSqB = bIsSpotDisc ? distSq : dot(vRay2D,vRay2D);
//if( all( float3(lgtDat.fSphRadiusSq, fSclProj, fSclProj) > float3(distSq, sqrt(distSqB)*lgtDat.fPenumbra, 0.0) ) ) uVal = 1;
// previous new version
//float fDist2DSqr = bIsSpotDisc ? dot(V,V) : (maC*maC);
//if( all( float3(lgtDat.fSphRadiusSq, (fSclProj*fSclProj), fSclProj) > float3(distSq, fDist2DSqr*cotaSqr, fSpotNearPlane) ) ) uVal = 1;
#if 0
void merge(int l, int m, int r);
void sortLightList(int localThreadID, int n)
{
for(int curr_size=1; curr_size<=n-1; curr_size = 2*curr_size)
{
for(int left_start=localThreadID*(2*curr_size); left_start<(n-1); left_start+=NR_THREADS*(2*curr_size))
{
int mid = left_start + curr_size - 1;
int right_end = min(left_start + 2*curr_size - 1, n-1);
merge(left_start, mid, right_end);
}
GroupMemoryBarrierWithGroupSync();
}
}
//groupshared unsigned int tmpBuffer[MAX_NR_COARSE_ENTRIES];
void merge(int l, int m, int r)
{
int i, j, k;
int ol = l;
int or = m+1;
int sl = m - l + 1; // capacity is size of left list = m - l + 1;
int sr = r - m; // capacity is size of right list = r - m
unsigned int tmpBuffer[] = coarseList; // re use coarse list buffer as temp buffer.
// could do this copy more efficiently before the if-statement
// in sortLightList() but this requires another GroupMemoryBarrierWithGroupSync()
for(int i=l; i<=r; i++) tmpBuffer[i] = prunedList[i];
i = 0;
j = 0;
k = l;
while (i < sl && j < sr)
{
const uint lVal = tmpBuffer[ol+i];
const uint rVal = tmpBuffer[or+j];
bool pickLeft = lVal <= rVal;
i = pickLeft ? (i+1) : i;
j = pickLeft ? j : (j+1);
prunedList[k] = pickLeft ? lVal : rVal;
k++;
}
while (i < sl)
{
prunedList[k] = tmpBuffer[ol+i];
i++; k++;
}
while (j < sr)
{
prunedList[k] = tmpBuffer[or+j];
j++; k++;
}
}
#else
// NOTE! returns 1 when value_in==0
unsigned int LimitPow2AndClamp(unsigned int value_in, unsigned int maxValue)
{
unsigned int value = 1;
while(value<value_in && (value<<1)<=maxValue)
value<<=1;
return value;
}
void sortLightList(int localThreadID, int length)
{
// closest pow2 integer greater than or equal to length
const int N = (const int) LimitPow2AndClamp((unsigned int) length, MAX_NR_COARSE_ENTRIES); // N is 1 when length is zero but will still not enter first for-loop
// bitonic sort can only handle arrays with a power of two length. Fill remaining entries with greater than possible index.
for(int t=length+localThreadID; t<N; t+=NR_THREADS) { prunedList[t]=MAX_NR_COARSE_ENTRIES; } // impossible index
GroupMemoryBarrierWithGroupSync();
for(int k=2; k<=N; k=2*k)
{
for(int j=k>>1; j>0; j=j>>1)
{
for(int i=localThreadID; i<N; i+=NR_THREADS)
{
int ixj=i^j;
if((ixj)>i)
{
const unsigned int Avalue = prunedList[i];
const unsigned int Bvalue = prunedList[ixj];
const bool mustSwap = ((i&k)!=0^(Avalue>Bvalue)) && Avalue!=Bvalue;
if(mustSwap)
{
prunedList[i]=Bvalue;
prunedList[ixj]=Avalue;
}
}
}
GroupMemoryBarrierWithGroupSync();
}
}
}
#endif

9
Assets/ScriptableRenderLoop/fptl/lightlistbuild.compute.meta


fileFormatVersion: 2
guid: f54ef7cb596a714488693ef9cdaf63fb
timeCreated: 1467917168
licenseType: Pro
ComputeShaderImporter:
currentAPIMask: 4
userData:
assetBundleName:
assetBundleVariant:

20
Assets/ScriptableRenderLoop/fptl/renderloopfptl.asset


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!114 &11400000
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: ad5bf4f8e45bdd1429eadc3445df2c89, type: 3}
m_Name: renderloopfptl
m_EditorClassIdentifier:
m_DeferredShader: {fileID: 4800000, guid: 1c102a89f3460254a8c413dbdcd63a2a, type: 3}
m_DeferredReflectionShader: {fileID: 4800000, guid: 3899e06d641c2cb4cbff794df0da536b,
type: 3}
m_BuildScreenAABBShader: {fileID: 7200000, guid: e7a739144e735934b89a42a4b9d9e23c,
type: 3}
m_BuildPerTileLightListShader: {fileID: 7200000, guid: f54ef7cb596a714488693ef9cdaf63fb,
type: 3}

8
Assets/ScriptableRenderLoop/fptl/renderloopfptl.asset.meta


fileFormatVersion: 2
guid: 4dc14083116279a438e1b2de3830999f
timeCreated: 1468020641
licenseType: Pro
NativeFormatImporter:
userData:
assetBundleName:
assetBundleVariant:

497
Assets/ScriptableRenderLoop/fptl/scrbound.compute


#pragma kernel ScreenBoundsAABB
#include "..\common\ShaderBase.h"
#include "LightDefinitions.cs"
uniform int g_iNrVisibLights;
uniform float4x4 g_mInvProjection;
uniform float4x4 g_mProjection;
StructuredBuffer<SFiniteLightBound> g_data : register( t0 );
#define FLT_EPSILON 1.192092896e-07F // smallest such that 1.0+FLT_EPSILON != 1.0
#define NR_THREADS 64
// output buffer
RWStructuredBuffer<float3> g_vBoundsBuffer : register( u0 );
#define MAX_PNTS 9 // strictly this should be 10=6+4 but we get more wavefronts and 10 seems to never hit (fingers crossed)
// However, worst case the plane that would be skipped if such an extreme case ever happened would be backplane
// clipping gets skipped which doesn't cause any errors.
// LDS (2496 bytes)
groupshared float posX[MAX_PNTS*8*2];
groupshared float posY[MAX_PNTS*8*2];
groupshared float posZ[MAX_PNTS*8*2];
groupshared float posW[MAX_PNTS*8*2];
groupshared unsigned int clipFlags[48];
unsigned int GetClip(const float4 P);
int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p);
void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r);
void GetQuad(out float3 p0, out float3 p1, out float3 p2, out float3 p3, const float3 vBoxX, const float3 vBoxY, const float3 vBoxZ, const float3 vCen, const float2 vScaleXY, const int sideIndex);
[numthreads(NR_THREADS, 1, 1)]
void ScreenBoundsAABB(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
uint groupID = u3GroupID.x;
//uint vindex = groupID * NR_THREADS + threadID;
unsigned int g = groupID;
unsigned int t = threadID;
const int subLigt = (int) (t/8);
const int lgtIndex = subLigt+(int) g*8;
const int sideIndex = (int) (t%8);
SFiniteLightBound lgtDat = g_data[lgtIndex];
const float3 vBoxX = lgtDat.vBoxAxisX.xyz;
const float3 vBoxY = lgtDat.vBoxAxisY.xyz;
const float3 vBoxZ = -lgtDat.vBoxAxisZ.xyz; // flip an axis to make it right handed since Determinant(worldToView)<0
const float3 vCen = lgtDat.vCen.xyz;
const float fRadius = lgtDat.fRadius;
const float2 vScaleXY = lgtDat.vScaleXY;
{
if(sideIndex<6 && lgtIndex<(int) g_iNrVisibLights) // mask 2 out of 8 threads
{
float3 q0, q1, q2, q3;
GetQuad(q0, q1, q2, q3, vBoxX, vBoxY, vBoxZ, vCen, vScaleXY, sideIndex);
const float4 vP0 = mul(g_mProjection, float4(q0, 1));
const float4 vP1 = mul(g_mProjection, float4(q1, 1));
const float4 vP2 = mul(g_mProjection, float4(q2, 1));
const float4 vP3 = mul(g_mProjection, float4(q3, 1));
// test vertices of one quad (of the convex hull) for intersection
const unsigned int uFlag0 = GetClip(vP0);
const unsigned int uFlag1 = GetClip(vP1);
const unsigned int uFlag2 = GetClip(vP2);
const unsigned int uFlag3 = GetClip(vP3);
const float4 vPnts[] = {vP0, vP1, vP2, vP3};
// screen-space AABB of one quad (assuming no intersection)
float3 vMin, vMax;
for(int k=0; k<4; k++)
{
float fW = vPnts[k].w;
float fS = fW<0 ? -1 : 1;
float fWabs = fW<0 ? (-fW) : fW;
fW = fS * (fWabs<FLT_EPSILON ? FLT_EPSILON : fWabs);
float3 vP = float3(vPnts[k].x/fW, vPnts[k].y/fW, vPnts[k].z/fW);
if(k==0) { vMin=vP; vMax=vP; }
vMax = max(vMax, vP); vMin = min(vMin, vP);
}
clipFlags[subLigt*6+sideIndex] = (uFlag0<<0) | (uFlag1<<6) | (uFlag2<<12) | (uFlag3<<18);
// store in clip buffer (only use these vMin and vMax if light is 100% visible in which case clipping isn't needed)
posX[subLigt*MAX_PNTS*2 + sideIndex] = vMin.x;
posY[subLigt*MAX_PNTS*2 + sideIndex] = vMin.y;
posZ[subLigt*MAX_PNTS*2 + sideIndex] = vMin.z;
posX[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.x;
posY[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.y;
posZ[subLigt*MAX_PNTS*2 + sideIndex + 6] = vMax.z;
}
}
// if not XBONE and not PLAYSTATION4 we need a memorybarrier here
// since we can't rely on the gpu cores being 64 wide.
// We need a pound define around this.
GroupMemoryBarrierWithGroupSync();
{
int f=0;
if(sideIndex==0 && lgtIndex<(int) g_iNrVisibLights)
{
// quick acceptance or rejection
unsigned int uCollectiveAnd = (unsigned int) -1;
unsigned int uCollectiveOr = 0;
for(f=0; f<6; f++)
{
unsigned int uFlagAnd = clipFlags[subLigt*6+f]&0x3f;
unsigned int uFlagOr = uFlagAnd;
for(int i=1; i<4; i++)
{
unsigned int uClipBits = (clipFlags[subLigt*6+f]>>(i*6))&0x3f;
uFlagAnd &= uClipBits;
uFlagOr |= uClipBits;
}
uCollectiveAnd &= uFlagAnd;
uCollectiveOr |= uFlagOr;
}
bool bSetBoundYet = false;
float3 vMin=0.0, vMax=0.0;
if(uCollectiveAnd!=0 || uCollectiveOr==0) // all invisible or all visible (early out)
{
if(uCollectiveOr==0) // all visible
{
for(f=0; f<6; f++)
{
const int sideIndex = f;
float3 vFaceMi = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 0], posY[subLigt*MAX_PNTS*2 + sideIndex + 0], posZ[subLigt*MAX_PNTS*2 + sideIndex + 0]);
float3 vFaceMa = float3(posX[subLigt*MAX_PNTS*2 + sideIndex + 6], posY[subLigt*MAX_PNTS*2 + sideIndex + 6], posZ[subLigt*MAX_PNTS*2 + sideIndex + 6]);
for(int k=0; k<2; k++)
{
float3 vP = k==0 ? vFaceMi : vFaceMa;
if(f==0 && k==0) { vMin=vP; vMax=vP; }
vMax = max(vMax, vP); vMin = min(vMin, vP);
}
}
bSetBoundYet=true;
}
}
else // :( need true clipping
{
for(f=0; f<6; f++)
{
float3 q0, q1, q2, q3;
GetQuad(q0, q1, q2, q3, vBoxX, vBoxY, vBoxZ, vCen, vScaleXY, f);
// 4 vertices to a quad of the convex hull in post projection space
const float4 vP0 = mul(g_mProjection, float4(q0, 1));
const float4 vP1 = mul(g_mProjection, float4(q1, 1));
const float4 vP2 = mul(g_mProjection, float4(q2, 1));
const float4 vP3 = mul(g_mProjection, float4(q3, 1));
int iSrcIndex = 0;
int offs = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
// fill up source clip buffer with the quad
posX[offs+0]=vP0.x; posX[offs+1]=vP1.x; posX[offs+2]=vP2.x; posX[offs+3]=vP3.x;
posY[offs+0]=vP0.y; posY[offs+1]=vP1.y; posY[offs+2]=vP2.y; posY[offs+3]=vP3.y;
posZ[offs+0]=vP0.z; posZ[offs+1]=vP1.z; posZ[offs+2]=vP2.z; posZ[offs+3]=vP3.z;
posW[offs+0]=vP0.w; posW[offs+1]=vP1.w; posW[offs+2]=vP2.w; posW[offs+3]=vP3.w;
int iNrSrcVerts = 4;
// do true clipping
for(int p=0; p<6; p++)
{
const int nrVertsDst = ClipAgainstPlane(iSrcIndex, iNrSrcVerts, subLigt, p);
iSrcIndex = 1-iSrcIndex;
iNrSrcVerts = nrVertsDst;
if(iNrSrcVerts<3 || iNrSrcVerts>=MAX_PNTS) break;
}
// final clipped convex primitive is in src buffer
if(iNrSrcVerts>2)
{
int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
for(int k=0; k<iNrSrcVerts; k++)
{
float4 vCur = float4(posX[offs_src+k], posY[offs_src+k], posZ[offs_src+k], posW[offs_src+k]);
// project and apply toward AABB
float3 vP = float3(vCur.x/vCur.w, vCur.y/vCur.w, vCur.z/vCur.w);
if(!bSetBoundYet) { vMin=vP; vMax=vP; bSetBoundYet=true; }
vMax = max(vMax, vP); vMin = min(vMin, vP);
}
}
}
////////////////////// look for camera frustum verts that need to be included. That is frustum vertices inside the convex hull for the light
int i=0;
for(i=0; i<8; i++) // establish 8 camera frustum vertices
{
float3 vVertPSpace = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0);
float4 v4ViewSpace = mul(g_mInvProjection, float4(vVertPSpace,1));
float3 vViewSpace = float3(v4ViewSpace.x/v4ViewSpace.w, v4ViewSpace.y/v4ViewSpace.w, v4ViewSpace.z/v4ViewSpace.w);
posX[subLigt*MAX_PNTS*2 + i] = vViewSpace.x;
posY[subLigt*MAX_PNTS*2 + i] = vViewSpace.y;
posZ[subLigt*MAX_PNTS*2 + i] = vViewSpace.z;
}
// determine which camera frustum vertices are inside the convex hull
uint uVisibFl = 0xff;
for(f=0; f<6; f++)
{
float3 vP0, vP1, vP2, vP3;
GetQuad(vP0, vP1, vP2, vP3, vBoxX, vBoxY, vBoxZ, vCen, vScaleXY, f);
// one edge might be zero length so we do all 4
float3 vN = cross(vP1-vP0, vP3-vP0) + cross(vP2-vP1, vP0-vP1) + cross(vP3-vP2, vP1-vP2) + cross(vP0-vP3, vP2-vP3);
float fLen = length(vN);
if(fLen>1) vN = normalize(vN); // this won't necessarily be a non zero vector (spot lights have all 4 top points as the same)
for(i=0; i<8; i++)
{
float3 vViewSpace = float3(posX[subLigt*MAX_PNTS*2 + i], posY[subLigt*MAX_PNTS*2 + i], posZ[subLigt*MAX_PNTS*2 + i]);
#ifdef LEFT_HAND_COORDINATES
uVisibFl &= ( dot(vViewSpace-vP0, vN)<0 ? 0xff : (~(1<<i)) );
#else
uVisibFl &= ( dot(vViewSpace-vP0, vN)>0 ? 0xff : (~(1<<i)) );
#endif
}
}
// apply camera frustum vertices inside the convex hull to the AABB
for(i=0; i<8; i++)
{
if((uVisibFl&(1<<i))!=0)
{
float3 vP = float3((i&1)!=0 ? 1 : (-1), (i&2)!=0 ? 1 : (-1), (i&4)!=0 ? 1 : 0);
if(!bSetBoundYet) { vMin=vP; vMax=vP; bSetBoundYet=true; }
vMax = max(vMax, vP); vMin = min(vMin, vP);
}
}
}
// determine AABB bound in [-1;1]x[-1;1] screen space using bounding sphere.
// Use the result to make our already established AABB from the convex hull
// potentially tighter.
if(!bSetBoundYet)
{
// set the AABB off-screen
vMin = float3(-3,-3,-3);
vMax = float3(-2,-2,-2);
}
else
{
//if((vCen.z+fRadius)<0.0)
if( length(vCen)>fRadius)
{
float2 vMi, vMa;
bool2 bMi, bMa;
CalcBound(bMi, bMa, vMi, vMa, g_mInvProjection, vCen, fRadius);
vMin.xy = bMi ? max(vMin.xy, vMi) : vMin.xy;
vMax.xy = bMa ? min(vMax.xy, vMa) : vMax.xy;
}
#ifdef LEFT_HAND_COORDINATES
if((vCen.z-fRadius)>0.0)
{
float4 vPosF = mul(g_mProjection, float4(0,0,vCen.z-fRadius,1));
vMin.z = max(vMin.z, vPosF.z/vPosF.w);
}
if((vCen.z+fRadius)>0.0)
{
float4 vPosB = mul(g_mProjection, float4(0,0,vCen.z+fRadius,1));
vMax.z = min(vMax.z, vPosB.z/vPosB.w);
}
#else
if((vCen.z+fRadius)<0.0)
{
float4 vPosF = mul(g_mProjection, float4(0,0,vCen.z+fRadius,1));
vMin.z = max(vMin.z, vPosF.z/vPosF.w);
}
if((vCen.z-fRadius)<0.0)
{
float4 vPosB = mul(g_mProjection, float4(0,0,vCen.z-fRadius,1));
vMax.z = min(vMax.z, vPosB.z/vPosB.w);
}
#endif
else
{
vMin = float3(-3,-3,-3);
vMax = float3(-2,-2,-2);
}
}
// we should consider doing a look-up here into a max depth mip chain
// to see if the light is occluded: vMin.z*VIEWPORT_SCALE_Z > MipTexelMaxDepth
//g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, -0.5*vMax.y+0.5, vMin.z*VIEWPORT_SCALE_Z);
//g_vBoundsBuffer[lgtIndex+g_iNrVisibLights] = float3(0.5*vMax.x+0.5, -0.5*vMin.y+0.5, vMax.z*VIEWPORT_SCALE_Z);
// changed for unity
g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, 0.5*vMin.y+0.5, vMin.z*VIEWPORT_SCALE_Z);
g_vBoundsBuffer[lgtIndex+(int) g_iNrVisibLights] = float3(0.5*vMax.x+0.5, 0.5*vMax.y+0.5, vMax.z*VIEWPORT_SCALE_Z);
}
}
}
float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p);
int ClipAgainstPlane(const int iSrcIndex, const int iNrSrcVerts, const int subLigt, const int p)
{
int offs_src = iSrcIndex*MAX_PNTS+subLigt*MAX_PNTS*2;
int offs_dst = (1-iSrcIndex)*MAX_PNTS+subLigt*MAX_PNTS*2;
float4 vPrev = float4(posX[offs_src+(iNrSrcVerts-1)], posY[offs_src+(iNrSrcVerts-1)], posZ[offs_src+(iNrSrcVerts-1)], posW[offs_src+(iNrSrcVerts-1)]);
int nrVertsDst = 0;
unsigned int uMask = (1<<p);
bool bIsPrevVisib = (GetClip(vPrev)&uMask)==0;
for(int i=0; i<iNrSrcVerts; i++)
{
float4 vCur = float4(posX[offs_src+i], posY[offs_src+i], posZ[offs_src+i], posW[offs_src+i]);
bool bIsCurVisib = (GetClip(vCur)&uMask)==0;
if( (bIsCurVisib && !bIsPrevVisib) || (!bIsCurVisib && bIsPrevVisib) )
{
//assert(nrVertsDst<MAX_PNTS);
if(nrVertsDst<MAX_PNTS)
{
// generate new vertex
float4 vNew = GenNewVert(bIsCurVisib ? vCur : vPrev, bIsCurVisib ? vPrev : vCur, p);
posX[offs_dst+nrVertsDst]=vNew.x; posY[offs_dst+nrVertsDst]=vNew.y; posZ[offs_dst+nrVertsDst]=vNew.z; posW[offs_dst+nrVertsDst]=vNew.w;
++nrVertsDst;
}
}
if(bIsCurVisib)
{
//assert(nrVertsDst<MAX_PNTS);
if(nrVertsDst<MAX_PNTS)
{
posX[offs_dst+nrVertsDst]=vCur.x; posY[offs_dst+nrVertsDst]=vCur.y; posZ[offs_dst+nrVertsDst]=vCur.z; posW[offs_dst+nrVertsDst]=vCur.w;
++nrVertsDst;
}
}
vPrev = vCur;
bIsPrevVisib = bIsCurVisib;
}
return nrVertsDst;
}
unsigned int GetClip(const float4 P)
{
//-P.w <= P.x <= P.w
return ((P.x<-P.w)?1:0) | ((P.x>P.w)?2:0) | ((P.y<-P.w)?4:0) | ((P.y>P.w)?8:0) | ((P.z<0)?16:0) | ((P.z>P.w)?32:0);
}
float4 GenNewVert(const float4 vVisib, const float4 vInvisib, const int p)
{
const float fS = p==4 ? 0 : ((p&1)==0 ? -1 : 1);
const int index = ((uint) p)/2;
float x1 = index==0 ? vVisib.x : (index==1 ? vVisib.y : vVisib.z);
float x0 = index==0 ? vInvisib.x : (index==1 ? vInvisib.y : vInvisib.z);
//fS*((vVisib.w-vInvisib.w)*t + vInvisib.w) = (x1-x0)*t + x0;
const float fT = (fS*vInvisib.w-x0)/((x1-x0) - fS*(vVisib.w-vInvisib.w));
float4 vNew = vVisib*fT + vInvisib*(1-fT);
// just to be really anal we make sure the clipped against coordinate is precise
if(index==0) vNew.x = fS*vNew.w;
else if(index==1) vNew.y = fS*vNew.w;
else vNew.z = fS*vNew.w;
return vNew;
}
void GetQuad(out float3 p0, out float3 p1, out float3 p2, out float3 p3, const float3 vBoxX, const float3 vBoxY, const float3 vBoxZ, const float3 vCen, const float2 vScaleXY, const int sideIndex)
{
const int iAbsSide = (sideIndex == 0 || sideIndex == 1) ? 0 : ((sideIndex == 2 || sideIndex == 3) ? 1 : 2);
const float fS = (sideIndex & 1) != 0 ? 1 : (-1);
float3 vA = fS*(iAbsSide == 0 ? vBoxX : (iAbsSide == 1 ? (-vBoxY) : vBoxZ));
float3 vB = fS*(iAbsSide == 0 ? (-vBoxY) : (iAbsSide == 1 ? (-vBoxX) : (-vBoxY)));
float3 vC = iAbsSide == 0 ? vBoxZ : (iAbsSide == 1 ? vBoxZ : (-vBoxX));
bool bIsTopQuad = iAbsSide == 2 && (sideIndex & 1) != 0; // in this case all 4 verts get scaled.
bool bIsSideQuad = (iAbsSide == 0 || iAbsSide == 1); // if side quad only two verts get scaled (impacts q1 and q2)
if (bIsTopQuad) { vB *= vScaleXY.y; vC *= vScaleXY.x; }
float3 vA2 = vA;
float3 vB2 = vB;
if (bIsSideQuad) { vA2 *= (iAbsSide == 0 ? vScaleXY.x : vScaleXY.y); vB2 *= (iAbsSide == 0 ? vScaleXY.y : vScaleXY.x); }
p0 = vCen + vA + vB - vC; // vCen + vA is center of face when vScaleXY is 1.0
p1 = vCen + vA2 + vB2 + vC;
p2 = vCen + vA2 - vB2 + vC;
p3 = vCen + vA - vB - vC;
}
float4 TransformPlaneToPostSpace(float4x4 InvProjection, float4 plane)
{
return mul(plane, InvProjection);
}
float4 EvalPlanePair(float2 posXY_in, float r)
{
// rotate by 90 degrees to avoid potential division by zero
bool bMustFlip = abs(posXY_in.y)<abs(posXY_in.x);
float2 posXY = bMustFlip ? float2(-posXY_in.y, posXY_in.x) : posXY_in;
float fLenSQ = dot(posXY, posXY);
float D = posXY.y * sqrt(fLenSQ - r*r);
float4 res;
res.x = (-r*posXY.x - D) / fLenSQ;
res.z = (-r*posXY.x + D) / fLenSQ;
res.y = (-r-res.x*posXY.x) / posXY.y;
res.w = (-r-res.z*posXY.x) / posXY.y;
// rotate back by 90 degrees
res = bMustFlip ? Vec4(res.y, -res.x, res.w, -res.z) : res;
return res;
}
void CalcBound(out bool2 bIsMinValid, out bool2 bIsMaxValid, out float2 vMin, out float2 vMax, float4x4 InvProjection, float3 pos_view_space, float r)
{
float4 planeX = EvalPlanePair(float2(pos_view_space.x, pos_view_space.z), r);
float4 planeY = EvalPlanePair(float2(pos_view_space.y, pos_view_space.z), r);
#ifdef LEFT_HAND_COORDINATES
planeX = planeX.zwxy; // need to swap left/right and top/bottom planes when using left hand system
planeY = planeY.zwxy;
#endif
bIsMinValid = bool2(planeX.z<0, planeY.z<0);
bIsMaxValid = bool2((-planeX.x)<0, (-planeY.x)<0);
// hopefully the compiler takes zeros into account
// should be the case since the transformation in TransformPlaneToPostSpace()
// is done using multiply-adds and not dot product instructions.
float4 planeX0 = TransformPlaneToPostSpace(InvProjection, float4(planeX.x, 0, planeX.y, 0));
float4 planeX1 = TransformPlaneToPostSpace(InvProjection, float4(planeX.z, 0, planeX.w, 0));
float4 planeY0 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.x, planeY.y, 0));
float4 planeY1 = TransformPlaneToPostSpace(InvProjection, float4(0, planeY.z, planeY.w, 0));
// convert planes to the forms (1,0,0,D) and (0,1,0,D)
// 2D bound is given by -D components
float2 A = -float2(planeX0.w / planeX0.x, planeY0.w / planeY0.y);
float2 B = -float2(planeX1.w / planeX1.x, planeY1.w / planeY1.y);
// Bound is complete
vMin = B;
vMax = A;
}

9
Assets/ScriptableRenderLoop/fptl/scrbound.compute.meta


fileFormatVersion: 2
guid: e7a739144e735934b89a42a4b9d9e23c
timeCreated: 1467917168
licenseType: Pro
ComputeShaderImporter:
currentAPIMask: 4
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存