浏览代码

Build probability tables using only 2D textures (cubemaps as arrays)

/main
Evgenii Golubev 8 年前
当前提交
c31ad089
共有 6 个文件被更改,包括 251 次插入57 次删除
  1. 207
      Assets/ScriptableRenderLoop/HDRenderLoop/Sky/Resources/BuildProbabilityTables.compute
  2. 7
      Assets/ScriptableRenderLoop/HDRenderLoop/Sky/Resources/GGXConvolve.shader
  3. 66
      Assets/ScriptableRenderLoop/HDRenderLoop/Sky/SkyManager.cs
  4. 2
      Assets/ScriptableRenderLoop/ShaderLibrary/API/D3D11.hlsl
  5. 24
      Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl
  6. 2
      Assets/ScriptableRenderLoop/ShaderLibrary/Fibonacci.hlsl

207
Assets/ScriptableRenderLoop/HDRenderLoop/Sky/Resources/BuildProbabilityTables.compute


// TODO: add description
// Given a cube map (passed as a 2D array), builds CDFs of two distributions:
// 1. 1D texture with marginal densities, telling us the likelihood of selecting a particular row,
// 2. 2D texture with conditional densities, which correspond to the PDF of the texel given its row.
// Ref: PBRT v3, 13.6.7 "Piecewise-Constant 2D Distributions".
#define textureSize 128 // The size of the MIP level 1 of the input texture
int cubeFaceId; // Cubemap face index
#define TEXTURE_SIZE 256 // The size of the input texture
#define MIP1_SIZE TEXTURE_SIZE / 2 // The size of the MIP level 1 of the input texture
TEXTURE2D(envMap) // Cubemap face (s.t. MIP 1: [textureSize x textureSize])
TEXTURE2D_ARRAY(envMap) // Cubemap as an array: [TEXTURE_SIZE x TEXTURE_SIZE x 6]
RWTexture2D<float> marginalRowDensities; // One row per face: [textureSize x 8]
RWTexture2D<float> conditionalDensities; // Cubemap face: [textureSize x textureSize]
RWTexture2D<float> marginalRowDensities; // 1D texture: [(6 * MIP1_SIZE + 1) x 1]
RWTexture2D<float> conditionalDensities; // Array: [MIP1_SIZE x (6 * MIP1_SIZE)]
/* --- Shared --- */
/* --- Implementation --- */
groupshared float rowIntegralValues[textureSize];
// Creates an access pattern which avoids shared memory bank conflicts.
#define NUM_BANKS 32
#define SHARED_MEM(x) ((x) + (x) / NUM_BANKS)
/* --- Implementation --- */
#pragma kernel ComputeConditionalDensities
#pragma kernel BuildProabilityTables
groupshared float rowVals[SHARED_MEM(MIP1_SIZE)];
[numthreads(1, textureSize, 1)]
void BuildProabilityTables(uint3 groupId : SV_GroupID,
uint3 groupThreadId : SV_GroupThreadID,
uint3 dispatchThreadId : SV_DispatchThreadID,
uint groupIndex : SV_GroupIndex)
[numthreads(MIP1_SIZE / 2, 1, 1)]
void ComputeConditionalDensities(uint3 groupId : SV_GroupID,
uint3 groupThreadId : SV_GroupThreadID)
// A single thread group processes a row of 'textureSize' texels.
const int j = groupThreadId.y;
// There are (MIP1_SIZE x 6) thread groups.
// A single thread group processes a row of MIP1_SIZE texels (2 per thread).
const uint n = MIP1_SIZE;
const uint i = groupThreadId.x;
const uint j = groupId.x;
const uint k = groupId.y;
const uint jk = Mad24(k, n, j);
const uint i1 = i;
const uint i2 = i + n / 2;
// TODO: reduce storage requirements.
/* HUGE */ float temp[textureSize];
// --------------------------------------------------------------------
// Compute the integral of the step function (row values).
// Perform a block-level parallel scan.
// Ref: GPU Gems 3, Chapter 39: "Parallel Prefix Sum (Scan) with CUDA".
// TODO: process 4 texels per thread, and manually unroll.
// --------------------------------------------------------------------
// Compute the integral of the step function.
float rowIntegralValue = 0.0;
// Step 1: load the row of data into shared memory.
// We use MIP level 1 to account for interpolation during light sampling.
// Ref: PBRT v3, page 847.
float3 c1 = LOAD_TEXTURE2D_ARRAY_LOD(envMap, uint2(i1, j), k, 1).rgb;
float3 c2 = LOAD_TEXTURE2D_ARRAY_LOD(envMap, uint2(i2, j), k, 1).rgb;
rowVals[SHARED_MEM(i1)] = c1.r + c1.g + c1.b;
rowVals[SHARED_MEM(i2)] = c2.r + c2.g + c2.b;
// Suppress the D3D compiler warning.
int i;
uint offset;
// TODO: run in parallel.
for (i = 0; i < textureSize; i++)
// Step 2: execute the up-sweep phase.
for (offset = 1; offset <= n / 2; offset *= 2)
temp[i] = rowIntegralValue;
GroupMemoryBarrierWithGroupSync();
// We use MIP level 1 to account for interpolation during light sampling.
// Ref: PBRT v3, page 847.
float3 color = LOAD_TEXTURE2D_LOD(envMap, int2(i, j), 1).rgb;
float intensity = color.r + color.g + color.b;
/// a1 = (2 * i + 1) * offset - 1;
uint a1 = Mad24(Mad24(2, i, 1), offset, -1);
uint a2 = a1 + offset;
rowIntegralValue += intensity / textureSize;
if (a2 < n)
{
rowVals[SHARED_MEM(a2)] += rowVals[SHARED_MEM(a1)];
}
GroupMemoryBarrierWithGroupSync();
rowIntegralValue = max(rowIntegralValue, FLT_MIN);
float rowValSum = max(rowVals[SHARED_MEM(n - 1)], FLT_MIN);
// Compute the CDF. Note: the value at (i = textureSize) is implicitly 1.
// TODO: run in parallel.
for (i = 0; i < textureSize; i++)
if (i == 0)
conditionalDensities[int2(i, j)] = temp[i] / rowIntegralValue;
float rowIntegralValue = rowValSum / n;
marginalRowDensities[uint2(jk, 0)] = rowIntegralValue;
// The exclusive scan requires the 1st element to be 0.
rowVals[SHARED_MEM(n - 1)] = 0.0;
}
// Step 3: execute the down-sweep phase.
for (offset = n / 2; offset > 0; offset /= 2)
{
GroupMemoryBarrierWithGroupSync();
/// a1 = (2 * i + 1) * offset - 1;
uint a1 = Mad24(Mad24(2, i, 1), offset, -1);
uint a2 = a1 + offset;
if (a2 < n)
{
float t1 = rowVals[SHARED_MEM(a1)];
rowVals[SHARED_MEM(a1)] = rowVals[SHARED_MEM(a2)];
rowVals[SHARED_MEM(a2)] += t1;
}
// Store the value of the integral.
rowIntegralValues[j] = rowIntegralValue;
if (groupIndex == 0)
// Compute the CDF. Note: the value at (i = n) is implicitly 1.
conditionalDensities[uint2(i1, jk)] = rowVals[SHARED_MEM(i1)] / rowValSum;
conditionalDensities[uint2(i2, jk)] = rowVals[SHARED_MEM(i2)] / rowValSum;
}
#pragma kernel ComputeMarginalRowDensities
groupshared float rowInts[SHARED_MEM(8 * MIP1_SIZE)];
[numthreads(8 * MIP1_SIZE / 2, 1, 1)]
void ComputeMarginalRowDensities(uint3 groupThreadId : SV_GroupThreadID)
{
// The size of the input is (6 * MIP1_SIZE).
// However, the algorithm only works with inputs of sizes which are powers of 2,
// therefore there is a single thread group processing (8 * MIP1_SIZE) texels (2 per thread).
const uint sz = 6 * MIP1_SIZE;
const uint n = 8 * MIP1_SIZE;
const uint i = groupThreadId.x;
const uint i1 = i;
const uint i2 = i + n / 2;
// --------------------------------------------------------------------
// Compute the integral of the step function (row integrals).
// Perform a block-level parallel scan.
// Ref: GPU Gems 3, Chapter 39: "Parallel Prefix Sum (Scan) with CUDA".
// TODO: process 4 texels per thread, and manually unroll.
// --------------------------------------------------------------------
// Step 1: load the row of data into shared memory.
rowInts[SHARED_MEM(i1)] = (i1 < sz) ? marginalRowDensities[uint2(i1, 0)] : 0.0;
rowInts[SHARED_MEM(i2)] = (i2 < sz) ? marginalRowDensities[uint2(i2, 0)] : 0.0;
uint offset;
// Step 2: execute the up-sweep phase.
for (offset = 1; offset <= n / 2; offset *= 2)
// Compute the integral of the step function.
float imgIntegralValue = 0.0;
GroupMemoryBarrierWithGroupSync();
/// a1 = (2 * i + 1) * offset - 1;
uint a1 = Mad24(Mad24(2, i, 1), offset, -1);
uint a2 = a1 + offset;
// TODO: run in parallel.
for (i = 0; i < textureSize; i++)
if (a2 < n)
temp[i] = imgIntegralValue;
rowInts[SHARED_MEM(a2)] += rowInts[SHARED_MEM(a1)];
}
}
GroupMemoryBarrierWithGroupSync();
// Prevent NaNs arising from the division of 0 by 0.
float rowIntSum = max(rowInts[SHARED_MEM(n - 1)], FLT_MIN);
if (i == 0)
{
float imgIntegralValue = rowIntSum / sz;
marginalRowDensities[uint2(sz, 0)] = imgIntegralValue;
// The exclusive scan requires the 1st element to be 0.
rowInts[SHARED_MEM(n - 1)] = 0.0;
}
// Step 3: execute the down-sweep phase.
for (offset = n / 2; offset > 0; offset /= 2)
{
GroupMemoryBarrierWithGroupSync();
imgIntegralValue += rowIntegralValues[i] / textureSize;
}
/// a1 = (2 * i + 1) * offset - 1;
uint a1 = Mad24(Mad24(2, i, 1), offset, -1);
uint a2 = a1 + offset;
// Compute the CDF. Note: the value at (i = textureSize) is implicitly 1.
// TODO: run in parallel.
for (i = 0; i < textureSize; i++)
if (a2 < n)
marginalRowDensities[int2(i, cubeFaceId)] = temp[i] / imgIntegralValue;
float t1 = rowInts[SHARED_MEM(a1)];
rowInts[SHARED_MEM(a1)] = rowInts[SHARED_MEM(a2)];
rowInts[SHARED_MEM(a2)] += t1;
// Store the value of the integral of the entire image.
// TODO: find a better place for this.
marginalRowDensities[int2(0, 6)] = imgIntegralValue;
GroupMemoryBarrierWithGroupSync();
// Compute the CDF. Note: the value at (i = n) is implicitly 1.
if (i1 < sz) { marginalRowDensities[uint2(i1, 0)] = rowInts[SHARED_MEM(i1)] / rowIntSum; }
if (i2 < sz) { marginalRowDensities[uint2(i2, 0)] = rowInts[SHARED_MEM(i2)] / rowIntSum; }
}

7
Assets/ScriptableRenderLoop/HDRenderLoop/Sky/Resources/GGXConvolve.shader


TEXTURECUBE(_MainTex);
SAMPLERCUBE(sampler_MainTex);
TEXTURE2D(_ConditionalDensities);
SAMPLER2D(sampler_ConditionalDensities)
TEXTURE2D(_MarginalRowDensities);
SAMPLER2D(sampler_MarginalRowDensities);
float _Level;
float _InvOmegaP;

66
Assets/ScriptableRenderLoop/HDRenderLoop/Sky/SkyManager.cs


using System.Collections.Generic;
using System;
namespace UnityEngine.Experimental.ScriptableRenderLoop
{
[Serializable]

{
RenderTexture m_SkyboxCubemapRT = null;
RenderTexture m_SkyboxGGXCubemapRT = null;
RenderTexture m_SkyboxMarginalRowCdfRT = null;
RenderTexture m_SkyboxConditionalCdfRT = null;
ComputeShader m_BuildProbabilityTablesCS = null;
int m_ConditionalDensitiesKernel = -1;
int m_MarginalRowDensitiesKernel = -1;
Vector4 m_CubemapScreenSize;
Matrix4x4[] m_faceCameraViewProjectionMatrix = new Matrix4x4[6];

{
Utilities.Destroy(m_SkyboxCubemapRT);
Utilities.Destroy(m_SkyboxGGXCubemapRT);
Utilities.Destroy(m_SkyboxMarginalRowCdfRT);
Utilities.Destroy(m_SkyboxConditionalCdfRT);
m_UpdateRequired = true; // Special case. Even if update mode is set to OnDemand, we need to regenerate the environment after destroying the texture.
}

m_SkyboxGGXCubemapRT.autoGenerateMips = false;
m_SkyboxGGXCubemapRT.filterMode = FilterMode.Trilinear;
m_SkyboxGGXCubemapRT.Create();
// + 1 because we store the value of the integral of the cubemap at the end of the texture.
m_SkyboxMarginalRowCdfRT = new RenderTexture(6 * resolution / 2 + 1, 1, 1, RenderTextureFormat.RFloat);
m_SkyboxMarginalRowCdfRT.dimension = TextureDimension.Tex2D;
m_SkyboxMarginalRowCdfRT.useMipMap = false;
m_SkyboxMarginalRowCdfRT.autoGenerateMips = false;
m_SkyboxMarginalRowCdfRT.enableRandomWrite = true;
m_SkyboxMarginalRowCdfRT.filterMode = FilterMode.Point;
m_SkyboxMarginalRowCdfRT.Create();
m_SkyboxConditionalCdfRT = new RenderTexture(resolution / 2, 6 * resolution / 2, 1, RenderTextureFormat.RFloat);
m_SkyboxConditionalCdfRT.dimension = TextureDimension.Tex2D;
m_SkyboxConditionalCdfRT.useMipMap = false;
m_SkyboxConditionalCdfRT.autoGenerateMips = false;
m_SkyboxConditionalCdfRT.enableRandomWrite = true;
m_SkyboxConditionalCdfRT.filterMode = FilterMode.Point;
m_SkyboxConditionalCdfRT.Create();
}
m_CubemapScreenSize = new Vector4((float)resolution, (float)resolution, 1.0f / (float)resolution, 1.0f / (float)resolution);

// TODO: We need to have an API to send our sky information to Enlighten. For now use a workaround through skybox/cubemap material...
m_StandardSkyboxMaterial = Utilities.CreateEngineMaterial("Skybox/Cubemap");
m_GGXConvolveMaterial = Utilities.CreateEngineMaterial("Hidden/HDRenderLoop/GGXConvolve");
m_BuildProbabilityTablesCS = Resources.Load<ComputeShader>("BuildProbabilityTables");
m_ConditionalDensitiesKernel = m_BuildProbabilityTablesCS.FindKernel("ComputeConditionalDensities");
m_MarginalRowDensitiesKernel = m_BuildProbabilityTablesCS.FindKernel("ComputeMarginalRowDensities");
m_CurrentUpdateTime = 0.0f;
}

Utilities.Destroy(m_GGXConvolveMaterial);
Utilities.Destroy(m_SkyboxCubemapRT);
Utilities.Destroy(m_SkyboxGGXCubemapRT);
Utilities.Destroy(m_SkyboxMarginalRowCdfRT);
Utilities.Destroy(m_SkyboxConditionalCdfRT);
if(m_Renderer != null)
m_Renderer.Cleanup();

}
}
private void BuildProbabilityTables(RenderLoop renderLoop)
{
// Bind the input cubemap as a Texture2DArray.
// TODO: for some reason, Unity only binds the first face...
m_BuildProbabilityTablesCS.SetTexture(m_ConditionalDensitiesKernel, "envMap", m_SkyboxCubemapRT);
// Bind the outputs.
m_BuildProbabilityTablesCS.SetTexture(m_ConditionalDensitiesKernel, "marginalRowDensities", m_SkyboxMarginalRowCdfRT);
m_BuildProbabilityTablesCS.SetTexture(m_ConditionalDensitiesKernel, "conditionalDensities", m_SkyboxConditionalCdfRT);
m_BuildProbabilityTablesCS.SetTexture(m_MarginalRowDensitiesKernel, "marginalRowDensities", m_SkyboxMarginalRowCdfRT);
// TODO: the shader has 'TEXTURE_SIZE' hard-coded to 256!
int mip1Size = (int)m_SkyParameters.resolution / 2;
var cmd = new CommandBuffer() { name = "" };
cmd.DispatchCompute(m_BuildProbabilityTablesCS, m_ConditionalDensitiesKernel, mip1Size, 6, 1);
cmd.DispatchCompute(m_BuildProbabilityTablesCS, m_MarginalRowDensitiesKernel, 1, 1, 1);
renderLoop.ExecuteCommandBuffer(cmd);
cmd.Dispose();
}
bool useMIS = false;
using (new Utilities.ProfilingSample("Sky Pass: GGX Convolution", renderLoop))
{
int mipCount = 1 + (int)Mathf.Log(input.width, 2.0f);

return;
}
if (useMIS)
{
BuildProbabilityTables(renderLoop);
}
// Copy the first mip.

m_GGXConvolveMaterial.SetTexture("_MainTex", input);
m_GGXConvolveMaterial.SetFloat("_InvOmegaP", invOmegaP);
if (useMIS)
{
m_GGXConvolveMaterial.SetTexture("_ConditionalDensities", m_SkyboxConditionalCdfRT);
m_GGXConvolveMaterial.SetTexture("_MarginalRowDensities", m_SkyboxMarginalRowCdfRT);
}
for (int mip = 1; mip < ((int)EnvConstants.SpecCubeLodStep + 1); ++mip)
{

2
Assets/ScriptableRenderLoop/ShaderLibrary/API/D3D11.hlsl


#define LOAD_TEXTURE2D(textureName, unCoord2) textureName.Load(int3(unCoord2, 0))
#define LOAD_TEXTURE2D_LOD(textureName, unCoord2, lod) textureName.Load(int3(unCoord2, lod))
#define LOAD_TEXTURE2D_MSAA(textureName, unCoord2, sampleIndex) textureName.Load(unCoord2, sampleIndex)
#define LOAD_TEXTURE2D_ARRAY(textureName, unCoord2, index) textureName.Load(int4(unCoord2, index, 0))
#define LOAD_TEXTURE2D_ARRAY_LOD(textureName, unCoord2, index, lod) textureName.Load(int4(unCoord2, index, lod))
#define GATHER_TEXTURE2D(textureName, samplerName, coord2) textureName.Gather(samplerName, coord2)
#define GATHER_TEXTURE2D_ARRAY(textureName, samplerName, coord2, index) textureName.Gather(samplerName, float3(coord2, index))

24
Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl


#define Clamp clamp
#endif // INTRINSIC_CLAMP
#ifndef INTRINSIC_MUL24
int Mul24(int a, int b)
{
return a * b;
}
uint Mul24(uint a, uint b)
{
return a * b;
}
#endif // INTRINSIC_MUL24
#ifndef INTRINSIC_MAD24
int Mad24(int a, int b, int c)
{
return a * b + c;
}
uint Mad24(uint a, uint b, uint c)
{
return a * b + c;
}
#endif // INTRINSIC_MAD24
#ifndef INTRINSIC_MED3
float Med3(float a, float b, float c)
{

2
Assets/ScriptableRenderLoop/ShaderLibrary/Fibonacci.hlsl


int fibN2 = sampleCount;
// These are all constants, so this loop will be optimized away.
for (int j = 0; j < 16; j++)
for (int j = 1; j < 16; j++)
{
if (k_FibonacciSeq[j] == fibN1)
{

正在加载...
取消
保存