您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
160 行
7.1 KiB
160 行
7.1 KiB
// Given a cube map (passed as a 2D array), builds CDFs of two distributions:
|
|
// 1. 1D texture with marginal densities, telling us the likelihood of selecting a particular row,
|
|
// 2. 2D texture with conditional densities, which correspond to the PDF of the texel given its row.
|
|
// Ref: PBRT v3, 13.6.7 "Piecewise-Constant 2D Distributions".
|
|
// Note that we use the equiareal sphere-to-square mapping instead of the latitude-longitude one.
|
|
|
|
#include "Common.hlsl"
|
|
#include "ImageBasedLighting.hlsl"
|
|
|
|
/* --- Input --- */
|
|
|
|
#define TEXTURE_HEIGHT 256 // Equiareal texture map: cos(theta) = 1.0 - 2.0 * v
|
|
#define TEXTURE_WIDTH 2 * TEXTURE_HEIGHT // Equiareal texture map: phi = TWO_PI * (1.0 - u)
|
|
|
|
TEXTURECUBE(envMap); // Input cubemap
|
|
SAMPLERCUBE(sampler_envMap);
|
|
|
|
/* --- Output --- */
|
|
|
|
RWTexture2D<float> marginalRowDensities; // [(TEXTURE_HEIGHT + 1) x 1] (+ 1 for the image integral)
|
|
RWTexture2D<float> conditionalDensities; // [TEXTURE_WIDTH x TEXTURE_HEIGHT]
|
|
|
|
/* --- Implementation --- */
|
|
|
|
// Creates an access pattern which avoids shared memory bank conflicts.
|
|
#define NUM_BANKS 32
|
|
#define SHARED_MEM(x) ((x) + (x) / NUM_BANKS)
|
|
|
|
// Performs a block-level parallel scan.
|
|
// Ref: GPU Gems 3, Chapter 39: "Parallel Prefix Sum (Scan) with CUDA".
|
|
#define PARALLEL_SCAN(i, n, temp, sum) \
|
|
{ \
|
|
uint offset; \
|
|
\
|
|
/* Execute the up-sweep phase. */ \
|
|
for (offset = 1; offset <= n / 2; offset *= 2) \
|
|
{ \
|
|
GroupMemoryBarrierWithGroupSync(); \
|
|
\
|
|
/*** a1 = (2 * i + 1) * offset - 1 */ \
|
|
uint a1 = Mad24(Mad24(2, i, 1), offset, -1); \
|
|
uint a2 = a1 + offset; \
|
|
\
|
|
if (a2 < n) \
|
|
{ \
|
|
temp[SHARED_MEM(a2)] += temp[SHARED_MEM(a1)]; \
|
|
} \
|
|
} \
|
|
\
|
|
GroupMemoryBarrierWithGroupSync(); \
|
|
\
|
|
/* Prevent NaNs arising from the division of 0 by 0. */ \
|
|
sum = max(temp[SHARED_MEM(n - 1)], FLT_MIN); \
|
|
\
|
|
GroupMemoryBarrierWithGroupSync(); \
|
|
\
|
|
/* The exclusive scan requires the last element to be 0. */ \
|
|
if (i == 0) \
|
|
{ \
|
|
temp[SHARED_MEM(n - 1)] = 0.0; \
|
|
} \
|
|
\
|
|
/* Execute the down-sweep phase. */ \
|
|
for (offset = n / 2; offset > 0; offset /= 2) \
|
|
{ \
|
|
GroupMemoryBarrierWithGroupSync(); \
|
|
\
|
|
/*** a1 = (2 * i + 1) * offset - 1 */ \
|
|
uint a1 = Mad24(Mad24(2, i, 1), offset, -1); \
|
|
uint a2 = a1 + offset; \
|
|
\
|
|
if (a2 < n) \
|
|
{ \
|
|
float t1 = temp[SHARED_MEM(a1)]; \
|
|
temp[SHARED_MEM(a1)] = temp[SHARED_MEM(a2)]; \
|
|
temp[SHARED_MEM(a2)] += t1; \
|
|
} \
|
|
} \
|
|
\
|
|
GroupMemoryBarrierWithGroupSync(); \
|
|
}
|
|
|
|
#pragma kernel ComputeConditionalDensities
|
|
|
|
groupshared float rowVals[SHARED_MEM(TEXTURE_WIDTH)];
|
|
|
|
[numthreads(TEXTURE_WIDTH / 2, 1, 1)]
|
|
void ComputeConditionalDensities(uint3 groupId : SV_GroupID,
|
|
uint3 groupThreadId : SV_GroupThreadID)
|
|
{
|
|
// There are TEXTURE_HEIGHT thread groups processing 2 texels per thread.
|
|
const uint n = TEXTURE_WIDTH;
|
|
const uint i = groupThreadId.x;
|
|
const uint j = groupId.x;
|
|
const uint i1 = i;
|
|
const uint i2 = i + n / 2;
|
|
|
|
float w = TEXTURE_WIDTH;
|
|
float h = TEXTURE_HEIGHT;
|
|
float u1 = i1 / w + 0.5 / w;
|
|
float u2 = i2 / w + 0.5 / w;
|
|
float v = j / h + 0.5 / h;
|
|
|
|
float3 L1 = ConvertEquiarealToCubemap(u1, v);
|
|
float3 L2 = ConvertEquiarealToCubemap(u2, v);
|
|
float3 c1 = SAMPLE_TEXTURECUBE_LOD(envMap, sampler_envMap, L1, 0).rgb;
|
|
float3 c2 = SAMPLE_TEXTURECUBE_LOD(envMap, sampler_envMap, L2, 0).rgb;
|
|
|
|
// Compute the integral of the step function (row values).
|
|
// TODO: process 4 texels per thread, and manually unroll.
|
|
rowVals[SHARED_MEM(i1)] = c1.r + c1.g + c1.b;
|
|
rowVals[SHARED_MEM(i2)] = c2.r + c2.g + c2.b;
|
|
|
|
float rowValSum;
|
|
|
|
PARALLEL_SCAN(i, n, rowVals, rowValSum)
|
|
|
|
// Compute the CDF. Note: the value at (i = n) is implicitly 1.
|
|
conditionalDensities[uint2(i1, j)] = rowVals[SHARED_MEM(i1)] / rowValSum;
|
|
conditionalDensities[uint2(i2, j)] = rowVals[SHARED_MEM(i2)] / rowValSum;
|
|
|
|
if (i == 0)
|
|
{
|
|
float rowIntegralValue = rowValSum / n;
|
|
marginalRowDensities[uint2(j, 0)] = rowIntegralValue;
|
|
}
|
|
}
|
|
|
|
#pragma kernel ComputeMarginalRowDensities
|
|
|
|
groupshared float rowInts[SHARED_MEM(TEXTURE_HEIGHT)];
|
|
|
|
[numthreads(TEXTURE_HEIGHT / 2, 1, 1)]
|
|
void ComputeMarginalRowDensities(uint3 groupThreadId : SV_GroupThreadID)
|
|
{
|
|
// There is only one thread group processing 2 texels per thread.
|
|
const uint n = TEXTURE_HEIGHT;
|
|
const uint i = groupThreadId.x;
|
|
const uint i1 = i;
|
|
const uint i2 = i + n / 2;
|
|
|
|
// Compute the integral of the step function (row integrals).
|
|
// TODO: process 4 texels per thread, and manually unroll.
|
|
rowInts[SHARED_MEM(i1)] = marginalRowDensities[uint2(i1, 0)];
|
|
rowInts[SHARED_MEM(i2)] = marginalRowDensities[uint2(i2, 0)];
|
|
|
|
float rowIntSum;
|
|
|
|
PARALLEL_SCAN(i, n, rowInts, rowIntSum)
|
|
|
|
// Compute the CDF. Note: the value at (i = n) is implicitly 1.
|
|
marginalRowDensities[uint2(i1, 0)] = rowInts[SHARED_MEM(i1)] / rowIntSum;
|
|
marginalRowDensities[uint2(i2, 0)] = rowInts[SHARED_MEM(i2)] / rowIntSum;
|
|
|
|
if (i == 0)
|
|
{
|
|
float imgIntegralValue = rowIntSum / n;
|
|
marginalRowDensities[uint2(n, 0)] = imgIntegralValue;
|
|
}
|
|
}
|