浏览代码

HDRenderLoop: Clean packed Gbuffer code in Uint16 + packing functions

- It still doesn't work as UINT16 RT is not supported by Unity :(
- packing.hlsl need some convention/naming cleanup
/main
Sebastien Lagarde 8 年前
当前提交
efa7c787
共有 7 个文件被更改,包括 138 次插入131 次删除
  1. 1
      Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Lit/Lit.cs
  2. 65
      Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Lit/Lit.hlsl
  3. 1
      Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Material.hlsl
  4. 4
      Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/ShaderConfig.cs
  5. 70
      Assets/ScriptableRenderLoop/ShaderLibrary/Color.hlsl
  6. 4
      Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl
  7. 124
      Assets/ScriptableRenderLoop/ShaderLibrary/Packing.hlsl

1
Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Lit/Lit.cs


#pragma warning disable 162 // warning CS0162: Unreachable code detected
if (ShaderConfig.PackgbufferInU16 == 1)
{
// TODO: Just discovered that Unity doesn't support unsigned 16 RT format.
RTFormat[0] = RenderTextureFormat.ARGBInt; RTReadWrite[0] = RenderTextureReadWrite.Linear;
RTFormat[1] = RenderTextureFormat.ARGBInt; RTReadWrite[1] = RenderTextureReadWrite.Linear;
}

65
Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Lit/Lit.hlsl


#if SHADEROPTIONS_PACK_GBUFFER_IN_U16
// Now pack all buffer into 2 uint buffer
// TODO: should be more efficient to pack data directly in uint format rather than going through outGBuffer but easier to maintain in case of change
// We don't have hardware sRGB, so just sqrt the baseColor value instead
data.outGBuffer0.xyz = sqrt(data.outGBuffer0.xyz);
// We don't have hardware sRGB to store base color in case we pack int u16, so rather than perform full sRGB encoding just use cheap gamma20
// TODO: test alternative like FastLinearToSRGB to better match unpacked gbuffer
outGBuffer0.xyz = LinearToGamma20(outGBuffer0.xyz);
uint outGBuffer0X = uint(saturate(data.outGBuffer0.x) * 255.5);
uint outGBuffer0Y = uint(saturate(data.outGBuffer0.y) * 255.5);
uint outGBuffer0Z = uint(saturate(data.outGBuffer0.z) * 255.5);
uint outGBuffer0W = uint(saturate(data.outGBuffer0.w) * 255.5);
uint packedGBuffer1 = PackR10G10B10A2(outGBuffer1);
outGBufferU0 = uint4( PackFloatToUInt(outGBuffer1.x, 10, 0) | PackFloatToUInt(outGBuffer1.w, 2, 10) | PackNUpperbitFromU8(outGBuffer0Z, 2, 12) | PackNUpperbitFromU8(outGBuffer0W, 2, 14),
PackFloatToUInt(outGBuffer1.y, 10, 0) | PackNLowerbitFromU8(outGBuffer0Z, 6, 10),
PackFloatToUInt(outGBuffer1.z, 10, 0) | PackNLowerbitFromU8(outGBuffer0W, 6, 10),
outGBuffer0X | outGBuffer0Y << 8
);
outGBufferU0 = uint4( PackFloatToUInt(outGBuffer0.x, 8, 0) | PackFloatToUInt(outGBuffer0.y, 8, 8),
PackFloatToUInt(outGBuffer0.z, 8, 0) | PackFloatToUInt(outGBuffer0.w, 8, 8),
(packedGBuffer1 & 0x0000FFFF),
(packedGBuffer1 & 0xFFFF0000) >> 16);
uint outGBuffer2X = uint(saturate(data.outGBuffer2.x) * 255.5);
uint outGBuffer2Y = uint(saturate(data.outGBuffer2.y) * 255.5);
uint outGBuffer2Z = uint(saturate(data.outGBuffer2.z) * 255.5);
uint outGBuffer2W = uint(saturate(data.outGBuffer2.w) * 255.5);
uint packedGBuffer3 = PackR11G11B10f(outGBuffer3.xyz);
// TODO: This doesn't work for lighting buffer as the encoded format is float. i.e it mean that we must convert first to 111110Float format (TODO: Look at the code maybe not so expensive ?)
// before storing as uint the binary representation. Alternative is to use RGBM/LogLuv.
outGBufferU1 = uint4( PackFloatToUInt(outGBuffer3.x, 11, 0) | PackNUpperbitFromU8(outGBuffer2Z, 3, 11) | PackNUpperbitFromU8(outGBuffer2W, 2, 14),
PackFloatToUInt(outGBuffer3.z, 11, 0) | PackNLowerbitFromU8(outGBuffer2Z, 5, 11),
PackFloatToUInt(outGBuffer3.x, 10, 0) | PackNLowerbitFromU8(outGBuffer2W, 6, 10),
outGBuffer2X | outGBuffer2Y << 8
);
outGBufferU1 = uint4( PackFloatToUInt(outGBuffer2.x, 8, 0) | PackFloatToUInt(outGBuffer2.y, 8, 8),
PackFloatToUInt(outGBuffer2.z, 8, 0) | PackFloatToUInt(outGBuffer2.w, 8, 8),
(packedGBuffer3 & 0x0000FFFF),
(packedGBuffer3 & 0xFFFF0000) >> 16);
#endif
}

#if SHADEROPTIONS_PACK_GBUFFER_IN_U16
float4 inGBuffer0, inGBuffer1, inGBuffer2, inGBuffer3;
inGBuffer0.x = UnpackUIntToFloat(inGBufferU0.w, 8, 0);
inGBuffer0.y = UnpackUIntToFloat(inGBufferU0.w, 8, 8);
inGBuffer0.z = (UnpackNLowerbitFromU8(inGBufferU1.y, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 12)) / 255.0;
inGBuffer0.w = (UnpackNLowerbitFromU8(inGBufferU1.z, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 14)) / 255.0;
inGBuffer0.x = UnpackUIntToFloat(inGBufferU0.x, 8, 0);
inGBuffer0.y = UnpackUIntToFloat(inGBufferU0.x, 8, 8);
inGBuffer0.z = UnpackUIntToFloat(inGBufferU0.y, 8, 0);
inGBuffer0.w = UnpackUIntToFloat(inGBufferU0.y, 8, 8);
inGBuffer0.xyz = Gamma20ToLinear(inGBuffer0.xyz);
inGBuffer1.x = UnpackUIntToFloat(inGBufferU0.x, 10, 0);
inGBuffer1.y = UnpackUIntToFloat(inGBufferU0.y, 10, 0);
inGBuffer1.z = UnpackUIntToFloat(inGBufferU0.z, 10, 0);
inGBuffer1.w = UnpackUIntToFloat(inGBufferU0.x, 2, 10);
uint packedGBuffer1 = inGBufferU0.z | inGBufferU0.w << 16;
inGBuffer1 = UnpackR10G10B10A2(packedGBuffer1);
inGBuffer2.x = UnpackUIntToFloat(inGBufferU1.w, 8, 0);
inGBuffer2.y = UnpackUIntToFloat(inGBufferU1.w, 8, 8);
inGBuffer2.z = (UnpackNLowerbitFromU8(inGBufferU1.y, 5, 11) | UnpackNUpperbitFromU8(inGBufferU1.x, 3, 11)) / 255.0;
inGBuffer2.w = (UnpackNLowerbitFromU8(inGBufferU1.z, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 14)) / 255.0;
inGBuffer2.x = UnpackUIntToFloat(inGBufferU1.x, 8, 0);
inGBuffer2.y = UnpackUIntToFloat(inGBufferU1.x, 8, 8);
inGBuffer2.z = UnpackUIntToFloat(inGBufferU1.y, 8, 0);
inGBuffer2.w = UnpackUIntToFloat(inGBufferU1.y, 8, 8);
inGBuffer3.x = UnpackUIntToFloat(inGBufferU1.x, 11, 0);
inGBuffer3.y = UnpackUIntToFloat(inGBufferU1.y, 11, 0);
inGBuffer3.z = UnpackUIntToFloat(inGBufferU1.z, 10, 0);
uint packedGBuffer3 = inGBufferU1.z | inGBufferU1.w << 16;
inGBuffer3.xyz = UnpackR11G11B10f(packedGBuffer1);
inGBuffer3.w = 0.0;
#endif

1
Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Material.hlsl


#ifndef UNITY_MATERIAL_INCLUDED
#define UNITY_MATERIAL_INCLUDED
#include "Color.hlsl"
#include "Packing.hlsl"
#include "BSDF.hlsl"
#include "Debug.hlsl"

4
Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/ShaderConfig.cs


{
// TODO: Currently it is not yet possible to use this feature, we need to provide previousPositionCS to the vertex shader as part of Attribute for GBuffer pass
// TODO: How to enable this feature only on mesh that effectively require it like skinned and moving mesh (other can be done with depth reprojection. But TAA can be an issue)
VelocityInGBuffer = 0, // Change to 1 to enable the feature
VelocityInGBuffer = 0, // Change to 1 to enable the feature, then regenerate hlsl headers.
// Note: #define can't be use in include file in C# so we choes this way to configure both C# and hlsl
// Note: #define can't be use in include file in C# so we chose this way to configure both C# and hlsl
// Changing a value in this enum Config here require to regenerate the hlsl include and recompile C# and shaders
public class ShaderConfig
{

70
Assets/ScriptableRenderLoop/ShaderLibrary/Color.hlsl


return dot(linearRgb, float3(0.2126729f, 0.7151522f, 0.0721750f));
}
// Ref: http://realtimecollisiondetection.net/blog/?p=15
float4 PackLogLuv(float3 vRGB)
{
// M matrix, for encoding
const float3x3 M = float3x3(
0.2209, 0.3390, 0.4184,
0.1138, 0.6780, 0.7319,
0.0102, 0.1130, 0.2969);
float4 vResult;
float3 Xp_Y_XYZp = mul(vRGB, M);
Xp_Y_XYZp = max(Xp_Y_XYZp, float3(1e-6, 1e-6, 1e-6));
vResult.xy = Xp_Y_XYZp.xy / Xp_Y_XYZp.z;
float Le = 2.0 * log2(Xp_Y_XYZp.y) + 127.0;
vResult.w = frac(Le);
vResult.z = (Le - (floor(vResult.w*255.0f))/255.0f)/255.0f;
return vResult;
}
float3 UnpackLogLuv(float4 vLogLuv)
{
// Inverse M matrix, for decoding
const float3x3 InverseM = float3x3(
6.0014, -2.7008, -1.7996,
-1.3320, 3.1029, -5.7721,
0.3008, -1.0882, 5.6268);
float Le = vLogLuv.z * 255.0 + vLogLuv.w;
float3 Xp_Y_XYZp;
Xp_Y_XYZp.y = exp2((Le - 127.0) / 2.0);
Xp_Y_XYZp.z = Xp_Y_XYZp.y / vLogLuv.y;
Xp_Y_XYZp.x = vLogLuv.x * Xp_Y_XYZp.z;
float3 vRGB = mul(Xp_Y_XYZp, InverseM);
return max(vRGB, float3(0.0, 0.0, 0.0));
}
// TODO: This function is used with the LightTransport pass to encode lightmap or emissive
float4 PackRGBM(float3 rgb, float maxRGBM)
{
float kOneOverRGBMMaxRange = 1.0 / maxRGBM;
const float kMinMultiplier = 2.0 * 1e-2;
float4 rgbm = float4(rgb * kOneOverRGBMMaxRange, 1.0);
rgbm.a = max(max(rgbm.r, rgbm.g), max(rgbm.b, kMinMultiplier));
rgbm.a = ceil(rgbm.a * 255.0) / 255.0;
// Division-by-zero warning from d3d9, so make compiler happy.
rgbm.a = max(rgbm.a, kMinMultiplier);
rgbm.rgb /= rgbm.a;
return rgbm;
}
// Alternative...
#define RGBMRANGE (8.0)
float4 PackRGBM(float3 color)
{
float4 rgbm;
color *= (1.0 / RGBMRANGE);
rgbm.a = saturate( max( max( color.r, color.g ), max( color.b, 1e-6 ) ) );
rgbm.a = ceil( rgbm.a * 255.0 ) / 255.0;
rgbm.rgb = color / rgbm.a;
return rgbm;
}
float3 UnpackRGBM(float4 rgbm)
{
return RGBMRANGE * rgbm.rgb * rgbm.a;
}
// Ref: http://www.nvidia.com/object/real-time-ycocg-dxt-compression.html
#define CHROMA_BIAS (0.5 * 256.0 / 255.0)
float3 RGBToYCoCg(float3 rgb)

4
Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl


#ifndef INTRINSIC_BITFIELD_EXTRACT
// unsigned integer bit field extract implementation
uint BitFieldExtract(uint inData, uint inSize, uint inOffset)
uint BitFieldExtract(uint data, uint size, uint offset)
return (inData >> inOffset) & ((1u << inSize) - 1u);
return (data >> offset) & ((1u << size) - 1u);
}
#endif // INTRINSIC_BITFIELD_EXTRACT

124
Assets/ScriptableRenderLoop/ShaderLibrary/Packing.hlsl


}
//-----------------------------------------------------------------------------
// HDR packing
//-----------------------------------------------------------------------------
// Ref: http://realtimecollisiondetection.net/blog/?p=15
float4 PackLogLuv(float3 vRGB)
{
// M matrix, for encoding
const float3x3 M = float3x3(
0.2209, 0.3390, 0.4184,
0.1138, 0.6780, 0.7319,
0.0102, 0.1130, 0.2969);
float4 vResult;
float3 Xp_Y_XYZp = mul(vRGB, M);
Xp_Y_XYZp = max(Xp_Y_XYZp, float3(1e-6, 1e-6, 1e-6));
vResult.xy = Xp_Y_XYZp.xy / Xp_Y_XYZp.z;
float Le = 2.0 * log2(Xp_Y_XYZp.y) + 127.0;
vResult.w = frac(Le);
vResult.z = (Le - (floor(vResult.w*255.0f)) / 255.0f) / 255.0f;
return vResult;
}
float3 UnpackLogLuv(float4 vLogLuv)
{
// Inverse M matrix, for decoding
const float3x3 InverseM = float3x3(
6.0014, -2.7008, -1.7996,
-1.3320, 3.1029, -5.7721,
0.3008, -1.0882, 5.6268);
float Le = vLogLuv.z * 255.0 + vLogLuv.w;
float3 Xp_Y_XYZp;
Xp_Y_XYZp.y = exp2((Le - 127.0) / 2.0);
Xp_Y_XYZp.z = Xp_Y_XYZp.y / vLogLuv.y;
Xp_Y_XYZp.x = vLogLuv.x * Xp_Y_XYZp.z;
float3 vRGB = mul(Xp_Y_XYZp, InverseM);
return max(vRGB, float3(0.0, 0.0, 0.0));
}
// TODO: This function is used with the LightTransport pass to encode lightmap or emissive
float4 PackRGBM(float3 rgb, float maxRGBM)
{
float kOneOverRGBMMaxRange = 1.0 / maxRGBM;
const float kMinMultiplier = 2.0 * 1e-2;
float4 rgbm = float4(rgb * kOneOverRGBMMaxRange, 1.0);
rgbm.a = max(max(rgbm.r, rgbm.g), max(rgbm.b, kMinMultiplier));
rgbm.a = ceil(rgbm.a * 255.0) / 255.0;
// Division-by-zero warning from d3d9, so make compiler happy.
rgbm.a = max(rgbm.a, kMinMultiplier);
rgbm.rgb /= rgbm.a;
return rgbm;
}
// Alternative...
#define RGBMRANGE (8.0)
float4 PackRGBM(float3 color)
{
float4 rgbm;
color *= (1.0 / RGBMRANGE);
rgbm.a = saturate(max(max(color.r, color.g), max(color.b, 1e-6)));
rgbm.a = ceil(rgbm.a * 255.0) / 255.0;
rgbm.rgb = color / rgbm.a;
return rgbm;
}
float3 UnpackRGBM(float4 rgbm)
{
return RGBMRANGE * rgbm.rgb * rgbm.a;
}
// The standard 32-bit HDR color format
uint PackR11G11B10f(float3 rgb)
{
uint r = (f32tof16(rgb.x) << 17) & 0xFFE00000;
uint g = (f32tof16(rgb.y) << 6) & 0x001FFC00;
uint b = (f32tof16(rgb.z) >> 5) & 0x000003FF;
return r | g | b;
}
float3 UnpackR11G11B10f(uint rgb)
{
float r = f16tof32((rgb >> 17) & 0x7FF0);
float g = f16tof32((rgb >> 6) & 0x7FF0);
float b = f16tof32((rgb << 5) & 0x7FE0);
return float3(r, g, b);
}
//-----------------------------------------------------------------------------
// Quaternion packing
//-----------------------------------------------------------------------------

// float packing to sint/uint
//-----------------------------------------------------------------------------
// src must be between 0.0 and 1.0
const uint MAX_VALUE = (1 << size) - 1;
return Clamp(uint(src * MAX_VALUE), uint(0), uint(MAX_VALUE)) << offset;
const float maxValue = float((1u << size) - 1u) + 0.5; // Shader compiler should be able to remove this
return uint(src * maxValue) << offset;
const uint MAX_VALUE = (1 << size) - 1;
const float invMaxValue = 1.0 / float((1 << size) - 1);
return BitFieldExtract(src, size, offset) / float(MAX_VALUE);
return float(BitFieldExtract(src, size, offset)) * invMaxValue;
uint PackNUpperbitFromU8(uint src, uint nbit, uint offset)
uint PackR10G10B10A2(float4 rgba)
return ( (src & (((1 << nbit) - 1) << (8 - nbit))) >> (8 - nbit) ) << offset;
return (PackFloatToUInt(rgba.x, 10, 0) | PackFloatToUInt(rgba.y, 10, 10) | PackFloatToUInt(rgba.z, 10, 20) | PackFloatToUInt(rgba.w, 2, 30));
uint PackNLowerbitFromU8(uint src, uint nbit, uint offset)
float4 UnpackR10G10B10A2(uint rgba)
return (src & ((1 << nbit) - 1)) << offset;
}
uint UnpackNUpperbitToU8(uint src, uint nbit, uint offset)
{
return ((src & (((1 << nbit) - 1) << offset)) >> offset) << (8 - nbit);
}
uint UnpackNLowerbitToU8(uint src, uint nbit, uint offset)
{
return (src & (((1 << nbit) - 1) << offset)) >> offset;
float4 ouput;
ouput.x = UnpackUIntToFloat(rgba, 10, 0);
ouput.y = UnpackUIntToFloat(rgba, 10, 10);
ouput.z = UnpackUIntToFloat(rgba, 10, 20);
ouput.w = UnpackUIntToFloat(rgba, 2, 30);
return ouput;
}
正在加载...
取消
保存