HDRenderLoop: Clean packed Gbuffer code in Uint16 + packing functions

- It still doesn't work as UINT16 RT is not supported by Unity :( - packing.hlsl need some convention/naming cleanup
8 年前 · efa7c787
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Lit/Lit.cs
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Lit/Lit.cs
 #pragma warning disable 162 // warning CS0162: Unreachable code detected
                if (ShaderConfig.PackgbufferInU16 == 1)
                {
+                    // TODO: Just discovered that Unity doesn't support unsigned 16 RT format.
                    RTFormat[0] = RenderTextureFormat.ARGBInt; RTReadWrite[0] = RenderTextureReadWrite.Linear;
                    RTFormat[1] = RenderTextureFormat.ARGBInt; RTReadWrite[1] = RenderTextureReadWrite.Linear;
                }
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Lit/Lit.hlsl
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Lit/Lit.hlsl

    #if SHADEROPTIONS_PACK_GBUFFER_IN_U16
    // Now pack all buffer into 2 uint buffer
-    // TODO: should be more efficient to pack data directly in uint format rather than going through outGBuffer but easier to maintain in case of change
-    // We don't have hardware sRGB, so just sqrt the baseColor value instead
-    data.outGBuffer0.xyz = sqrt(data.outGBuffer0.xyz);
+    // We don't have hardware sRGB to store base color in case we pack int u16, so rather than perform full sRGB encoding just use cheap gamma20
+    // TODO: test alternative like FastLinearToSRGB to better match unpacked gbuffer
+    outGBuffer0.xyz = LinearToGamma20(outGBuffer0.xyz);
-    uint outGBuffer0X = uint(saturate(data.outGBuffer0.x) * 255.5);
-    uint outGBuffer0Y = uint(saturate(data.outGBuffer0.y) * 255.5);
-    uint outGBuffer0Z = uint(saturate(data.outGBuffer0.z) * 255.5);
-    uint outGBuffer0W = uint(saturate(data.outGBuffer0.w) * 255.5);
+    uint packedGBuffer1 = PackR10G10B10A2(outGBuffer1);
-    outGBufferU0 = uint4(   PackFloatToUInt(outGBuffer1.x, 10, 0) | PackFloatToUInt(outGBuffer1.w, 2, 10) | PackNUpperbitFromU8(outGBuffer0Z, 2, 12) | PackNUpperbitFromU8(outGBuffer0W, 2, 14),
-                            PackFloatToUInt(outGBuffer1.y, 10, 0) | PackNLowerbitFromU8(outGBuffer0Z, 6, 10),
-                            PackFloatToUInt(outGBuffer1.z, 10, 0) | PackNLowerbitFromU8(outGBuffer0W, 6, 10),
-                            outGBuffer0X | outGBuffer0Y << 8
-                        );
+    outGBufferU0 = uint4(   PackFloatToUInt(outGBuffer0.x, 8, 0)  | PackFloatToUInt(outGBuffer0.y, 8, 8),
+                            PackFloatToUInt(outGBuffer0.z, 8, 0)  | PackFloatToUInt(outGBuffer0.w, 8, 8),
+                            (packedGBuffer1 & 0x0000FFFF),
+                            (packedGBuffer1 & 0xFFFF0000) >> 16);
-    uint outGBuffer2X = uint(saturate(data.outGBuffer2.x) * 255.5);
-    uint outGBuffer2Y = uint(saturate(data.outGBuffer2.y) * 255.5);
-    uint outGBuffer2Z = uint(saturate(data.outGBuffer2.z) * 255.5);
-    uint outGBuffer2W = uint(saturate(data.outGBuffer2.w) * 255.5);
+    uint packedGBuffer3 = PackR11G11B10f(outGBuffer3.xyz);
-    // TODO: This doesn't work for lighting buffer as the encoded format is float. i.e it mean that we must convert first to 111110Float format (TODO: Look at the code maybe not so expensive ?)
-    // before storing as uint the binary representation. Alternative is to use RGBM/LogLuv.
-    outGBufferU1 = uint4(   PackFloatToUInt(outGBuffer3.x, 11, 0) | PackNUpperbitFromU8(outGBuffer2Z, 3, 11) | PackNUpperbitFromU8(outGBuffer2W, 2, 14),
-                            PackFloatToUInt(outGBuffer3.z, 11, 0) | PackNLowerbitFromU8(outGBuffer2Z, 5, 11),
-                            PackFloatToUInt(outGBuffer3.x, 10, 0) | PackNLowerbitFromU8(outGBuffer2W, 6, 10),
-                            outGBuffer2X | outGBuffer2Y << 8
-                        );
+    outGBufferU1 = uint4(   PackFloatToUInt(outGBuffer2.x, 8, 0)  | PackFloatToUInt(outGBuffer2.y, 8, 8),
+                            PackFloatToUInt(outGBuffer2.z, 8, 0)  | PackFloatToUInt(outGBuffer2.w, 8, 8),
+                            (packedGBuffer3 & 0x0000FFFF),
+                            (packedGBuffer3 & 0xFFFF0000) >> 16);
    #endif
 }

    #if SHADEROPTIONS_PACK_GBUFFER_IN_U16
    float4 inGBuffer0, inGBuffer1, inGBuffer2, inGBuffer3;
    
-    inGBuffer0.x = UnpackUIntToFloat(inGBufferU0.w, 8, 0);
-    inGBuffer0.y = UnpackUIntToFloat(inGBufferU0.w, 8, 8);
-    inGBuffer0.z = (UnpackNLowerbitFromU8(inGBufferU1.y, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 12)) / 255.0;
-    inGBuffer0.w = (UnpackNLowerbitFromU8(inGBufferU1.z, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 14)) / 255.0;
+    inGBuffer0.x = UnpackUIntToFloat(inGBufferU0.x, 8, 0);
+    inGBuffer0.y = UnpackUIntToFloat(inGBufferU0.x, 8, 8);
+    inGBuffer0.z = UnpackUIntToFloat(inGBufferU0.y, 8, 0);
+    inGBuffer0.w = UnpackUIntToFloat(inGBufferU0.y, 8, 8);
+
+    inGBuffer0.xyz = Gamma20ToLinear(inGBuffer0.xyz);
-    inGBuffer1.x = UnpackUIntToFloat(inGBufferU0.x, 10, 0);
-    inGBuffer1.y = UnpackUIntToFloat(inGBufferU0.y, 10, 0);
-    inGBuffer1.z = UnpackUIntToFloat(inGBufferU0.z, 10, 0);
-    inGBuffer1.w = UnpackUIntToFloat(inGBufferU0.x, 2, 10);
+    uint packedGBuffer1 = inGBufferU0.z | inGBufferU0.w << 16;
+    inGBuffer1 = UnpackR10G10B10A2(packedGBuffer1);
-    inGBuffer2.x = UnpackUIntToFloat(inGBufferU1.w, 8, 0);
-    inGBuffer2.y = UnpackUIntToFloat(inGBufferU1.w, 8, 8);
-    inGBuffer2.z = (UnpackNLowerbitFromU8(inGBufferU1.y, 5, 11) | UnpackNUpperbitFromU8(inGBufferU1.x, 3, 11)) / 255.0;
-    inGBuffer2.w = (UnpackNLowerbitFromU8(inGBufferU1.z, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 14)) / 255.0;
+    inGBuffer2.x = UnpackUIntToFloat(inGBufferU1.x, 8, 0);
+    inGBuffer2.y = UnpackUIntToFloat(inGBufferU1.x, 8, 8);
+    inGBuffer2.z = UnpackUIntToFloat(inGBufferU1.y, 8, 0);
+    inGBuffer2.w = UnpackUIntToFloat(inGBufferU1.y, 8, 8);
-    inGBuffer3.x = UnpackUIntToFloat(inGBufferU1.x, 11, 0);
-    inGBuffer3.y = UnpackUIntToFloat(inGBufferU1.y, 11, 0);
-    inGBuffer3.z = UnpackUIntToFloat(inGBufferU1.z, 10, 0);
+    uint packedGBuffer3 = inGBufferU1.z | inGBufferU1.w << 16;
+    inGBuffer3.xyz = UnpackR11G11B10f(packedGBuffer1);
    inGBuffer3.w = 0.0;
    #endif

--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Material.hlsl
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/Material/Material.hlsl
 #ifndef UNITY_MATERIAL_INCLUDED
 #define UNITY_MATERIAL_INCLUDED

+#include "Color.hlsl"
 #include "Packing.hlsl"
 #include "BSDF.hlsl"
 #include "Debug.hlsl"
--- a/Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/ShaderConfig.cs
+++ b/Assets/ScriptableRenderLoop/HDRenderLoop/Shaders/ShaderConfig.cs
    {
        // TODO: Currently it is not yet possible to use this feature, we need to provide previousPositionCS to the vertex shader as part of Attribute for GBuffer pass
        // TODO: How to enable this feature only on mesh that effectively require it like skinned and moving mesh (other can be done with depth reprojection. But TAA can be an issue)
-        VelocityInGBuffer = 0, // Change to 1 to enable the feature
+        VelocityInGBuffer = 0, // Change to 1 to enable the feature, then regenerate hlsl headers.
-    // Note: #define can't be use in include file in C# so we choes this way to configure both C# and hlsl
+    // Note: #define can't be use in include file in C# so we chose this way to configure both C# and hlsl
    // Changing a value in this enum Config here require to regenerate the hlsl include and recompile C# and shaders
    public class ShaderConfig
    {
--- a/Assets/ScriptableRenderLoop/ShaderLibrary/Color.hlsl
+++ b/Assets/ScriptableRenderLoop/ShaderLibrary/Color.hlsl
    return dot(linearRgb, float3(0.2126729f, 0.7151522f, 0.0721750f));
 }

-// Ref: http://realtimecollisiondetection.net/blog/?p=15
-float4 PackLogLuv(float3 vRGB)
-{
-    // M matrix, for encoding
-    const float3x3 M = float3x3(
-        0.2209, 0.3390, 0.4184,
-        0.1138, 0.6780, 0.7319,
-        0.0102, 0.1130, 0.2969);
-
-    float4 vResult;
-    float3 Xp_Y_XYZp = mul(vRGB, M);
-    Xp_Y_XYZp = max(Xp_Y_XYZp, float3(1e-6, 1e-6, 1e-6));
-    vResult.xy = Xp_Y_XYZp.xy / Xp_Y_XYZp.z;
-    float Le = 2.0 * log2(Xp_Y_XYZp.y) + 127.0;
-    vResult.w = frac(Le);
-    vResult.z = (Le - (floor(vResult.w*255.0f))/255.0f)/255.0f;
-    return vResult;
-}
-
-float3 UnpackLogLuv(float4 vLogLuv)
-{
-    // Inverse M matrix, for decoding
-    const float3x3 InverseM = float3x3(
-        6.0014, -2.7008, -1.7996,
-       -1.3320,  3.1029, -5.7721,
-        0.3008, -1.0882,  5.6268);
-
-    float Le = vLogLuv.z * 255.0 + vLogLuv.w;
-    float3 Xp_Y_XYZp;
-    Xp_Y_XYZp.y = exp2((Le - 127.0) / 2.0);
-    Xp_Y_XYZp.z = Xp_Y_XYZp.y / vLogLuv.y;
-    Xp_Y_XYZp.x = vLogLuv.x * Xp_Y_XYZp.z;
-    float3 vRGB = mul(Xp_Y_XYZp, InverseM);
-    return max(vRGB, float3(0.0, 0.0, 0.0));
-}
-
-// TODO: This function is used with the LightTransport pass to encode lightmap or emissive
-float4 PackRGBM(float3 rgb, float maxRGBM)
-{
-    float kOneOverRGBMMaxRange = 1.0 / maxRGBM;
-    const float kMinMultiplier = 2.0 * 1e-2;
-
-    float4 rgbm = float4(rgb * kOneOverRGBMMaxRange, 1.0);
-    rgbm.a = max(max(rgbm.r, rgbm.g), max(rgbm.b, kMinMultiplier));
-    rgbm.a = ceil(rgbm.a * 255.0) / 255.0;
-
-    // Division-by-zero warning from d3d9, so make compiler happy.
-    rgbm.a = max(rgbm.a, kMinMultiplier);
-
-    rgbm.rgb /= rgbm.a;
-    return rgbm;
-}
-
-// Alternative...
-#define RGBMRANGE (8.0)
-float4 PackRGBM(float3 color)
-{
-    float4 rgbm;
-    color *= (1.0 / RGBMRANGE);
-    rgbm.a = saturate( max( max( color.r, color.g ), max( color.b, 1e-6 ) ) );
-    rgbm.a = ceil( rgbm.a * 255.0 ) / 255.0;
-    rgbm.rgb = color / rgbm.a;
-    return rgbm;
-}
-
-float3 UnpackRGBM(float4 rgbm)
-{
-    return RGBMRANGE * rgbm.rgb * rgbm.a;
-}
-
 // Ref: http://www.nvidia.com/object/real-time-ycocg-dxt-compression.html
 #define CHROMA_BIAS (0.5 * 256.0 / 255.0)
 float3 RGBToYCoCg(float3 rgb)
--- a/Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl
+++ b/Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl

 #ifndef INTRINSIC_BITFIELD_EXTRACT
 // unsigned integer bit field extract implementation
-uint BitFieldExtract(uint inData, uint inSize, uint inOffset)
+uint BitFieldExtract(uint data, uint size, uint offset)
-    return (inData >> inOffset) & ((1u << inSize) - 1u);
+    return (data >> offset) & ((1u << size) - 1u);
 }
 #endif // INTRINSIC_BITFIELD_EXTRACT

--- a/Assets/ScriptableRenderLoop/ShaderLibrary/Packing.hlsl
+++ b/Assets/ScriptableRenderLoop/ShaderLibrary/Packing.hlsl
 }

 //-----------------------------------------------------------------------------
+// HDR packing
+//-----------------------------------------------------------------------------
+
+// Ref: http://realtimecollisiondetection.net/blog/?p=15
+float4 PackLogLuv(float3 vRGB)
+{
+    // M matrix, for encoding
+    const float3x3 M = float3x3(
+        0.2209, 0.3390, 0.4184,
+        0.1138, 0.6780, 0.7319,
+        0.0102, 0.1130, 0.2969);
+
+    float4 vResult;
+    float3 Xp_Y_XYZp = mul(vRGB, M);
+    Xp_Y_XYZp = max(Xp_Y_XYZp, float3(1e-6, 1e-6, 1e-6));
+    vResult.xy = Xp_Y_XYZp.xy / Xp_Y_XYZp.z;
+    float Le = 2.0 * log2(Xp_Y_XYZp.y) + 127.0;
+    vResult.w = frac(Le);
+    vResult.z = (Le - (floor(vResult.w*255.0f)) / 255.0f) / 255.0f;
+    return vResult;
+}
+
+float3 UnpackLogLuv(float4 vLogLuv)
+{
+    // Inverse M matrix, for decoding
+    const float3x3 InverseM = float3x3(
+        6.0014, -2.7008, -1.7996,
+        -1.3320, 3.1029, -5.7721,
+        0.3008, -1.0882, 5.6268);
+
+    float Le = vLogLuv.z * 255.0 + vLogLuv.w;
+    float3 Xp_Y_XYZp;
+    Xp_Y_XYZp.y = exp2((Le - 127.0) / 2.0);
+    Xp_Y_XYZp.z = Xp_Y_XYZp.y / vLogLuv.y;
+    Xp_Y_XYZp.x = vLogLuv.x * Xp_Y_XYZp.z;
+    float3 vRGB = mul(Xp_Y_XYZp, InverseM);
+    return max(vRGB, float3(0.0, 0.0, 0.0));
+}
+
+// TODO: This function is used with the LightTransport pass to encode lightmap or emissive
+float4 PackRGBM(float3 rgb, float maxRGBM)
+{
+    float kOneOverRGBMMaxRange = 1.0 / maxRGBM;
+    const float kMinMultiplier = 2.0 * 1e-2;
+
+    float4 rgbm = float4(rgb * kOneOverRGBMMaxRange, 1.0);
+    rgbm.a = max(max(rgbm.r, rgbm.g), max(rgbm.b, kMinMultiplier));
+    rgbm.a = ceil(rgbm.a * 255.0) / 255.0;
+
+    // Division-by-zero warning from d3d9, so make compiler happy.
+    rgbm.a = max(rgbm.a, kMinMultiplier);
+
+    rgbm.rgb /= rgbm.a;
+    return rgbm;
+}
+
+// Alternative...
+#define RGBMRANGE (8.0)
+float4 PackRGBM(float3 color)
+{
+    float4 rgbm;
+    color *= (1.0 / RGBMRANGE);
+    rgbm.a = saturate(max(max(color.r, color.g), max(color.b, 1e-6)));
+    rgbm.a = ceil(rgbm.a * 255.0) / 255.0;
+    rgbm.rgb = color / rgbm.a;
+    return rgbm;
+}
+
+float3 UnpackRGBM(float4 rgbm)
+{
+    return RGBMRANGE * rgbm.rgb * rgbm.a;
+}
+
+// The standard 32-bit HDR color format
+uint PackR11G11B10f(float3 rgb)
+{
+    uint r = (f32tof16(rgb.x) << 17) & 0xFFE00000;
+    uint g = (f32tof16(rgb.y) << 6) & 0x001FFC00;
+    uint b = (f32tof16(rgb.z) >> 5) & 0x000003FF;
+    return r | g | b;
+}
+
+float3 UnpackR11G11B10f(uint rgb)
+{
+    float r = f16tof32((rgb >> 17) & 0x7FF0);
+    float g = f16tof32((rgb >> 6) & 0x7FF0);
+    float b = f16tof32((rgb << 5) & 0x7FE0);
+    return float3(r, g, b);
+}
+
+//-----------------------------------------------------------------------------
 // Quaternion packing
 //-----------------------------------------------------------------------------

 // float packing to sint/uint
 //-----------------------------------------------------------------------------

+// src must be between 0.0 and 1.0
-    const uint MAX_VALUE = (1 << size) - 1;
-
-    return Clamp(uint(src * MAX_VALUE), uint(0), uint(MAX_VALUE)) << offset;
+    const float maxValue = float((1u << size) - 1u) + 0.5; // Shader compiler should be able to remove this
+    return uint(src * maxValue) << offset;
-    const uint MAX_VALUE = (1 << size) - 1;
+    const float invMaxValue = 1.0 / float((1 << size) - 1);
-    return BitFieldExtract(src, size, offset) / float(MAX_VALUE);
+    return float(BitFieldExtract(src, size, offset)) * invMaxValue;
-uint PackNUpperbitFromU8(uint src, uint nbit, uint offset)
+uint PackR10G10B10A2(float4 rgba)
-    return ( (src & (((1 << nbit) - 1) << (8 - nbit))) >> (8 - nbit) ) << offset;
+    return (PackFloatToUInt(rgba.x, 10, 0) | PackFloatToUInt(rgba.y, 10, 10) | PackFloatToUInt(rgba.z, 10, 20) | PackFloatToUInt(rgba.w, 2, 30));
-uint PackNLowerbitFromU8(uint src, uint nbit, uint offset)
+float4 UnpackR10G10B10A2(uint rgba)
-    return (src & ((1 << nbit) - 1)) << offset;
-}
-
-uint UnpackNUpperbitToU8(uint src, uint nbit, uint offset)
-{
-    return ((src & (((1 << nbit) - 1) << offset)) >> offset) << (8 - nbit);
-}
-
-uint UnpackNLowerbitToU8(uint src, uint nbit, uint offset)
-{
-    return (src & (((1 << nbit) - 1) << offset)) >> offset;
+    float4 ouput;
+    ouput.x = UnpackUIntToFloat(rgba, 10, 0);
+    ouput.y = UnpackUIntToFloat(rgba, 10, 10);
+    ouput.z = UnpackUIntToFloat(rgba, 10, 20);
+    ouput.w = UnpackUIntToFloat(rgba, 2, 30);
+    return ouput;
 }