
HDRenderLoop: Clean packed Gbuffer code in Uint16 + packing functions

- It still doesn't work as UINT16 RT is not supported by Unity :(
- packing.hlsl need some convention/naming cleanup
Sebastien Lagarde 8 年前
#pragma warning disable 162 // warning CS0162: Unreachable code detected
if (ShaderConfig.PackgbufferInU16 == 1)
// TODO: Just discovered that Unity doesn't support unsigned 16 RT format.
RTFormat[0] = RenderTextureFormat.ARGBInt; RTReadWrite[0] = RenderTextureReadWrite.Linear;
RTFormat[1] = RenderTextureFormat.ARGBInt; RTReadWrite[1] = RenderTextureReadWrite.Linear;


// Now pack all buffer into 2 uint buffer
// TODO: should be more efficient to pack data directly in uint format rather than going through outGBuffer but easier to maintain in case of change
// We don't have hardware sRGB, so just sqrt the baseColor value instead
data.outGBuffer0.xyz = sqrt(data.outGBuffer0.xyz);
// We don't have hardware sRGB to store base color in case we pack int u16, so rather than perform full sRGB encoding just use cheap gamma20
// TODO: test alternative like FastLinearToSRGB to better match unpacked gbuffer
outGBuffer0.xyz = LinearToGamma20(outGBuffer0.xyz);
uint outGBuffer0X = uint(saturate(data.outGBuffer0.x) * 255.5);
uint outGBuffer0Y = uint(saturate(data.outGBuffer0.y) * 255.5);
uint outGBuffer0Z = uint(saturate(data.outGBuffer0.z) * 255.5);
uint outGBuffer0W = uint(saturate(data.outGBuffer0.w) * 255.5);
uint packedGBuffer1 = PackR10G10B10A2(outGBuffer1);
outGBufferU0 = uint4( PackFloatToUInt(outGBuffer1.x, 10, 0) | PackFloatToUInt(outGBuffer1.w, 2, 10) | PackNUpperbitFromU8(outGBuffer0Z, 2, 12) | PackNUpperbitFromU8(outGBuffer0W, 2, 14),
PackFloatToUInt(outGBuffer1.y, 10, 0) | PackNLowerbitFromU8(outGBuffer0Z, 6, 10),
PackFloatToUInt(outGBuffer1.z, 10, 0) | PackNLowerbitFromU8(outGBuffer0W, 6, 10),
outGBuffer0X | outGBuffer0Y << 8
outGBufferU0 = uint4( PackFloatToUInt(outGBuffer0.x, 8, 0) | PackFloatToUInt(outGBuffer0.y, 8, 8),
PackFloatToUInt(outGBuffer0.z, 8, 0) | PackFloatToUInt(outGBuffer0.w, 8, 8),
(packedGBuffer1 & 0x0000FFFF),
(packedGBuffer1 & 0xFFFF0000) >> 16);
uint outGBuffer2X = uint(saturate(data.outGBuffer2.x) * 255.5);
uint outGBuffer2Y = uint(saturate(data.outGBuffer2.y) * 255.5);
uint outGBuffer2Z = uint(saturate(data.outGBuffer2.z) * 255.5);
uint outGBuffer2W = uint(saturate(data.outGBuffer2.w) * 255.5);
uint packedGBuffer3 = PackR11G11B10f(outGBuffer3.xyz);
// TODO: This doesn't work for lighting buffer as the encoded format is float. i.e it mean that we must convert first to 111110Float format (TODO: Look at the code maybe not so expensive ?)
// before storing as uint the binary representation. Alternative is to use RGBM/LogLuv.
outGBufferU1 = uint4( PackFloatToUInt(outGBuffer3.x, 11, 0) | PackNUpperbitFromU8(outGBuffer2Z, 3, 11) | PackNUpperbitFromU8(outGBuffer2W, 2, 14),
PackFloatToUInt(outGBuffer3.z, 11, 0) | PackNLowerbitFromU8(outGBuffer2Z, 5, 11),
PackFloatToUInt(outGBuffer3.x, 10, 0) | PackNLowerbitFromU8(outGBuffer2W, 6, 10),
outGBuffer2X | outGBuffer2Y << 8
outGBufferU1 = uint4( PackFloatToUInt(outGBuffer2.x, 8, 0) | PackFloatToUInt(outGBuffer2.y, 8, 8),
PackFloatToUInt(outGBuffer2.z, 8, 0) | PackFloatToUInt(outGBuffer2.w, 8, 8),
(packedGBuffer3 & 0x0000FFFF),
(packedGBuffer3 & 0xFFFF0000) >> 16);

float4 inGBuffer0, inGBuffer1, inGBuffer2, inGBuffer3;
inGBuffer0.x = UnpackUIntToFloat(inGBufferU0.w, 8, 0);
inGBuffer0.y = UnpackUIntToFloat(inGBufferU0.w, 8, 8);
inGBuffer0.z = (UnpackNLowerbitFromU8(inGBufferU1.y, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 12)) / 255.0;
inGBuffer0.w = (UnpackNLowerbitFromU8(inGBufferU1.z, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 14)) / 255.0;
inGBuffer0.x = UnpackUIntToFloat(inGBufferU0.x, 8, 0);
inGBuffer0.y = UnpackUIntToFloat(inGBufferU0.x, 8, 8);
inGBuffer0.z = UnpackUIntToFloat(inGBufferU0.y, 8, 0);
inGBuffer0.w = UnpackUIntToFloat(inGBufferU0.y, 8, 8);
inGBuffer0.xyz = Gamma20ToLinear(inGBuffer0.xyz);
inGBuffer1.x = UnpackUIntToFloat(inGBufferU0.x, 10, 0);
inGBuffer1.y = UnpackUIntToFloat(inGBufferU0.y, 10, 0);
inGBuffer1.z = UnpackUIntToFloat(inGBufferU0.z, 10, 0);
inGBuffer1.w = UnpackUIntToFloat(inGBufferU0.x, 2, 10);
uint packedGBuffer1 = inGBufferU0.z | inGBufferU0.w << 16;
inGBuffer1 = UnpackR10G10B10A2(packedGBuffer1);
inGBuffer2.x = UnpackUIntToFloat(inGBufferU1.w, 8, 0);
inGBuffer2.y = UnpackUIntToFloat(inGBufferU1.w, 8, 8);
inGBuffer2.z = (UnpackNLowerbitFromU8(inGBufferU1.y, 5, 11) | UnpackNUpperbitFromU8(inGBufferU1.x, 3, 11)) / 255.0;
inGBuffer2.w = (UnpackNLowerbitFromU8(inGBufferU1.z, 6, 10) | UnpackNUpperbitFromU8(inGBufferU1.x, 2, 14)) / 255.0;
inGBuffer2.x = UnpackUIntToFloat(inGBufferU1.x, 8, 0);
inGBuffer2.y = UnpackUIntToFloat(inGBufferU1.x, 8, 8);
inGBuffer2.z = UnpackUIntToFloat(inGBufferU1.y, 8, 0);
inGBuffer2.w = UnpackUIntToFloat(inGBufferU1.y, 8, 8);
inGBuffer3.x = UnpackUIntToFloat(inGBufferU1.x, 11, 0);
inGBuffer3.y = UnpackUIntToFloat(inGBufferU1.y, 11, 0);
inGBuffer3.z = UnpackUIntToFloat(inGBufferU1.z, 10, 0);
uint packedGBuffer3 = inGBufferU1.z | inGBufferU1.w << 16;
inGBuffer3.xyz = UnpackR11G11B10f(packedGBuffer1);
inGBuffer3.w = 0.0;


#include "Color.hlsl"
#include "Packing.hlsl"
#include "BSDF.hlsl"
#include "Debug.hlsl"


// TODO: Currently it is not yet possible to use this feature, we need to provide previousPositionCS to the vertex shader as part of Attribute for GBuffer pass
// TODO: How to enable this feature only on mesh that effectively require it like skinned and moving mesh (other can be done with depth reprojection. But TAA can be an issue)
VelocityInGBuffer = 0, // Change to 1 to enable the feature
VelocityInGBuffer = 0, // Change to 1 to enable the feature, then regenerate hlsl headers.
// Note: #define can't be use in include file in C# so we choes this way to configure both C# and hlsl
// Note: #define can't be use in include file in C# so we chose this way to configure both C# and hlsl
// Changing a value in this enum Config here require to regenerate the hlsl include and recompile C# and shaders
public class ShaderConfig


// unsigned integer bit field extract implementation
uint BitFieldExtract(uint inData, uint inSize, uint inOffset)
uint BitFieldExtract(uint data, uint size, uint offset)
return (inData >> inOffset) & ((1u << inSize) - 1u);
return (data >> offset) & ((1u << size) - 1u);


// HDR packing
// Quaternion packing

// float packing to sint/uint
// src must be between 0.0 and 1.0
const uint MAX_VALUE = (1 << size) - 1;
return Clamp(uint(src * MAX_VALUE), uint(0), uint(MAX_VALUE)) << offset;
const float maxValue = float((1u << size) - 1u) + 0.5; // Shader compiler should be able to remove this
return uint(src * maxValue) << offset;
const uint MAX_VALUE = (1 << size) - 1;
const float invMaxValue = 1.0 / float((1 << size) - 1);
return BitFieldExtract(src, size, offset) / float(MAX_VALUE);
return float(BitFieldExtract(src, size, offset)) * invMaxValue;
uint PackNUpperbitFromU8(uint src, uint nbit, uint offset)
uint PackR10G10B10A2(float4 rgba)
return ( (src & (((1 << nbit) - 1) << (8 - nbit))) >> (8 - nbit) ) << offset;
return (PackFloatToUInt(rgba.x, 10, 0) | PackFloatToUInt(rgba.y, 10, 10) | PackFloatToUInt(rgba.z, 10, 20) | PackFloatToUInt(rgba.w, 2, 30));
uint PackNLowerbitFromU8(uint src, uint nbit, uint offset)
float4 UnpackR10G10B10A2(uint rgba)
return (src & ((1 << nbit) - 1)) << offset;
uint UnpackNUpperbitToU8(uint src, uint nbit, uint offset)
return ((src & (((1 << nbit) - 1) << offset)) >> offset) << (8 - nbit);
uint UnpackNLowerbitToU8(uint src, uint nbit, uint offset)
return (src & (((1 << nbit) - 1) << offset)) >> offset;
float4 ouput;
ouput.x = UnpackUIntToFloat(rgba, 10, 0);
ouput.y = UnpackUIntToFloat(rgba, 10, 10);
ouput.z = UnpackUIntToFloat(rgba, 10, 20);
ouput.w = UnpackUIntToFloat(rgba, 2, 30);
return ouput;