[BC6H] Added BC6H compression

7 年前 · 5a04f3fb
--- a/ScriptableRenderPipeline/Core/ShaderLibrary/Fibonacci.hlsl
+++ b/ScriptableRenderPipeline/Core/ShaderLibrary/Fibonacci.hlsl
 #ifndef UNITY_FIBONACCI_INCLUDED
 #define UNITY_FIBONACCI_INCLUDED

+#include "Common.hlsl"
+
 // Computes a point using the Fibonacci sequence of length N.
 // Input: Fib[N - 1], Fib[N - 2], and the index 'i' of the point.
 // Ref: Efficient Quadrature Rules for Illumination Integrals
--- a/ScriptableRenderPipeline/Core/ShaderLibrary/Sampling.hlsl
+++ b/ScriptableRenderPipeline/Core/ShaderLibrary/Sampling.hlsl
    return TransformGLtoDX(SphericalToCartesian(phi, cosTheta));
 }

+// Convert a texel position into normalized position [-1..1]x[-1..1]
+float2 CubemapTexelToNVC(uint2 unPositionTS, uint cubemapSize)
+{
+    return 2.0 * float2(unPositionTS) / float(max(cubemapSize - 1, 1)) - 1.0;
+}
+
+// Map cubemap face to world vector basis
+static const float3 CUBEMAP_FACE_BASIS_MAPPING[6][3] =
+{
+    //XPOS face
+    {
+        float3(0.0, 0.0, -1.0),
+        float3(0.0, -1.0, 0.0),
+        float3(1.0, 0.0, 0.0)
+    },
+    //XNEG face
+    {
+        float3(0.0, 0.0, 1.0),
+        float3(0.0, -1.0, 0.0),
+        float3(-1.0, 0.0, 0.0)
+    },
+    //YPOS face
+    {
+        float3(1.0, 0.0, 0.0),
+        float3(0.0, 0.0, 1.0),
+        float3(0.0, 1.0, 0.0)
+    },
+    //YNEG face
+    {
+        float3(1.0, 0.0, 0.0),
+        float3(0.0, 0.0, -1.0),
+        float3(0.0, -1.0, 0.0)
+    },
+    //ZPOS face
+    {
+        float3(1.0, 0.0, 0.0),
+        float3(0.0, -1.0, 0.0),
+        float3(0.0, 0.0, 1.0)
+    },
+    //ZNEG face
+    {
+        float3(-1.0, 0.0, 0.0),
+        float3(0.0, -1.0, 0.0),
+        float3(0.0, 0.0, -1.0)
+    }
+};
+
+// Convert a normalized cubemap face position into a direction
+float3 CubemapTexelToDirection(float2 positionTS, uint faceId)
+{
+    float3 dir = CUBEMAP_FACE_BASIS_MAPPING[faceId][0] * positionTS.x
+               + CUBEMAP_FACE_BASIS_MAPPING[faceId][1] * positionTS.y
+               + CUBEMAP_FACE_BASIS_MAPPING[faceId][2];
+
+    return normalize(dir);
+}
+
 //-----------------------------------------------------------------------------
 // Sampling function
 // Reference : http://www.cs.virginia.edu/~jdl/bib/globillum/mis/shirley96.pdf + PBRT
--- a/ScriptableRenderPipeline/Core/BC6H.cs
+++ b/ScriptableRenderPipeline/Core/BC6H.cs
+using UnityEngine.Assertions;
+using UnityEngine.Rendering;
+
+namespace UnityEngine.Experimental.Rendering
+{
+    public class BC6H
+    {
+        public static BC6H DefaultInstance;
+
+        static readonly int _Source = Shader.PropertyToID("_Source");
+        static readonly int _Target = Shader.PropertyToID("_Target");
+        static readonly int _MipIndex = Shader.PropertyToID("_MipIndex");
+        static readonly int[] __Tmp_RT =
+        {
+            Shader.PropertyToID("__Tmp_RT0"),
+            Shader.PropertyToID("__Tmp_RT1"),
+            Shader.PropertyToID("__Tmp_RT2"),
+            Shader.PropertyToID("__Tmp_RT3"),
+            Shader.PropertyToID("__Tmp_RT4"),
+            Shader.PropertyToID("__Tmp_RT5"),
+            Shader.PropertyToID("__Tmp_RT6"),
+            Shader.PropertyToID("__Tmp_RT7"),
+            Shader.PropertyToID("__Tmp_RT8"),
+            Shader.PropertyToID("__Tmp_RT9"),
+            Shader.PropertyToID("__Tmp_RT10"),
+            Shader.PropertyToID("__Tmp_RT11"),
+            Shader.PropertyToID("__Tmp_RT12"),
+            Shader.PropertyToID("__Tmp_RT13")
+        };
+
+        readonly ComputeShader m_Shader;
+        readonly int m_KEncodeFastCubemapMip;
+
+        public BC6H(ComputeShader shader)
+        {
+            Assert.IsNotNull(shader);
+
+            m_Shader = shader;
+            m_KEncodeFastCubemapMip = m_Shader.FindKernel("KEncodeFastCubemapMip");
+
+            uint x, y, z;
+            m_Shader.GetKernelThreadGroupSizes(m_KEncodeFastCubemapMip, out x, out y, out z);
+        }
+
+        // Only use mode11 of BC6H encoding
+        public void EncodeFastCubemap(CommandBuffer cmb, RenderTargetIdentifier source, int sourceSize, RenderTargetIdentifier target, int fromMip, int toMip)
+        {
+            var maxMip = Mathf.Max(0, (int)(Mathf.Log(sourceSize) / Mathf.Log(2)) - 2);
+            fromMip = (int)Mathf.Clamp(fromMip, 0, maxMip);
+            toMip = (int)Mathf.Min(maxMip, Mathf.Max(toMip, fromMip));
+
+            // Convert TextureCube source to Texture2DArray
+            var d = new RenderTextureDescriptor
+            {
+                autoGenerateMips = false,
+                bindMS = false,
+                colorFormat = RenderTextureFormat.ARGBInt,
+                depthBufferBits = 0,
+                dimension = TextureDimension.Tex2DArray,
+                enableRandomWrite = true,
+                msaaSamples = 1,
+                volumeDepth = 6,
+                sRGB = false,
+                useMipMap = false,
+            };
+
+            cmb.SetComputeTextureParam(m_Shader, m_KEncodeFastCubemapMip, _Source, source); 
+
+            for (var mip = fromMip; mip <= toMip; ++mip)
+            {
+                var size = (sourceSize >> mip) >> 2;
+                d.width = size;
+                d.height = size;
+                cmb.GetTemporaryRT(__Tmp_RT[mip], d);
+            }
+
+            for (var mip = fromMip; mip <= toMip; ++mip)
+            {
+                var size = (sourceSize >> mip) >> 2;
+                cmb.SetComputeTextureParam(m_Shader, m_KEncodeFastCubemapMip, _Target, __Tmp_RT[mip]);
+                cmb.SetComputeIntParam(m_Shader, _MipIndex, mip);
+                cmb.DispatchCompute(m_Shader, m_KEncodeFastCubemapMip, size, size, 6);
+            }
+
+            for (var mip = fromMip; mip <= toMip; ++mip)
+            {
+                for (var faceId = 0; faceId < 6; ++faceId)
+                    cmb.CopyTexture(__Tmp_RT[mip], faceId, 0, target, faceId, mip);
+            }
+
+            for (var mip = fromMip; mip <= toMip; ++mip)
+                cmb.ReleaseTemporaryRT(__Tmp_RT[mip]);
+        }
+    }
+
+    public static class BC6HExtensions
+    {
+        public static void BC6HEncodeFastCubemap(this CommandBuffer cmb, RenderTargetIdentifier source, int sourceSize, RenderTargetIdentifier target, int fromMip, int toMip)
+        {
+            BC6H.DefaultInstance.EncodeFastCubemap(cmb, source, sourceSize, target, fromMip, toMip);
+        }
+    }
+}
--- a/ScriptableRenderPipeline/Core/BC6H.cs.meta
+++ b/ScriptableRenderPipeline/Core/BC6H.cs.meta
+fileFormatVersion: 2
+guid: ef7e375d470b6404a9e355690703502b
+timeCreated: 1507290672
+licenseType: Pro
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/ScriptableRenderPipeline/Core/Resources/BC6H.compute
+++ b/ScriptableRenderPipeline/Core/Resources/BC6H.compute
+#include "../ShaderLibrary/BC6H.hlsl"
+#include "../ShaderLibrary/Sampling.hlsl"
+
+TextureCube<float4> _Source;
+RWTexture2DArray<uint4> _Target;
+int _MipIndex;
+
+SamplerState s_point_clamp;
+
+static const uint2 kOffsets[] =
+{
+    uint2(0, 0), uint2(1, 0), uint2(2, 0), uint2(3, 0),
+    uint2(0, 1), uint2(1, 1), uint2(2, 1), uint2(3, 1),
+    uint2(0, 2), uint2(1, 2), uint2(2, 2), uint2(3, 2),
+    uint2(0, 3), uint2(1, 3), uint2(2, 3), uint2(3, 3),
+};
+
+#pragma kernel KEncodeFastCubemapMip
+[numthreads(1, 1, 1)]
+void KEncodeFastCubemapMip(uint3 groupId : SV_GroupID, uint3 groupThreadId : SV_GroupThreadID, uint3 dispatchThreadId : SV_DispatchThreadID)
+{
+    uint width = 0;
+    uint height = 0;
+    _Source.GetDimensions(width, height);
+    int size = width >> _MipIndex;
+
+    // Load 4x4 pixel block
+    float3 texels[16];
+
+    uint2 topLeftSourceID = dispatchThreadId.xy << 2;
+    uint faceId = dispatchThreadId.z;
+
+    float2 nvc;
+    float3 dir;
+    for (uint i = 0u; i < 16; ++i)
+    {
+        nvc = CubemapTexelToNVC(topLeftSourceID + kOffsets[i], size);
+        dir = CubemapTexelToDirection(nvc, faceId);
+        texels[i] = _Source.SampleLevel(s_point_clamp, dir, _MipIndex).rgb;
+    }
+
+    uint4 block = uint4(0, 0, 0, 0);
+    float blockMSLE = 0;
+
+    EncodeMode11(block, blockMSLE, texels);
+
+    _Target[dispatchThreadId] = block;
+}
--- a/ScriptableRenderPipeline/Core/Resources/BC6H.compute.meta
+++ b/ScriptableRenderPipeline/Core/Resources/BC6H.compute.meta
+fileFormatVersion: 2
+guid: b69b95b3420fd904e8530b79f665a1f8
+timeCreated: 1507123133
+licenseType: Pro
+ComputeShaderImporter:
+  externalObjects: {}
+  currentAPIMask: 4
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/ScriptableRenderPipeline/Core/ShaderLibrary/BC6H.hlsl
+++ b/ScriptableRenderPipeline/Core/ShaderLibrary/BC6H.hlsl
+// Ref: https://github.com/knarkowicz/GPURealTimeBC6H/blob/master/bin/compress.hlsl
+// Doc: https://msdn.microsoft.com/en-us/library/windows/desktop/hh308952(v=vs.85).aspx
+
+#include "Common.hlsl"
+
+// Measure compression error
+float CalcMSLE(float3 a, float3 b)
+{
+    float3 err = log2(( b + 1.0f) / (a + 1.0f ));
+    err = err * err;
+    return err.x + err.y + err.z;
+}
+
+// Quantification Helpers
+float3 Quantize7(float3 x)
+{
+    return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
+}
+
+float3 Quantize9(float3 x)
+{
+    return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
+}
+
+float3 Quantize10(float3 x)
+{
+    return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
+}
+
+float3 Unquantize7(float3 x)
+{
+    return (x * 65536.0f + 0x8000) / 128.0f;
+}
+
+float3 Unquantize9(float3 x)
+{
+    return (x * 65536.0f + 0x8000) / 512.0f;
+}
+
+float3 Unquantize10(float3 x)
+{
+    return (x * 65536.0f + 0x8000) / 1024.0f;
+}
+
+// BC6H Helpers
+// Compute index of a texel projected against endpoints
+uint ComputeIndex3( float texelPos, float endPoint0Pos, float endPoint1Pos )
+{
+    float r = ( texelPos - endPoint0Pos ) / ( endPoint1Pos - endPoint0Pos );
+    return (uint) clamp( r * 6.98182f + 0.00909f + 0.5f, 0.0f, 7.0f );
+}
+
+uint ComputeIndex4( float texelPos, float endPoint0Pos, float endPoint1Pos )
+{
+    float r = ( texelPos - endPoint0Pos ) / ( endPoint1Pos - endPoint0Pos );
+    return (uint) clamp( r * 14.93333f + 0.03333f + 0.5f, 0.0f, 15.0f );
+}
+
+void SignExtend( inout float3 v1, uint mask, uint signFlag )
+{
+    int3 v = (int3) v1;
+    v.x = ( v.x & mask ) | ( v.x < 0 ? signFlag : 0 );
+    v.y = ( v.y & mask ) | ( v.y < 0 ? signFlag : 0 );
+    v.z = ( v.z & mask ) | ( v.z < 0 ? signFlag : 0 );
+    v1 = v;
+}
+
+// 2nd step for unquantize
+float3 FinishUnquantize( float3 endpoint0Unq, float3 endpoint1Unq, float weight )
+{
+    float3 comp = ( endpoint0Unq * ( 64.0f - weight ) + endpoint1Unq * weight + 32.0f ) * ( 31.0f / 4096.0f );
+    return f16tof32( uint3( comp ) );
+}
+
+// BC6H Modes
+void EncodeMode11( inout uint4 block, inout float blockMSLE, float3 texels[ 16 ] )
+{
+    // compute endpoints (min/max RGB bbox)
+    float3 blockMin = texels[ 0 ];
+    float3 blockMax = texels[ 0 ];
+    for ( uint i = 1; i < 16; ++i )
+    {
+        blockMin = min( blockMin, texels[ i ] );
+        blockMax = max( blockMax, texels[ i ] );
+    }
+
+    // refine endpoints in log2 RGB space
+    float3 refinedBlockMin = blockMax;
+    float3 refinedBlockMax = blockMin;
+    for (i = 0; i < 16; ++i )
+    {
+        refinedBlockMin = min( refinedBlockMin, texels[ i ] == blockMin ? refinedBlockMin : texels[ i ] );
+        refinedBlockMax = max( refinedBlockMax, texels[ i ] == blockMax ? refinedBlockMax : texels[ i ] );
+    }
+
+    float3 logBlockMax          = log2( blockMax + 1.0f );
+    float3 logBlockMin          = log2( blockMin + 1.0f );
+    float3 logRefinedBlockMax   = log2( refinedBlockMax + 1.0f );
+    float3 logRefinedBlockMin   = log2( refinedBlockMin + 1.0f );
+    float3 logBlockMaxExt       = ( logBlockMax - logBlockMin ) * ( 1.0f / 32.0f );
+    logBlockMin += min( logRefinedBlockMin - logBlockMin, logBlockMaxExt );
+    logBlockMax -= min( logBlockMax - logRefinedBlockMax, logBlockMaxExt );
+    blockMin = exp2( logBlockMin ) - 1.0f;
+    blockMax = exp2( logBlockMax ) - 1.0f;
+    
+    float3 blockDir = blockMax - blockMin;
+    blockDir = blockDir / ( blockDir.x + blockDir.y + blockDir.z );
+
+    float3 endpoint0    = Quantize10( blockMin );
+    float3 endpoint1    = Quantize10( blockMax );
+    float endPoint0Pos  = f32tof16( dot( blockMin, blockDir ) );
+    float endPoint1Pos  = f32tof16( dot( blockMax, blockDir ) );
+
+
+    // check if endpoint swap is required
+    float fixupTexelPos = f32tof16( dot( texels[ 0 ], blockDir ) );
+    uint fixupIndex = ComputeIndex4( fixupTexelPos, endPoint0Pos, endPoint1Pos );
+    if ( fixupIndex > 7 )
+    {
+        Swap( endPoint0Pos, endPoint1Pos );
+        Swap( endpoint0, endpoint1 );
+    }
+
+    // compute indices
+    uint indices[ 16 ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    for (i = 0; i < 16; ++i )
+    {
+        float texelPos = f32tof16( dot( texels[ i ], blockDir ) );
+        indices[ i ] = ComputeIndex4( texelPos, endPoint0Pos, endPoint1Pos );
+    }
+
+    // compute compression error (MSLE)
+    float3 endpoint0Unq = Unquantize10( endpoint0 );
+    float3 endpoint1Unq = Unquantize10( endpoint1 );
+    float msle = 0.0f;
+    for (i = 0; i < 16; ++i )
+    {
+        float weight = floor( ( indices[ i ] * 64.0f ) / 15.0f + 0.5f );
+        float3 texelUnc = FinishUnquantize( endpoint0Unq, endpoint1Unq, weight );
+
+        msle += CalcMSLE( texels[ i ], texelUnc );
+    }
+
+
+    // encode block for mode 11
+    blockMSLE = msle;
+    block.x = 0x03;
+
+    // endpoints
+    block.x |= (uint) endpoint0.x << 5;
+    block.x |= (uint) endpoint0.y << 15;
+    block.x |= (uint) endpoint0.z << 25;
+    block.y |= (uint) endpoint0.z >> 7;
+    block.y |= (uint) endpoint1.x << 3;
+    block.y |= (uint) endpoint1.y << 13;
+    block.y |= (uint) endpoint1.z << 23;
+    block.z |= (uint) endpoint1.z >> 9;
+
+    // indices
+    block.z |= indices[ 0 ] << 1;
+    block.z |= indices[ 1 ] << 4;
+    block.z |= indices[ 2 ] << 8;
+    block.z |= indices[ 3 ] << 12;
+    block.z |= indices[ 4 ] << 16;
+    block.z |= indices[ 5 ] << 20;
+    block.z |= indices[ 6 ] << 24;
+    block.z |= indices[ 7 ] << 28;
+    block.w |= indices[ 8 ] << 0;
+    block.w |= indices[ 9 ] << 4;
+    block.w |= indices[ 10 ] << 8;
+    block.w |= indices[ 11 ] << 12;
+    block.w |= indices[ 12 ] << 16;
+    block.w |= indices[ 13 ] << 20;
+    block.w |= indices[ 14 ] << 24;
+    block.w |= indices[ 15 ] << 28;
+}
--- a/ScriptableRenderPipeline/Core/ShaderLibrary/BC6H.hlsl.meta
+++ b/ScriptableRenderPipeline/Core/ShaderLibrary/BC6H.hlsl.meta
+fileFormatVersion: 2
+guid: 27d419a4917d0ea49978c236e058d464
+timeCreated: 1507282342
+licenseType: Pro
+ShaderImporter:
+  externalObjects: {}
+  defaultTextures: []
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: