// The reason is that for compute shader we need to guarantee that the layout of CBs is consistent across kernels. Something that we can't control with the global namespace (uniforms get optimized out if not used, modifying the global CBuffer layout per kernel)
// Structure definition that are share between C# and hlsl.
// These structures need to be align on real 4 to respect various packing rules from sahder language.
// These structures need to be align on float 4 to respect various packing rules from sahder language.
// This mean that these structure need to be padded.
// Do not use "in", only "out" or "inout" as califier, not "inline" keyword either, useless.
// headers from ShaderLibrary do not include "common.hlsl", this should be included in the .shader using it (or Material.hlsl)
// Rules: When doing an array for constant buffer variables, we always use real 4 to avoid any packing issue, particularly between compute shader and pixel shaders
// Rules: When doing an array for constant buffer variables, we always use float 4 to avoid any packing issue, particularly between compute shader and pixel shaders
// uniform real 4 packedArray[3];
// static real unpackedArray[12] = (real [12]packedArray;
// uniform float 4 packedArray[3];
// static float unpackedArray[12] = (float [12]packedArray;
#ifndef real
#define real float
#ifndef INTRINSIC_CUBEMAP_FACE_ID
// TODO: implement this. Is the reference implementation of cubemapID provide by AMD the reverse of our ?
/*
real CubemapFaceID(real 3 dir)
float CubemapFaceID(float 3 dir)
real faceID;
float faceID;
if (abs(dir.z) >= abs(dir.x) && abs(dir.z) >= abs(dir.y))
{
faceID = (dir.z < 0.0) ? 5.0 : 4.0;
}
*/
void GetCubeFaceID(real 3 dir, out int faceIndex)
void GetCubeFaceID(float 3 dir, out int faceIndex)
real 3 adir = abs(dir);
float 3 adir = abs(dir);
// +Z -Z
faceIndex = dir.z > 0.0 ? CUBEMAPFACE_NEGATIVE_Z : CUBEMAPFACE_POSITIVE_Z;
// PositivePow remove this warning when you know the value is positive and avoid inf/NAN.
TEMPLATE_2_FLT(PositivePow, base, power, return pow(max(abs(base), FLT_EPS), power))
// Computes (FastSign(s) * x) using 2x VALU.
// See the comment about FastSign() below.
real FastMulBySignOf(real s, real x, bool ignoreNegZero = true)
{
uint negZero = 0x80000000u;
uint signBit = negZero & asuint(s);
return asfloat(signBit ^ asuint(x));
return asfloat(signBit ^ asuint(float( x) )); // Does not work work with half, so have to cast
}
#else
return (s >= 0) ? x : -x;
// Texture utilities
// ----------------------------------------------------------------------------
real ComputeTextureLOD(real 2 uv)
float ComputeTextureLOD(float 2 uv)
real 2 ddx_ = ddx(uv);
real 2 ddy_ = ddy(uv);
real d = max(dot(ddx_, ddx_), dot(ddy_, ddy_));
float 2 ddx_ = ddx(uv);
float 2 ddy_ = ddy(uv);
float d = max(dot(ddx_, ddx_), dot(ddy_, ddy_));
return max(0.5 * log2(d), 0.0);
}
real ComputeTextureLOD(real2 uv, real4 texelSize)
float ComputeTextureLOD(float2 uv, float4 texelSize)
{
uv *= texelSize.zw;
// Texture format sampling
// ----------------------------------------------------------------------------
real2 DirectionToLatLongCoordinate(real 3 unDir)
float2 DirectionToLatLongCoordinate(float 3 unDir)
real 3 dir = normalize(unDir);
float 3 dir = normalize(unDir);
return real 2(1.0 - 0.5 * INV_PI * atan2(dir.x, -dir.z), asin(dir.y) * INV_PI + 0.5);
return float 2(1.0 - 0.5 * INV_PI * atan2(dir.x, -dir.z), asin(dir.y) * INV_PI + 0.5);
real3 LatlongToDirectionCoordinate(real 2 coord)
float3 LatlongToDirectionCoordinate(float 2 coord)
real theta = coord.y * PI;
real phi = (coord.x * 2.f * PI - PI*0.5f);
float theta = coord.y * PI;
float phi = (coord.x * 2.f * PI - PI*0.5f);
real cosTheta = cos(theta);
real sinTheta = sqrt(1.0 - min(1.0, cosTheta*cosTheta));
real cosPhi = cos(phi);
real sinPhi = sin(phi);
float cosTheta = cos(theta);
float sinTheta = sqrt(1.0 - min(1.0, cosTheta*cosTheta));
float cosPhi = cos(phi);
float sinPhi = sin(phi);
real3 direction = real 3(sinTheta*cosPhi, cosTheta, sinTheta*sinPhi);
float3 direction = float 3(sinTheta*cosPhi, cosTheta, sinTheta*sinPhi);
direction.xy *= -1.0;
return direction;
}
// Z buffer to linear 0..1 depth (0 at near plane, 1 at far plane).
// Does not correctly handle oblique view frustums.
real Linear01DepthFromNear(real depth, real 4 zBufferParam)
float Linear01DepthFromNear(float depth, float 4 zBufferParam)
{
return 1.0 / (zBufferParam.x + zBufferParam.y / depth);
}
real Linear01Depth(real depth, real4 zBufferParam)
float Linear01Depth(float depth, float4 zBufferParam)
{
return 1.0 / (zBufferParam.x * depth + zBufferParam.y);
}
real LinearEyeDepth(real depth, real4 zBufferParam)
float LinearEyeDepth(float depth, float4 zBufferParam)
{
return 1.0 / (zBufferParam.z * depth + zBufferParam.w);
}
// Ref: An Efficient Depth Linearization Method for Oblique View Frustums, Eq. 6.
real LinearEyeDepth(real2 positionNDC, real deviceDepth, real 4 invProjParam)
float LinearEyeDepth(float2 positionNDC, float deviceDepth, float 4 invProjParam)
real4 positionCS = real 4(positionNDC * 2.0 - 1.0, deviceDepth, 1.0);
real viewSpaceZ = rcp(dot(positionCS, invProjParam));
float4 positionCS = float 4(positionNDC * 2.0 - 1.0, deviceDepth, 1.0);
float viewSpaceZ = rcp(dot(positionCS, invProjParam));
// The view space uses a right-handed coordinate system.
return -viewSpaceZ;
}
// Typically, this is the cheapest variant, provided you've already computed 'positionWS'.
real LinearEyeDepth(real3 positionWS, real 4x4 viewProjMatrix)
float LinearEyeDepth(float3 positionWS, float 4x4 viewProjMatrix)
return mul(viewProjMatrix, real 4(positionWS, 1.0)).w;
return mul(viewProjMatrix, float 4(positionWS, 1.0)).w;
}
// ----------------------------------------------------------------------------
static const real3x3 k_identity3x3 = { 1, 0, 0,
0, 1, 0,
0, 0, 1};
static const float3x3 k_identity3x3 = {1, 0, 0,
0, 1, 0,
0, 0, 1};
static const real4x4 k_identity4x4 = { 1, 0, 0 , 0,
0, 1 , 0, 0,
0, 0, 1, 0,
0, 0, 0, 1};
static const float4x4 k_identity4x4 = {1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1 , 0,
0, 0, 0, 1};
real2 ComputeNormalizedDeviceCoordinates(real3 position, real4x4 clipSpaceTransform = k_identity4x4)
float2 ComputeNormalizedDeviceCoordinates(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
real4 positionCS = mul(clipSpaceTransform, real4(position, 1.0));
real2 positionNDC = positionCS.xy * (rcp(positionCS.w) * 0.5) + 0.5;
float4 positionCS = mul(clipSpaceTransform, float4(position, 1.0));
float2 positionNDC = positionCS.xy * (rcp(positionCS.w) * 0.5) + 0.5;
#if UNITY_UV_STARTS_AT_TOP
positionNDC.y = 1.0 - positionNDC.y;
#endif
real4 ComputeClipSpacePosition(real2 positionNDC, real deviceDepth)
float4 ComputeClipSpacePosition(float2 positionNDC, float deviceDepth)
return real4(positionNDC * 2.0 - 1.0, deviceDepth, 1.0);
return float4(positionNDC * 2.0 - 1.0, deviceDepth, 1.0);
real3 ComputeViewSpacePosition(real2 positionNDC, real deviceDepth, real4x4 invProjMatrix)
float3 ComputeViewSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invProjMatrix)
real4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
real4 positionVS = mul(invProjMatrix, positionCS);
float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
float4 positionVS = mul(invProjMatrix, positionCS);
real3 ComputeWorldSpacePosition(real2 positionNDC, real deviceDepth, real4x4 invViewProjMatrix)
float3 ComputeWorldSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invViewProjMatrix)
real4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
real4 hpositionWS = mul(invViewProjMatrix, positionCS);
float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
float4 hpositionWS = mul(invViewProjMatrix, positionCS);
return hpositionWS.xyz / hpositionWS.w;
}
struct PositionInputs
{
real 3 positionWS; // World space position (could be camera-relative)
real2 positionNDC; // Normalized screen UVs : [0, 1) (with the real -pixel offset)
float 3 positionWS; // World space position (could be camera-relative)
float2 positionNDC; // Normalized screen UVs : [0, 1) (with the float -pixel offset)
real deviceDepth; // Depth from the depth buffer : [0, 1] (typically reversed)
real linearDepth; // View space Z coordinate : [Near, Far]
float deviceDepth; // Depth from the depth buffer : [0, 1] (typically reversed)
float linearDepth; // View space Z coordinate : [Near, Far]
};
// This function is use to provide an easy way to sample into a screen texture, either from a pixel or a compute shaders.
PositionInputs GetPositionInput(real2 positionSS, real2 invScreenSize, uint2 tileCoord) // Specify explicit tile coordinates so that we can easily make it lane invariant for compute evaluation.
PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, uint2 tileCoord) // Specify explicit tile coordinates so that we can easily make it lane invariant for compute evaluation.
{
PositionInputs posInput;
ZERO_INITIALIZE(PositionInputs, posInput);
// In case of compute shader an extra real offset is added to the screenPos to shift the integer position to pixel center.
posInput.positionNDC.xy += real2(0.5, 0.5);
// In case of compute shader an extra offset is added to the screenPos to shift the integer position to pixel center.
posInput.positionNDC.xy += float2(0.5, 0.5);
#endif
posInput.positionNDC *= invScreenSize;
return posInput;
}
PositionInputs GetPositionInput(real2 positionSS, real 2 invScreenSize)
PositionInputs GetPositionInput(float2 positionSS, float 2 invScreenSize)
{
return GetPositionInput(positionSS, invScreenSize, uint2(0, 0));
}
void UpdatePositionInput(real deviceDepth, real linearDepth, real3 positionWS, inout PositionInputs posInput)
void UpdatePositionInput(float deviceDepth, float linearDepth, float3 positionWS, inout PositionInputs posInput)
{
posInput.deviceDepth = deviceDepth;
posInput.linearDepth = linearDepth;
// From deferred or compute shader
// depth must be the depth from the raw depth buffer. This allow to handle all kind of depth automatically with the inverse view projection matrix.
// For information. In Unity Depth is always in range 0..1 (even on OpenGL) but can be reversed.
void UpdatePositionInput(real deviceDepth, real4x4 invViewProjMatrix, real 4x4 viewProjMatrix, inout PositionInputs posInput)
void UpdatePositionInput(float deviceDepth, float4x4 invViewProjMatrix, float 4x4 viewProjMatrix, inout PositionInputs posInput)
posInput.linearDepth = mul(viewProjMatrix, real 4(posInput.positionWS, 1.0)).w;
posInput.linearDepth = mul(viewProjMatrix, float 4(posInput.positionWS, 1.0)).w;
void ApplyDepthOffsetPositionInput(real3 V, real depthOffsetVS, real 4x4 viewProjMatrix, inout PositionInputs posInput)
void ApplyDepthOffsetPositionInput(float3 V, float depthOffsetVS, float 4x4 viewProjMatrix, inout PositionInputs posInput)
real4 positionCS = mul(viewProjMatrix, real 4(posInput.positionWS, 1.0));
float4 positionCS = mul(viewProjMatrix, float 4(posInput.positionWS, 1.0));
posInput.linearDepth = positionCS.w;
posInput.deviceDepth = positionCS.z / positionCS.w;
}
// Generates a triangle in homogeneous clip space, s.t.
// v0 = (-1, -1, 1), v1 = (3, -1, 1), v2 = (-1, 3, 1).
real 2 GetFullScreenTriangleTexCoord(uint vertexID)
float 2 GetFullScreenTriangleTexCoord(uint vertexID)
return real 2((vertexID << 1) & 2, 1.0 - (vertexID & 2));
return float 2((vertexID << 1) & 2, 1.0 - (vertexID & 2));
return real 2((vertexID << 1) & 2, vertexID & 2);
return float 2((vertexID << 1) & 2, vertexID & 2);
real4 GetFullScreenTriangleVertexPosition(uint vertexID, real z = UNITY_NEAR_CLIP_VALUE)
float4 GetFullScreenTriangleVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE)
real2 uv = real 2((vertexID << 1) & 2, vertexID & 2);
return real 4(uv * 2.0 - 1.0, z, 1.0);
float2 uv = float 2((vertexID << 1) & 2, vertexID & 2);
return float 4(uv * 2.0 - 1.0, z, 1.0);
}
#if !defined(SHADER_API_GLES)
// LOD1 must use this function with ditherFactor 0..1
void LODDitheringTransition(uint2 positionSS, real ditherFactor)
void LODDitheringTransition(uint2 positionSS, float ditherFactor)
real p = GenerateHashedRandomFloat(positionSS);
float p = GenerateHashedRandomFloat(positionSS);
// We want to have a symmetry between 0..0.5 ditherFactor and 0.5..1 so no pixels are transparent during the transition
// this is handled by this test which reverse the pattern