
Stereo-ize screen-space AABBs generation (AABBBoundsBuffer)

In order to stereo-ize this job, the following work was needed:
* Dispatch twice as many thread groups. Use the GroupID.y to select the eye being processed.
* Submit one set of convex bounds per eye.  Two bounds are generated for each light.
* Submit stereo-aware projection matrices, passed in as an array, and indexed with the stereo eye index in the compute shader.
* Output one set of AABBs for each eye.
* Increase size of compute buffers to support the stereo case (WIP)
This was verified with the test suite and manually via RenderDoc.
public static readonly int g_vBoundsBuffer = Shader.PropertyToID("g_vBoundsBuffer");
public static readonly int _LightVolumeData = Shader.PropertyToID("_LightVolumeData");
public static readonly int g_data = Shader.PropertyToID("g_data");
public static readonly int g_mProjection = Shader.PropertyToID("g_mProjection");
public static readonly int g_mInvProjection = Shader.PropertyToID("g_mInvProjection");
public static readonly int g_mProjectionArr = Shader.PropertyToID("g_mProjectionArr");
public static readonly int g_mInvProjectionArr = Shader.PropertyToID("g_mInvProjectionArr");
public static readonly int g_viDimensions = Shader.PropertyToID("g_viDimensions");
public static readonly int g_vLightList = Shader.PropertyToID("g_vLightList");


public const int k_MaxEnvLightsOnScreen = 64;
public const int k_MaxShadowOnScreen = 16;
public const int k_MaxCascadeCount = 4; //Should be not less than m_Settings.directionalLightCascadeCount;
public const int k_MaxStereoEyes = 2;
static readonly Vector3 k_BoxCullingExtentThreshold = Vector3.one * 0.01f;
// Static keyword is required here else we get a "DestroyBuffer can only be called from the main thread"

s_GenAABBKernel = buildScreenAABBShader.FindKernel("ScreenBoundsAABB");
s_AABBBoundsBuffer = new ComputeBuffer(2 * k_MaxLightsOnScreen, 3 * sizeof(float));
s_ConvexBoundsBuffer = new ComputeBuffer(k_MaxLightsOnScreen, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightBound)));
s_LightVolumeDataBuffer = new ComputeBuffer(k_MaxLightsOnScreen, System.Runtime.InteropServices.Marshal.SizeOf(typeof(LightVolumeData)));
// The bounds and light volumes are view-dependent, and AABB is additionally projection dependent.
// The view and proj matrices are per eye in stereo. This means we have to double the size of these buffers.
// TODO: Maybe in stereo, we will only support half as many lights total, in order to minimize buffer size waste.
// Alternatively, we could re-size these buffers if any stereo camera is active, instead of unilaterally increasing buffer size.
s_AABBBoundsBuffer = new ComputeBuffer(k_MaxStereoEyes * 2 * k_MaxLightsOnScreen, 3 * sizeof(float));
s_ConvexBoundsBuffer = new ComputeBuffer(k_MaxStereoEyes* k_MaxLightsOnScreen, System.Runtime.InteropServices.Marshal.SizeOf(typeof(SFiniteLightBound)));
s_LightVolumeDataBuffer = new ComputeBuffer(k_MaxStereoEyes* k_MaxLightsOnScreen, System.Runtime.InteropServices.Marshal.SizeOf(typeof(LightVolumeData)));
s_DispatchIndirectBuffer = new ComputeBuffer(LightDefinitions.s_NumFeatureVariants * 3, sizeof(uint), ComputeBufferType.IndirectArguments);
// Cluster

temp.SetRow(1, new Vector4(0.0f, 1.0f, 0.0f, 0.0f));
temp.SetRow(2, new Vector4(0.0f, 0.0f, 0.5f, 0.5f));
temp.SetRow(3, new Vector4(0.0f, 0.0f, 0.0f, 1.0f));
//var projh = temp * proj;
var projh = temp * projArr[0];
var invProjh = projh.inverse;
// TODO: Array these
var projhArr = new Matrix4x4[2];
var invProjhArr = new Matrix4x4[2];
if (m_FrameSettings.enableStereo)
for (int eyeIndex = 0; eyeIndex < 2; eyeIndex++)
projhArr[eyeIndex] = temp * projArr[eyeIndex];
invProjhArr[eyeIndex] = projhArr[eyeIndex].inverse;
projhArr[0] = temp * projArr[0];
invProjhArr[0] = projhArr[0].inverse;
// In the stereo case, we have two sets of light bounds to iterate over (bounds are in per-eye view space)
cmd.SetComputeMatrixParam(buildScreenAABBShader, HDShaderIDs.g_mProjection, projh);
cmd.SetComputeMatrixParam(buildScreenAABBShader, HDShaderIDs.g_mInvProjection, invProjh);
cmd.SetComputeMatrixArrayParam(buildScreenAABBShader, HDShaderIDs.g_mProjectionArr, projhArr);
cmd.SetComputeMatrixArrayParam(buildScreenAABBShader, HDShaderIDs.g_mInvProjectionArr, invProjhArr);
// In stereo, we output two sets of AABB bounds
cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (m_lightCount + 7) / 8, 1, 1);
// double the thread groups for stereo
int tgY = m_FrameSettings.enableStereo ? 2 : 1;
cmd.DispatchCompute(buildScreenAABBShader, s_GenAABBKernel, (m_lightCount + 7) / 8, tgY, 1);
// enable coarse 2D pass on 64x64 tiles (used for both fptl and clustered).


uniform int g_isOrthographic;
uniform int g_iNrVisibLights;
uniform float4x4 g_mInvProjection;
uniform float4x4 g_mProjection;
uniform float4x4 g_mInvProjectionArr[2];
uniform float4x4 g_mProjectionArr[2];
StructuredBuffer<SFiniteLightBound> g_data : register( t0 );

void ScreenBoundsAABB(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
uint groupID = u3GroupID.x;
uint eyeIndex = u3GroupID.y; // can only be 0 or 1
// The g_ is preserved in order to make cross-pipeline (FPTL) updates easier
float4x4 g_mInvProjection = g_mInvProjectionArr[eyeIndex];
float4x4 g_mProjection = g_mProjectionArr[eyeIndex];
//uint vindex = groupID * NR_THREADS + threadID;
unsigned int g = groupID;

const int lgtIndex = subLigt+(int) g*8;
const int sideIndex = (int) (t%8);
SFiniteLightBound lgtDat = g_data[lgtIndex];
const int eyeAdjustedLgtIndex = lgtIndex + (eyeIndex * g_iNrVisibLights);
SFiniteLightBound lgtDat = g_data[eyeAdjustedLgtIndex];
const float3 boxX = lgtDat.boxAxisX.xyz;
const float3 boxY = lgtDat.boxAxisY.xyz;

//g_vBoundsBuffer[lgtIndex+g_iNrVisibLights] = float3(0.5*vMax.x+0.5, -0.5*vMin.y+0.5, vMax.z*VIEWPORT_SCALE_Z);
// changed for unity
g_vBoundsBuffer[lgtIndex+0] = float3(0.5*vMin.x+0.5, 0.5*vMin.y+0.5, vMin.z*VIEWPORT_SCALE_Z);
g_vBoundsBuffer[lgtIndex+(int) g_iNrVisibLights] = float3(0.5*vMax.x+0.5, 0.5*vMax.y+0.5, vMax.z*VIEWPORT_SCALE_Z);
// Each light's AABB is represented by two float3s, the min and max of the box.
// And for stereo, we have two sets of lights. Therefore, each eye has a set of mins, followed by
// a set of maxs, and each set is equal to g_iNrVisibLights.
const int eyeBaseIndex = eyeIndex * g_iNrVisibLights * 2;
const int minIndex = eyeBaseIndex + lgtIndex + 0;
const int maxIndex = eyeBaseIndex + lgtIndex + (int)g_iNrVisibLights;
g_vBoundsBuffer[minIndex] = float3(0.5*vMin.x + 0.5, 0.5*vMin.y + 0.5, vMin.z*VIEWPORT_SCALE_Z);
g_vBoundsBuffer[maxIndex] = float3(0.5*vMax.x + 0.5, 0.5*vMax.y + 0.5, vMax.z*VIEWPORT_SCALE_Z);
