您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

78 行
2.6 KiB

// Each #kernel tells which function to compile; you can have many kernels
#pragma kernel PresenseMask
#pragma kernel ClearCounts
#pragma kernel ClearPresenceMask
#pragma kernel CombineCounts
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Packing.hlsl"
// Create a RenderTexture with enableRandomWrite flag and set it
// with cs.SetTexture
Texture2D<float4> SegmentationTexture;
SamplerState sampler_SegmentationTexture;
StructuredBuffer<uint> InstanceIdToClassId;
RWStructuredBuffer<uint> InstanceIdPresenceMask;
RWStructuredBuffer<uint> ClassCounts;
uint Width;
uint Height;
uint SampleSegmentationTexture(uint2 pixelCoord)
{
float2 sampleCoord = pixelCoord + float2(.5, .5);
float4 segmentationSample = SegmentationTexture.SampleLevel(sampler_SegmentationTexture, sampleCoord * float2(1/(float)Width, 1/(float)Height), 0);
uint objectId = PackFloatToUInt(segmentationSample.x, 0, 8) + PackFloatToUInt(segmentationSample.y, 8, 8) + PackFloatToUInt(segmentationSample.z, 16, 8) + PackFloatToUInt(segmentationSample.w, 24, 8);
return objectId;
}
// For each pixel in the segmentation image, set InstanceIdPresenceMask[pixelValue] to 1
[numthreads(8,8,1)]
void PresenseMask (uint3 id : SV_DispatchThreadID)
{
uint instanceId = SampleSegmentationTexture(id.xy);
if (instanceId != 0)
InstanceIdPresenceMask[instanceId] = 1;
//Attempt at packing presense into single bits. Good for memory, bad for perf due to InterlockedOr.
//int bitOffset = objectId % 32;
//int maskOffset = objectId / 32;
//InterlockedOr(IdPresenceMask[maskOffset], 1 << bitOffset);
}
[numthreads(8,1,1)]
void ClearCounts (uint3 id : SV_DispatchThreadID)
{
ClassCounts[id.x] = 0;
}
[numthreads(8,1,1)]
void ClearPresenceMask(uint3 id : SV_DispatchThreadID)
{
InstanceIdPresenceMask[id.x] = 0;
}
// For instanceId in InstanceIdPresenceMask, increment the corresponding class count. Uses InterlockedAdd, which may be a significant bottleneck.
[numthreads(8,1,1)]
void CombineCounts (uint3 id : SV_DispatchThreadID)
{
uint mask = InstanceIdPresenceMask[id.x];
if (mask > 0)
InterlockedAdd(ClassCounts[InstanceIdToClassId[id.x]], 1);
//Attempt at packing presense into single bits. Good for memory, bad for perf due to InterlockedOr in PresenseMask(...).
//int idStart = id.x * 32;
//for(int i = 0; i < 32 ; i++)
//{
// bool isPresent = mask & 1 << i;
// if (isPresent)
// {
// InterlockedAdd(ClassCounts[InstanceIdToClassId[idStart + i]], 1);
// }
//}
}