using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using Unity.Collections;
using Unity.Entities;
using Unity.Profiling;
namespace UnityEngine.Perception.GroundTruth
{
///
/// Produces 3d bounding box ground truth for all visible and objects each frame.
///
public class BoundingBox3DLabeler : CameraLabeler
{
EntityQuery m_EntityQuery;
///
public override string description
{
get => "Produces 3D bounding box ground truth data for all visible objects that bear a label defined in this labeler's associated label configuration.";
protected set {}
}
// ReSharper disable MemberCanBePrivate.Global
///
/// The GUID id to associate with the annotations produced by this labeler.
///
public string annotationId = "0bfbe00d-00fa-4555-88d1-471b58449f5c";
///
/// The which associates objects with labels.
///
public IdLabelConfig idLabelConfig;
// ReSharper restore MemberCanBePrivate.Global
///
/// Each 3D bounding box data record maps a tuple of (instance, label) to translation, size and rotation that draws a 3D bounding box,
/// as well as velocity and acceleration (optional) of the 3D bounding box. All location data is given with respect to the sensor coordinate system.
///
///
/// Currently not supporting exporting velocity and acceleration. Both values will be null.
///
[SuppressMessage("ReSharper", "InconsistentNaming")]
[Serializable]
public struct BoxData
{
///
/// Integer identifier of the label
///
public int label_id;
///
/// String identifier of the label
///
public string label_name;
///
/// UUID of the instance
///
public uint instance_id;
///
/// 3d bounding box's center location in meters as center_x, center_y, center_z with respect to global coordinate system
///
public Vector3 translation;
///
/// 3d bounding box size in meters as width, length, height
///
public Vector3 size;
///
/// 3d bounding box orientation as quaternion: w, x, y, z
///
public Quaternion rotation;
///
/// [optional]: 3d bounding box velocity in meters per second as v_x, v_y, v_z
///
public Vector3 velocity;
///
/// [optional]: 3d bounding box acceleration in meters per second^2 as a_x, a_y, a_z
///
public Vector3 acceleration;
}
static ProfilerMarker s_BoundingBoxCallback = new ProfilerMarker("OnBoundingBoxes3DReceived");
AnnotationDefinition m_AnnotationDefinition;
Dictionary m_AsyncAnnotations;
Dictionary> m_BoundingBoxValues;
List m_ToReport;
int m_CurrentFrame;
///
protected override bool supportsVisualization => false;
///
/// Fired when the bounding boxes are computed for a frame.
///
public event Action> BoundingBoxComputed;
///
/// Creates a new BoundingBox3DLabeler. Be sure to assign before adding to a .
///
public BoundingBox3DLabeler() {}
///
/// Creates a new BoundingBox3DLabeler with the given .
///
/// The label config for resolving the label for each object.
public BoundingBox3DLabeler(IdLabelConfig labelConfig)
{
this.idLabelConfig = labelConfig;
}
///
protected override void Setup()
{
if (idLabelConfig == null)
throw new InvalidOperationException("BoundingBox2DLabeler's idLabelConfig field must be assigned");
m_AnnotationDefinition = DatasetCapture.RegisterAnnotationDefinition("bounding box 3D", idLabelConfig.GetAnnotationSpecification(),
"Bounding box for each labeled object visible to the sensor", id: new Guid(annotationId));
perceptionCamera.RenderedObjectInfosCalculated += OnRenderObjectInfosCalculated;
m_EntityQuery = World.DefaultGameObjectInjectionWorld.EntityManager.CreateEntityQuery(typeof(Labeling), typeof(GroundTruthInfo));
m_AsyncAnnotations = new Dictionary();
m_BoundingBoxValues = new Dictionary>();
m_ToReport = new List();
}
static BoxData ConvertToBoxData(IdLabelEntry label, uint instanceId, Vector3 center, Vector3 extents, Quaternion rot)
{
return new BoxData
{
label_id = label.id,
label_name = label.label,
instance_id = instanceId,
translation = center,
size = extents * 2,
rotation = rot,
acceleration = Vector3.zero,
velocity = Vector3.zero
};
}
static Vector3[] GetBoxCorners(Bounds bounds, Quaternion rotation)
{
var boundsCenter = bounds.center;
var right = Vector3.right * bounds.extents.x;
var up = Vector3.up * bounds.extents.y;
var forward = Vector3.forward * bounds.extents.z;
right = rotation * right;
up = rotation * up;
forward = rotation * forward;
var doubleRight = right * 2;
var doubleUp = up * 2;
var doubleForward = forward * 2;
var corners = new Vector3[8];
corners[0] = boundsCenter - right - up - forward;
corners[1] = corners[0] + doubleUp;
corners[2] = corners[1] + doubleRight;
corners[3] = corners[0] + doubleRight;
for (var i = 0; i < 4; i++)
{
corners[i + 4] = corners[i] + doubleForward;
}
return corners;
}
///
protected override void OnBeginRendering()
{
m_CurrentFrame = Time.frameCount;
m_BoundingBoxValues[m_CurrentFrame] = new Dictionary();
m_AsyncAnnotations[m_CurrentFrame] = perceptionCamera.SensorHandle.ReportAnnotationAsync(m_AnnotationDefinition);
var entities = m_EntityQuery.ToEntityArray(Allocator.TempJob);
var entityManager = World.DefaultGameObjectInjectionWorld.EntityManager;
foreach (var entity in entities)
{
ProcessEntity(entityManager.GetComponentObject(entity));
}
entities.Dispose();
}
void OnRenderObjectInfosCalculated(int frameCount, NativeArray renderedObjectInfos)
{
if (!m_AsyncAnnotations.TryGetValue(frameCount, out var asyncAnnotation))
return;
if (!m_BoundingBoxValues.TryGetValue(frameCount, out var boxes))
return;
m_AsyncAnnotations.Remove(frameCount);
m_BoundingBoxValues.Remove(frameCount);
using (s_BoundingBoxCallback.Auto())
{
m_ToReport.Clear();
for (var i = 0; i < renderedObjectInfos.Length; i++)
{
var objectInfo = renderedObjectInfos[i];
if (boxes.TryGetValue(objectInfo.instanceId, out var box))
{
m_ToReport.Add(box);
}
}
BoundingBoxComputed?.Invoke(frameCount, m_ToReport);
asyncAnnotation.ReportValues(m_ToReport);
}
}
void ProcessEntity(Labeling labeledEntity)
{
using (s_BoundingBoxCallback.Auto())
{
// Unfortunately to get the non-axis aligned bounding prism from a game object is not very
// straightforward. A game object's default bounding prism is always axis aligned. To find a "tight"
// fitting prism for a game object we must calculate the oriented bounds of all of the meshes in a
// game object. These meshes (in the object tree) may go through a series of transformations. We need
// to transform all of the children mesh bounds into the coordinate space of the "labeled" game object
// and then intersect all of those bounds together. We then need to apply the "labeled" game object's
// transform to the combined bounds to transform the bounds into world space. Finally, we then need
// to take the bounds in world space and transform it to camera space to record it to json...
//
// Currently we are only reporting objects that are a) labeled and b) are visible based on the perception
// camera's rendered object info. In the future we plan on reporting how much of the object can be seen, including
// none if it is off camera
if (idLabelConfig.TryGetLabelEntryFromInstanceId(labeledEntity.instanceId, out var labelEntry))
{
var entityGameObject = labeledEntity.gameObject;
var meshFilters = entityGameObject.GetComponentsInChildren();
if (meshFilters == null || meshFilters.Length == 0) return;
var labelTransform = entityGameObject.transform;
var cameraTransform = perceptionCamera.transform;
var combinedBounds = new Bounds(Vector3.zero, Vector3.zero);
var areBoundsUnset = true;
// Need to convert all bounds into labeling mesh space...
foreach (var mesh in meshFilters)
{
var currentTransform = mesh.gameObject.transform;
// Grab the bounds of the game object from the mesh, although these bounds are axis-aligned,
// they are axis-aligned with respect to the current component's coordinate space. This, in theory
// could still provide non-ideal fitting bounds (if the model is made strangely, but garbage in; garbage out)
var meshBounds = mesh.mesh.bounds;
var transformedBounds = new Bounds(meshBounds.center, meshBounds.size);
var transformedRotation = Quaternion.identity;
// Apply the transformations on this object until we reach the labeled transform
while (currentTransform != labelTransform)
{
transformedBounds.center += currentTransform.localPosition;
transformedBounds.extents = Vector3.Scale(transformedBounds.extents, currentTransform.localScale);
transformedRotation *= currentTransform.localRotation;
currentTransform = currentTransform.parent;
}
// Due to rotations that may be applied, we cannot simply use the extents of the bounds, but
// need to calculate all 8 corners of the bounds and combine them with the current combined
// bounds
var corners = GetBoxCorners(transformedBounds, transformedRotation);
// If this is the first time, create a new bounds struct
if (areBoundsUnset)
{
combinedBounds = new Bounds(corners[0], Vector3.zero);
areBoundsUnset = false;
}
// Go through each corner add add it to the bounds
foreach (var c2 in corners)
{
combinedBounds.Encapsulate(c2);
}
}
// Convert the combined bounds into world space
combinedBounds.center = labelTransform.TransformPoint(combinedBounds.center);
combinedBounds.extents = Vector3.Scale(combinedBounds.extents, labelTransform.localScale);
// Now convert all points into camera's space
var cameraCenter = cameraTransform.InverseTransformPoint(combinedBounds.center);
cameraCenter = Vector3.Scale(cameraTransform.localScale, cameraCenter);
// Rotation to go from label space to camera space
var cameraRotation = Quaternion.Inverse(cameraTransform.rotation) * labelTransform.rotation;
var converted = ConvertToBoxData(labelEntry, labeledEntity.instanceId, cameraCenter, combinedBounds.extents, cameraRotation);
m_BoundingBoxValues[m_CurrentFrame][labeledEntity.instanceId] = converted;
}
}
}
}
}