3d bounding box visualization

4 年前 · 30dc05b5
--- a/com.unity.perception/Editor/Pyrception/PyrceptionInstaller.cs
+++ b/com.unity.perception/Editor/Pyrception/PyrceptionInstaller.cs
    [MenuItem("Window/Pyrception/Run")]
    static void RunPyrception()
    {
+        UnityEngine.Debug.Log(PlayerPrefs.GetInt("currentProcessId"));
        if (RestartBrowser())
            return;

 #endif
        ProcessStartInfo info = new ProcessStartInfo(shell, argument);

-        info.CreateNoWindow = !displayWindow || true;
-        info.UseShellExecute = !waitForExit && false;
+        info.CreateNoWindow = true;
+        info.UseShellExecute = false;
        info.RedirectStandardOutput = redirectOutput && waitForExit && false;
        info.RedirectStandardError = waitForExit;

--- a/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/preview.py
+++ b/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/preview.py
 from PIL.Image import Image
 from PIL.ImageDraw import ImageDraw
 from pyrception_utils import PyrceptionDataset
+from pyquaternion import Quaternion
+from bbox import BBox3D
+from bbox3d_plot import add_single_bbox3d_on_image
-#--------------------------------Custom component-----------------------------------------------------------------------
+# --------------------------------Custom component-----------------------------------------------------------------------

 import streamlit.components.v1 as components


 def item_selector_zoom(index, datasetSize, key=0):
    return _item_selector_zoom(index=index, datasetSize=datasetSize, key=key, default=index)
-#-------------------------------------END-------------------------------------------------------------------------------
+
+
+# -------------------------------------END-------------------------------------------------------------------------------

 def list_datasets(path) -> List:
    """
        image_draw.text(
            (box[0], box[1]), class_name, font=font, fill=colors[class_name]
        )
-    #st.subheader(header)
-    #st.markdown(description)
-    #st.image(image, use_column_width=True)
+    # st.subheader(header)
+    # st.markdown(description)
+    # st.image(image, use_column_width=True)
-def draw_image_with_semantic_segmentation(
+
+def draw_image_with_segmentation(
    image: Image,
    height: int,
    width: int,
    """
    # image_draw = ImageDraw(segmentation)
    rgba = np.array(segmentation.copy().convert("RGBA"))
-    r,g,b,a = rgba.T
+    r, g, b, a = rgba.T
-    rgba[...,0:4][black_areas.T] = (0,0,0,0)
-    rgba[...,-1][other_areas.T] = int(0.6 * 255)
+    rgba[..., 0:4][black_areas.T] = (0, 0, 0, 0)
+    rgba[..., -1][other_areas.T] = int(0.6 * 255)
-    image.paste(foreground,(0,0),foreground)
+    image.paste(foreground, (0, 0), foreground)
-def draw_image_stacked(
-    image: Image,
-    classes: Dict,
-    labels: List,
-    boxes: List[List],
-    colors: Dict,
-    header: str,
-    description: str,
-    height: int,
-    width: int,
-    segmentation: Image,
+def draw_image_with_keypoints(
+    image: Image,
+    keypoints,
+    dataset,
-    color_intensity = st.sidebar.slider('color intensity 2 (%)', 0, 100, 65);
-    alpha = color_intensity / 100;
+    image_draw = ImageDraw(image)
+    radius = int(dataset.metadata.image_size[0] * 5/500)
+    for i in range(len(keypoints)):
+        keypoint = keypoints[i]
+        if keypoint["state"] != 2:
+            continue
+        coordinates = (keypoint["x"]-radius, keypoint["y"]-radius, keypoint["x"]+radius, keypoint["y"]+radius)
+        color = dataset.metadata.annotations[dataset.metadata.available_annotations['keypoints']]["spec"][0]["key_points"][i]["color"]
+        image_draw.ellipse(coordinates, fill=(int(255*color["r"]), int(255*color["g"]), int(255*color["b"]), 255))
+
+    skeleton = dataset.metadata.annotations[dataset.metadata.available_annotations['keypoints']]["spec"][0]["skeleton"]
+    for bone in skeleton:
+        if keypoints[bone["joint1"]]["state"] != 2 or keypoints[bone["joint1"]]["state"] != 2:
+            continue
+        joint1 = (keypoints[bone["joint1"]]["x"], keypoints[bone["joint1"]]["y"])
+        joint2 = (keypoints[bone["joint2"]]["x"], keypoints[bone["joint2"]]["y"])
+        r = bone["color"]["r"]
+        g = bone["color"]["g"]
+        b = bone["color"]["b"]
+        image_draw.line([joint1, joint2], fill=(int(255*r), int(255*g), int(255*b), 255), width=int(dataset.metadata.image_size[0] * 3/500))
+    return image
+
+def plot_bboxes3d(image, bboxes, projection, color, orthographic):
+    """ Plot an image with 3D bounding boxes
+
+    Currently this method should only be used for ground truth images, and
+    doesn't support predictions. If a list of colors is not provided as an
+    argument to this routine, the default color of green will be used.
+
+    Args:
+        image (PIL Image): a PIL image.
+        bboxes (list): a list of BBox3D objects
+        projection: The perspective projection of the camera which
+        captured the ground truth.
+        colors (list): a color list for boxes. Defaults to none. If
+        colors = None, it will default to coloring all boxes green.
+
+    Returns:
+        PIL image: a PIL image with bounding boxes drawn on it.
+    """
+    np_image = np.array(image)
+    img_height, img_width, _ = np_image.shape
-    for x in range(0, width - 1):
-        for y in range(0, height - 1):
-            (seg_r, seg_g, seg_b) = segmentation.getpixel((x, y))
-            (r, g, b) = image.getpixel((x, y))
-            # if it isn't a black pixel in the segmentation image then highlight it with the segmentation color
-            if seg_r != 0 or seg_g != 0 or seg_b != 0:
-                image.putpixel((x, y),
-                               (int((1 - alpha) * r + alpha * seg_r),
-                                int((1 - alpha) * g + alpha * seg_g),
-                                int((1 - alpha) * b + alpha * seg_b)))
+    for i, box in enumerate(bboxes):
+        add_single_bbox3d_on_image(np_image, box, projection, color, orthographic=orthographic)
+
+    return PIL.Image.fromarray(np_image)
+
+def read_bounding_box_3d(bounding_boxes_metadata):
+    bboxes = []
-    image_draw = ImageDraw(image)
-    # draw bounding boxes
-    path_to_font = pathlib.Path(__file__).parent.absolute()
-    font = ImageFont.truetype(f"{path_to_font}/NairiNormal-m509.ttf", 15)
+    for b in bounding_boxes_metadata:
+        label_id = b['label_id']
+        translation = (b["translation"]["x"],b["translation"]["y"],b["translation"]["z"])
+        size = (b["size"]["x"], b["size"]["y"], b["size"]["z"])
+        rotation = b["rotation"]
+        rotation = Quaternion(
+            x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"]
+        )
-    for label, box in zip(labels, boxes):
-        label = label - 1
-        class_name = classes[label]
-        image_draw.rectangle(box, outline=colors[class_name], width=2)
-        image_draw.text(
-            (box[0], box[1]), class_name, font=font, fill=colors[class_name]
+        #if label_mappings and label_id not in label_mappings:
+        #    continue
+        box = BBox3D(
+            translation=translation,
+            size=size,
+            label=label_id,
+            sample_token=0,
+            score=1,
+            rotation=rotation,
+        bboxes.append(box)
-    st.subheader(header)
-    st.markdown(description)
-    st.image(image, use_column_width=True)
+    return bboxes
+
+def draw_image_with_box_3d(image, sensor, values, colors):
+    if 'camera_intrinsic' in sensor:
+        projection = np.array(sensor["camera_intrinsic"])
+    else:
+        projection = np.array([[1,0,0],[0,1,0],[0,0,1]])
+
+    boxes = read_bounding_box_3d(values)
+    img_with_boxes = plot_bboxes3d(image, boxes, projection, None, orthographic=(sensor["projection"]=="\"orthographic\""))
+    return img_with_boxes
-def display_count(
-    header: str,
-    description: str,
-):
-    """
-    :param header: Image header
-    :type str:
-    :param description: Image description
-    :type str:
-    """
-    return
@st.cache(show_spinner=True, allow_output_mutation=True)
 def load_perception_dataset(path: str) -> Tuple:
    """
    :param base_dataset_dir: The directory that contains the perceptions datasets.
    :type str:
    """
-    #st.markdown("# Synthetic Dataset Preview\n ## Unity Technologies ")
+    # st.markdown("# Synthetic Dataset Preview\n ## Unity Technologies ")
-        labelers = {'semantic_segmentation': st.sidebar.checkbox("Semantic Segmentation", key="ss"),
-                    'bounding_boxes_2d': st.sidebar.checkbox("Bounding Boxes", key="bb2d")}
-
+        available_labelers = [a["name"] for a in dataset.metadata.annotations]
+        labelers = {}
+        if 'bounding box' in available_labelers:
+            labelers['bounding box'] = st.sidebar.checkbox("Bounding Boxes 2D", key="bb2d")
+        if 'bounding box 3D' in available_labelers:
+            labelers['bounding box 3D'] = st.sidebar.checkbox("Bounding Boxes 3D", key="bb2d")
+        if 'keypoints' in available_labelers:
+            labelers['keypoints'] = st.sidebar.checkbox("Key Points", key="kp")
+        if 'instance segmentation' in available_labelers and 'semantic segmentation' in available_labelers:
+            if st.sidebar.checkbox('Segmentation'):
+                selected_segmentation = st.sidebar.radio("Select the segmentation type:", ['Semantic Segmentation', 'Instance Segmentation'], index=0)
+                if selected_segmentation == 'Semantic Segmentation':
+                    labelers['semantic segmentation'] = True
+                elif selected_segmentation == 'Instance Segmentation':
+                    labelers['instance segmentation'] = True
+        elif 'semantic segmentation' in available_labelers:
+            labelers['semantic segmentation'] = st.sidebar.checkbox("Semantic Segmentation", key="ss")
+        elif 'instance segmentation' in available_labelers:
+            labelers['instance segmentation'] = st.sidebar.checkbox("Instance Segmentation", key="is")
+
        session_state = SessionState.get(image='-1', start_at='0', num_cols='3')
        index = int(session_state.image)
        if index >= 0:
            grid_view(num_rows, colors, dataset, session_state, labelers)


+def get_image_with_labelers(image_and_labelers, dataset, colors, labelers_to_use):
+    classes = dataset.classes
+    image = image_and_labelers['image']
+    if 'semantic segmentation' in labelers_to_use and labelers_to_use['semantic segmentation']:
+        semantic = image_and_labelers["semantic segmentation"]
+        image = draw_image_with_segmentation(
+            image, dataset.metadata.image_size[0], dataset.metadata.image_size[1], semantic,
+            "Semantic Segmentation Preview", ""
+        )
+
+    if 'instance segmentation' in labelers_to_use and labelers_to_use['instance segmentation']:
+        instance = image_and_labelers['instance segmentation']
+        image = draw_image_with_segmentation(
+            image, dataset.metadata.image_size[0], dataset.metadata.image_size[1], instance,
+            "Semantic Segmentation Preview", ""
+        )
+
+    if 'bounding box' in labelers_to_use and labelers_to_use['bounding box']:
+        target = image_and_labelers["bounding box"]
+        labels = target["labels"]
+        boxes = target["boxes"]
+        image = draw_image_with_boxes(
+            image, classes, labels, boxes, colors, "Bounding Boxes Preview", ""
+        )
+
+    if 'keypoints' in labelers_to_use and labelers_to_use['keypoints']:
+        keypoints = image_and_labelers["keypoints"]
+        image = draw_image_with_keypoints(
+            image, keypoints, dataset
+        )
+
+    if 'bounding box 3D' in labelers_to_use and labelers_to_use['bounding box 3D']:
+        sensor, values = image_and_labelers['bounding box 3D']
+        image = draw_image_with_box_3d(image, sensor, values, colors)
+
+    return image
+
+
-    header = st.beta_columns([2/3, 1/3])
-    num_cols = header[1].slider(label="Image per row: ", min_value=1, max_value=5, step=1, value=int(session_state.num_cols))
-    session_state.num_cols = num_cols
+    header = st.beta_columns([2 / 3, 1 / 3])
+
+    num_cols = header[1].slider(label="Image per row: ", min_value=1, max_value=5, step=1,
+                                value=int(session_state.num_cols))
+    if not num_cols == session_state.num_cols:
+        session_state.num_cols = num_cols
+        st.experimental_rerun()
+
    with header[0]:
        start_at = item_selector(int(session_state.start_at), num_cols * num_rows, len(dataset))
        session_state.start_at = start_at
    for i in range(start_at, min(start_at + (num_cols * num_rows), len(dataset))):
-        classes = dataset.classes
-        image, segmentation, target = dataset[i]
-        labels = target["labels"]
-        boxes = target["boxes"]
+        image = get_image_with_labelers(dataset[i], dataset, colors, labelers)
-        if labelers['semantic_segmentation']:
-            image = draw_image_with_semantic_segmentation(
-                image, dataset.metadata.image_size[0], dataset.metadata.image_size[1], segmentation, "Semantic Segmentation Preview", ""
-            )
-        if labelers['bounding_boxes_2d']:
-            image = draw_image_with_boxes(
-                image, classes, labels, boxes, colors, "Bounding Boxes Preview", ""
-            )
-        expand_image = container.button(label="Expand image", key="exp"+str(i))
+        expand_image = container.button(label="Expand image", key="exp" + str(i))
        container.image(image, caption=str(i), use_column_width=True)
        if expand_image:
            session_state.image = i
 def zoom(index, colors, dataset, session_state, labelers, dataset_path):
-    classes = dataset.classes
-    image, segmentation, target = dataset[index]
-    labels = target["labels"]
-    boxes = target["boxes"]
-
    header = st.beta_columns([0.2, 0.6, 0.2])

    if header[0].button('< Back to Grid view'):
            session_state.image = new_index
            st.experimental_rerun()

-    if labelers['semantic_segmentation']:
-        image = draw_image_with_semantic_segmentation(
-            image, dataset.metadata.image_size[0], dataset.metadata.image_size[1], segmentation, "Semantic Segmentation Preview", ""
-        )
-    if labelers['bounding_boxes_2d']:
-        image = draw_image_with_boxes(
-            image, classes, labels, boxes, colors, "Bounding Boxes Preview", ""
-        )
+    image = get_image_with_labelers(dataset[index], dataset, colors, labelers)

    layout = st.beta_columns([0.7, 0.3])
    layout[0].image(image, use_column_width=True)
        raise ValueError("Please specify the path to the main dataset directory!")


-
-    #st.markdown('<script type="application/javascript"> function resizeIFrameToFitContent( iFrameme ) { iFrame.width  = '
+    # st.markdown('<script type="application/javascript"> function resizeIFrameToFitContent( iFrameme ) { iFrame.width  = '
    #            'iFrame.contentWindow.document.body.scrollWidth;iFrame.height = '
    #            'iFrame.contentWindow.document.body.scrollHeight;} window.addEventListener(\'DOMContentLoaded\', '
    #            'function(e) { var iFrame = document.getElementById( \'iFrame1\' ); resizeIFrameToFitContent( iFrame '
--- a/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/pyrception.py
+++ b/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/pyrception.py
        # Extract the class labels
        self.classes = []
        for label in self.annotations[0]["spec"]:
-            self.classes.append(label["label_name"])
+            if "label_name" in label:
+                self.classes.append(label["label_name"])

        # Set the number of classes
        self.num_classes = len(self.classes)
            )
        self.last_file_index = None

-    def __getitem__(self, index: int) -> Tuple:
+    def __getitem__(self, index: int) -> dict:
-        :return: Returns a tuple containing the image and target metadata as (image, target)
-        :rtype: Tuple
        """

        if index > self.metadata.length - 1:
+
+        image_and_labelers = {}
+
+            # Image
-            segmentation = Image.open(
-                os.path.join(self.metadata.data_dir, self.data[sub_index]["annotations"][1]["filename"])
-            ).convert("RGB")
+            image_and_labelers["image"] = image
+
+            # Assumes that the order is the same for the annotations in metadata as in the captures_***.json file
+            annotations = {}
+            for i in range(len(self.metadata.annotations)):
+                a = self.metadata.annotations[i]
+                for j in range(len(self.data[sub_index]["annotations"])):
+                    if self.data[sub_index]["annotations"][j]["annotation_definition"] == a["id"]:
+                        annotations[a["name"]] = j
+                        break
+
+            self.metadata.available_annotations = annotations
+
+            # Bounding Boxes
+            if "bounding box" in annotations:
+                image_and_labelers["bounding box"] = self.get_bounding_boxes(sub_index, annotations["bounding box"])
+
+            # Bounding Boxes 3d
+            if "bounding box 3D" in annotations:
+                image_and_labelers["bounding box 3D"] = self.get_bounding_box_3d(sub_index, annotations["bounding box 3D"])
+
+            # Semantic Segmentation
+            if "semantic segmentation" in annotations:
+                image_and_labelers["semantic segmentation"] = self.get_segmentation(sub_index, annotations[
+                    "semantic segmentation"])
+
+            # Instance Segmentation
+            if "instance segmentation" in annotations:
+                image_and_labelers["instance segmentation"] = self.get_segmentation(sub_index, annotations[
+                    "instance segmentation"])
+
+            # Keypoints
+            if "keypoints" in annotations:
+                image_and_labelers["keypoints"] = self.get_keypoints(sub_index, annotations["keypoints"])
+
+
+        return image_and_labelers
+
+    def get_keypoints(self, sub_index, ann_index):
+        image_ann = self.data[sub_index]
+        keypoints = image_ann["annotations"][ann_index]["values"][0]["keypoints"]
+        return keypoints
+
+    def get_segmentation(self, sub_index, ann_index):
+        return Image.open(
+            os.path.join(self.metadata.data_dir, self.data[sub_index]["annotations"][ann_index]["filename"])
+        ).convert("RGB")
+
+    def get_bounding_box_3d(self, sub_index, ann_index):
+        sensor = self.data[sub_index]["sensor"]
+        values = self.data[sub_index]["annotations"][ann_index]["values"]
+        return sensor, values
+
+    def get_bounding_boxes(self, sub_index, ann_index):
-        for value in image_ann["annotations"][0]["values"]:
+        for value in image_ann["annotations"][ann_index]["values"]:
            box = [
                value["x"],
                value["y"],
        # assumes that the image id naming convention is
        # RGB<uuid>/rgb_<image_id>.png
        image_id = self.data[sub_index]["filename"][44:-4]
-        target = {"image_id": image_id, "labels": labels, "boxes": boxes}
-        return image, segmentation, target
+        return {"image_id": image_id, "labels": labels, "boxes": boxes}

    def __len__(self) -> int:
        """
--- a/com.unity.perception/Editor/Pyrception/pyrception-utils/requirements.txt
+++ b/com.unity.perception/Editor/Pyrception/pyrception-utils/requirements.txt
 pytest-html==3.1.1
 pytest-datadir==1.3.1
 coverage==5.5
+opencv-python>=4.5
+pyquaternion>=0.9.9
--- a/com.unity.perception/Editor/Pyrception/pyrception-utils/setup.py
+++ b/com.unity.perception/Editor/Pyrception/pyrception-utils/setup.py
        "streamlit==0.75.0",
        "google-cloud-storage==1.19.0",
        "gcsfs==0.7.1",
+        "pyquaternion>=0.9.9",
+        "opencv-python>=4.5"
    ],
    entry_points={"console_scripts": ["pyrception-utils=pyrception_utils.cli:main"]},
 )
--- a/com.unity.perception/Runtime/GroundTruth/Labelers/BoundingBox3DLabeler.cs
+++ b/com.unity.perception/Runtime/GroundTruth/Labelers/BoundingBox3DLabeler.cs
                var forward = box.rotation * Vector3.forward;

                var s = box.size * 0.5f;
-                var bbl = CalculateRotatedPoint(cam, t,right, up, forward,-s.x,-s.y, -s.z);
+                var bbl = CalculateRotatedPoint (cam, t,right, up, forward,-s.x,-s.y, -s.z);
                var btl = CalculateRotatedPoint(cam, t,right, up, forward,-s.x, s.y, -s.z);
                var btr = CalculateRotatedPoint(cam, t,right, up, forward,s.x, s.y, -s.z);
                var bbr = CalculateRotatedPoint(cam, t,right, up, forward,s.x, -s.y, -s.z);
--- a/com.unity.perception/Runtime/GroundTruth/PerceptionCamera.cs
+++ b/com.unity.perception/Runtime/GroundTruth/PerceptionCamera.cs
            // Record the camera's projection matrix
            SetPersistentSensorData("camera_intrinsic", ToProjectionMatrix3x3(cam.projectionMatrix));

+            // Record the camera's projection type (orthographic or perspective)
+            SetPersistentSensorData("projection", cam.orthographic ? "orthographic" : "perspective");
+
            var captureFilename = $"{Manager.Instance.GetDirectoryFor(rgbDirectory)}/{k_RgbFilePrefix}{Time.frameCount}.png";
            var dxRootPath = $"{rgbDirectory}/{k_RgbFilePrefix}{Time.frameCount}.png";
            SensorHandle.ReportCapture(dxRootPath, SensorSpatialData.FromGameObjects(
--- a/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox.py
+++ b/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox.py
+import math
+
+import numpy as np
+from pyquaternion import Quaternion
+
+
+def group_bbox2d_per_label(bboxes):
+    """Group 2D bounding boxes with same label.
+
+    Args:
+        bboxes (list[BBox2D]): a list of 2D bounding boxes
+
+    Returns:
+        dict: a dictionary of 2d boundign box group.
+        {label1: [bbox1, bboxes2, ...], label2: [bbox1, ...]}
+    """
+    bboxes_per_label = {}
+    for box in bboxes:
+        if box.label not in bboxes_per_label:
+            bboxes_per_label[box.label] = []
+        bboxes_per_label[box.label].append(box)
+
+    return bboxes_per_label
+
+
+class BBox2D:
+    """Canonical Representation of a 2D bounding box.
+
+    Attributes:
+        label (str): string representation of the label.
+        x (float): x pixel coordinate of the upper left corner.
+        y (float): y pixel coordinate of the upper left corner.
+        w (float): width (number of pixels)of the bounding box.
+        h (float): height (number of pixels) of the bounding box.
+        score (float): detection confidence score. Default is set to score=1.
+            if this is a ground truth bounding box.
+
+    Examples:
+        Here is an example about how to use this class.
+
+        .. code-block::
+
+            >>> gt_bbox = BBox2D(label='car', x=2, y=6, w=2, h=4)
+            >>> gt_bbox
+            "label='car'|score=1.0|x=2.0|y=6.0|w=2.0|h=4.0"
+            >>> pred_bbox = BBox2D(label='car', x=2, y=5, w=2, h=4, score=0.79)
+            >>> pred_bbox.area
+            8
+            >>> pred_bbox.intersect_with(gt_bbox)
+            True
+            >>> pred_bbox.intersection(gt_bbox)
+            6
+            >>> pred_bbox.union(gt_bbox)
+            10
+            >>> pred_bbox.iou(gt_bbox)
+            0.6
+
+    """
+
+    def __init__(self, label, x, y, w, h, score=1.0):
+        """ Initialize 2D bounding box object
+
+        Args:
+            label (str): string representation of the label
+            x (float): x pixel coordinate of the upper left corner
+            y (float): y pixel coordinate of the upper left corner
+            w (float): width (number of pixels)of the bounding box
+            h (float): height (number of pixels) of the bounding box
+            score (float): detection confidence score
+        """
+        self.label = label
+        self.x = x
+        self.y = y
+        self.w = w
+        self.h = h
+        self.score = score
+
+    def __repr__(self):
+        return (
+            f"label={self.label}|score={self.score:.2f}|"
+            f"x={self.x:.2f}|y={self.y:.2f}|w={self.w:.2f}|h={self.h:.2f}"
+        )
+
+    def __eq__(self, other):
+        return (
+            self.x == other.x
+            and self.y == other.y
+            and self.w == other.w
+            and self.h == other.h
+            and self.label == other.label
+            and math.isclose(self.score, other.score, rel_tol=1e-07)
+        )
+
+    @property
+    def area(self):
+        """Calculate area of this bounding box
+
+        Returns:
+            width x height of the bound box
+        """
+        return self.w * self.h
+
+    def intersect_with(self, other):
+        """Check whether this box intersects with other bounding box
+
+        Args:
+            other (BBox2D): other bounding box object to check intersection
+
+        Returns:
+            True if two bounding boxes intersect, False otherwise
+        """
+        if self.x > other.x + other.w:
+            return False
+        if other.x > self.x + self.w:
+            return False
+        if self.y + self.h < other.y:
+            return False
+        if self.y > other.y + other.h:
+            return False
+        return True
+
+    def intersection(self, other):
+        """Calculate the intersection area with other bounding box
+
+        Args:
+            other (BBox2D): other bounding box object to calculate intersection
+
+        Returns:
+            float of the intersection area for two bounding boxes
+        """
+        x1 = max(self.x, other.x)
+        y1 = max(self.y, other.y)
+        x2 = min(self.x + self.w, other.x + other.w)
+        y2 = min(self.y + self.h, other.y + other.h)
+        return (x2 - x1) * (y2 - y1)
+
+    def union(self, other, intersection_area=None):
+        """Calculate union area with other bounding box
+
+        Args:
+            other (BBox2D): other bounding box object to calculate union
+            intersection_area (float): pre-calculated area of intersection
+
+        Returns:
+            float of the union area for two bounding boxes
+        """
+        area_a = self.area
+        area_b = other.area
+        if not intersection_area:
+            intersection_area = self.intersection(other)
+        return float(area_a + area_b - intersection_area)
+
+    def iou(self, other):
+        """Calculate intersection over union area with other bounding box
+
+        .. math::
+                IOU = \\frac{intersection}{union}
+
+        Args:
+            other (BBox2D): other bounding box object to calculate iou
+
+        Returns:
+            float of the union area for two bounding boxes
+        """
+        # if boxes don't intersect
+        if not self.intersect_with(other):
+            return 0
+        intersection_area = self.intersection(other)
+        union_area = self.union(other, intersection_area=intersection_area)
+        # intersection over union
+        iou = intersection_area / union_area
+        return iou
+
+
+class BBox3D:
+    """
+    Class for 3d bounding boxes which can either be predictions or
+    ground-truths. This class is the primary representation in this repo of 3d
+    bounding boxes and is based off of the Nuscenes style dataset.
+    """
+
+    def __init__(
+        self,
+        translation,
+        size,
+        label,
+        sample_token,
+        score=1,
+        rotation: Quaternion = Quaternion(),
+        velocity=(np.nan, np.nan, np.nan),
+    ):
+        self.sample_token = sample_token
+        self.translation = translation
+        self.size = size
+        self.width, self.height, self.length = size
+        self.rotation = rotation
+        self.velocity = velocity
+        self.label = label
+        self.score = score
+
+    def _local2world_coordinate(self, x):
+        """
+
+        Args:
+            x: vector describing point (x,y,z) in local coordinates (where the
+            center of the box is 0,0,0)
+
+        Returns: the x,y,z coordinates of the input point in global coordinates
+
+        """
+
+        y = np.array(self.translation) + self.rotation.rotate(x)
+        return y
+
+    @property
+    def back_left_bottom_pt(self):
+        """
+
+        Returns: :py:class:`float`: Back-left-bottom point.
+
+        """
+        p = np.array([-self.width / 2, -self.height / 2, -self.length / 2])
+        p = self._local2world_coordinate(p)
+        return p
+
+    @property
+    def front_left_bottom_pt(self):
+        """
+        :py:class:`float`: Front-left-bottom point.
+        """
+        p = np.array([-self.width / 2, -self.height / 2, self.length / 2])
+        p = self._local2world_coordinate(p)
+        return p
+
+    @property
+    def front_right_bottom_pt(self):
+        """
+        :py:class:`float`: Front-right-bottom point.
+        """
+        p = np.array([self.width / 2, -self.height / 2, self.length / 2])
+        p = self._local2world_coordinate(p)
+        return p
+
+    @property
+    def back_right_bottom_pt(self):
+        """
+        :py:class:`float`: Back-right-bottom point.
+        """
+        p = np.array([self.width / 2, -self.height / 2, -self.length / 2])
+        p = self._local2world_coordinate(p)
+        return p
+
+    @property
+    def back_left_top_pt(self):
+        """
+        :py:class:`float`: Back-left-top point.
+        """
+        p = np.array([-self.width / 2, self.height / 2, -self.length / 2])
+        p = self._local2world_coordinate(p)
+        return p
+
+    @property
+    def front_left_top_pt(self):
+        """
+        :py:class:`float`: Front-left-top point.
+        """
+        p = np.array([-self.width / 2, self.height / 2, self.length / 2])
+        p = self._local2world_coordinate(p)
+        return p
+
+    @property
+    def front_right_top_pt(self):
+        """
+        :py:class:`float`: Front-right-top point.
+        """
+        p = np.array([self.width / 2, self.height / 2, self.length / 2])
+        p = self._local2world_coordinate(p)
+        return p
+
+    @property
+    def back_right_top_pt(self):
+        """
+        :py:class:`float`: Back-right-top point.
+        """
+        p = np.array([self.width / 2, self.height / 2, -self.length / 2])
+        p = self._local2world_coordinate(p)
+        return p
+
+    @property
+    def p(self) -> np.ndarray:
+        """
+
+        Returns: list of all 8 corners of the box beginning with the the bottom
+         four corners and then the top
+        four corners, both in counterclockwise order (from birds eye view)
+        beginning with the back-left corner
+
+        """
+        x = np.vstack(
+            [
+                self.back_left_bottom_pt,
+                self.front_left_bottom_pt,
+                self.front_right_bottom_pt,
+                self.back_right_bottom_pt,
+                self.back_left_top_pt,
+                self.front_left_top_pt,
+                self.front_right_top_pt,
+                self.back_right_top_pt,
+            ]
+        )
+        return x
--- a/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox.py.meta
+++ b/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox.py.meta
+fileFormatVersion: 2
+guid: 4122f2f81144716438e5281967ce7272
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox3d_plot.py
+++ b/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox3d_plot.py
+""" Helper bounding box 3d library to plot pretty 3D boundign
+boxes with a simple Python API.
+"""
+
+import cv2
+import numpy
+import streamlit as st
+
+
+def _add_single_bbox3d_on_image(
+    image,
+    front_bottom_left,
+    front_upper_left,
+    front_upper_right,
+    front_bottom_right,
+    back_bottom_left,
+    back_upper_left,
+    back_upper_right,
+    back_bottom_right,
+    color=None,
+    box_line_width=2,
+):
+    """ Add a single 3D bounding box to the passed in image.
+
+    For this version of the method, all of the passed in coordinates should be
+    integer tuples already projected in image pixel coordinate space.
+
+    Args:
+        image (numpy array): numpy array version of the image
+        front_bottom_left (int tuple): Front bottom left coordinate of the 3D
+        bounding box in pixel space
+        front_upper_left (int tuple): Front upper left coordinate of the 3D
+        bounding box in pixel space
+        front_upper_right (int tuple): Front upper right coordinate of the 3D
+        bounding box in pixel space
+        front_bottom_right (int tuple): Front bottom right coordinate of the 3D
+        bounding box in pixel space
+        back_bottom_left (int tuple): Back bottom left coordinate of the 3D
+        bounding box in pixel space
+        back_upper_left (int tuple): Back bottom left coordinate of the 3D
+        bounding box in pixel space
+        back_upper_right (int tuple): Back bottom left coordinate of the 3D
+        bounding box in pixel space
+        back_bottom_right (int tuple): Back bottom left coordinate of the 3D
+        bounding box in pixel space
+        color (tuple): RGBA color of the bounding box. Defaults to None. If
+        color = None the the tuple of [0, 255, 0, 255] (Green) will be used.
+        box_line_width: The width of the drawn box. Defaults to 2.
+    """
+    try:
+        fbl = (front_bottom_left[0], front_bottom_left[1])
+        ful = (front_upper_left[0], front_upper_left[1])
+        fur = (front_upper_right[0], front_upper_right[1])
+        fbr = (front_bottom_right[0], front_bottom_right[1])
+
+        bbl = (back_bottom_left[0], back_bottom_left[1])
+        bul = (back_upper_left[0], back_upper_left[1])
+        bur = (back_upper_right[0], back_upper_right[1])
+        bbr = (back_bottom_right[0], back_bottom_right[1])
+
+    except ValueError:
+        raise TypeError("all box coorinates must be a number")
+
+    if color is None:
+        color = [0, 255, 0, 255]
+
+    cv2.line(image, fbl, ful, color, box_line_width)  # front left
+    cv2.line(image, ful, fur, color, box_line_width)  # front top
+    cv2.line(image, fbr, fur, color, box_line_width)  # front right
+    cv2.line(image, fbl, fbr, color, box_line_width)  # front bottom
+
+    cv2.line(image, bbl, bul, color, box_line_width)  # back left
+    cv2.line(image, bul, bur, color, box_line_width)  # back top
+    cv2.line(image, bbr, bur, color, box_line_width)  # back right
+    cv2.line(image, bbl, bbr, color, box_line_width)  # back bottom
+
+    cv2.line(image, ful, bul, color, box_line_width)  # top left
+    cv2.line(image, fur, bur, color, box_line_width)  # top right
+    cv2.line(image, fbl, bbl, color, box_line_width)  # bottom left
+    cv2.line(image, fbr, bbr, color, box_line_width)  # bottom right
+
+
+def add_single_bbox3d_on_image(
+    image, box, proj, color=None, orthographic=False,  box_line_width=2,
+):
+    """" Add single 3D bounding box on a given image.
+
+    Args:
+        image (numpy array): a numpy array for an image
+        box (BBox3D): a 3D bounding box in camera's coordinate system
+        proj (numpy 2D array): camera's 3x3 projection matrix
+        color(tuple): RGBA color of the bounding box. Defaults to None. If
+        color = None the the tuple of [0, 255, 0, 255] (Green) will be used.
+        box_line_width (int): line width of the bounding boxes. Defaults to 2.
+    """
+    img_height, img_width, _ = image.shape
+
+    fll = box.back_left_bottom_pt
+    ful = box.back_left_top_pt
+    fur = box.back_right_top_pt
+    flr = box.back_right_bottom_pt
+
+    bll = box.front_left_bottom_pt
+    bul = box.front_left_top_pt
+    bur = box.front_right_top_pt
+    blr = box.front_right_bottom_pt
+
+    pixel_location_fun = _project_pt_to_pixel_location_orthographic if orthographic else _project_pt_to_pixel_location
+
+    fll_raster = pixel_location_fun(fll, proj, img_height, img_width)
+    ful_raster = pixel_location_fun(ful, proj, img_height, img_width)
+    fur_raster = pixel_location_fun(fur, proj, img_height, img_width)
+    flr_raster = pixel_location_fun(flr, proj, img_height, img_width)
+    bll_raster = pixel_location_fun(bll, proj, img_height, img_width)
+    bul_raster = pixel_location_fun(bul, proj, img_height, img_width)
+    bur_raster = pixel_location_fun(bur, proj, img_height, img_width)
+    blr_raster = pixel_location_fun(blr, proj, img_height, img_width)
+
+    _add_single_bbox3d_on_image(
+        image,
+        fll_raster,
+        ful_raster,
+        fur_raster,
+        flr_raster,
+        bll_raster,
+        bul_raster,
+        bur_raster,
+        blr_raster,
+        color,
+        box_line_width,
+    )
+
+def _project_pt_to_pixel_location(pt, projection, img_height, img_width):
+    """ Projects a 3D coordinate into a pixel location.
+
+    Applies the passed in projection matrix to project a point from the camera's
+    coordinate space into pixel space.
+
+    For a description of the math used in this method, see:
+    https://www.scratchapixel.com/lessons/3d-basic-rendering/computing-pixel-coordinates-of-3d-point/
+
+    Args:
+        pt (numpy array): The 3D point to project.
+        projection (numpy 2D array): The camera's 3x3 projection matrix.
+        img_height (int): The height of the image in pixels.
+        img_width (int): The width of the image in pixels.
+
+    Returns:
+        numpy array: a one-dimensional array with two values (x and y)
+        representing a point's pixel coordinate in an image.
+    """
+
+    _pt = projection.dot(pt)
+
+    # compute the perspective divide. Near clipping plane should take care of
+    # divide by zero cases, but we will check to be sure
+    if _pt[2] != 0:
+        _pt /= _pt[2]
+
+    return numpy.array(
+        [
+            int(-(_pt[0] * img_width) / 2.0 + (img_width * 0.5)),
+            int((_pt[1] * img_height) / 2.0 + (img_height * 0.5)),
+        ]
+    )
+
+def _project_pt_to_pixel_location_orthographic(pt, projection, img_height, img_width):
+    projection = numpy.array([
+        [projection[0][0], 0, 0],
+        [0, -projection[1][1], 0],
+        [0, 0, projection[2][2]]
+    ])
+    temp = projection.dot(pt)
+
+    pixel = [
+        int((temp[0] + 1)*0.5 * img_width),
+        int((temp[1] + 1)*0.5 * img_height)
+    ]
+    return pixel
--- a/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox3d_plot.py.meta
+++ b/com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox3d_plot.py.meta
+fileFormatVersion: 2
+guid: 6ebe9fac6325103488e689f91f4e486e
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: