浏览代码

3d bounding box visualization

/pyrception-integration
leopoldo-zugasti 4 年前
当前提交
30dc05b5
共有 11 个文件被更改,包括 747 次插入105 次删除
  1. 5
      com.unity.perception/Editor/Pyrception/PyrceptionInstaller.cs
  2. 261
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/preview.py
  3. 73
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/pyrception.py
  4. 2
      com.unity.perception/Editor/Pyrception/pyrception-utils/requirements.txt
  5. 2
      com.unity.perception/Editor/Pyrception/pyrception-utils/setup.py
  6. 2
      com.unity.perception/Runtime/GroundTruth/Labelers/BoundingBox3DLabeler.cs
  7. 3
      com.unity.perception/Runtime/GroundTruth/PerceptionCamera.cs
  8. 311
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox.py
  9. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox.py.meta
  10. 179
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox3d_plot.py
  11. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox3d_plot.py.meta

5
com.unity.perception/Editor/Pyrception/PyrceptionInstaller.cs


[MenuItem("Window/Pyrception/Run")]
static void RunPyrception()
{
UnityEngine.Debug.Log(PlayerPrefs.GetInt("currentProcessId"));
if (RestartBrowser())
return;

#endif
ProcessStartInfo info = new ProcessStartInfo(shell, argument);
info.CreateNoWindow = !displayWindow || true;
info.UseShellExecute = !waitForExit && false;
info.CreateNoWindow = true;
info.UseShellExecute = false;
info.RedirectStandardOutput = redirectOutput && waitForExit && false;
info.RedirectStandardError = waitForExit;

261
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/preview.py


from PIL.Image import Image
from PIL.ImageDraw import ImageDraw
from pyrception_utils import PyrceptionDataset
from pyquaternion import Quaternion
from bbox import BBox3D
from bbox3d_plot import add_single_bbox3d_on_image
#--------------------------------Custom component-----------------------------------------------------------------------
# --------------------------------Custom component-----------------------------------------------------------------------
import streamlit.components.v1 as components

def item_selector_zoom(index, datasetSize, key=0):
return _item_selector_zoom(index=index, datasetSize=datasetSize, key=key, default=index)
#-------------------------------------END-------------------------------------------------------------------------------
# -------------------------------------END-------------------------------------------------------------------------------
def list_datasets(path) -> List:
"""

image_draw.text(
(box[0], box[1]), class_name, font=font, fill=colors[class_name]
)
#st.subheader(header)
#st.markdown(description)
#st.image(image, use_column_width=True)
# st.subheader(header)
# st.markdown(description)
# st.image(image, use_column_width=True)
def draw_image_with_semantic_segmentation(
def draw_image_with_segmentation(
image: Image,
height: int,
width: int,

"""
# image_draw = ImageDraw(segmentation)
rgba = np.array(segmentation.copy().convert("RGBA"))
r,g,b,a = rgba.T
r, g, b, a = rgba.T
rgba[...,0:4][black_areas.T] = (0,0,0,0)
rgba[...,-1][other_areas.T] = int(0.6 * 255)
rgba[..., 0:4][black_areas.T] = (0, 0, 0, 0)
rgba[..., -1][other_areas.T] = int(0.6 * 255)
image.paste(foreground,(0,0),foreground)
image.paste(foreground, (0, 0), foreground)
def draw_image_stacked(
image: Image,
classes: Dict,
labels: List,
boxes: List[List],
colors: Dict,
header: str,
description: str,
height: int,
width: int,
segmentation: Image,
def draw_image_with_keypoints(
image: Image,
keypoints,
dataset,
color_intensity = st.sidebar.slider('color intensity 2 (%)', 0, 100, 65);
alpha = color_intensity / 100;
image_draw = ImageDraw(image)
radius = int(dataset.metadata.image_size[0] * 5/500)
for i in range(len(keypoints)):
keypoint = keypoints[i]
if keypoint["state"] != 2:
continue
coordinates = (keypoint["x"]-radius, keypoint["y"]-radius, keypoint["x"]+radius, keypoint["y"]+radius)
color = dataset.metadata.annotations[dataset.metadata.available_annotations['keypoints']]["spec"][0]["key_points"][i]["color"]
image_draw.ellipse(coordinates, fill=(int(255*color["r"]), int(255*color["g"]), int(255*color["b"]), 255))
skeleton = dataset.metadata.annotations[dataset.metadata.available_annotations['keypoints']]["spec"][0]["skeleton"]
for bone in skeleton:
if keypoints[bone["joint1"]]["state"] != 2 or keypoints[bone["joint1"]]["state"] != 2:
continue
joint1 = (keypoints[bone["joint1"]]["x"], keypoints[bone["joint1"]]["y"])
joint2 = (keypoints[bone["joint2"]]["x"], keypoints[bone["joint2"]]["y"])
r = bone["color"]["r"]
g = bone["color"]["g"]
b = bone["color"]["b"]
image_draw.line([joint1, joint2], fill=(int(255*r), int(255*g), int(255*b), 255), width=int(dataset.metadata.image_size[0] * 3/500))
return image
def plot_bboxes3d(image, bboxes, projection, color, orthographic):
""" Plot an image with 3D bounding boxes
Currently this method should only be used for ground truth images, and
doesn't support predictions. If a list of colors is not provided as an
argument to this routine, the default color of green will be used.
Args:
image (PIL Image): a PIL image.
bboxes (list): a list of BBox3D objects
projection: The perspective projection of the camera which
captured the ground truth.
colors (list): a color list for boxes. Defaults to none. If
colors = None, it will default to coloring all boxes green.
Returns:
PIL image: a PIL image with bounding boxes drawn on it.
"""
np_image = np.array(image)
img_height, img_width, _ = np_image.shape
for x in range(0, width - 1):
for y in range(0, height - 1):
(seg_r, seg_g, seg_b) = segmentation.getpixel((x, y))
(r, g, b) = image.getpixel((x, y))
# if it isn't a black pixel in the segmentation image then highlight it with the segmentation color
if seg_r != 0 or seg_g != 0 or seg_b != 0:
image.putpixel((x, y),
(int((1 - alpha) * r + alpha * seg_r),
int((1 - alpha) * g + alpha * seg_g),
int((1 - alpha) * b + alpha * seg_b)))
for i, box in enumerate(bboxes):
add_single_bbox3d_on_image(np_image, box, projection, color, orthographic=orthographic)
return PIL.Image.fromarray(np_image)
def read_bounding_box_3d(bounding_boxes_metadata):
bboxes = []
image_draw = ImageDraw(image)
# draw bounding boxes
path_to_font = pathlib.Path(__file__).parent.absolute()
font = ImageFont.truetype(f"{path_to_font}/NairiNormal-m509.ttf", 15)
for b in bounding_boxes_metadata:
label_id = b['label_id']
translation = (b["translation"]["x"],b["translation"]["y"],b["translation"]["z"])
size = (b["size"]["x"], b["size"]["y"], b["size"]["z"])
rotation = b["rotation"]
rotation = Quaternion(
x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"]
)
for label, box in zip(labels, boxes):
label = label - 1
class_name = classes[label]
image_draw.rectangle(box, outline=colors[class_name], width=2)
image_draw.text(
(box[0], box[1]), class_name, font=font, fill=colors[class_name]
#if label_mappings and label_id not in label_mappings:
# continue
box = BBox3D(
translation=translation,
size=size,
label=label_id,
sample_token=0,
score=1,
rotation=rotation,
bboxes.append(box)
st.subheader(header)
st.markdown(description)
st.image(image, use_column_width=True)
return bboxes
def draw_image_with_box_3d(image, sensor, values, colors):
if 'camera_intrinsic' in sensor:
projection = np.array(sensor["camera_intrinsic"])
else:
projection = np.array([[1,0,0],[0,1,0],[0,0,1]])
boxes = read_bounding_box_3d(values)
img_with_boxes = plot_bboxes3d(image, boxes, projection, None, orthographic=(sensor["projection"]=="\"orthographic\""))
return img_with_boxes
def display_count(
header: str,
description: str,
):
"""
:param header: Image header
:type str:
:param description: Image description
:type str:
"""
return
@st.cache(show_spinner=True, allow_output_mutation=True)
def load_perception_dataset(path: str) -> Tuple:
"""

:param base_dataset_dir: The directory that contains the perceptions datasets.
:type str:
"""
#st.markdown("# Synthetic Dataset Preview\n ## Unity Technologies ")
# st.markdown("# Synthetic Dataset Preview\n ## Unity Technologies ")
labelers = {'semantic_segmentation': st.sidebar.checkbox("Semantic Segmentation", key="ss"),
'bounding_boxes_2d': st.sidebar.checkbox("Bounding Boxes", key="bb2d")}
available_labelers = [a["name"] for a in dataset.metadata.annotations]
labelers = {}
if 'bounding box' in available_labelers:
labelers['bounding box'] = st.sidebar.checkbox("Bounding Boxes 2D", key="bb2d")
if 'bounding box 3D' in available_labelers:
labelers['bounding box 3D'] = st.sidebar.checkbox("Bounding Boxes 3D", key="bb2d")
if 'keypoints' in available_labelers:
labelers['keypoints'] = st.sidebar.checkbox("Key Points", key="kp")
if 'instance segmentation' in available_labelers and 'semantic segmentation' in available_labelers:
if st.sidebar.checkbox('Segmentation'):
selected_segmentation = st.sidebar.radio("Select the segmentation type:", ['Semantic Segmentation', 'Instance Segmentation'], index=0)
if selected_segmentation == 'Semantic Segmentation':
labelers['semantic segmentation'] = True
elif selected_segmentation == 'Instance Segmentation':
labelers['instance segmentation'] = True
elif 'semantic segmentation' in available_labelers:
labelers['semantic segmentation'] = st.sidebar.checkbox("Semantic Segmentation", key="ss")
elif 'instance segmentation' in available_labelers:
labelers['instance segmentation'] = st.sidebar.checkbox("Instance Segmentation", key="is")
session_state = SessionState.get(image='-1', start_at='0', num_cols='3')
index = int(session_state.image)
if index >= 0:

grid_view(num_rows, colors, dataset, session_state, labelers)
def get_image_with_labelers(image_and_labelers, dataset, colors, labelers_to_use):
classes = dataset.classes
image = image_and_labelers['image']
if 'semantic segmentation' in labelers_to_use and labelers_to_use['semantic segmentation']:
semantic = image_and_labelers["semantic segmentation"]
image = draw_image_with_segmentation(
image, dataset.metadata.image_size[0], dataset.metadata.image_size[1], semantic,
"Semantic Segmentation Preview", ""
)
if 'instance segmentation' in labelers_to_use and labelers_to_use['instance segmentation']:
instance = image_and_labelers['instance segmentation']
image = draw_image_with_segmentation(
image, dataset.metadata.image_size[0], dataset.metadata.image_size[1], instance,
"Semantic Segmentation Preview", ""
)
if 'bounding box' in labelers_to_use and labelers_to_use['bounding box']:
target = image_and_labelers["bounding box"]
labels = target["labels"]
boxes = target["boxes"]
image = draw_image_with_boxes(
image, classes, labels, boxes, colors, "Bounding Boxes Preview", ""
)
if 'keypoints' in labelers_to_use and labelers_to_use['keypoints']:
keypoints = image_and_labelers["keypoints"]
image = draw_image_with_keypoints(
image, keypoints, dataset
)
if 'bounding box 3D' in labelers_to_use and labelers_to_use['bounding box 3D']:
sensor, values = image_and_labelers['bounding box 3D']
image = draw_image_with_box_3d(image, sensor, values, colors)
return image
header = st.beta_columns([2/3, 1/3])
num_cols = header[1].slider(label="Image per row: ", min_value=1, max_value=5, step=1, value=int(session_state.num_cols))
session_state.num_cols = num_cols
header = st.beta_columns([2 / 3, 1 / 3])
num_cols = header[1].slider(label="Image per row: ", min_value=1, max_value=5, step=1,
value=int(session_state.num_cols))
if not num_cols == session_state.num_cols:
session_state.num_cols = num_cols
st.experimental_rerun()
with header[0]:
start_at = item_selector(int(session_state.start_at), num_cols * num_rows, len(dataset))
session_state.start_at = start_at

for i in range(start_at, min(start_at + (num_cols * num_rows), len(dataset))):
classes = dataset.classes
image, segmentation, target = dataset[i]
labels = target["labels"]
boxes = target["boxes"]
image = get_image_with_labelers(dataset[i], dataset, colors, labelers)
if labelers['semantic_segmentation']:
image = draw_image_with_semantic_segmentation(
image, dataset.metadata.image_size[0], dataset.metadata.image_size[1], segmentation, "Semantic Segmentation Preview", ""
)
if labelers['bounding_boxes_2d']:
image = draw_image_with_boxes(
image, classes, labels, boxes, colors, "Bounding Boxes Preview", ""
)
expand_image = container.button(label="Expand image", key="exp"+str(i))
expand_image = container.button(label="Expand image", key="exp" + str(i))
container.image(image, caption=str(i), use_column_width=True)
if expand_image:
session_state.image = i

def zoom(index, colors, dataset, session_state, labelers, dataset_path):
classes = dataset.classes
image, segmentation, target = dataset[index]
labels = target["labels"]
boxes = target["boxes"]
header = st.beta_columns([0.2, 0.6, 0.2])
if header[0].button('< Back to Grid view'):

session_state.image = new_index
st.experimental_rerun()
if labelers['semantic_segmentation']:
image = draw_image_with_semantic_segmentation(
image, dataset.metadata.image_size[0], dataset.metadata.image_size[1], segmentation, "Semantic Segmentation Preview", ""
)
if labelers['bounding_boxes_2d']:
image = draw_image_with_boxes(
image, classes, labels, boxes, colors, "Bounding Boxes Preview", ""
)
image = get_image_with_labelers(dataset[index], dataset, colors, labelers)
layout = st.beta_columns([0.7, 0.3])
layout[0].image(image, use_column_width=True)

raise ValueError("Please specify the path to the main dataset directory!")
#st.markdown('<script type="application/javascript"> function resizeIFrameToFitContent( iFrameme ) { iFrame.width = '
# st.markdown('<script type="application/javascript"> function resizeIFrameToFitContent( iFrameme ) { iFrame.width = '
# 'iFrame.contentWindow.document.body.scrollWidth;iFrame.height = '
# 'iFrame.contentWindow.document.body.scrollHeight;} window.addEventListener(\'DOMContentLoaded\', '
# 'function(e) { var iFrame = document.getElementById( \'iFrame1\' ); resizeIFrameToFitContent( iFrame '

73
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/pyrception.py


# Extract the class labels
self.classes = []
for label in self.annotations[0]["spec"]:
self.classes.append(label["label_name"])
if "label_name" in label:
self.classes.append(label["label_name"])
# Set the number of classes
self.num_classes = len(self.classes)

)
self.last_file_index = None
def __getitem__(self, index: int) -> Tuple:
def __getitem__(self, index: int) -> dict:
:return: Returns a tuple containing the image and target metadata as (image, target)
:rtype: Tuple
"""
if index > self.metadata.length - 1:

image_and_labelers = {}
# Image
segmentation = Image.open(
os.path.join(self.metadata.data_dir, self.data[sub_index]["annotations"][1]["filename"])
).convert("RGB")
image_and_labelers["image"] = image
# Assumes that the order is the same for the annotations in metadata as in the captures_***.json file
annotations = {}
for i in range(len(self.metadata.annotations)):
a = self.metadata.annotations[i]
for j in range(len(self.data[sub_index]["annotations"])):
if self.data[sub_index]["annotations"][j]["annotation_definition"] == a["id"]:
annotations[a["name"]] = j
break
self.metadata.available_annotations = annotations
# Bounding Boxes
if "bounding box" in annotations:
image_and_labelers["bounding box"] = self.get_bounding_boxes(sub_index, annotations["bounding box"])
# Bounding Boxes 3d
if "bounding box 3D" in annotations:
image_and_labelers["bounding box 3D"] = self.get_bounding_box_3d(sub_index, annotations["bounding box 3D"])
# Semantic Segmentation
if "semantic segmentation" in annotations:
image_and_labelers["semantic segmentation"] = self.get_segmentation(sub_index, annotations[
"semantic segmentation"])
# Instance Segmentation
if "instance segmentation" in annotations:
image_and_labelers["instance segmentation"] = self.get_segmentation(sub_index, annotations[
"instance segmentation"])
# Keypoints
if "keypoints" in annotations:
image_and_labelers["keypoints"] = self.get_keypoints(sub_index, annotations["keypoints"])
return image_and_labelers
def get_keypoints(self, sub_index, ann_index):
image_ann = self.data[sub_index]
keypoints = image_ann["annotations"][ann_index]["values"][0]["keypoints"]
return keypoints
def get_segmentation(self, sub_index, ann_index):
return Image.open(
os.path.join(self.metadata.data_dir, self.data[sub_index]["annotations"][ann_index]["filename"])
).convert("RGB")
def get_bounding_box_3d(self, sub_index, ann_index):
sensor = self.data[sub_index]["sensor"]
values = self.data[sub_index]["annotations"][ann_index]["values"]
return sensor, values
def get_bounding_boxes(self, sub_index, ann_index):
for value in image_ann["annotations"][0]["values"]:
for value in image_ann["annotations"][ann_index]["values"]:
box = [
value["x"],
value["y"],

# assumes that the image id naming convention is
# RGB<uuid>/rgb_<image_id>.png
image_id = self.data[sub_index]["filename"][44:-4]
target = {"image_id": image_id, "labels": labels, "boxes": boxes}
return image, segmentation, target
return {"image_id": image_id, "labels": labels, "boxes": boxes}
def __len__(self) -> int:
"""

2
com.unity.perception/Editor/Pyrception/pyrception-utils/requirements.txt


pytest-html==3.1.1
pytest-datadir==1.3.1
coverage==5.5
opencv-python>=4.5
pyquaternion>=0.9.9

2
com.unity.perception/Editor/Pyrception/pyrception-utils/setup.py


"streamlit==0.75.0",
"google-cloud-storage==1.19.0",
"gcsfs==0.7.1",
"pyquaternion>=0.9.9",
"opencv-python>=4.5"
],
entry_points={"console_scripts": ["pyrception-utils=pyrception_utils.cli:main"]},
)

2
com.unity.perception/Runtime/GroundTruth/Labelers/BoundingBox3DLabeler.cs


var forward = box.rotation * Vector3.forward;
var s = box.size * 0.5f;
var bbl = CalculateRotatedPoint(cam, t,right, up, forward,-s.x,-s.y, -s.z);
var bbl = CalculateRotatedPoint (cam, t,right, up, forward,-s.x,-s.y, -s.z);
var btl = CalculateRotatedPoint(cam, t,right, up, forward,-s.x, s.y, -s.z);
var btr = CalculateRotatedPoint(cam, t,right, up, forward,s.x, s.y, -s.z);
var bbr = CalculateRotatedPoint(cam, t,right, up, forward,s.x, -s.y, -s.z);

3
com.unity.perception/Runtime/GroundTruth/PerceptionCamera.cs


// Record the camera's projection matrix
SetPersistentSensorData("camera_intrinsic", ToProjectionMatrix3x3(cam.projectionMatrix));
// Record the camera's projection type (orthographic or perspective)
SetPersistentSensorData("projection", cam.orthographic ? "orthographic" : "perspective");
var captureFilename = $"{Manager.Instance.GetDirectoryFor(rgbDirectory)}/{k_RgbFilePrefix}{Time.frameCount}.png";
var dxRootPath = $"{rgbDirectory}/{k_RgbFilePrefix}{Time.frameCount}.png";
SensorHandle.ReportCapture(dxRootPath, SensorSpatialData.FromGameObjects(

311
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox.py


import math
import numpy as np
from pyquaternion import Quaternion
def group_bbox2d_per_label(bboxes):
"""Group 2D bounding boxes with same label.
Args:
bboxes (list[BBox2D]): a list of 2D bounding boxes
Returns:
dict: a dictionary of 2d boundign box group.
{label1: [bbox1, bboxes2, ...], label2: [bbox1, ...]}
"""
bboxes_per_label = {}
for box in bboxes:
if box.label not in bboxes_per_label:
bboxes_per_label[box.label] = []
bboxes_per_label[box.label].append(box)
return bboxes_per_label
class BBox2D:
"""Canonical Representation of a 2D bounding box.
Attributes:
label (str): string representation of the label.
x (float): x pixel coordinate of the upper left corner.
y (float): y pixel coordinate of the upper left corner.
w (float): width (number of pixels)of the bounding box.
h (float): height (number of pixels) of the bounding box.
score (float): detection confidence score. Default is set to score=1.
if this is a ground truth bounding box.
Examples:
Here is an example about how to use this class.
.. code-block::
>>> gt_bbox = BBox2D(label='car', x=2, y=6, w=2, h=4)
>>> gt_bbox
"label='car'|score=1.0|x=2.0|y=6.0|w=2.0|h=4.0"
>>> pred_bbox = BBox2D(label='car', x=2, y=5, w=2, h=4, score=0.79)
>>> pred_bbox.area
8
>>> pred_bbox.intersect_with(gt_bbox)
True
>>> pred_bbox.intersection(gt_bbox)
6
>>> pred_bbox.union(gt_bbox)
10
>>> pred_bbox.iou(gt_bbox)
0.6
"""
def __init__(self, label, x, y, w, h, score=1.0):
""" Initialize 2D bounding box object
Args:
label (str): string representation of the label
x (float): x pixel coordinate of the upper left corner
y (float): y pixel coordinate of the upper left corner
w (float): width (number of pixels)of the bounding box
h (float): height (number of pixels) of the bounding box
score (float): detection confidence score
"""
self.label = label
self.x = x
self.y = y
self.w = w
self.h = h
self.score = score
def __repr__(self):
return (
f"label={self.label}|score={self.score:.2f}|"
f"x={self.x:.2f}|y={self.y:.2f}|w={self.w:.2f}|h={self.h:.2f}"
)
def __eq__(self, other):
return (
self.x == other.x
and self.y == other.y
and self.w == other.w
and self.h == other.h
and self.label == other.label
and math.isclose(self.score, other.score, rel_tol=1e-07)
)
@property
def area(self):
"""Calculate area of this bounding box
Returns:
width x height of the bound box
"""
return self.w * self.h
def intersect_with(self, other):
"""Check whether this box intersects with other bounding box
Args:
other (BBox2D): other bounding box object to check intersection
Returns:
True if two bounding boxes intersect, False otherwise
"""
if self.x > other.x + other.w:
return False
if other.x > self.x + self.w:
return False
if self.y + self.h < other.y:
return False
if self.y > other.y + other.h:
return False
return True
def intersection(self, other):
"""Calculate the intersection area with other bounding box
Args:
other (BBox2D): other bounding box object to calculate intersection
Returns:
float of the intersection area for two bounding boxes
"""
x1 = max(self.x, other.x)
y1 = max(self.y, other.y)
x2 = min(self.x + self.w, other.x + other.w)
y2 = min(self.y + self.h, other.y + other.h)
return (x2 - x1) * (y2 - y1)
def union(self, other, intersection_area=None):
"""Calculate union area with other bounding box
Args:
other (BBox2D): other bounding box object to calculate union
intersection_area (float): pre-calculated area of intersection
Returns:
float of the union area for two bounding boxes
"""
area_a = self.area
area_b = other.area
if not intersection_area:
intersection_area = self.intersection(other)
return float(area_a + area_b - intersection_area)
def iou(self, other):
"""Calculate intersection over union area with other bounding box
.. math::
IOU = \\frac{intersection}{union}
Args:
other (BBox2D): other bounding box object to calculate iou
Returns:
float of the union area for two bounding boxes
"""
# if boxes don't intersect
if not self.intersect_with(other):
return 0
intersection_area = self.intersection(other)
union_area = self.union(other, intersection_area=intersection_area)
# intersection over union
iou = intersection_area / union_area
return iou
class BBox3D:
"""
Class for 3d bounding boxes which can either be predictions or
ground-truths. This class is the primary representation in this repo of 3d
bounding boxes and is based off of the Nuscenes style dataset.
"""
def __init__(
self,
translation,
size,
label,
sample_token,
score=1,
rotation: Quaternion = Quaternion(),
velocity=(np.nan, np.nan, np.nan),
):
self.sample_token = sample_token
self.translation = translation
self.size = size
self.width, self.height, self.length = size
self.rotation = rotation
self.velocity = velocity
self.label = label
self.score = score
def _local2world_coordinate(self, x):
"""
Args:
x: vector describing point (x,y,z) in local coordinates (where the
center of the box is 0,0,0)
Returns: the x,y,z coordinates of the input point in global coordinates
"""
y = np.array(self.translation) + self.rotation.rotate(x)
return y
@property
def back_left_bottom_pt(self):
"""
Returns: :py:class:`float`: Back-left-bottom point.
"""
p = np.array([-self.width / 2, -self.height / 2, -self.length / 2])
p = self._local2world_coordinate(p)
return p
@property
def front_left_bottom_pt(self):
"""
:py:class:`float`: Front-left-bottom point.
"""
p = np.array([-self.width / 2, -self.height / 2, self.length / 2])
p = self._local2world_coordinate(p)
return p
@property
def front_right_bottom_pt(self):
"""
:py:class:`float`: Front-right-bottom point.
"""
p = np.array([self.width / 2, -self.height / 2, self.length / 2])
p = self._local2world_coordinate(p)
return p
@property
def back_right_bottom_pt(self):
"""
:py:class:`float`: Back-right-bottom point.
"""
p = np.array([self.width / 2, -self.height / 2, -self.length / 2])
p = self._local2world_coordinate(p)
return p
@property
def back_left_top_pt(self):
"""
:py:class:`float`: Back-left-top point.
"""
p = np.array([-self.width / 2, self.height / 2, -self.length / 2])
p = self._local2world_coordinate(p)
return p
@property
def front_left_top_pt(self):
"""
:py:class:`float`: Front-left-top point.
"""
p = np.array([-self.width / 2, self.height / 2, self.length / 2])
p = self._local2world_coordinate(p)
return p
@property
def front_right_top_pt(self):
"""
:py:class:`float`: Front-right-top point.
"""
p = np.array([self.width / 2, self.height / 2, self.length / 2])
p = self._local2world_coordinate(p)
return p
@property
def back_right_top_pt(self):
"""
:py:class:`float`: Back-right-top point.
"""
p = np.array([self.width / 2, self.height / 2, -self.length / 2])
p = self._local2world_coordinate(p)
return p
@property
def p(self) -> np.ndarray:
"""
Returns: list of all 8 corners of the box beginning with the the bottom
four corners and then the top
four corners, both in counterclockwise order (from birds eye view)
beginning with the back-left corner
"""
x = np.vstack(
[
self.back_left_bottom_pt,
self.front_left_bottom_pt,
self.front_right_bottom_pt,
self.back_right_bottom_pt,
self.back_left_top_pt,
self.front_left_top_pt,
self.front_right_top_pt,
self.back_right_top_pt,
]
)
return x

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox.py.meta


fileFormatVersion: 2
guid: 4122f2f81144716438e5281967ce7272
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

179
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox3d_plot.py


""" Helper bounding box 3d library to plot pretty 3D boundign
boxes with a simple Python API.
"""
import cv2
import numpy
import streamlit as st
def _add_single_bbox3d_on_image(
image,
front_bottom_left,
front_upper_left,
front_upper_right,
front_bottom_right,
back_bottom_left,
back_upper_left,
back_upper_right,
back_bottom_right,
color=None,
box_line_width=2,
):
""" Add a single 3D bounding box to the passed in image.
For this version of the method, all of the passed in coordinates should be
integer tuples already projected in image pixel coordinate space.
Args:
image (numpy array): numpy array version of the image
front_bottom_left (int tuple): Front bottom left coordinate of the 3D
bounding box in pixel space
front_upper_left (int tuple): Front upper left coordinate of the 3D
bounding box in pixel space
front_upper_right (int tuple): Front upper right coordinate of the 3D
bounding box in pixel space
front_bottom_right (int tuple): Front bottom right coordinate of the 3D
bounding box in pixel space
back_bottom_left (int tuple): Back bottom left coordinate of the 3D
bounding box in pixel space
back_upper_left (int tuple): Back bottom left coordinate of the 3D
bounding box in pixel space
back_upper_right (int tuple): Back bottom left coordinate of the 3D
bounding box in pixel space
back_bottom_right (int tuple): Back bottom left coordinate of the 3D
bounding box in pixel space
color (tuple): RGBA color of the bounding box. Defaults to None. If
color = None the the tuple of [0, 255, 0, 255] (Green) will be used.
box_line_width: The width of the drawn box. Defaults to 2.
"""
try:
fbl = (front_bottom_left[0], front_bottom_left[1])
ful = (front_upper_left[0], front_upper_left[1])
fur = (front_upper_right[0], front_upper_right[1])
fbr = (front_bottom_right[0], front_bottom_right[1])
bbl = (back_bottom_left[0], back_bottom_left[1])
bul = (back_upper_left[0], back_upper_left[1])
bur = (back_upper_right[0], back_upper_right[1])
bbr = (back_bottom_right[0], back_bottom_right[1])
except ValueError:
raise TypeError("all box coorinates must be a number")
if color is None:
color = [0, 255, 0, 255]
cv2.line(image, fbl, ful, color, box_line_width) # front left
cv2.line(image, ful, fur, color, box_line_width) # front top
cv2.line(image, fbr, fur, color, box_line_width) # front right
cv2.line(image, fbl, fbr, color, box_line_width) # front bottom
cv2.line(image, bbl, bul, color, box_line_width) # back left
cv2.line(image, bul, bur, color, box_line_width) # back top
cv2.line(image, bbr, bur, color, box_line_width) # back right
cv2.line(image, bbl, bbr, color, box_line_width) # back bottom
cv2.line(image, ful, bul, color, box_line_width) # top left
cv2.line(image, fur, bur, color, box_line_width) # top right
cv2.line(image, fbl, bbl, color, box_line_width) # bottom left
cv2.line(image, fbr, bbr, color, box_line_width) # bottom right
def add_single_bbox3d_on_image(
image, box, proj, color=None, orthographic=False, box_line_width=2,
):
"""" Add single 3D bounding box on a given image.
Args:
image (numpy array): a numpy array for an image
box (BBox3D): a 3D bounding box in camera's coordinate system
proj (numpy 2D array): camera's 3x3 projection matrix
color(tuple): RGBA color of the bounding box. Defaults to None. If
color = None the the tuple of [0, 255, 0, 255] (Green) will be used.
box_line_width (int): line width of the bounding boxes. Defaults to 2.
"""
img_height, img_width, _ = image.shape
fll = box.back_left_bottom_pt
ful = box.back_left_top_pt
fur = box.back_right_top_pt
flr = box.back_right_bottom_pt
bll = box.front_left_bottom_pt
bul = box.front_left_top_pt
bur = box.front_right_top_pt
blr = box.front_right_bottom_pt
pixel_location_fun = _project_pt_to_pixel_location_orthographic if orthographic else _project_pt_to_pixel_location
fll_raster = pixel_location_fun(fll, proj, img_height, img_width)
ful_raster = pixel_location_fun(ful, proj, img_height, img_width)
fur_raster = pixel_location_fun(fur, proj, img_height, img_width)
flr_raster = pixel_location_fun(flr, proj, img_height, img_width)
bll_raster = pixel_location_fun(bll, proj, img_height, img_width)
bul_raster = pixel_location_fun(bul, proj, img_height, img_width)
bur_raster = pixel_location_fun(bur, proj, img_height, img_width)
blr_raster = pixel_location_fun(blr, proj, img_height, img_width)
_add_single_bbox3d_on_image(
image,
fll_raster,
ful_raster,
fur_raster,
flr_raster,
bll_raster,
bul_raster,
bur_raster,
blr_raster,
color,
box_line_width,
)
def _project_pt_to_pixel_location(pt, projection, img_height, img_width):
""" Projects a 3D coordinate into a pixel location.
Applies the passed in projection matrix to project a point from the camera's
coordinate space into pixel space.
For a description of the math used in this method, see:
https://www.scratchapixel.com/lessons/3d-basic-rendering/computing-pixel-coordinates-of-3d-point/
Args:
pt (numpy array): The 3D point to project.
projection (numpy 2D array): The camera's 3x3 projection matrix.
img_height (int): The height of the image in pixels.
img_width (int): The width of the image in pixels.
Returns:
numpy array: a one-dimensional array with two values (x and y)
representing a point's pixel coordinate in an image.
"""
_pt = projection.dot(pt)
# compute the perspective divide. Near clipping plane should take care of
# divide by zero cases, but we will check to be sure
if _pt[2] != 0:
_pt /= _pt[2]
return numpy.array(
[
int(-(_pt[0] * img_width) / 2.0 + (img_width * 0.5)),
int((_pt[1] * img_height) / 2.0 + (img_height * 0.5)),
]
)
def _project_pt_to_pixel_location_orthographic(pt, projection, img_height, img_width):
projection = numpy.array([
[projection[0][0], 0, 0],
[0, -projection[1][1], 0],
[0, 0, projection[2][2]]
])
temp = projection.dot(pt)
pixel = [
int((temp[0] + 1)*0.5 * img_width),
int((temp[1] + 1)*0.5 * img_height)
]
return pixel

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/bbox3d_plot.py.meta


fileFormatVersion: 2
guid: 6ebe9fac6325103488e689f91f4e486e
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存