浏览代码

Change from pyrception-utils to datasetinsights

/pyrception-integration
leopoldo-zugasti 3 年前
当前提交
88e3b24c
共有 250 个文件被更改,包括 3891 次插入181 次删除
  1. 110
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/preview.py
  2. 211
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/visualization/visualizers.py
  3. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master.meta
  4. 85
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.dockerignore
  5. 13
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.flake8
  6. 129
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/CODE_OF_CONDUCT.md
  7. 30
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/bug_report.md
  8. 17
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/feature_request.md
  9. 9
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/questions-about-datasetinsights.md
  10. 11
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/PULL_REQUEST_TEMPLATE.md
  11. 64
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/linting-and-unittests.yaml
  12. 19
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/publish-docker-hub.yaml
  13. 44
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/publish-pypi.yaml
  14. 116
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.gitignore
  15. 28
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.pre-commit-config.yaml
  16. 13
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.readthedocs.yaml
  17. 116
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/CONTRIBUTING.md
  18. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/CONTRIBUTING.md.meta
  19. 39
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Dockerfile
  20. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Dockerfile.meta
  21. 201
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/LICENCE
  22. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/LICENCE.meta
  23. 18
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Makefile
  24. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Makefile.meta
  25. 119
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/README.md
  26. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/README.md.meta
  27. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights.meta
  28. 0
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__init__.py
  29. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__init__.py.meta
  30. 36
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__main__.py
  31. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__main__.py.meta
  32. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands.meta
  33. 30
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/__init__.py
  34. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/__init__.py.meta
  35. 140
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/download.py
  36. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/download.py.meta
  37. 16
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/constants.py
  38. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/constants.py.meta
  39. 118
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/dashboard.py
  40. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/dashboard.py.meta
  41. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets.meta
  42. 0
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/__init__.py
  43. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/__init__.py.meta
  44. 2
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/exceptions.py
  45. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/exceptions.py.meta
  46. 79
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/synthetic.py
  47. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/synthetic.py.meta
  48. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception.meta
  49. 12
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/__init__.py
  50. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/__init__.py.meta
  51. 201
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/captures.py
  52. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/captures.py.meta
  53. 3
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/exceptions.py
  54. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/exceptions.py.meta
  55. 124
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/metrics.py
  56. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/metrics.py.meta
  57. 253
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/references.py
  58. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/references.py.meta
  59. 86
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/tables.py
  60. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/tables.py.meta
  61. 57
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/validation.py
  62. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/validation.py.meta
  63. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io.meta
  64. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/__init__.py
  65. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/__init__.py.meta
  66. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/bbox.py.meta
  67. 218
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/download.py
  68. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/download.py.meta
  69. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader.meta
  70. 11
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/__init__.py
  71. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/__init__.py.meta
  72. 83
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/base.py
  73. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/base.py.meta
  74. 26
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/gcs_downloader.py
  75. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/gcs_downloader.py.meta
  76. 51
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/http_downloader.py
  77. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/http_downloader.py.meta
  78. 392
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/unity_simulation.py
  79. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/unity_simulation.py.meta
  80. 13
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/exceptions.py
  81. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/exceptions.py.meta
  82. 246
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/gcs.py
  83. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/gcs.py.meta
  84. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats.meta
  85. 23
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/__init__.py
  86. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/__init__.py.meta
  87. 150
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/statistics.py
  88. 7
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/statistics.py.meta
  89. 8
      com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/visualization.meta

110
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/preview.py


import numpy as np
import streamlit as st
import SessionState
import visualization.visualizers as v
import streamlit.components.v1 as components
import datasetinsights
from datasetinsights.datasets.unity_perception import AnnotationDefinitions, MetricDefinitions
from datasetinsights.datasets.unity_perception.captures import Captures
#from pyrception_utils import PyrceptionDataset
import datasetinsights_master.datasetinsights
from datasetinsights_master.datasetinsights.datasets.unity_perception import AnnotationDefinitions, MetricDefinitions
from datasetinsights_master.datasetinsights.datasets.unity_perception.captures import Captures
import visualization.visualizers as v
st.set_page_config(layout="wide") # This needs to be the first streamlit command
import helpers.custom_components_setup as cc

@st.cache(show_spinner=True, allow_output_mutation=True)
def load_perception_dataset(data_root: str) -> Tuple:
"""
Loads the perception dataset in the cache and caches the random bounding box color scheme.
:param path: Dataset path
:type str:
:return: A tuple with the colors and PyrceptionDataset object as (colors, dataset)
:rtype: Tuple
"""
# --------------------------------CHANGE TO DATASETINSIGHTS LOADING---------------------------------------------
# dataset = PyrceptionDataset(data_dir=path)
# classes = dataset.classes
# colors = {name: tuple(np.random.randint(128, 255, size=3)) for name in classes}
# return colors, dataset
def preview_dataset(base_dataset_dir: str):
"""

base_dataset_dir = session_state.curr_dir
st.sidebar.markdown("# Select Project")
if st.sidebar.button("Change dataset folder"):
if st.sidebar.button("Change project folder"):
st.sidebar.markdown("# Dataset Selection")
st.sidebar.markdown("# Select Dataset")
datasets = list_datasets(base_dataset_dir)
datasets_names = [ctime + " " + item for ctime, item in datasets]

break
if dataset_name is not None:
ann_def, metric_def, cap = load_perception_dataset(
os.path.join(base_dataset_dir, dataset_name)
)
st.sidebar.markdown("# Labeler Visualization")
data_root = os.path.join(base_dataset_dir, dataset_name)
ann_def, metric_def, cap = load_perception_dataset(data_root)
st.write(ann_def)
st.sidebar.markdown("# Labeler Visualization")
available_labelers = [a[1] for a in ann_def.table.iterrows()]
available_labelers = [a["name"] for a in ann_def.table.to_dict('records')]
labelers = {}
if 'bounding box' in available_labelers:

index = int(session_state.image)
if index >= 0:
dataset_path = os.path.join(base_dataset_dir, dataset_name)
zoom(index, ann_def, metric_def, cap, base_dataset_dir, session_state, labelers, dataset_path)
zoom(index, ann_def, metric_def, cap, data_root, session_state, labelers, data_root)
grid_view(num_rows, ann_def, metric_def, cap, base_dataset_dir, session_state, labelers)
grid_view(num_rows, ann_def, metric_def, cap, data_root, session_state, labelers)
def get_image_with_labelers(index, ann_def, metric_def, cap, data_root, labelers_to_use):
def get_annotation_def(ann_def, name):
for idx, a in enumerate(ann_def.table.to_dict('records')):
if a["name"] == name:
return a["id"]
return -1
filename = os.path.join(data_root, cap.loc[index, "filename"])
def get_annotation_index(ann_def, name):
for idx, a in enumerate(ann_def.table.to_dict('records')):
if a["name"] == name:
return idx
return -1
def get_image_with_labelers(index, ann_def, metric_def, cap, data_root, labelers_to_use):
filename = os.path.join(data_root, cap.captures.loc[index, "filename"])
semantic_segmentation_definition_id = -1
for idx, a in enumerate(ann_def):
if a["name"] == "semantic segmentation":
semantic_segmentation_definition_id = idx
break
semantic_segmentation_definition_id = get_annotation_def(ann_def, 'semantic segmentation')
seg_captures = cap.filter(def_id=semantic_segmentation_definition_id)
seg_filename = os.path.join(data_root, seg_captures.loc[index, "annotation.filename"])

)
if 'instance segmentation' in labelers_to_use and labelers_to_use['instance segmentation']:
instance_segmentation_definition_id = -1
for idx, a in enumerate(ann_def):
if a["name"] == "semantic segmentation":
instance_segmentation_definition_id = idx
break
instance_segmentation_definition_id = get_annotation_def(ann_def, 'instance segmentation')
inst_captures = cap.filter(def_id=instance_segmentation_definition_id)
inst_filename = os.path.join(data_root, inst_captures.loc[index, "annotation.filename"])

)
if 'bounding box' in labelers_to_use and labelers_to_use['bounding box']:
target = image_and_labelers["bounding box"]
labels = target["labels"]
boxes = target["boxes"]
classes = dataset.classes
bounding_box_definition_id = get_annotation_def(ann_def, 'bounding box')
catalog = v.capture_df(bounding_box_definition_id, data_root)
label_mappings = v.label_mappings_dict(bounding_box_definition_id, data_root)
image, classes, labels, boxes, colors
image,
index,
catalog,
label_mappings,
keypoints = image_and_labelers["keypoints"]
image = v.draw_image_with_keypoints(
image, keypoints, dataset
)
keypoints_definition_id = get_annotation_def(ann_def, 'keypoints')
kp_captures = cap.filter(def_id=keypoints_definition_id)
annotations = kp_captures.loc[index, "annotation.values"]
templates = ann_def.table.to_dict('records')[get_annotation_index(ann_def, 'keypoints')]['spec']
v.draw_image_with_keypoints(image, annotations, templates)
sensor, values = image_and_labelers['bounding box 3D']
image = v.draw_image_with_box_3d(image, sensor, values, colors)
bounding_box_3d_definition_id = get_annotation_def(ann_def, 'bounding box 3D')
box_captures = cap.filter(def_id=bounding_box_3d_definition_id)
annotations = box_captures.loc[index, "annotation.values"]
sensor = box_captures.loc[index, "sensor"]
image = v.draw_image_with_box_3d(image, sensor, annotations, None)
return image

session_state.num_cols = num_cols
st.experimental_rerun()
start_at = cc.item_selector(int(session_state.start_at), num_cols * num_rows, len(cap))
start_at = cc.item_selector(int(session_state.start_at), num_cols * num_rows, len(cap.captures.to_dict('records')))
components.html("""<hr style="height:2px;border:none;border-bottom:-25px;color:#AAA;background-color:#AAA;" /> """, height=10)
for i in range(start_at, min(start_at + (num_cols * num_rows), len(cap))):
for i in range(start_at, min(start_at + (num_cols * num_rows), len(cap.captures.to_dict('records')))):
image = get_image_with_labelers(i, ann_def, metric_def, cap, data_root, labelers)
container = cols[(i - (start_at % num_cols)) % num_cols].beta_container()

st.experimental_rerun()
with header[1]:
new_index = cc.item_selector_zoom(index, len(cap))
new_index = cc.item_selector_zoom(index, len(cap.captures.to_dict('records')))
if not new_index == index:
session_state.image = new_index
st.experimental_rerun()

211
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/visualization/visualizers.py


import pathlib
from typing import Dict, List
import os
from pathlib import Path
import streamlit as st
from PIL import ImageFont
from datasetinsights_master.datasetinsights.datasets.unity_perception import AnnotationDefinitions
from datasetinsights_master.datasetinsights.datasets.unity_perception.captures import Captures
from datasetinsights_master.datasetinsights.datasets.synthetic import read_bounding_box_2d, read_bounding_box_3d
from datasetinsights_master.datasetinsights.stats.visualization.plots import plot_bboxes, plot_bboxes3d, plot_keypoints
from pyquaternion import Quaternion
from visualization.bbox import BBox3D
from visualization.bbox3d_plot import add_single_bbox3d_on_image
def draw_image_with_boxes(
image: Image,
classes: Dict,
labels: List,
boxes: List[List],
colors: Dict,
):
"""
Draws an image in streamlit with labels and bounding boxes.
:param image: the PIL image
:type PIL:
:param classes: the class dictionary
:type Dict:
:param labels: list of integer object labels for the frame
:type List:
:param boxes: List of bounding boxes (as a List of coordinates) for the frame
:type List[List]:
:param colors: class colors
:type Dict:
:param header: Image header
:type str:
:param description: Image description
:type str:
"""
image = image.copy()
image_draw = ImageDraw(image)
# draw bounding boxes
path_to_font = pathlib.Path(__file__).parent.parent.absolute()
font = ImageFont.truetype(f"{path_to_font}/NairiNormal-m509.ttf", 15)
def cleanup(catalog, data_root):
catalog = remove_captures_with_missing_files(data_root, catalog)
catalog = remove_captures_without_bboxes(catalog)
return catalog
for label, box in zip(labels, boxes):
label = label - 1
class_name = classes[label]
image_draw.rectangle(box, outline=colors[class_name], width=2)
image_draw.text(
(box[0], box[1]), class_name, font=font, fill=colors[class_name]
)
def remove_captures_without_bboxes(catalog):
keep_mask = catalog["annotation.values"].apply(len) > 0
return catalog[keep_mask]
def remove_captures_with_missing_files(root, catalog):
def exists(capture_file):
path = Path(root) / capture_file
return path.exists()
keep_mask = catalog.filename.apply(exists)
return catalog[keep_mask]
def capture_df(def_id, data_root):
captures = Captures(data_root)
catalog = captures.filter(def_id)
catalog = cleanup(catalog, data_root)
return catalog
def label_mappings_dict(def_id, data_root):
annotation_def = AnnotationDefinitions(data_root)
init_definition = annotation_def.get_definition(def_id)
label_mappings = {
m["label_id"]: m["label_name"] for m in init_definition["spec"]
}
return label_mappings
return image
def draw_image_with_boxes(
image,
index,
catalog,
label_mappings,
):
cap = catalog.iloc[index]
ann = cap["annotation.values"]
capture = image
image = capture.convert("RGB") # Remove alpha channel
bboxes = read_bounding_box_2d(ann, label_mappings)
return plot_bboxes(image, bboxes, label_mappings)
def draw_image_with_segmentation(

image.paste(foreground, (0, 0), foreground)
return image
image: Image,
keypoints,
dataset,
image, annotations, templates
image = image.copy()
image_draw = ImageDraw(image)
# image = image.copy()
# image_draw = ImageDraw(image)
radius = int(dataset.metadata.image_size[0] * 5/500)
for i in range(len(keypoints)):
keypoint = keypoints[i]
if keypoint["state"] != 2:
continue
coordinates = (keypoint["x"]-radius, keypoint["y"]-radius, keypoint["x"]+radius, keypoint["y"]+radius)
color = dataset.metadata.annotations[find_metadata_annotation_index(dataset,"keypoints")]["spec"][0]["key_points"][i]["color"]
image_draw.ellipse(coordinates, fill=(int(255*color["r"]), int(255*color["g"]), int(255*color["b"]), 255))
# radius = int(dataset.metadata.image_size[0] * 5 / 500)
# for i in range(len(keypoints)):
# keypoint = keypoints[i]
# if keypoint["state"] != 2:
# continue
# coordinates = (keypoint["x"] - radius, keypoint["y"] - radius, keypoint["x"] + radius, keypoint["y"] + radius)
# color = \
# dataset.metadata.annotations[find_metadata_annotation_index(dataset, "keypoints")]["spec"][0]["key_points"][i][
# "color"]
# image_draw.ellipse(coordinates, fill=(int(255 * color["r"]), int(255 * color["g"]), int(255 * color["b"]), 255))
skeleton = dataset.metadata.annotations[find_metadata_annotation_index(dataset,"keypoints")]["spec"][0]["skeleton"]
for bone in skeleton:
if keypoints[bone["joint1"]]["state"] != 2 or keypoints[bone["joint1"]]["state"] != 2:
continue
joint1 = (keypoints[bone["joint1"]]["x"], keypoints[bone["joint1"]]["y"])
joint2 = (keypoints[bone["joint2"]]["x"], keypoints[bone["joint2"]]["y"])
r = bone["color"]["r"]
g = bone["color"]["g"]
b = bone["color"]["b"]
image_draw.line([joint1, joint2], fill=(int(255*r), int(255*g), int(255*b), 255), width=int(dataset.metadata.image_size[0] * 3/500))
return image
# skeleton = dataset.metadata.annotations[find_metadata_annotation_index(dataset, "keypoints")]["spec"][0]["skeleton"]
# for bone in skeleton:
# if keypoints[bone["joint1"]]["state"] != 2 or keypoints[bone["joint1"]]["state"] != 2:
# continue
# joint1 = (keypoints[bone["joint1"]]["x"], keypoints[bone["joint1"]]["y"])
# joint2 = (keypoints[bone["joint2"]]["x"], keypoints[bone["joint2"]]["y"])
# r = bone["color"]["r"]
# g = bone["color"]["g"]
# b = bone["color"]["b"]
# image_draw.line([joint1, joint2], fill=(int(255 * r), int(255 * g), int(255 * b), 255),
# width=int(dataset.metadata.image_size[0] * 3 / 500))
# return image
def plot_bboxes3d(image, bboxes, projection, color, orthographic):
""" Plot an image with 3D bounding boxes
return plot_keypoints(image, annotations, templates)
Currently this method should only be used for ground truth images, and
doesn't support predictions. If a list of colors is not provided as an
argument to this routine, the default color of green will be used.
Args:
image (PIL Image): a PIL image.
bboxes (list): a list of BBox3D objects
projection: The perspective projection of the camera which
captured the ground truth.
colors (list): a color list for boxes. Defaults to none. If
colors = None, it will default to coloring all boxes green.
Returns:
PIL image: a PIL image with bounding boxes drawn on it.
"""
np_image = np.array(image)
img_height, img_width, _ = np_image.shape
for i, box in enumerate(bboxes):
add_single_bbox3d_on_image(np_image, box, projection, color, orthographic=orthographic)
return PIL.Image.fromarray(np_image)
def read_bounding_box_3d(bounding_boxes_metadata):
bboxes = []
for b in bounding_boxes_metadata:
label_id = b['label_id']
translation = (b["translation"]["x"],b["translation"]["y"],b["translation"]["z"])
size = (b["size"]["x"], b["size"]["y"], b["size"]["z"])
rotation = b["rotation"]
rotation = Quaternion(
x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"]
)
#if label_mappings and label_id not in label_mappings:
# continue
box = BBox3D(
translation=translation,
size=size,
label=label_id,
sample_token=0,
score=1,
rotation=rotation,
)
bboxes.append(box)
return bboxes
#TODO Implement colors
#TODO: IMPLEMENT COLORS
projection = np.array([[1,0,0],[0,1,0],[0,0,1]])
projection = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
img_with_boxes = plot_bboxes3d(image, boxes, projection, None, orthographic=(sensor["projection"] == "orthographic"))
img_with_boxes = plot_bboxes3d(image, boxes, projection, None,
orthographic=(sensor["projection"] == "orthographic"))
return img_with_boxes

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master.meta


fileFormatVersion: 2
guid: 948e708fe62ad0142ba8ea72aeb3355d
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

85
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.dockerignore


# Git
.git
.gitignore
# CI
.codeclimate.yml
.travis.yml
.taskcluster.yml
# Docker
docker-compose.yml
.docker
# Byte-compiled / optimized / DLL files
**/__pycache__/
**/*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
.pytest_cache
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Virtual environment
.env/
.venv/
venv/
# PyCharm
.idea
# IDE
**/.ropeproject
**/.swp
.vscode
.ipynb_checkpoints
# Place project specific ignores here
runs

13
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.flake8


[flake8]
max-line-length = 80
ignore =
E133,
E203,
W503,
W504,
W605,
F541
exclude =
.git,
__pycache__,
datasetinsights/data/datasets/protos/

129
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/CODE_OF_CONDUCT.md


# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
overall community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or
advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
<perception@unity3d.com>.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series
of actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within
the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.

30
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/bug_report.md


---
name: Bug report
about: Report a bug with datasetinsights
labels: bug
---
**Describe the Bug:**
[A clear and concise description of what the bug is.]
**How to Reproduce?**
[What are the steps that would reproduce the bug that you encountered.]
**What did you expect to happen:**
**Console logs / stack traces**
Please wrap in [triple backticks (```)](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) to make it easier to read.
**Screenshots**
[If applicable, add screenshots to help explain your problem.]
**Anything else you would like to add:**
[Miscellaneous information that will assist in solving the issue.]
**Environment:**
- OS + version: [e.g. Ubuntu 20.04.1 LTS]
- datasetinsights version
- _Environment_: (which example environment you used to reproduce the error)
- Other environment settings

17
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/feature_request.md


---
name: Feature request
about: Suggest an idea for this project
labels: enhancement
---
**Why you need this feature:**
[Is your feature request related to a problem? Please describe in details]
**Describe the solution you'd like:**
[A clear and concise description of what you want to happen.]
**Anything else you would like to add:**
[Miscellaneous information that will assist in solving the issue.]

9
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/questions-about-datasetinsights.md


---
name: Questions about datasetinsights
about: Ask your question or about any confusion that you have about this project
labels: question
---
**Question:**
[You can ask any question about this project.]

11
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/PULL_REQUEST_TEMPLATE.md


# Peer Review Information
Add information on any code, feature, documentation changes here.
# Pull Request Check List
<!-- This is just a reminder about the most common mistakes. Please make sure that you tick all *appropriate* boxes. Please read our [contribution guide](https://github.com/Unity-Technologies/dataset-insights/blob/master/CONTRIBUTING.md)
at least once, it will save you unnecessary review cycles! -->
- [ ] Added **tests** for changed code.
- [ ] Updated **documentation** for changed code.

64
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/linting-and-unittests.yaml


name: Tests
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
linting:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Linting
run: |
pip install pre-commit
pre-commit run --all-files
tests:
# reference from https://github.com/python-poetry/poetry/blob/master/.github/workflows/main.yml
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Get full Python version
id: full-python-version
shell: bash
run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))")
- name: Install poetry
shell: bash
run: |
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
echo "$HOME/.poetry/bin" >> $GITHUB_PATH
- name: Configure poetry
shell: bash
run: poetry config virtualenvs.in-project true
- name: Set up cache
uses: actions/cache@v2
id: cache
with:
path: .venv
key: venv-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }}
- name: Ensure cache is healthy
if: steps.cache.outputs.cache-hit == 'true'
shell: bash
run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv
- name: Install dependencies
run: poetry install
shell: bash
- name: Run pytest
run: poetry run pytest

19
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/publish-docker-hub.yaml


name: Publish Docker image
on:
release:
types: [published]
jobs:
push_to_registry:
name: Push Docker image to Docker Hub
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v2
- name: Push to Docker Hub
uses: docker/build-push-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }}
repository: unitytechnologies/datasetinsights
tags: latest
tag_with_ref: true

44
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/publish-pypi.yaml


name: Publish to pypi
on:
release:
types: [published]
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
jobs:
build-and-publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Get full Python version
id: full-python-version
shell: bash
run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))")
- name: Install poetry
shell: bash
run: |
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
echo "$HOME/.poetry/bin" >> $GITHUB_PATH
- name: Set env
run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
- name : Configure poetry
shell: bash
run: poetry config pypi-token.pypi $PYPI_TOKEN
- name: Set poetry version
shell: bash
run: poetry version $RELEASE_VERSION
- name: build
shell: bash
run: poetry build
- name: publish
shell: bash
run: poetry publish

116
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.gitignore


# Compiled source #
###################
*.com
*.class
*.dll
*.exe
*.o
*.so
# Compressed files #
####################
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
# Logs and databases #
######################
*.log
*.sql
*.sqlite
# OS generated files #
######################
.DS_Store*
ehthumbs.db
Icon?
Thumbs.db
*.bak*
# IDE Project files #
######################
*.sublime-*
*.Rproj
.Rproj.user
.Rhistory
*.xcodeproj
*.idea
# Python #
###########
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Editor
.vscode
# For this Project #
######################
runs/
checkpoints/
metrics/

28
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.pre-commit-config.yaml


# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py
)$
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.4.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- id: check-merge-conflict
- repo: https://github.com/psf/black
rev: 19.10b0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.1
hooks:
- id: flake8
- repo: https://github.com/timothycrosley/isort
rev: 5.1.0
hooks:
- id: isort

13
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.readthedocs.yaml


version: 2
formats: all
build:
image: stable
python:
version: 3.7
install:
- requirements: docs/requirements.txt
- method: pip
path: .
sphinx:
builder: html
configuration: docs/source/conf.py

116
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/CONTRIBUTING.md


# Table of contents
- [Contributing to datasetinsights](#contributing-to-datasetinsights)
- [Developing datasetinsights](#developing-datasetinsights)
- [Add new dependencies](#add-new-dependencies)
- [Codebase structure](#codebase-structure)
- [Unit testing](#unit-testing)
- [Style Guide](#style-guide)
- [Writing documentation](#writing-documentation)
- [Building documentation](#building-documentation)
## Contributing to datasetinsights
We encourage contributions to the datasetinsights repo, including but not limited to following categories:
1. You want to improve the documentation of existing module.
2. You want to provide bug-fix for an outstanding issue.
3. You want to implement a new feature to support new type of perception package outputs.
## Developing datasetinsights
Here are some steps to setup datasetinsights virtual environment with on your machine:
1. Install [poetry](https://python-poetry.org/), [git](https://git-scm.com/) and [pre-commit](https://pre-commit.com/)
2. Create a virtual environment. We recommend using [miniconda](https://docs.conda.io/en/latest/miniconda.html)
```bash
conda create -n dins-dev python=3.7
conda activate dins-dev
```
3. Clone a copy of datasetinsights from source:
```bash
git clone https://github.com/Unity-Technologies/datasetinsights.git
cd datasetinsights
```
4. Install datasetinsights in `develop` mode:
```bash
poetry install
```
This will symlink the Python files from the current local source tree into the installed virtual environment install.
The `develop` mode also includes Python packages such as [pytest](https://docs.pytest.org/en/latest/) and [black](https://black.readthedocs.io/en/stable/).
5. Install pre-commit [hook](https://pre-commit.com/#3-install-the-git-hook-scripts) to `.git` folder.
```bash
pre-commit install
# pre-commit installed at .git/hooks/pre-commit
```
### Add new dependencies
Adding new Python dependencies to datasetinsights environment using poetry like:
```bash
poetry add numpy@^1.18.4
```
Make sure you only add the desired packages instead of adding all dependencies.
Let package management system resolve for dependencies.
See [poetry add](https://python-poetry.org/docs/cli/#add) for detail instructions.
## Codebase structure
The datasetinsights package contains the following modules:
- [commands](datasetinsights/commands) This module contains the cli commands.
- [datasets](datasetinsights/datasets) This module contains different datasets. The dataset classes contain knowledge on how the dataset should be loaded into memory.
- [io](datasetinsights/io) This module contains functionality that relates to writing/downloading/uploading to/from different sources.
- [stats](datasetinsights/stats) This module contains code for visualizing and gathering statistics on the dataset
## Unit testing
We use [pytest](https://docs.pytest.org/en/latest/) to run tests located under `tests/`. Run the entire test suite with
```bash
pytest
```
or run individual test files, like:
```bash
pytest tests/test_visual.py
```
for individual test suites.
## Style Guide
We follow Black code [style](https://black.readthedocs.io/en/stable/the_black_code_style.html) for this repository.
The max line length is set at 80.
We enforce this code style using [Black](https://black.readthedocs.io/en/stable/) to format Python code.
In addition to Black, we use [isort](https://github.com/timothycrosley/isort) to sort Python imports.
Before submitting a pull request, run:
```bash
pre-commit run --all-files
```
Fix all issues that were highlighted by flake8. If you want to skip exceptions such as long url lines in docstring, add `# noqa: E501 <describe reason>` for the specific line violation. See [this](https://flake8.pycqa.org/en/3.1.1/user/ignoring-errors.html) to learn more about how to ignore flake8 errors.
Some editors support automatically formatting on save. For example, in [vscode](https://code.visualstudio.com/docs/python/editing#_formatting)
## Writing documentation
Datasetinsights uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for formatting docstrings.
Length of line inside docstrings block must be limited to 80 characters with exceptions such as long urls or tables.
### Building documentation
Follow instructions [here](docs/README.md).

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/CONTRIBUTING.md.meta


fileFormatVersion: 2
guid: f1b92295dacba8144a75292f32648da4
TextScriptImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

39
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Dockerfile


FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04
RUN apt-get update \
&& apt-get install -y \
build-essential \
curl \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-dev \
python3.7-dev \
python3-pip \
&& ln -s /usr/bin/python3.7 /usr/local/bin/python
# Pin setuptools to 49.x.x until this [issue](https://github.com/pypa/setuptools/issues/2350) is fixed.
RUN python -m pip install --upgrade pip poetry==1.0.10 setuptools==49.6.0 -U pip cryptography==3.3.2
# pin cryptography to 3.3.2 until this (https://github.com/pyca/cryptography/issues/5753) is fixed.
# Add Tini
ENV TINI_VERSION v0.18.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/local/bin/tini
RUN chmod +x /usr/local/bin/tini
WORKDIR /datasetinsights
VOLUME /data /root/.config
COPY poetry.lock pyproject.toml ./
RUN poetry config virtualenvs.create false \
&& poetry install --no-root
COPY . ./
# Run poetry install again to install datasetinsights
RUN poetry config virtualenvs.create false \
&& poetry install
# Use -g to ensure all child process received SIGKILL
ENTRYPOINT ["tini", "-g", "--"]
CMD sh -c "jupyter notebook --notebook-dir=/ --ip=0.0.0.0 --no-browser --allow-root --port=8888 --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.allow_origin='*' --NotebookApp.base_url=${NB_PREFIX}"

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Dockerfile.meta


fileFormatVersion: 2
guid: de1ed20d981a4764d9ea407fdfaf90f1
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

201
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/LICENCE


Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2020 Unity Technologies
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/LICENCE.meta


fileFormatVersion: 2
guid: f5121630b2ec60f48a2515492953d095
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

18
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Makefile


.PHONY: help
help:
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
.DEFAULT_GOAL := help
GCP_PROJECT_ID := unity-ai-thea-test
TAG ?= latest
build: ## Build datasetinsights docker image
@echo "Building docker image for datasetinsights with tag: $(TAG)"
@docker build -t datasetinsights:$(TAG) .
push: ## Push datasetinsights docker image to registry
@echo "Uploading docker image to GCS registry with tag: $(TAG)"
@docker tag datasetinsights:$(TAG) gcr.io/$(GCP_PROJECT_ID)/datasetinsights:$(TAG) && \
docker push gcr.io/$(GCP_PROJECT_ID)/datasetinsights:$(TAG)

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Makefile.meta


fileFormatVersion: 2
guid: 350053f97e4e1434a9999c029bd2c83d
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

119
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/README.md


# Dataset Insights
[![PyPI python](https://img.shields.io/pypi/pyversions/datasetinsights)](https://pypi.org/project/datasetinsights)
[![PyPI version](https://badge.fury.io/py/datasetinsights.svg)](https://pypi.org/project/datasetinsights)
[![Downloads](https://pepy.tech/badge/datasetinsights)](https://pepy.tech/project/datasetinsights)
[![Tests](https://github.com/Unity-Technologies/datasetinsights/actions/workflows/linting-and-unittests.yaml/badge.svg?branch=master&event=push)](https://github.com/Unity-Technologies/datasetinsights/actions/workflows/linting-and-unittests.yaml?query=branch%3Amaster+event%3Apush)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
Unity Dataset Insights is a python package for downloading, parsing and analyzing synthetic datasets generated using the Unity [Perception package](https://github.com/Unity-Technologies/com.unity.perception).
## Installation
Dataset Insights maintains a pip package for easy installation. It can work in any standard Python environment using `pip install datasetinsights` command.
## Getting Started
### Dataset Statistics
We provide a sample [notebook](notebooks/Perception_Statistics.ipynb) to help you load synthetic datasets generated using [Perception package](https://github.com/Unity-Technologies/com.unity.perception) and visualize dataset statistics. We plan to support other sample Unity projects in the future.
### Dataset Download
You can download the datasets from HTTP(s), GCS, and Unity simulation projects using the 'download' command from CLI or API.
[CLI](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.commands.html#datasetinsights-commands-download)
```bash
datasetinsights download \
--source-uri=<xxx> \
--output=$HOME/data
```
[Programmatically](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.io.downloader.html#module-datasetinsights.io.downloader.gcs_downloader)
UnitySimulationDownloader downloads a dataset from Unity Simulation.
```python3
from datasetinsights.io.downloader import UnitySimulationDownloader
source_uri=usim://<project_id>/<run_execution_id>
dest = "~/data"
access_token = "XXX"
downloader = UnitySimulationDownloader(access_token=access_token)
downloader.download(source_uri=source_uri, output=dest)
```
GCSDatasetDownloader downloads a dataset from GCS location.
```python3
from datasetinsights.io.downloader import GCSDatasetDownloader
source_uri=gs://url/to/file.zip or gs://url/to/folder
dest = "~/data"
downloader = GCSDatasetDownloader()
downloader.download(source_uri=source_uri, output=dest)
```
HTTPDatasetDownloader downloads a dataset from any HTTP(S) location.
```python3
from datasetinsights.io.downloader import HTTPDatasetDownloader
source_uri=http://url.to.file.zip
dest = "~/data"
downloader = HTTPDatasetDownloader()
downloader.download(source_uri=source_uri, output=dest)
```
### Dataset Explore
You can explore the dataset [schema](https://datasetinsights.readthedocs.io/en/latest/Synthetic_Dataset_Schema.html#synthetic-dataset-schema) by using following API:
[Unity Perception](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.datasets.unity_perception.html#datasetinsights-datasets-unity-perception)
AnnotationDefinitions and MetricDefinitions loads synthetic dataset definition tables and return a dictionary containing the definitions.
```python3
from datasetinsights.datasets.unity_perception import AnnotationDefinitions,
MetricDefinitions
annotation_def = AnnotationDefinitions(data_root=dest, version="my_schema_version")
definition_dict = annotation_def.get_definition(def_id="my_definition_id")
metric_def = MetricDefinitions(data_root=dest, version="my_schema_version")
definition_dict = metric_def.get_definition(def_id="my_definition_id")
```
Captures loads synthetic dataset captures tables and return a pandas dataframe with captures and annotations columns.
```python3
from datasetinsights.datasets.unity_perception import Captures
captures = Captures(data_root=dest, version="my_schema_version")
captures_df = captures.filter(def_id="my_definition_id")
```
Metrics loads synthetic dataset metrics table which holds extra metadata that can be used to describe a particular sequence, capture or annotation and return a pandas dataframe with captures and metrics columns.
```python3
from datasetinsights.datasets.unity_perception import Metrics
metrics = Metrics(data_root=dest, version="my_schema_version")
metrics_df = metrics.filter_metrics(def_id="my_definition_id")
```
## Docker
You can use the pre-build docker image [unitytechnologies/datasetinsights](https://hub.docker.com/r/unitytechnologies/datasetinsights) to run similar commands.
## Documentation
You can find the API documentation on [readthedocs](https://datasetinsights.readthedocs.io/en/latest/).
## Contributing
Please let us know if you encounter a bug by filing an issue. To learn more about making a contribution to Dataset Insights, please see our Contribution [page](CONTRIBUTING.md).
## License
Dataset Insights is licensed under the Apache License, Version 2.0. See [LICENSE](LICENCE) for the full license text.
## Citation
If you find this package useful, consider citing it using:
```
@misc{datasetinsights2020,
title={Unity {D}ataset {I}nsights Package},
author={{Unity Technologies}},
howpublished={\url{https://github.com/Unity-Technologies/datasetinsights}},
year={2020}
}
```

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/README.md.meta


fileFormatVersion: 2
guid: 3b0dfe6bbcfa1ef44864b006d71f63f0
TextScriptImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights.meta


fileFormatVersion: 2
guid: bb75890755a687e4d89909469f8da056
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

0
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__init__.py

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__init__.py.meta


fileFormatVersion: 2
guid: 5890473f9d95fc44abcc1849da06c437
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

36
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__main__.py


import logging
import click
from datasetinsights.commands import Entrypoint
from datasetinsights.constants import CONTEXT_SETTINGS
logging.basicConfig(
level=logging.INFO,
format=(
"%(levelname)s | %(asctime)s | %(name)s | %(threadName)s | "
"%(message)s"
),
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
@click.command(
cls=Entrypoint, help="Dataset Insights.", context_settings=CONTEXT_SETTINGS,
)
@click.option(
"-v",
"--verbose",
is_flag=True,
default=False,
help="Enables verbose mode.",
)
def entrypoint(verbose):
if verbose:
root_logger = logging.getLogger()
root_logger.setLevel(logging.DEBUG)
if __name__ == "__main__":
entrypoint()

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__main__.py.meta


fileFormatVersion: 2
guid: 2f272c80a2e6e7c46a61577129759411
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands.meta


fileFormatVersion: 2
guid: da74e8a36d75ca1459cdf8eb860f9686
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

30
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/__init__.py


import os
import click
class Entrypoint(click.MultiCommand):
""" Click MultiCommand Entrypoint For Datasetinsights CLI
"""
def list_commands(self, ctx):
"""Dynamically get the list of commands."""
rv = []
for filename in os.listdir(os.path.dirname(__file__)):
if filename.endswith(".py") and not filename.startswith("__init__"):
rv.append(filename[:-3])
rv.sort()
return rv
def get_command(self, ctx, name):
"""Dynamically get the command."""
ns = {}
fn = os.path.join(os.path.dirname(__file__), name + ".py")
if not os.path.exists(fn):
return None
with open(fn) as f:
code = compile(f.read(), fn, "exec")
eval(code, ns, ns)
return ns["cli"]

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/__init__.py.meta


fileFormatVersion: 2
guid: ab7da21cef59d8e4a8695c9ae8e072e7
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

140
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/download.py


import logging
import re
import click
import datasetinsights.constants as const
from datasetinsights.io.downloader.base import create_dataset_downloader
logger = logging.getLogger(__name__)
class SourceURI(click.ParamType):
"""Represents the Source URI Parameter type.
This extends click.ParamType that allows click framework to validates
supported source URI according to the prefix pattern.
Raises:
click.BadParameter: if the validation failed.
"""
name = "source_uri"
PREFIX_PATTERN = r"^gs://|^http(s)?://|^usim://"
def convert(self, value, param, ctx):
""" Validate source URI and Converts the value.
"""
match = re.search(self.PREFIX_PATTERN, value)
if not match:
message = (
f"The source uri {value} is not supported. "
f"Pattern: {self.PREFIX_PATTERN}"
)
self.fail(message, param, ctx)
return value
@click.command(context_settings=const.CONTEXT_SETTINGS,)
@click.option(
"-s",
"--source-uri",
type=SourceURI(),
required=True,
help=(
"URI of where this data should be downloaded. "
f"Supported source uri patterns {SourceURI.PREFIX_PATTERN}"
),
)
@click.option(
"-o",
"--output",
type=click.Path(exists=True, file_okay=False, writable=True),
default=const.DEFAULT_DATA_ROOT,
help="Directory on localhost where datasets should be downloaded.",
)
@click.option(
"-b",
"--include-binary",
is_flag=True,
default=False,
help=(
"Whether to download binary files such as images or LIDAR point "
"clouds. This flag applies to Datasets where metadata "
"(e.g. annotation json, dataset catalog, ...) can be separated from "
"binary files."
),
)
@click.option(
"--access-token",
type=str,
default=None,
help="Unity Simulation access token. "
"This will override synthetic datasets source-uri for Unity Simulation",
)
@click.option(
"--checksum-file",
type=str,
default=None,
help="Dataset checksum text file path. "
"Path can be a HTTP(S) url or a local file path. This will help check the "
"integrity of the downloaded dataset.",
)
def cli(
source_uri, output, include_binary, access_token, checksum_file,
):
"""Download datasets to localhost from known locations.
The download command can support downloading from 3 types of sources
1. Download from Unity Simulation:
You can specify project_id, run_execution_id, access_token in source-uri:
\b
datasetinsights download \\
--source-uri=usim://<access_token>@<project_id>/<run_execution_id> \\
--output=$HOME/data
Alternatively, you can also override access_token such as:
\b
datasetinsights download \\
--source-uri=usim://<project_id>/<run_execution_id> \\
--output=$HOME/data \\
--access-token=<access_token>
2. Downloading from a public http(s) url:
\b
datasetinsights download \\
--source-uri=http://url/to/file.zip \\
--output=$HOME/data
3. Downloading from a GCS url:
\b
datasetinsights download \\
--source-uri=gs://url/to/file.zip \\
--output=$HOME/data
or download all objects under the same directory:
\b
datasetinsights download \\
--source-uri=gs://url/to/directory \\
--output=$HOME/data
"""
ctx = click.get_current_context()
logger.debug(f"Called download command with parameters: {ctx.params}")
downloader = create_dataset_downloader(
source_uri=source_uri, access_token=access_token
)
downloader.download(
source_uri=source_uri,
output=output,
include_binary=include_binary,
checksum_file=checksum_file,
)

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/download.py.meta


fileFormatVersion: 2
guid: 6e67465ce5402284ba070e1c908b78a0
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

16
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/constants.py


DEFAULT_DATA_ROOT = "/data"
# Default Unity Project ID where USim jobs was executed
DEFAULT_PROJECT_ID = "474ba200-4dcc-4976-818e-0efd28efed30"
USIM_API_ENDPOINT = "https://api.simulation.unity3d.com"
# Default Timing text for codetiming.Timer decorator
TIMING_TEXT = "[{name}] elapsed time: {:0.4f} seconds."
# Click CLI context settings
CONTEXT_SETTINGS = {
"help_option_names": ["-h", "--help"],
"show_default": True,
"ignore_unknown_options": True,
"allow_extra_args": True,
}

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/constants.py.meta


fileFormatVersion: 2
guid: 5ba9ab0cee77c5d4c9fc9b6f3190164b
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

118
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/dashboard.py


import argparse
import json
import os
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import datasetinsights.stats.visualization.overview as overview
from datasetinsights.stats.visualization.app import get_app
from datasetinsights.stats.visualization.object_detection import (
render_object_detection_layout,
)
app = get_app()
def main_layout():
""" Method for generating main app layout.
Returns:
html layout: main layout design with tabs for overview statistics
and object detection.
"""
app_layout = html.Div(
[
html.H1(
children="Dataset Insights",
style={
"textAlign": "center",
"padding": 20,
"background": "lightgrey",
},
),
html.Div(
[
dcc.Tabs(
id="page_tabs",
value="dataset_overview",
children=[
dcc.Tab(
label="Overview", value="dataset_overview",
),
dcc.Tab(
label="Object Detection",
value="object_detection",
),
],
),
html.Div(id="main_page_tabs"),
]
),
# Sharing data between callbacks using hidden division.
# These hidden dcc and html components are for storing data-root
# into the division. This is further used in callbacks made in the
# object_detection module. This is a temporary hack and can be found
# in example 1 of sharing data between callback dash tutorial.
# ref: https://dash.plotly.com/sharing-data-between-callbacks
# TODO: Fix this using a better solution to share data.
dcc.Dropdown(id="dropdown", style={"display": "none"}),
html.Div(id="data_root_value", style={"display": "none"}),
]
)
return app_layout
@app.callback(
Output("data_root_value", "children"), [Input("dropdown", "value")]
)
def store_data_root(value):
""" Method for storing data-root value in a hidden division.
Returns:
json : data-root encoded in json to be stored in data_root_value div.
"""
json_data_root = json.dumps(data_root)
return json_data_root
@app.callback(
Output("main_page_tabs", "children"),
[Input("page_tabs", "value"), Input("data_root_value", "children")],
)
def render_content(value, json_data_root):
""" Method for rendering dashboard layout based
on the selected tab value.
Args:
value(str): selected tab value
json_data_root: data root stored in hidden div in json format.
Returns:
html layout: layout for the selected tab.
"""
# read data root value from the data_root_value division
data_root = json.loads(json_data_root)
if value == "dataset_overview":
return overview.html_overview(data_root)
elif value == "object_detection":
return render_object_detection_layout(data_root)
def check_path(path):
""" Method for checking if the given data-root path is valid or not."""
if os.path.isdir(path):
return path
else:
raise ValueError(f"Path {path} not found")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data-root", help="Path to the data root")
args = parser.parse_args()
data_root = check_path(args.data_root)
app.layout = main_layout()
app.run_server(debug=True)

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/dashboard.py.meta


fileFormatVersion: 2
guid: 58ebaee02a1b55e43b6d59489b8612a7
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets.meta


fileFormatVersion: 2
guid: 108e7ec1fa06ea84eb5be362e4dea313
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

0
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/__init__.py

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/__init__.py.meta


fileFormatVersion: 2
guid: edbce5e9bac110a438499a740428ba9e
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

2
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/exceptions.py


class DatasetNotFoundError(Exception):
""" Raise when a dataset file can't be found."""

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/exceptions.py.meta


fileFormatVersion: 2
guid: 3c63f372326b3e74c867cb1ddde4fa58
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

79
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/synthetic.py


""" Simulation Dataset Catalog
"""
import logging
from pyquaternion import Quaternion
from datasetinsights.io.bbox import BBox2D, BBox3D
logger = logging.getLogger(__name__)
def read_bounding_box_3d(annotation, label_mappings=None):
""" Convert dictionary representations of 3d bounding boxes into objects
of the BBox3d class
Args:
annotation (List[dict]): 3D bounding box annotation
label_mappings (dict): a dict of {label_id: label_name} mapping
Returns:
A list of 3d bounding box objects
"""
bboxes = []
for b in annotation:
label_id = b["label_id"]
translation = (
b["translation"]["x"],
b["translation"]["y"],
b["translation"]["z"],
)
size = (b["size"]["x"], b["size"]["y"], b["size"]["z"])
rotation = b["rotation"]
rotation = Quaternion(
x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"]
)
if label_mappings and label_id not in label_mappings:
continue
box = BBox3D(
translation=translation,
size=size,
label=label_id,
sample_token=0,
score=1,
rotation=rotation,
)
bboxes.append(box)
return bboxes
def read_bounding_box_2d(annotation, label_mappings=None):
"""Convert dictionary representations of 2d bounding boxes into objects
of the BBox2D class
Args:
annotation (List[dict]): 2D bounding box annotation
label_mappings (dict): a dict of {label_id: label_name} mapping
Returns:
A list of 2D bounding box objects
"""
bboxes = []
for b in annotation:
label_id = b["label_id"]
x = b["x"]
y = b["y"]
w = b["width"]
h = b["height"]
if label_mappings and label_id not in label_mappings:
continue
box = BBox2D(label=label_id, x=x, y=y, w=w, h=h)
bboxes.append(box)
return bboxes

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/synthetic.py.meta


fileFormatVersion: 2
guid: ef52ac9e1f365b94ea2981908c8b47b3
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception.meta


fileFormatVersion: 2
guid: eb590aab824eace43af805377245b87e
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

12
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/__init__.py


from .captures import Captures
from .metrics import Metrics
from .references import AnnotationDefinitions, Egos, MetricDefinitions, Sensors
__all__ = [
"AnnotationDefinitions",
"Captures",
"Egos",
"Metrics",
"MetricDefinitions",
"Sensors",
]

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/__init__.py.meta


fileFormatVersion: 2
guid: 762895cdff237464ab31b646ccddb66e
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

201
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/captures.py


""" Load Synthetic dataset captures and annotations tables
"""
import pandas as pd
from datasetinsights.constants import DEFAULT_DATA_ROOT
from .exceptions import DefinitionIDError
from .tables import DATASET_TABLES, SCHEMA_VERSION, glob, load_table
class Captures:
"""Load captures table
A capture record stores the relationship between a captured file,
a collection of annotations, and extra metadata that describes this
capture. For more detail, see schema design here:
:ref:`captures`
Examples:
.. code-block:: python
>>> captures = Captures(data_root="/data")
#captures class automatically loads the captures (e.g. lidar scan,
image, depth map) and the annotations (e.g semantic segmentation
labels, bounding boxes, etc.)
>>> data = captures.filter(def_id="6716c783-1c0e-44ae-b1b5-7f068454b66e") # noqa E501 table command not be broken down into multiple lines
#return the captures and annotations filtered by the annotation
definition id
Attributes:
captures (pd.DataFrame): a collection of captures without annotations
annotations (pd.DataFrame): a collection of annotations
"""
TABLE_NAME = "captures"
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file
def __init__(self, data_root=DEFAULT_DATA_ROOT, version=SCHEMA_VERSION):
""" Initialize Captures
Args:
data_root (str): the root directory of the dataset
version (str): desired schema version
"""
self.captures = self._load_captures(data_root, version)
self.annotations = self._load_annotations(data_root, version)
def _load_captures(self, data_root, version):
"""Load captures except annotations.
:ref:`captures`
Args:
data_root (str): the root directory of the dataset
version (str): desired schema version
Returns:
A pandas dataframe with combined capture records.
Columns: 'id' (UUID of the capture), 'sequence_id',
'step' (index of captures), 'timestamp' (Simulation timestamp in
milliseconds since the sequence started.), 'sensor'
(sensor attributes), 'ego' (ego pose of the simulation),
'filename' (single filename that stores captured data)
Example Captures DataFrame:
id(str) sequence_id(str) step(int) timestamp(float) \
cdc8bc5c... 2954c... 300 4.979996
sensor (dict) \
{'sensor_id': 'da873b...', 'ego_id': '44ca9...', 'modality': 'camera',
'translation': [0.0, 0.0, 0.0], 'rotation': [0.0, 0.0, 0.0, 1.0],
'scale': 0.344577253}
ego (dict) \
{'ego_id': '44ca9...', 'translation': [0.0, 0.0, -20.0],
'rotation': [0.0, 0.0, 0.0, 1.0], 'velocity': None,
'acceleration': None}
filename (str) format (str)
RGB3/rgb_30... PNG
"""
captures = []
for c_file in glob(data_root, self.FILE_PATTERN):
capture = load_table(c_file, self.TABLE_NAME, version, max_level=0)
if "annotations" in capture.columns:
capture.drop(columns="annotations")
captures.append(capture)
# pd.concat might create memory bottleneck
return pd.concat(captures, axis=0)
def _load_annotations(self, data_root, version):
"""Load annotations and capture IDs.
:ref:`capture-annotation`
Args:
data_root (str): the root directory of the dataset
version (str): desired schema version
Returns:
A pandas dataframe with combined annotation records
Columns: 'id' (annotation id), 'annotation_definition' (annotation
definition ID),
'values'
(list of objects that store annotation data, e.g. 2d bounding
box), 'capture.id'
Example Annotation Dataframe:
id(str) annotation_definition(str) \
ace0... 6716c...
values (dict) \
[{'label_id': 34, 'label_name': 'snack_chips_pringles',
...'height': 118.0}, {'label_id': 35, '... 'height': 91.0}...]
capture.id (str)
cdc8b...
"""
annotations = []
for c_file in glob(data_root, self.FILE_PATTERN):
try:
annotation = load_table(
c_file,
self.TABLE_NAME,
version,
record_path="annotations",
meta="id",
meta_prefix="capture.",
)
except KeyError:
annotation = pd.DataFrame(
{"annotation_definition": [], "capture.id": []}
)
annotations.append(annotation)
return pd.concat(annotations, axis=0)
def filter(self, def_id):
"""Get captures and annotations filtered by annotation definition id
:ref:`captures`
Args:
def_id (int): annotation definition id used to filter results
Returns:
A pandas dataframe with captures and annotations
Columns: 'id' (capture id), 'sequence_id', 'step', 'timestamp',
'sensor', 'ego',
'filename', 'format', 'annotation.id',
'annotation.annotation_definition','annotation.values'
Raises:
DefinitionIDError: Raised if none of the annotation records in the
combined annotation and captures dataframe match the def_id
specified as a parameter.
Example Returned Dataframe (first row):
+---------------+------------------+-----------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+---------------+--------------+---------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------------------+
| label_id(int) | sequence_id(str) | step(int) | timestamp (float) | sensor (dict) | ego (dict) | filename(str) | format (str) | annotation.id (str) | annotation.annotation_definition(str) | annotation.values |
+===============+==================+===========+===================+===============================================================================================================================================================+============+===============+==============+=====================+=======================================+=======================================================================================================================+
| 2 | None | 50 | 4.9 | {'sensor_id': 'dDa873b...', 'ego_id': '44ca9...', 'modality': 'camera','translation': [0.0, 0.0, 0.0], 'rotation': [0.0, 0.0, 0.0, 1.0],'scale': 0.344577253} | ... | RGB3/asd.png | PNG | ace0 | 6716c | [{'label_id': 34, 'label_name': 'snack_chips_pringles',...'height': 118.0}, {'label_id': 35, '... 'height': 91.0}...] |
+---------------+------------------+-----------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+---------------+--------------+---------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------------------+
""" # noqa: E501 table should not be broken down into multiple lines
if self.annotations.empty:
msg = (
f"Can't find annotations records associate with the given "
f"definition id {def_id}."
)
raise DefinitionIDError(msg)
mask = self.annotations.annotation_definition == def_id
annotations = (
self.annotations[mask]
.set_index("capture.id")
.add_prefix("annotation.")
)
captures = self.captures.set_index("id")
combined = (
captures.join(annotations, how="inner")
.reset_index()
.rename(columns={"index": "id"})
)
if combined.empty:
msg = (
f"Can't find annotations records associate with the given "
f"definition id {def_id}."
)
raise DefinitionIDError(msg)
return combined

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/captures.py.meta


fileFormatVersion: 2
guid: 414a16086f4a33f40a03ab87d72bf11c
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

3
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/exceptions.py


class DefinitionIDError(Exception):
""" Raise when a given definition id can't be found.
"""

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/exceptions.py.meta


fileFormatVersion: 2
guid: 5ad46965cdf3e4c45b627e8c92108b03
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

124
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/metrics.py


"""Load Synthetic dataset Metrics
"""
import json
import dask.bag as db
from datasetinsights.constants import DEFAULT_DATA_ROOT
from .exceptions import DefinitionIDError
from .tables import DATASET_TABLES, SCHEMA_VERSION, glob
from .validation import verify_version
class Metrics:
"""Load metrics table
Metrics store extra metadata that can be used to describe a particular
sequence, capture or annotation. Metric records are stored as arbitrary
number (M) of key-value pairs.
For more detail, see schema design doc:
:ref:`metrics`
Attributes:
metrics (dask.bag.core.Bag): a collection of metrics records
Examples:
>>> metrics = Metrics(data_root="/data")
>>> metrics_df = metrics.filter_metrics(def_id="my_definition_id")
#metrics_df now contains all the metrics data corresponding to
"my_definition_id"
One example of metrics_df (first row shown below):
+---------------+------------------+---------------------+
| label_id(int) | instance_id(int) | visible_pixels(int) |
+===============+==================+=====================+
| 2 | 2 | 2231 |
+---------------+------------------+---------------------+
"""
TABLE_NAME = "metrics"
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file
def __init__(self, data_root=DEFAULT_DATA_ROOT, version=SCHEMA_VERSION):
""" Initialize Metrics
Args:
data_root (str): the root directory of the dataset containing
metrics
version (str): desired schema version
"""
self.metrics = self._load_metrics(data_root, version)
def _load_metrics(self, data_root, version):
"""
`:ref:`metrics`
Args:
data_root: (str): the root directory of the dataset containing
metrics
version (str): desired schema version
Returns:
dask.bag.core.Bag
"""
metrics_files = db.from_sequence(glob(data_root, self.FILE_PATTERN))
metrics = metrics_files.map(
lambda path: Metrics._load_json(path, self.TABLE_NAME, version)
).flatten()
return metrics
@staticmethod
def _normalize_values(metric):
""" Filter unnecessary info from metric.
1-level faltten of metrics.values column.
"""
values = metric["values"]
for value in values:
value["capture_id"] = metric["capture_id"]
value["annotation_id"] = metric["annotation_id"]
value["sequence_id"] = metric["sequence_id"]
value["step"] = metric["step"]
return values
def filter_metrics(self, def_id):
"""Get all metrics filtered by a given metric definition id
Args:
def_id (str): metric definition id used to filter results
Raises:
DefinitionIDError: raised if no metrics records match the given
def_id
Returns (pd.DataFrame):
Columns: "label_id", "capture_id", "annotation_id", "sequence_id",
"step"
"""
metrics = (
self.metrics.filter(
lambda metric: metric["metric_definition"] == def_id
)
.map(Metrics._normalize_values)
.flatten()
)
if metrics.count().compute() == 0:
msg = (
f"Can't find metrics records associated with the given "
f"definition id {def_id}."
)
raise DefinitionIDError(msg)
return metrics.to_dataframe().compute()
@staticmethod
def _load_json(filename, table_name, version):
"""Load records from json files into a dict
"""
with open(filename, "r") as file:
data = json.load(file)
verify_version(data, version)
return data[table_name]

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/metrics.py.meta


fileFormatVersion: 2
guid: c20dd3dca57f517448ffb0fec4e6a631
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

253
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/references.py


""" Load Synthetic dataset references tables
"""
import pandas as pd
from .tables import DATASET_TABLES, SCHEMA_VERSION, glob, load_table
from .validation import NoRecordError
class AnnotationDefinitions:
"""Load annotation_definitions table
For more detail, see schema design here:
:ref:`annotation_definitions.json`
Attributes:
table (pd): a collection of annotation_definitions records
"""
TABLE_NAME = "annotation_definitions"
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file
def __init__(self, data_root, version=SCHEMA_VERSION):
""" Initialize AnnotationDefinitions
Args:
data_root (str): the root directory of the dataset containing
tables
version (str): desired schema version
"""
self.table = self.load_annotation_definitions(data_root, version)
def load_annotation_definitions(self, data_root, version):
"""Load annotation definition files.
For more detail, see schema design here:
:ref:`annotation_definitions.json`
Args:
data_root (str): the root directory of the dataset containing
tables
version (str): desired schema version
Returns:
A Pandas dataframe with annotation definition records.
Columns: 'id' (annotation id), 'name' (annotation name),
'description' (string description), 'format'
(string describing format), 'spec' ( Format-specific specification
for the annotation values)
"""
definitions = []
for def_file in glob(data_root, self.FILE_PATTERN):
definition = load_table(def_file, self.TABLE_NAME, version)
definitions.append(definition)
if definitions:
combined = pd.concat(definitions, axis=0).drop_duplicates(
subset="id"
)
else:
combined = pd.DataFrame({})
return combined
def get_definition(self, def_id):
"""Get the annotation definition for a given definition id
Args:
def_id (int): annotation definition id used to filter results
Returns:
a dictionary containing the annotation definition
"""
mask = self.table.id == def_id
definition = self.table[mask]
if definition.empty:
raise NoRecordError(
f"No records are found in the annotation_definitions file "
f"that matches the specified definition id: {def_id}"
)
definition = definition.to_dict("records")[0]
return definition
class MetricDefinitions:
"""Load metric_definitions table
For more detail, see schema design here:
:ref:`metric_definitions.json`
Attributes:
table (pd): a collection of metric_definitions records with columns: id
(id for metric definition), name, description, spec (definition specific
spec)
"""
TABLE_NAME = "metric_definitions"
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file
def __init__(self, data_root, version=SCHEMA_VERSION):
""" Initialize MetricDefinitions
Args:
data_root (str): the root directory of the dataset containing
tables
version (str): desired schema version
"""
self.table = self.load_metric_definitions(data_root, version)
def load_metric_definitions(self, data_root, version):
"""Load metric definition files.
:ref:`metric_definitions.json`
Args:
data_root (str): the root directory of the dataset containing tables
version (str): desired schema version
Returns:
A Pandas dataframe with metric definition records.
a collection of metric_definitions records with columns: id
(id for metric definition), name, description, spec (definition specific
spec)
"""
definitions = []
for def_file in glob(data_root, self.FILE_PATTERN):
definition = load_table(def_file, self.TABLE_NAME, version)
definitions.append(definition)
combined = pd.concat(definitions, axis=0).drop_duplicates(subset="id")
return combined
def get_definition(self, def_id):
"""Get the metric definition for a given definition id
Args:
def_id (int): metric definition id used to filter results
Returns:
a dictionary containing metric definition
"""
mask = self.table.id == def_id
definition = self.table[mask]
if definition.empty:
raise NoRecordError(
f"No records are found in the metric_definitions file "
f"that matches the specified definition id: {def_id}"
)
definition = definition.to_dict("records")[0]
return definition
class Egos:
"""Load egos table
For more detail, see schema design here:
:ref:`egos.json`
Attributes:
table (pd): a collection of egos records
"""
TABLE_NAME = "egos"
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file
def __init__(self, data_root, version=SCHEMA_VERSION):
"""Initialize `:ref:Egos`
Args:
data_root (str): the root directory of the dataset containing
ego tables. Two columns: id (ego id) and description
version (str): desired schema version
"""
self.table = self.load_egos(data_root, version)
def load_egos(self, data_root, version):
"""Load egos files.
For more detail, see schema design here:
:ref:`egos.json`
Args:
data_root (str): the root directory of the dataset containing
ego tables
version (str): desired schema version
Returns:
A pandas dataframe with all ego records with two columns: id
(ego id) and description
"""
egos = []
for ego_file in glob(data_root, self.FILE_PATTERN):
ego = load_table(ego_file, self.TABLE_NAME, version)
egos.append(ego)
combined = pd.concat(egos, axis=0).drop_duplicates(subset="id")
return combined
class Sensors:
"""Load sensors table
For more detail, see schema design here:
:ref:`sensors.json`
Attributes:
table (pd): a collection of sensors records with columns:
'id' (sensor id), 'ego_id', 'modality'
({camera, lidar, radar, sonar,...} -- Sensor modality), 'description'
"""
TABLE_NAME = "sensors"
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file
def __init__(self, data_root, version=SCHEMA_VERSION):
""" Initialize Sensors
Args:
data_root (str): the root directory of the dataset containing
tables
version (str): desired schema version
"""
self.table = self.load_sensors(data_root, version)
def load_sensors(self, data_root, version):
"""Load sensors files.
For more detail, see schema design here:
:ref:`sensors.json`
Args:
data_root (str): the root directory of the dataset containing
tables
version (str): desired schema version
Returns:
A pandas dataframe with all sensors records with columns:
'id' (sensor id), 'ego_id', 'modality'
({camera, lidar, radar, sonar,...} -- Sensor modality), 'description'
"""
sensors = []
for sensor_file in glob(data_root, self.FILE_PATTERN):
sensor = load_table(sensor_file, self.TABLE_NAME, version)
sensors.append(sensor)
combined = pd.concat(sensors, axis=0).drop_duplicates(subset="id")
return combined

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/references.py.meta


fileFormatVersion: 2
guid: 28399836ae95db949b8a439aeda34d9a
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

86
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/tables.py


import json
import logging
import pathlib
from collections import namedtuple
from enum import Enum
import pandas as pd
from .validation import verify_version
logger = logging.getLogger(__name__)
SCHEMA_VERSION = "0.0.1" # Synthetic dataset schema version
class FileType(Enum):
BINARY = "binary"
REFERENCE = "reference"
METRIC = "metric"
CAPTURE = "capture"
Table = namedtuple("Table", "file pattern filetype")
DATASET_TABLES = {
"annotation_definitions": Table(
"**/annotation_definitions.json",
r"(?:\w|-|/)*annotation_definitions.json",
FileType.REFERENCE,
),
"captures": Table(
"**/captures_*.json",
r"(?:\w|-|/)*captures_[0-9]+.json",
FileType.CAPTURE,
),
"egos": Table("**/egos.json", r"(?:\w|-|/)*egos.json", FileType.REFERENCE),
"metric_definitions": Table(
"**/metric_definitions.json",
r"(?:\w|-|/)*metric_definitions.json",
FileType.REFERENCE,
),
"metrics": Table(
"**/metrics_*.json", r"(?:\w|-|/)*metrics_[0-9]+.json", FileType.METRIC
),
"sensors": Table(
"**/sensors.json", r"(?:\w|-|/)*sensors.json", FileType.REFERENCE
),
}
def glob(data_root, pattern):
"""Find all matching files in a directory.
Args:
data_root (str): directory containing capture files
pattern (str): Unix file pattern
Yields:
str: matched filenames in a directory
"""
path = pathlib.Path(data_root)
for fp in path.glob(pattern):
yield fp
def load_table(json_file, table_name, version, **kwargs):
"""Load records from json files into a pandas table
Args:
json_file (str): filename to json.
table_name (str): table name in the json file to be loaded
version (str): requested version of this table
**kwargs: arbitrary keyword arguments to be passed to pandas'
json_normalize method.
Returns:
a pandas dataframe of the loaded table.
Raises:
VersionError: If the version in json file does not match the requested
version.
"""
logger.debug(f"Loading table {table_name} from {json_file}")
data = json.load(open(json_file, "r"))
verify_version(data, version)
table = pd.json_normalize(data[table_name], **kwargs)
return table

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/tables.py.meta


fileFormatVersion: 2
guid: a463dc85bbc1a464e99103d91ede2bc9
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

57
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/validation.py


""" Validate Simulation Data
"""
class VersionError(Exception):
"""Raise when the data file version does not match"""
pass
class DuplicateRecordError(Exception):
""" Raise when the definition file has duplicate definition id
"""
pass
class NoRecordError(Exception):
""" Raise when no record is found matching a given definition id
"""
pass
def verify_version(json_data, version):
"""Verify json schema version
Args:
json_data (json): a json object loaded from file.
version (str): string of the requested version.
Raises:
VersionError: If the version in json file does not match the requested
version.
"""
loaded = json_data["version"]
if loaded != version:
raise VersionError(f"Version mismatch. Expected version: {version}")
def check_duplicate_records(table, column, table_name):
""" Check if table has duplicate records for a given column
Args:
table (pd.DataFrame): a pandas dataframe
column (str): the column where no duplication is allowed
table_name (str): table name
Raises:
DuplicateRecordError: If duplicate records are found in a column
"""
if table[column].nunique() != len(table):
raise DuplicateRecordError(
f"Duplicate record was found in {column} of table {table_name}. "
f"This column is expected to be unique. Violating this requirement "
f"might cause ambiguity when the records are loaded."
)

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/validation.py.meta


fileFormatVersion: 2
guid: 7f34c23e908e98d4e9dca0d7e8bd9bf1
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io.meta


fileFormatVersion: 2
guid: b7b7c71c5464e5d469afcd30440015e4
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/__init__.py


from .bbox import BBox2D
from .downloader import create_dataset_downloader
__all__ = [
"BBox2D",
"create_dataset_downloader",
]

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/__init__.py.meta


fileFormatVersion: 2
guid: cd7b49678064cc140a42019e9ae81e3d
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/bbox.py.meta


fileFormatVersion: 2
guid: 1dd346d7eb846064883556c3b6a2a8bd
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

218
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/download.py


import hashlib
import logging
import os
import re
import tempfile
import zlib
from pathlib import Path
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from .exceptions import ChecksumError, DownloadError
logger = logging.getLogger(__name__)
# Timeout of requests (in seconds)
DEFAULT_TIMEOUT = 1800
# Retry after failed request
DEFAULT_MAX_RETRIES = 5
class TimeoutHTTPAdapter(HTTPAdapter):
def __init__(self, timeout, *args, **kwargs):
self.timeout = timeout
super().__init__(*args, **kwargs)
def send(self, request, **kwargs):
kwargs["timeout"] = self.timeout
return super().send(request, **kwargs)
def download_file(source_uri: str, dest_path: str, file_name: str = None):
"""Download a file specified from a source uri
Args:
source_uri (str): source url where the file should be downloaded
dest_path (str): destination path of the file
file_name (str): file name of the file to be downloaded
Returns:
String of destination path.
"""
logger.debug(f"Trying to download file from {source_uri} -> {dest_path}")
adapter = TimeoutHTTPAdapter(
timeout=DEFAULT_TIMEOUT, max_retries=Retry(total=DEFAULT_MAX_RETRIES)
)
with requests.Session() as http:
http.mount("https://", adapter)
try:
response = http.get(source_uri)
response.raise_for_status()
except requests.exceptions.RequestException as ex:
logger.error(ex)
err_msg = (
f"The request download from {source_uri} -> {dest_path} can't "
f"be completed."
)
raise DownloadError(err_msg)
else:
dest_path = Path(dest_path)
if not file_name:
file_name = _parse_filename(response, source_uri)
dest_path = dest_path / file_name
dest_path.parent.mkdir(parents=True, exist_ok=True)
with open(dest_path, "wb") as f:
f.write(response.content)
return dest_path
def checksum_matches(filepath, expected_checksum, algorithm="CRC32"):
""" Check if the checksum matches
Args:
filepath (str): the doaloaded file path
expected_checksum (int): expected checksum of the file
algorithm (str): checksum algorithm. Defaults to CRC32
Returns:
True if the file checksum matches.
"""
computed = compute_checksum(filepath, algorithm)
return computed == expected_checksum
def validate_checksum(filepath, expected_checksum, algorithm="CRC32"):
""" Validate checksum of the downloaded file.
Args:
filepath (str): the doaloaded file path
expected_checksum (int): expected checksum of the file
algorithm (str): checksum algorithm. Defaults to CRC32
Raises:
ChecksumError if the file checksum does not match.
"""
if not checksum_matches(filepath, expected_checksum, algorithm):
raise ChecksumError
def compute_checksum(filepath, algorithm="CRC32"):
""" Compute the checksum of a file.
Args:
filepath (str): the doaloaded file path
algorithm (str): checksum algorithm. Defaults to CRC32
Returns:
int: the checksum value
"""
if algorithm == "CRC32":
chs = _crc32_checksum(filepath)
elif algorithm == "MD5":
chs = _md5_checksum(filepath)
else:
raise ValueError("Unsupported checksum algorithm!")
return chs
def _crc32_checksum(filepath):
""" Calculate the checksum of a file using CRC32.
"""
with open(filepath, "rb") as f:
checksum = zlib.crc32(f.read())
return checksum
def _md5_checksum(filename):
""" Calculate the checksum of a file using MD5.
"""
md5 = hashlib.md5()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
md5.update(chunk)
return md5.hexdigest()
def get_checksum_from_file(filepath):
""" This method return checksum of the file whose filepath is given.
Args:
filepath (str): Path of the checksum file.
Path can be HTTP(s) url or local path.
Raises:
ValueError: Raises this error if filepath is not local or not
HTTP or HTTPS url.
"""
if filepath.startswith(("http://", "https://")):
with tempfile.TemporaryDirectory() as tmp:
checksum_file_path = os.path.join(tmp, "checksum.txt")
file_path = download_file(
source_uri=filepath, dest_path=checksum_file_path
)
return _read_checksum_from_txt(file_path)
elif os.path.isfile(filepath):
return _read_checksum_from_txt(filepath)
else:
raise ValueError(f"Can not get checksum from path: {filepath}")
def _read_checksum_from_txt(filepath):
""" This method reads checksum from a txt file and returns it.
Args:
filepath (str): Local filepath of the checksum file.
Returns:
str: checksum value from the checksum file.
"""
with open(filepath) as file:
checksum = file.read()
return checksum
def _parse_filename(response, uri):
file_name = _get_filename_from_response(response)
if file_name is None:
file_name = _get_file_name_from_uri(uri)
return file_name
def _get_filename_from_response(response):
""" Gets filename from requests response object
Args:
response: requests.Response() object that contains the server's
response to the HTTP request.
Returns:
filename (str): Name of the file to be downloaded
"""
cd = response.headers.get("content-disposition")
if not cd:
return None
file_name = re.findall("filename=(.+)", cd)
if len(file_name) == 0:
return None
return file_name[0]
def _get_file_name_from_uri(uri):
""" Gets filename from URI
Args:
uri (str): URI
"""
return uri.split("/")[-1]

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/download.py.meta


fileFormatVersion: 2
guid: c216948dd1001a74a8924e984e10fd07
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader.meta


fileFormatVersion: 2
guid: 95081547889c43a41b2e74a036c3f840
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

11
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/__init__.py


from .base import create_dataset_downloader
from .gcs_downloader import GCSDatasetDownloader
from .http_downloader import HTTPDatasetDownloader
from .unity_simulation import UnitySimulationDownloader
__all__ = [
"UnitySimulationDownloader",
"HTTPDatasetDownloader",
"create_dataset_downloader",
"GCSDatasetDownloader",
]

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/__init__.py.meta


fileFormatVersion: 2
guid: ae10a9b1d099dc14b9d9b6cf8b340846
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

83
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/base.py


import re
from abc import ABC, abstractmethod
_registry = {}
def _find_downloader(source_uri):
"""
This function returns the correct DatasetDownloader
from a registry based on the source-uri provided
Args:
source_uri: URI of where this data should be downloaded.
Returns: The dataset downloader class that is registered with the
source-uri protocol.
"""
protocols = "|".join(_registry.keys())
pattern = re.compile(f"({protocols})")
protocol = pattern.findall(source_uri)
if source_uri.startswith(("https://", "http://")):
protocol = "http://"
elif protocol:
protocol = protocol[0]
else:
raise ValueError(f"Downloader not found for source-uri '{source_uri}'")
return _registry.get(protocol)
def create_dataset_downloader(source_uri, **kwargs):
"""
This function instantiates the dataset downloader
after finding it with the source-uri provided
Args:
source_uri: URI used to look up the correct dataset downloader
**kwargs:
Returns: The dataset downloader instance matching the source-uri.
"""
downloader_class = _find_downloader(source_uri=source_uri)
return downloader_class(**kwargs)
class DatasetDownloader(ABC):
"""This is the base class for all dataset downloaders
The DatasetDownloader can be subclasses in the following way
class NewDatasetDownloader(DatasetDownloader, protocol="protocol://")
Here the 'protocol://' should match the prefix that the method download
source_uri supports. Example http:// gs://
"""
def __init__(self, **kwargs):
pass
@classmethod
def __init_subclass__(cls, protocol=None, **kwargs):
if protocol:
_registry[protocol] = cls
else:
raise NotImplementedError(
f"Subclass needs to have class keyword argument named protocol."
)
super().__init_subclass__(**kwargs)
@abstractmethod
def download(self, source_uri, output, **kwargs):
""" This method downloads a dataset stored at the source_uri and stores it
in the output directory
Args:
source_uri: URI that points to the dataset that should be downloaded
output: path to local folder where the dataset should be stored
"""
raise NotImplementedError("Subclass needs to implement this method")

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/base.py.meta


fileFormatVersion: 2
guid: 7f5c18ade6b16754ca766b5a79f8dea7
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

26
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/gcs_downloader.py


from datasetinsights.io.downloader.base import DatasetDownloader
from datasetinsights.io.gcs import GCSClient
class GCSDatasetDownloader(DatasetDownloader, protocol="gs://"):
""" This class is used to download data from GCS
"""
def __init__(self, **kwargs):
""" initiating GCSDownloader
"""
self.client = GCSClient()
def download(self, source_uri=None, output=None, **kwargs):
"""
Args:
source_uri: This is the downloader-uri that indicates where on
GCS the dataset should be downloaded from.
The expected source-uri follows these patterns
gs://bucket/folder or gs://bucket/folder/data.zip
output: This is the path to the directory
where the download will store the dataset.
"""
self.client.download(local_path=output, url=source_uri)

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/gcs_downloader.py.meta


fileFormatVersion: 2
guid: f6a00cd7d3adcf84fb7ddd1d8eb6ee75
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

51
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/http_downloader.py


import logging
import os
from datasetinsights.io.download import (
download_file,
get_checksum_from_file,
validate_checksum,
)
from datasetinsights.io.downloader.base import DatasetDownloader
from datasetinsights.io.exceptions import ChecksumError
logger = logging.getLogger(__name__)
class HTTPDatasetDownloader(DatasetDownloader, protocol="http://"):
""" This class is used to download data from any HTTP or HTTPS public url
and perform function such as downloading the dataset and checksum
validation if checksum file path is provided.
"""
def download(self, source_uri, output, checksum_file=None, **kwargs):
""" This method is used to download the dataset from HTTP or HTTPS url.
Args:
source_uri (str): This is the downloader-uri that indicates where
the dataset should be downloaded from.
output (str): This is the path to the directory where the download
will store the dataset.
checksum_file (str): This is path of the txt file that contains
checksum of the dataset to be downloaded. It
can be HTTP or HTTPS url or local path.
Raises:
ChecksumError: This will raise this error if checksum doesn't
matches
"""
dataset_path = download_file(source_uri, output)
if checksum_file:
logger.debug("Reading checksum from checksum file.")
checksum = get_checksum_from_file(checksum_file)
try:
logger.debug("Validating checksum!!")
validate_checksum(dataset_path, int(checksum))
except ChecksumError as e:
logger.info("Checksum mismatch. Deleting the downloaded file.")
os.remove(dataset_path)
raise e

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/http_downloader.py.meta


fileFormatVersion: 2
guid: 68dd19858eaf21948ab9099bd283a3ed
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

392
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/unity_simulation.py


"""UnitySimulationDownloader downloads a dataset from Unity Simulation"""
import concurrent.futures
import logging
import os
import re
from pathlib import Path
import numpy as np
import pandas as pd
import requests
from codetiming import Timer
from requests.packages.urllib3.util.retry import Retry
from tqdm import tqdm
import datasetinsights.constants as const
from datasetinsights.datasets.unity_perception.tables import (
DATASET_TABLES,
FileType,
)
from datasetinsights.io.download import TimeoutHTTPAdapter, download_file
from datasetinsights.io.downloader.base import DatasetDownloader
from datasetinsights.io.exceptions import DownloadError
# number of workers for ThreadPoolExecutor. This is the default value
# in python3.8
MAX_WORKER = min(32, os.cpu_count() + 4)
# Timeout of requests (in seconds)
DEFAULT_TIMEOUT = 1800
# Retry after failed request
DEFAULT_MAX_RETRIES = 5
logger = logging.getLogger(__name__)
class UnitySimulationDownloader(DatasetDownloader, protocol="usim://"):
""" This class is used to download data from Unity Simulation
For more on Unity Simulation please see these
`docs <https://github.com/Unity-Technologies/Unity-Simulation-Docs>`
Args:
access_token (str): Access token to be used to authenticate to
unity simulation for downloading the dataset
"""
SOURCE_URI_PATTERN = r"usim://([^@]*)?@?([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})/(\w+)" # noqa: E501
def __init__(self, access_token=None, **kwargs):
super().__init__(**kwargs)
self.access_token = access_token
self.run_execution_id = None
self.project_id = None
def download(self, source_uri, output, include_binary=False, **kwargs):
""" Download from Unity Simulation
Args:
source_uri: This is the downloader-uri that indicates where on
unity simulation the dataset should be downloaded from.
The expected source-uri should follow these patterns:
usim://access-token@project-id/run-execution-id
or
usim://project-id/run-execution-id
output: This is the path to the directory where the download
method will store the dataset.
include_binary: Whether to download binary files such as images
or LIDAR point clouds. This flag applies to Datasets where
metadata (e.g. annotation json, dataset catalog, ...)
can be separated from binary files.
"""
self.parse_source_uri(source_uri)
manifest_file = os.path.join(output, f"{self.run_execution_id}.csv")
manifest_file = download_manifest(
self.run_execution_id,
manifest_file,
self.access_token,
project_id=self.project_id,
)
dl_worker = Downloader(manifest_file, output)
dl_worker.download_references()
dl_worker.download_metrics()
dl_worker.download_captures()
if include_binary:
dl_worker.download_binary_files()
def parse_source_uri(self, source_uri):
""" Parse unity simulation source uri
Args:
source_uri: Parses source-uri in the following format
usim://access-token@project-id/run-execution-id
or
usim://project-id/run-execution-id
"""
pattern = re.compile(self.SOURCE_URI_PATTERN)
result = pattern.findall(source_uri)
if len(result) == 1:
(access_token, project_id, run_execution_id,) = pattern.findall(
source_uri
)[0]
if not self.access_token:
if access_token:
self.access_token = access_token
else:
raise ValueError(f"Missing access token")
if project_id:
self.project_id = project_id
if run_execution_id:
self.run_execution_id = run_execution_id
else:
raise ValueError(
f"{source_uri} needs to be in format"
f" usim://access_token@project_id/run_execution_id "
f"or usim://project_id/run_execution_id "
)
def _filter_unsuccessful_attempts(manifest_df):
"""
remove all rows from a dataframe where a greater attempt_id exists for
the 'instance_id'. This is necessary so that we avoid using data from
a failed USim run and only get the most recent retry.
Args:
manifest_df (pandas df): must have columns 'attempt_id', 'app_param_id'
and 'instance_id'
Returns(pandas df): where all rows for earlier attempt ids have been
removed
"""
last_attempt_per_instance = manifest_df.groupby("instance_id")[
"attempt_id"
].agg(["max"])
merged = manifest_df.merge(
how="outer",
right=last_attempt_per_instance,
left_on="instance_id",
right_on="instance_id",
)
filtered = merged[merged["attempt_id"] == merged["max"]]
filtered = filtered.reset_index(drop=True)
filtered = filtered.drop(columns="max")
return filtered
class Downloader:
"""Parse a given manifest file to download simulation output
For more on Unity Simulation please see these
`docs <https://github.com/Unity-Technologies/Unity-Simulation-Docs>`_
Attributes:
manifest (DataFrame): the csv manifest file stored in a pandas dataframe
data_root (str): root directory where the simulation output should
be downloaded
"""
MANIFEST_FILE_COLUMNS = (
"run_execution_id",
"app_param_id",
"instance_id",
"attempt_id",
"file_name",
"download_uri",
)
def __init__(self, manifest_file: str, data_root: str):
""" Initialize Downloader
Args:
manifest_file (str): path to a manifest file
data_root (str): root directory where the simulation output should
be downloaded
"""
self.manifest = pd.read_csv(
manifest_file, header=0, names=self.MANIFEST_FILE_COLUMNS
)
self.manifest = _filter_unsuccessful_attempts(manifest_df=self.manifest)
self.manifest["filetype"] = self.match_filetypes(self.manifest)
self.data_root = data_root
@staticmethod
def match_filetypes(manifest):
""" Match filetypes for every rows in the manifest file.
Args:
manifest (pd.DataFrame): the manifest csv file
Returns:
a list of filetype strings
"""
filenames = manifest.file_name
filetypes = []
for name in filenames:
for _, table in DATASET_TABLES.items():
if re.match(table.pattern, name):
filetypes.append(table.filetype)
break
else:
filetypes.append(FileType.BINARY)
return filetypes
@Timer(name="download_all", text=const.TIMING_TEXT, logger=logging.info)
def download_all(self):
""" Download all files in the manifest file.
"""
matched_rows = np.ones(len(self.manifest), dtype=bool)
downloaded = self._download_rows(matched_rows)
logger.info(
f"Total {len(downloaded)} files in manifest are successfully "
f"downloaded."
)
@Timer(
name="download_references", text=const.TIMING_TEXT, logger=logging.info
)
def download_references(self):
""" Download all reference files.
All reference tables are static tables during the simulation.
This typically comes from the definition of the simulation and should
be created before tasks running distributed at different instances.
"""
logger.info("Downloading references files...")
matched_rows = self.manifest.filetype == FileType.REFERENCE
downloaded = self._download_rows(matched_rows)
logger.info(
f"Total {len(downloaded)} reference files are successfully "
f"downloaded."
)
@Timer(name="download_metrics", text=const.TIMING_TEXT, logger=logging.info)
def download_metrics(self):
""" Download all metrics files.
"""
logger.info("Downloading metrics files...")
matched_rows = self.manifest.filetype == FileType.METRIC
downloaded = self._download_rows(matched_rows)
logger.info(
f"Total {len(downloaded)} metric files are successfully downloaded."
)
@Timer(
name="download_captures", text=const.TIMING_TEXT, logger=logging.info
)
def download_captures(self):
""" Download all captures files. See :ref:`captures`
"""
logger.info("Downloading captures files...")
matched_rows = self.manifest.filetype == FileType.CAPTURE
downloaded = self._download_rows(matched_rows)
logger.info(
f"Total {len(downloaded)} capture files are successfully "
f"downloaded."
)
@Timer(
name="download_binary_files",
text=const.TIMING_TEXT,
logger=logging.info,
)
def download_binary_files(self):
""" Download all binary files.
"""
logger.info("Downloading binary files...")
matched_rows = self.manifest.filetype == FileType.BINARY
downloaded = self._download_rows(matched_rows)
logger.info(
f"Total {len(downloaded)} binary files are successfully "
f"downloaded."
)
def _download_rows(self, matched_rows):
""" Download matched rows in a manifest file.
Note:
We might need to download 1M+ of simulation output files, in this case
we don't want to have a single file transfer failure holding back on
getting the simulation data. Here download exception are captured.
We only log an error message and requires uses to pay attention to
this error.
Args:
matched_rows (pd.Series): boolean series indicator of the manifest
file that should be downloaded
Returns:
list of strings representing the downloaded destination path.
"""
n_expected = sum(matched_rows)
future_downloaded = []
downloaded = []
with concurrent.futures.ThreadPoolExecutor(MAX_WORKER) as executor:
for _, row in self.manifest[matched_rows].iterrows():
source_uri = row.download_uri
relative_path = Path(self.data_root, row.file_name)
dest_path = relative_path.parent
file_name = relative_path.name
future = executor.submit(
download_file, source_uri, dest_path, file_name
)
future_downloaded.append(future)
for future in tqdm(
concurrent.futures.as_completed(future_downloaded),
total=n_expected,
):
try:
downloaded.append(future.result())
except DownloadError as ex:
logger.error(ex)
n_downloaded = len(downloaded)
if n_downloaded != n_expected:
logger.warning(
f"Found {n_expected} matching records in the manifest file, "
f"but only {n_downloaded} are downloaded."
)
return downloaded
def download_manifest(
run_execution_id, manifest_file, access_token, project_id, use_cache=True
):
""" Download manifest file from a single run_execution_id
For more on Unity Simulation see these
`docs <https://github.com/Unity-Technologies/Unity-Simulation-Docs>`_
Args:
run_execution_id (str): Unity Simulation run execution id
manifest_file (str): path to the destination of the manifest_file
access_token (str): short lived authorization token
project_id (str): Unity project id that has Unity Simulation enabled
use_cache (bool, optional): indicator to skip download if manifest
file already exists. Default: True.
Returns:
str: Full path to the manifest_file
"""
api_endpoint = const.USIM_API_ENDPOINT
project_url = f"{api_endpoint}/v1/projects/{project_id}/"
data_url = f"{project_url}runs/{run_execution_id}/data"
if Path(manifest_file).exists() and use_cache:
logger.info(
f"Mainfest file {manifest_file} already exists. Skipping downloads."
)
return manifest_file
logger.info(
f"Trying to download manifest file for run-execution-id "
f"{run_execution_id}"
)
adapter = TimeoutHTTPAdapter(
timeout=DEFAULT_TIMEOUT, max_retries=Retry(total=DEFAULT_MAX_RETRIES)
)
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
}
with requests.Session() as http:
http.mount("https://", adapter)
try:
resp = http.get(data_url, headers=headers)
resp.raise_for_status()
except requests.exceptions.RequestException as ex:
logger.error(ex)
err_msg = (
f"Failed to download manifest file for run-execution-id: "
f"{run_execution_id}."
)
raise DownloadError(err_msg)
else:
Path(manifest_file).parent.mkdir(parents=True, exist_ok=True)
with open(manifest_file, "wb") as f:
for chunk in resp.iter_content(chunk_size=1024):
f.write(chunk)
logger.info(
f"Manifest file {manifest_file} downloaded for run-execution-id "
f"{run_execution_id}"
)
return manifest_file

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/unity_simulation.py.meta


fileFormatVersion: 2
guid: 8c40e40e50b7fb24ca8d68b31022e159
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

13
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/exceptions.py


class DownloadError(Exception):
""" Raise when download file failed.
"""
class ChecksumError(Exception):
""" Raises when the downloaded file checksum is not correct.
"""
class InvalidTrackerError(Exception):
""" Raises when unknown tracker requested .
"""

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/exceptions.py.meta


fileFormatVersion: 2
guid: 7f42738b771e0cb42816c12788bdb76e
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

246
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/gcs.py


import base64
import logging
import os
import re
from os import makedirs
from os.path import basename, isdir
from pathlib import Path
from google.cloud.storage import Client
from datasetinsights.io.download import validate_checksum
from datasetinsights.io.exceptions import ChecksumError
logger = logging.getLogger(__name__)
class GCSClient:
""" This class is used to download data from GCS location
and perform function such as downloading the dataset and checksum
validation.
"""
GCS_PREFIX = "^gs://"
KEY_SEPARATOR = "/"
def __init__(self, **kwargs):
""" Initialize a client to google cloud storage (GCS).
"""
self.client = Client(**kwargs)
def download(self, *, url=None, local_path=None, bucket=None, key=None):
""" This method is used to download the dataset from GCS.
Args:
url (str): This is the downloader-uri that indicates where
the dataset should be downloaded from.
local_path (str): This is the path to the directory where the
download will store the dataset.
bucket (str): gcs bucket name
key (str): object key path
Examples:
>>> url = "gs://bucket/folder or gs://bucket/folder/data.zip"
>>> local_path = "/tmp/folder"
>>> bucket ="bucket"
>>> key ="folder/data.zip" or "folder"
"""
if not (bucket and key) and url:
bucket, key = self._parse(url)
bucket_obj = self.client.get_bucket(bucket)
if self._is_file(bucket_obj, key):
self._download_file(bucket_obj, key, local_path)
else:
self._download_folder(bucket_obj, key, local_path)
def _download_folder(self, bucket, key, local_path):
""" download all files from directory
"""
blobs = bucket.list_blobs(prefix=key)
for blob in blobs:
local_file_path = blob.name.replace(key, local_path)
self._download_validate(blob, local_file_path)
def _download_file(self, bucket, key, local_path):
""" download single file
"""
blob = bucket.get_blob(key)
key_suffix = key.replace("/" + basename(key), "")
local_file_path = blob.name.replace(key_suffix, local_path)
self._download_validate(blob, local_file_path)
def _download_validate(self, blob, local_file_path):
""" download file and validate checksum
"""
self._download_blob(blob, local_file_path)
self._checksum(blob, local_file_path)
def _download_blob(self, blob, local_file_path):
""" download blob from gcs
Raises:
NotFound: This will raise when object not found
"""
dst_dir = local_file_path.replace("/" + basename(local_file_path), "")
key = blob.name
if not isdir(dst_dir):
makedirs(dst_dir)
logger.info(f"Downloading from {key} to {local_file_path}.")
blob.download_to_filename(local_file_path)
def _checksum(self, blob, filename):
"""validate checksum and delete file if checksum does not match
Raises:
ChecksumError: This will raise this error if checksum doesn't
matches
"""
expected_checksum = blob.md5_hash
if expected_checksum:
expected_checksum_hex = self._md5_hex(expected_checksum)
try:
validate_checksum(
filename, expected_checksum_hex, algorithm="MD5"
)
except ChecksumError as e:
logger.exception(
"Checksum mismatch. Delete the downloaded files."
)
os.remove(filename)
raise e
def _is_file(self, bucket, key):
"""Check if the key is a file or directory"""
blob = bucket.get_blob(key)
return blob and blob.name == key
def _md5_hex(self, checksum):
"""fix the missing padding if requires and converts into hex"""
missing_padding = len(checksum) % 4
if missing_padding != 0:
checksum += "=" * (4 - missing_padding)
return base64.b64decode(checksum).hex()
def _parse(self, url):
"""Split an GCS-prefixed URL into bucket and path."""
match = re.search(self.GCS_PREFIX, url)
if not match:
raise ValueError(
f"Specified destination prefix: {url} does not start "
f"with {self.GCS_PREFIX}"
)
url = url[len(self.GCS_PREFIX) - 1 :]
if self.KEY_SEPARATOR not in url:
raise ValueError(
f"Specified destination prefix: {self.GCS_PREFIX + url} does "
f"not have object key "
)
idx = url.index(self.KEY_SEPARATOR)
bucket = url[:idx]
path = url[(idx + 1) :]
return bucket, path
def upload(
self, *, local_path=None, bucket=None, key=None, url=None, pattern="*"
):
""" Upload a file or list of files from directory to GCS
Args:
url (str): This is the gcs location that indicates where
the dataset should be uploaded.
local_path (str): This is the path to the directory or file
where the data is stored.
bucket (str): gcs bucket name
key (str): object key path
pattern: Unix glob patterns. Use **/* for recursive glob.
Examples:
For file upload:
>>> url = "gs://bucket/folder/data.zip"
>>> local_path = "/tmp/folder/data.zip"
>>> bucket ="bucket"
>>> key ="folder/data.zip"
For directory upload:
>>> url = "gs://bucket/folder"
>>> local_path = "/tmp/folder"
>>> bucket ="bucket"
>>> key ="folder"
>>> key ="**/*"
"""
if not (bucket and key) and url:
bucket, key = self._parse(url)
bucket_obj = self.client.get_bucket(bucket)
if isdir(local_path):
self._upload_folder(
local_path=local_path,
bucket=bucket_obj,
key=key,
pattern=pattern,
)
else:
self._upload_file(local_path=local_path, bucket=bucket_obj, key=key)
def _upload_file(self, local_path=None, bucket=None, key=None):
""" Upload a single object to GCS
"""
blob = bucket.blob(key)
logger.info(f"Uploading from {local_path} to {key}.")
blob.upload_from_filename(local_path)
def _upload_folder(
self, local_path=None, bucket=None, key=None, pattern="*"
):
"""Upload all files from a folder to GCS based on pattern
"""
for path in Path(local_path).glob(pattern):
if path.is_dir():
continue
full_path = str(path)
relative_path = str(path.relative_to(local_path))
object_key = os.path.join(key, relative_path)
self._upload_file(
local_path=full_path, bucket=bucket, key=object_key
)
def get_most_recent_blob(self, url=None, bucket_name=None, key=None):
""" Get the last updated blob in a given bucket under given prefix
Args:
bucket_name (str): gcs bucket name
key (str): object key path
"""
if not (bucket_name and key) and url:
bucket_name, key = self._parse(url)
bucket = self.client.get_bucket(bucket_name)
if self._is_file(bucket, key):
# Called on file, return file
return bucket.get_blob(key)
else:
logger.debug(
f"Cloud path not a file. Checking for most recent file in {url}"
)
# Return the blob with the max update time (most recent)
blobs = self._list_blobs(bucket, prefix=key)
return max(
blobs, key=lambda blob: bucket.get_blob(blob.name).updated
)
def _list_blobs(self, bucket_name=None, prefix=None):
"""List all blobs with given prefix
"""
blobs = self.client.list_blobs(bucket_name, prefix=prefix)
blob_list = list(blobs)
logger.debug(f"Blobs in {bucket_name} under prefix {prefix}:")
logger.debug(blob_list)
return blob_list

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/gcs.py.meta


fileFormatVersion: 2
guid: 2781a7a725105754c8a575f07f16b4c2
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats.meta


fileFormatVersion: 2
guid: 387629385a081e846bed63e41e33ea22
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

23
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/__init__.py


from .statistics import RenderedObjectInfo
from .visualization.plots import (
bar_plot,
grid_plot,
histogram_plot,
model_performance_box_plot,
model_performance_comparison_box_plot,
plot_bboxes,
plot_keypoints,
rotation_plot,
)
__all__ = [
"bar_plot",
"grid_plot",
"histogram_plot",
"plot_bboxes",
"model_performance_box_plot",
"model_performance_comparison_box_plot",
"rotation_plot",
"RenderedObjectInfo",
"plot_keypoints",
]

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/__init__.py.meta


fileFormatVersion: 2
guid: 60558960135c8064cbc9d241acc15d72
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

150
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/statistics.py


import logging
import datasetinsights.constants as const
from datasetinsights.datasets.unity_perception import MetricDefinitions, Metrics
from datasetinsights.datasets.unity_perception.tables import SCHEMA_VERSION
logger = logging.getLogger(__name__)
class RenderedObjectInfo:
"""Rendered Object Info in Captures
This metric stores common object info captured by a sensor in the simulation
environment. It can be used to calculate object statistics such as
object count, object rotation and visible pixels.
Attributes:
raw_table (pd.DataFrame): rendered object info stored with a tidy
pandas dataframe. Columns "label_id", "instance_id", "visible_pixels",
"capture_id, "label_name".
Examples:
.. code-block:: python
>>> # set the data root path to where data was stored
>>> data_root = "$HOME/data"
>>> # use rendered object info definition id
>>> definition_id = "659c6e36-f9f8-4dd6-9651-4a80e51eabc4"
>>> roinfo = RenderedObjectInfo(data_root, definition_id)
#total object count per label dataframe
>>> roinfo.total_counts()
label_id label_name count
1 object1 10
2 object2 21
#object count per capture dataframe
>>> roinfo.per_capture_counts()
capture_id count
qwerty 10
asdfgh 21
"""
LABEL = "label_id"
LABEL_READABLE = "label_name"
INDEX_COLUMN = "capture_id"
VALUE_COLUMN = "values"
COUNT_COLUMN = "count"
def __init__(
self,
data_root=const.DEFAULT_DATA_ROOT,
version=SCHEMA_VERSION,
def_id=None,
):
"""Initialize RenderedObjectInfo
Args:
data_root (str): root directory where the dataset was stored
version (str): synthetic dataset schema version
def_id (str): rendered object info definition id
"""
filtered_metrics = Metrics(data_root, version).filter_metrics(def_id)
label_mappings = self._read_label_mappings(data_root, version, def_id)
self.raw_table = self._read_filtered_metrics(
filtered_metrics, label_mappings
)
def num_captures(self):
"""Total number of captures
Returns:
integer: Total number of captures
"""
return self.raw_table[self.INDEX_COLUMN].nunique()
@staticmethod
def _read_label_mappings(data_root, version, def_id):
"""Read label_mappings from a metric_definition record.
Args:
data_root (str): root directory where the dataset was stored
version (str): synthetic dataset schema version
def_id (str): rendered object info definition id
Returns:
dict: The mappings of {label_id: label_name}
"""
definition = MetricDefinitions(data_root, version).get_definition(
def_id
)
name = RenderedObjectInfo.LABEL
readable_name = RenderedObjectInfo.LABEL_READABLE
return {d[name]: d[readable_name] for d in definition["spec"]}
@staticmethod
def _read_filtered_metrics(filtered_metrics, label_mappings):
"""Read label_mappings from a metric_definition record.
Args:
filtered_metrics (pd.DataFrame): A pandas dataframe for metrics
filtered by definition id.
label_mappings (dict): the mappings of {label_id: label_name}
Returns:
pd.DataFrame: rendered object info stored with a tidy
pandas dataframe. Columns "label_id", "instance_id",
"visible_pixels", "capture_id, "label_name".
"""
filtered_metrics[RenderedObjectInfo.LABEL_READABLE] = filtered_metrics[
RenderedObjectInfo.LABEL
].map(label_mappings)
# Remove metrics data not defined in label_mappings
filtered_metrics.dropna(
subset=[RenderedObjectInfo.LABEL_READABLE], inplace=True
)
return filtered_metrics
def total_counts(self):
"""Aggregate Total Object Counts Per Label
Returns:
pd.DataFrame: Total object counts table.
Columns "label_id", "label_name", "count"
"""
agg = (
self.raw_table.groupby([self.LABEL, self.LABEL_READABLE])
.size()
.to_frame(name=self.COUNT_COLUMN)
.reset_index()
)
return agg
def per_capture_counts(self):
""" Aggregate Object Counts Per Label
Returns:
pd.DataFrame: Total object counts table.
Columns "capture_id", "count"
"""
agg = (
self.raw_table.groupby(self.INDEX_COLUMN)
.size()
.to_frame(name=self.COUNT_COLUMN)
.reset_index()
)
return agg

7
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/statistics.py.meta


fileFormatVersion: 2
guid: 6cf35b682544b9e4a9df8c371766c9a1
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/visualization.meta


fileFormatVersion: 2
guid: 6835dc223ee79af449a720c046e342ff
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存