leopoldo-zugasti
3 年前
当前提交
88e3b24c
共有 250 个文件被更改,包括 3891 次插入 和 181 次删除
-
110com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/preview.py
-
211com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/visualization/visualizers.py
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master.meta
-
85com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.dockerignore
-
13com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.flake8
-
129com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/CODE_OF_CONDUCT.md
-
30com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/bug_report.md
-
17com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/feature_request.md
-
9com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/ISSUE_TEMPLATE/questions-about-datasetinsights.md
-
11com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/PULL_REQUEST_TEMPLATE.md
-
64com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/linting-and-unittests.yaml
-
19com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/publish-docker-hub.yaml
-
44com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.github/workflows/publish-pypi.yaml
-
116com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.gitignore
-
28com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.pre-commit-config.yaml
-
13com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/.readthedocs.yaml
-
116com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/CONTRIBUTING.md
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/CONTRIBUTING.md.meta
-
39com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Dockerfile
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Dockerfile.meta
-
201com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/LICENCE
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/LICENCE.meta
-
18com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Makefile
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/Makefile.meta
-
119com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/README.md
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/README.md.meta
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights.meta
-
0com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__init__.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__init__.py.meta
-
36com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__main__.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/__main__.py.meta
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands.meta
-
30com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/__init__.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/__init__.py.meta
-
140com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/download.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/commands/download.py.meta
-
16com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/constants.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/constants.py.meta
-
118com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/dashboard.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/dashboard.py.meta
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets.meta
-
0com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/__init__.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/__init__.py.meta
-
2com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/exceptions.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/exceptions.py.meta
-
79com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/synthetic.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/synthetic.py.meta
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception.meta
-
12com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/__init__.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/__init__.py.meta
-
201com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/captures.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/captures.py.meta
-
3com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/exceptions.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/exceptions.py.meta
-
124com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/metrics.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/metrics.py.meta
-
253com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/references.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/references.py.meta
-
86com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/tables.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/tables.py.meta
-
57com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/validation.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/datasets/unity_perception/validation.py.meta
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io.meta
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/__init__.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/__init__.py.meta
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/bbox.py.meta
-
218com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/download.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/download.py.meta
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader.meta
-
11com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/__init__.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/__init__.py.meta
-
83com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/base.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/base.py.meta
-
26com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/gcs_downloader.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/gcs_downloader.py.meta
-
51com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/http_downloader.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/http_downloader.py.meta
-
392com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/unity_simulation.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/downloader/unity_simulation.py.meta
-
13com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/exceptions.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/exceptions.py.meta
-
246com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/gcs.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/io/gcs.py.meta
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats.meta
-
23com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/__init__.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/__init__.py.meta
-
150com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/statistics.py
-
7com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/statistics.py.meta
-
8com.unity.perception/Editor/Pyrception/pyrception-utils/pyrception_utils/datasetinsights_master/datasetinsights/stats/visualization.meta
|
|||
fileFormatVersion: 2 |
|||
guid: 948e708fe62ad0142ba8ea72aeb3355d |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
# Git |
|||
.git |
|||
.gitignore |
|||
|
|||
# CI |
|||
.codeclimate.yml |
|||
.travis.yml |
|||
.taskcluster.yml |
|||
|
|||
# Docker |
|||
docker-compose.yml |
|||
.docker |
|||
|
|||
# Byte-compiled / optimized / DLL files |
|||
**/__pycache__/ |
|||
**/*.py[cod] |
|||
|
|||
# C extensions |
|||
*.so |
|||
|
|||
# Distribution / packaging |
|||
.Python |
|||
env/ |
|||
build/ |
|||
develop-eggs/ |
|||
dist/ |
|||
downloads/ |
|||
eggs/ |
|||
lib/ |
|||
lib64/ |
|||
parts/ |
|||
sdist/ |
|||
var/ |
|||
*.egg-info/ |
|||
.installed.cfg |
|||
*.egg |
|||
|
|||
# PyInstaller |
|||
# Usually these files are written by a python script from a template |
|||
# before PyInstaller builds the exe, so as to inject date/other infos into it. |
|||
*.manifest |
|||
*.spec |
|||
|
|||
# Installer logs |
|||
pip-log.txt |
|||
pip-delete-this-directory.txt |
|||
|
|||
# Unit test / coverage reports |
|||
htmlcov/ |
|||
.tox/ |
|||
.coverage |
|||
.cache |
|||
nosetests.xml |
|||
coverage.xml |
|||
.pytest_cache |
|||
|
|||
# Translations |
|||
*.mo |
|||
*.pot |
|||
|
|||
# Django stuff: |
|||
*.log |
|||
|
|||
# Sphinx documentation |
|||
docs/_build/ |
|||
|
|||
# PyBuilder |
|||
target/ |
|||
|
|||
# Virtual environment |
|||
.env/ |
|||
.venv/ |
|||
venv/ |
|||
|
|||
# PyCharm |
|||
.idea |
|||
|
|||
# IDE |
|||
**/.ropeproject |
|||
**/.swp |
|||
.vscode |
|||
.ipynb_checkpoints |
|||
|
|||
# Place project specific ignores here |
|||
runs |
|
|||
[flake8] |
|||
max-line-length = 80 |
|||
ignore = |
|||
E133, |
|||
E203, |
|||
W503, |
|||
W504, |
|||
W605, |
|||
F541 |
|||
exclude = |
|||
.git, |
|||
__pycache__, |
|||
datasetinsights/data/datasets/protos/ |
|
|||
|
|||
# Contributor Covenant Code of Conduct |
|||
|
|||
## Our Pledge |
|||
|
|||
We as members, contributors, and leaders pledge to make participation in our |
|||
community a harassment-free experience for everyone, regardless of age, body |
|||
size, visible or invisible disability, ethnicity, sex characteristics, gender |
|||
identity and expression, level of experience, education, socio-economic status, |
|||
nationality, personal appearance, race, religion, or sexual identity |
|||
and orientation. |
|||
|
|||
We pledge to act and interact in ways that contribute to an open, welcoming, |
|||
diverse, inclusive, and healthy community. |
|||
|
|||
## Our Standards |
|||
|
|||
Examples of behavior that contributes to a positive environment for our |
|||
community include: |
|||
|
|||
* Demonstrating empathy and kindness toward other people |
|||
* Being respectful of differing opinions, viewpoints, and experiences |
|||
* Giving and gracefully accepting constructive feedback |
|||
* Accepting responsibility and apologizing to those affected by our mistakes, |
|||
and learning from the experience |
|||
* Focusing on what is best not just for us as individuals, but for the |
|||
overall community |
|||
|
|||
Examples of unacceptable behavior include: |
|||
|
|||
* The use of sexualized language or imagery, and sexual attention or |
|||
advances of any kind |
|||
* Trolling, insulting or derogatory comments, and personal or political attacks |
|||
* Public or private harassment |
|||
* Publishing others' private information, such as a physical or email |
|||
address, without their explicit permission |
|||
* Other conduct which could reasonably be considered inappropriate in a |
|||
professional setting |
|||
|
|||
## Enforcement Responsibilities |
|||
|
|||
Community leaders are responsible for clarifying and enforcing our standards of |
|||
acceptable behavior and will take appropriate and fair corrective action in |
|||
response to any behavior that they deem inappropriate, threatening, offensive, |
|||
or harmful. |
|||
|
|||
Community leaders have the right and responsibility to remove, edit, or reject |
|||
comments, commits, code, wiki edits, issues, and other contributions that are |
|||
not aligned to this Code of Conduct, and will communicate reasons for moderation |
|||
decisions when appropriate. |
|||
|
|||
## Scope |
|||
|
|||
This Code of Conduct applies within all community spaces, and also applies when |
|||
an individual is officially representing the community in public spaces. |
|||
Examples of representing our community include using an official e-mail address, |
|||
posting via an official social media account, or acting as an appointed |
|||
representative at an online or offline event. |
|||
|
|||
## Enforcement |
|||
|
|||
Instances of abusive, harassing, or otherwise unacceptable behavior may be |
|||
reported to the community leaders responsible for enforcement at |
|||
<perception@unity3d.com>. |
|||
All complaints will be reviewed and investigated promptly and fairly. |
|||
|
|||
All community leaders are obligated to respect the privacy and security of the |
|||
reporter of any incident. |
|||
|
|||
## Enforcement Guidelines |
|||
|
|||
Community leaders will follow these Community Impact Guidelines in determining |
|||
the consequences for any action they deem in violation of this Code of Conduct: |
|||
|
|||
### 1. Correction |
|||
|
|||
**Community Impact**: Use of inappropriate language or other behavior deemed |
|||
unprofessional or unwelcome in the community. |
|||
|
|||
**Consequence**: A private, written warning from community leaders, providing |
|||
clarity around the nature of the violation and an explanation of why the |
|||
behavior was inappropriate. A public apology may be requested. |
|||
|
|||
### 2. Warning |
|||
|
|||
**Community Impact**: A violation through a single incident or series |
|||
of actions. |
|||
|
|||
**Consequence**: A warning with consequences for continued behavior. No |
|||
interaction with the people involved, including unsolicited interaction with |
|||
those enforcing the Code of Conduct, for a specified period of time. This |
|||
includes avoiding interactions in community spaces as well as external channels |
|||
like social media. Violating these terms may lead to a temporary or |
|||
permanent ban. |
|||
|
|||
### 3. Temporary Ban |
|||
|
|||
**Community Impact**: A serious violation of community standards, including |
|||
sustained inappropriate behavior. |
|||
|
|||
**Consequence**: A temporary ban from any sort of interaction or public |
|||
communication with the community for a specified period of time. No public or |
|||
private interaction with the people involved, including unsolicited interaction |
|||
with those enforcing the Code of Conduct, is allowed during this period. |
|||
Violating these terms may lead to a permanent ban. |
|||
|
|||
### 4. Permanent Ban |
|||
|
|||
**Community Impact**: Demonstrating a pattern of violation of community |
|||
standards, including sustained inappropriate behavior, harassment of an |
|||
individual, or aggression toward or disparagement of classes of individuals. |
|||
|
|||
**Consequence**: A permanent ban from any sort of public interaction within |
|||
the community. |
|||
|
|||
## Attribution |
|||
|
|||
This Code of Conduct is adapted from the [Contributor Covenant][homepage], |
|||
version 2.0, available at |
|||
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. |
|||
|
|||
Community Impact Guidelines were inspired by [Mozilla's code of conduct |
|||
enforcement ladder](https://github.com/mozilla/diversity). |
|||
|
|||
[homepage]: https://www.contributor-covenant.org |
|||
|
|||
For answers to common questions about this code of conduct, see the FAQ at |
|||
https://www.contributor-covenant.org/faq. Translations are available at |
|||
https://www.contributor-covenant.org/translations. |
|
|||
--- |
|||
name: Bug report |
|||
about: Report a bug with datasetinsights |
|||
labels: bug |
|||
|
|||
--- |
|||
|
|||
**Describe the Bug:** |
|||
[A clear and concise description of what the bug is.] |
|||
|
|||
**How to Reproduce?** |
|||
[What are the steps that would reproduce the bug that you encountered.] |
|||
|
|||
**What did you expect to happen:** |
|||
|
|||
**Console logs / stack traces** |
|||
Please wrap in [triple backticks (```)](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) to make it easier to read. |
|||
|
|||
**Screenshots** |
|||
[If applicable, add screenshots to help explain your problem.] |
|||
|
|||
**Anything else you would like to add:** |
|||
[Miscellaneous information that will assist in solving the issue.] |
|||
|
|||
**Environment:** |
|||
|
|||
- OS + version: [e.g. Ubuntu 20.04.1 LTS] |
|||
- datasetinsights version |
|||
- _Environment_: (which example environment you used to reproduce the error) |
|||
- Other environment settings |
|
|||
--- |
|||
name: Feature request |
|||
about: Suggest an idea for this project |
|||
labels: enhancement |
|||
|
|||
--- |
|||
|
|||
**Why you need this feature:** |
|||
[Is your feature request related to a problem? Please describe in details] |
|||
|
|||
|
|||
**Describe the solution you'd like:** |
|||
[A clear and concise description of what you want to happen.] |
|||
|
|||
|
|||
**Anything else you would like to add:** |
|||
[Miscellaneous information that will assist in solving the issue.] |
|
|||
--- |
|||
name: Questions about datasetinsights |
|||
about: Ask your question or about any confusion that you have about this project |
|||
labels: question |
|||
|
|||
--- |
|||
|
|||
**Question:** |
|||
[You can ask any question about this project.] |
|
|||
# Peer Review Information |
|||
|
|||
Add information on any code, feature, documentation changes here. |
|||
|
|||
# Pull Request Check List |
|||
|
|||
<!-- This is just a reminder about the most common mistakes. Please make sure that you tick all *appropriate* boxes. Please read our [contribution guide](https://github.com/Unity-Technologies/dataset-insights/blob/master/CONTRIBUTING.md) |
|||
at least once, it will save you unnecessary review cycles! --> |
|||
|
|||
- [ ] Added **tests** for changed code. |
|||
- [ ] Updated **documentation** for changed code. |
|
|||
name: Tests |
|||
|
|||
on: |
|||
push: |
|||
branches: |
|||
- master |
|||
pull_request: |
|||
branches: |
|||
- master |
|||
|
|||
jobs: |
|||
linting: |
|||
runs-on: ubuntu-latest |
|||
|
|||
steps: |
|||
- uses: actions/checkout@v2 |
|||
- name: Set up Python 3.8 |
|||
uses: actions/setup-python@v2 |
|||
with: |
|||
python-version: 3.8 |
|||
- name: Linting |
|||
run: | |
|||
pip install pre-commit |
|||
pre-commit run --all-files |
|||
tests: |
|||
# reference from https://github.com/python-poetry/poetry/blob/master/.github/workflows/main.yml |
|||
runs-on: ubuntu-latest |
|||
strategy: |
|||
matrix: |
|||
python-version: [3.7, 3.8, 3.9] |
|||
|
|||
steps: |
|||
- uses: actions/checkout@v2 |
|||
- name: Set up Python ${{ matrix.python-version }} |
|||
uses: actions/setup-python@v2 |
|||
with: |
|||
python-version: ${{ matrix.python-version }} |
|||
- name: Get full Python version |
|||
id: full-python-version |
|||
shell: bash |
|||
run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") |
|||
- name: Install poetry |
|||
shell: bash |
|||
run: | |
|||
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python |
|||
echo "$HOME/.poetry/bin" >> $GITHUB_PATH |
|||
- name: Configure poetry |
|||
shell: bash |
|||
run: poetry config virtualenvs.in-project true |
|||
- name: Set up cache |
|||
uses: actions/cache@v2 |
|||
id: cache |
|||
with: |
|||
path: .venv |
|||
key: venv-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }} |
|||
- name: Ensure cache is healthy |
|||
if: steps.cache.outputs.cache-hit == 'true' |
|||
shell: bash |
|||
run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv |
|||
- name: Install dependencies |
|||
run: poetry install |
|||
shell: bash |
|||
- name: Run pytest |
|||
run: poetry run pytest |
|
|||
name: Publish Docker image |
|||
on: |
|||
release: |
|||
types: [published] |
|||
jobs: |
|||
push_to_registry: |
|||
name: Push Docker image to Docker Hub |
|||
runs-on: ubuntu-latest |
|||
steps: |
|||
- name: Check out the repo |
|||
uses: actions/checkout@v2 |
|||
- name: Push to Docker Hub |
|||
uses: docker/build-push-action@v1 |
|||
with: |
|||
username: ${{ secrets.DOCKERHUB_USERNAME }} |
|||
password: ${{ secrets.DOCKERHUB_ACCESS_TOKEN }} |
|||
repository: unitytechnologies/datasetinsights |
|||
tags: latest |
|||
tag_with_ref: true |
|
|||
name: Publish to pypi |
|||
|
|||
on: |
|||
release: |
|||
types: [published] |
|||
|
|||
env: |
|||
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} |
|||
|
|||
jobs: |
|||
|
|||
build-and-publish: |
|||
runs-on: ubuntu-latest |
|||
|
|||
steps: |
|||
|
|||
- uses: actions/checkout@v2 |
|||
- name: Set up Python 3.8 |
|||
uses: actions/setup-python@v2 |
|||
with: |
|||
python-version: 3.8 |
|||
- name: Get full Python version |
|||
id: full-python-version |
|||
shell: bash |
|||
run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") |
|||
- name: Install poetry |
|||
shell: bash |
|||
run: | |
|||
curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python |
|||
echo "$HOME/.poetry/bin" >> $GITHUB_PATH |
|||
- name: Set env |
|||
run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV |
|||
- name : Configure poetry |
|||
shell: bash |
|||
run: poetry config pypi-token.pypi $PYPI_TOKEN |
|||
- name: Set poetry version |
|||
shell: bash |
|||
run: poetry version $RELEASE_VERSION |
|||
- name: build |
|||
shell: bash |
|||
run: poetry build |
|||
- name: publish |
|||
shell: bash |
|||
run: poetry publish |
|
|||
# Compiled source # |
|||
################### |
|||
*.com |
|||
*.class |
|||
*.dll |
|||
*.exe |
|||
*.o |
|||
*.so |
|||
|
|||
# Compressed files # |
|||
#################### |
|||
# it's better to unpack these files and commit the raw source |
|||
# git has its own built in compression methods |
|||
*.7z |
|||
*.dmg |
|||
*.gz |
|||
*.iso |
|||
*.jar |
|||
*.rar |
|||
*.tar |
|||
*.zip |
|||
|
|||
# Logs and databases # |
|||
###################### |
|||
*.log |
|||
*.sql |
|||
*.sqlite |
|||
|
|||
# OS generated files # |
|||
###################### |
|||
.DS_Store* |
|||
ehthumbs.db |
|||
Icon? |
|||
Thumbs.db |
|||
*.bak* |
|||
|
|||
# IDE Project files # |
|||
###################### |
|||
*.sublime-* |
|||
*.Rproj |
|||
.Rproj.user |
|||
.Rhistory |
|||
*.xcodeproj |
|||
*.idea |
|||
|
|||
# Python # |
|||
########### |
|||
# Byte-compiled / optimized / DLL files |
|||
__pycache__/ |
|||
*.py[cod] |
|||
*$py.class |
|||
|
|||
# C extensions |
|||
*.so |
|||
|
|||
# Distribution / packaging |
|||
.Python |
|||
build/ |
|||
develop-eggs/ |
|||
dist/ |
|||
downloads/ |
|||
eggs/ |
|||
.eggs/ |
|||
lib/ |
|||
lib64/ |
|||
parts/ |
|||
sdist/ |
|||
var/ |
|||
wheels/ |
|||
pip-wheel-metadata/ |
|||
share/python-wheels/ |
|||
*.egg-info/ |
|||
.installed.cfg |
|||
*.egg |
|||
MANIFEST |
|||
|
|||
# Unit test / coverage reports |
|||
htmlcov/ |
|||
.tox/ |
|||
.nox/ |
|||
.coverage |
|||
.coverage.* |
|||
.cache |
|||
nosetests.xml |
|||
coverage.xml |
|||
*.cover |
|||
.hypothesis/ |
|||
.pytest_cache/ |
|||
|
|||
# Jupyter Notebook |
|||
.ipynb_checkpoints |
|||
|
|||
# IPython |
|||
profile_default/ |
|||
ipython_config.py |
|||
|
|||
# pyenv |
|||
.python-version |
|||
|
|||
# Environments |
|||
.env |
|||
.venv |
|||
env/ |
|||
venv/ |
|||
ENV/ |
|||
env.bak/ |
|||
venv.bak/ |
|||
|
|||
# Editor |
|||
.vscode |
|||
|
|||
# For this Project # |
|||
###################### |
|||
runs/ |
|||
checkpoints/ |
|||
metrics/ |
|
|||
# See https://pre-commit.com for more information |
|||
# See https://pre-commit.com/hooks.html for more hooks |
|||
exclude: > |
|||
(?x)^( |
|||
.*_pb2.py| |
|||
.*_pb2_grpc.py |
|||
)$ |
|||
repos: |
|||
- repo: https://github.com/pre-commit/pre-commit-hooks |
|||
rev: v2.4.0 |
|||
hooks: |
|||
- id: trailing-whitespace |
|||
- id: end-of-file-fixer |
|||
- id: check-yaml |
|||
- id: check-added-large-files |
|||
- id: check-merge-conflict |
|||
- repo: https://github.com/psf/black |
|||
rev: 19.10b0 |
|||
hooks: |
|||
- id: black |
|||
- repo: https://gitlab.com/pycqa/flake8 |
|||
rev: 3.8.1 |
|||
hooks: |
|||
- id: flake8 |
|||
- repo: https://github.com/timothycrosley/isort |
|||
rev: 5.1.0 |
|||
hooks: |
|||
- id: isort |
|
|||
version: 2 |
|||
formats: all |
|||
build: |
|||
image: stable |
|||
python: |
|||
version: 3.7 |
|||
install: |
|||
- requirements: docs/requirements.txt |
|||
- method: pip |
|||
path: . |
|||
sphinx: |
|||
builder: html |
|||
configuration: docs/source/conf.py |
|
|||
# Table of contents |
|||
|
|||
- [Contributing to datasetinsights](#contributing-to-datasetinsights) |
|||
- [Developing datasetinsights](#developing-datasetinsights) |
|||
- [Add new dependencies](#add-new-dependencies) |
|||
- [Codebase structure](#codebase-structure) |
|||
- [Unit testing](#unit-testing) |
|||
- [Style Guide](#style-guide) |
|||
- [Writing documentation](#writing-documentation) |
|||
- [Building documentation](#building-documentation) |
|||
|
|||
## Contributing to datasetinsights |
|||
|
|||
We encourage contributions to the datasetinsights repo, including but not limited to following categories: |
|||
|
|||
1. You want to improve the documentation of existing module. |
|||
2. You want to provide bug-fix for an outstanding issue. |
|||
3. You want to implement a new feature to support new type of perception package outputs. |
|||
|
|||
## Developing datasetinsights |
|||
|
|||
Here are some steps to setup datasetinsights virtual environment with on your machine: |
|||
|
|||
1. Install [poetry](https://python-poetry.org/), [git](https://git-scm.com/) and [pre-commit](https://pre-commit.com/) |
|||
2. Create a virtual environment. We recommend using [miniconda](https://docs.conda.io/en/latest/miniconda.html) |
|||
|
|||
```bash |
|||
conda create -n dins-dev python=3.7 |
|||
conda activate dins-dev |
|||
``` |
|||
|
|||
3. Clone a copy of datasetinsights from source: |
|||
|
|||
```bash |
|||
git clone https://github.com/Unity-Technologies/datasetinsights.git |
|||
cd datasetinsights |
|||
``` |
|||
|
|||
4. Install datasetinsights in `develop` mode: |
|||
|
|||
```bash |
|||
poetry install |
|||
``` |
|||
|
|||
This will symlink the Python files from the current local source tree into the installed virtual environment install. |
|||
The `develop` mode also includes Python packages such as [pytest](https://docs.pytest.org/en/latest/) and [black](https://black.readthedocs.io/en/stable/). |
|||
|
|||
5. Install pre-commit [hook](https://pre-commit.com/#3-install-the-git-hook-scripts) to `.git` folder. |
|||
|
|||
```bash |
|||
pre-commit install |
|||
# pre-commit installed at .git/hooks/pre-commit |
|||
``` |
|||
|
|||
### Add new dependencies |
|||
|
|||
Adding new Python dependencies to datasetinsights environment using poetry like: |
|||
|
|||
```bash |
|||
poetry add numpy@^1.18.4 |
|||
``` |
|||
|
|||
Make sure you only add the desired packages instead of adding all dependencies. |
|||
Let package management system resolve for dependencies. |
|||
See [poetry add](https://python-poetry.org/docs/cli/#add) for detail instructions. |
|||
|
|||
## Codebase structure |
|||
|
|||
The datasetinsights package contains the following modules: |
|||
|
|||
- [commands](datasetinsights/commands) This module contains the cli commands. |
|||
- [datasets](datasetinsights/datasets) This module contains different datasets. The dataset classes contain knowledge on how the dataset should be loaded into memory. |
|||
- [io](datasetinsights/io) This module contains functionality that relates to writing/downloading/uploading to/from different sources. |
|||
- [stats](datasetinsights/stats) This module contains code for visualizing and gathering statistics on the dataset |
|||
|
|||
## Unit testing |
|||
|
|||
We use [pytest](https://docs.pytest.org/en/latest/) to run tests located under `tests/`. Run the entire test suite with |
|||
|
|||
```bash |
|||
pytest |
|||
``` |
|||
|
|||
or run individual test files, like: |
|||
|
|||
```bash |
|||
pytest tests/test_visual.py |
|||
``` |
|||
|
|||
for individual test suites. |
|||
|
|||
## Style Guide |
|||
|
|||
We follow Black code [style](https://black.readthedocs.io/en/stable/the_black_code_style.html) for this repository. |
|||
The max line length is set at 80. |
|||
We enforce this code style using [Black](https://black.readthedocs.io/en/stable/) to format Python code. |
|||
In addition to Black, we use [isort](https://github.com/timothycrosley/isort) to sort Python imports. |
|||
|
|||
Before submitting a pull request, run: |
|||
|
|||
```bash |
|||
pre-commit run --all-files |
|||
``` |
|||
|
|||
Fix all issues that were highlighted by flake8. If you want to skip exceptions such as long url lines in docstring, add `# noqa: E501 <describe reason>` for the specific line violation. See [this](https://flake8.pycqa.org/en/3.1.1/user/ignoring-errors.html) to learn more about how to ignore flake8 errors. |
|||
|
|||
Some editors support automatically formatting on save. For example, in [vscode](https://code.visualstudio.com/docs/python/editing#_formatting) |
|||
|
|||
## Writing documentation |
|||
|
|||
Datasetinsights uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for formatting docstrings. |
|||
Length of line inside docstrings block must be limited to 80 characters with exceptions such as long urls or tables. |
|||
|
|||
### Building documentation |
|||
|
|||
Follow instructions [here](docs/README.md). |
|
|||
fileFormatVersion: 2 |
|||
guid: f1b92295dacba8144a75292f32648da4 |
|||
TextScriptImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04 |
|||
|
|||
RUN apt-get update \ |
|||
&& apt-get install -y \ |
|||
build-essential \ |
|||
curl \ |
|||
libsm6 \ |
|||
libxext6 \ |
|||
libxrender-dev \ |
|||
libgl1-mesa-dev \ |
|||
python3.7-dev \ |
|||
python3-pip \ |
|||
&& ln -s /usr/bin/python3.7 /usr/local/bin/python |
|||
|
|||
# Pin setuptools to 49.x.x until this [issue](https://github.com/pypa/setuptools/issues/2350) is fixed. |
|||
RUN python -m pip install --upgrade pip poetry==1.0.10 setuptools==49.6.0 -U pip cryptography==3.3.2 |
|||
# pin cryptography to 3.3.2 until this (https://github.com/pyca/cryptography/issues/5753) is fixed. |
|||
|
|||
# Add Tini |
|||
ENV TINI_VERSION v0.18.0 |
|||
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/local/bin/tini |
|||
RUN chmod +x /usr/local/bin/tini |
|||
|
|||
WORKDIR /datasetinsights |
|||
VOLUME /data /root/.config |
|||
|
|||
COPY poetry.lock pyproject.toml ./ |
|||
RUN poetry config virtualenvs.create false \ |
|||
&& poetry install --no-root |
|||
|
|||
COPY . ./ |
|||
# Run poetry install again to install datasetinsights |
|||
RUN poetry config virtualenvs.create false \ |
|||
&& poetry install |
|||
|
|||
# Use -g to ensure all child process received SIGKILL |
|||
ENTRYPOINT ["tini", "-g", "--"] |
|||
|
|||
CMD sh -c "jupyter notebook --notebook-dir=/ --ip=0.0.0.0 --no-browser --allow-root --port=8888 --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.allow_origin='*' --NotebookApp.base_url=${NB_PREFIX}" |
|
|||
fileFormatVersion: 2 |
|||
guid: de1ed20d981a4764d9ea407fdfaf90f1 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
Apache License |
|||
Version 2.0, January 2004 |
|||
http://www.apache.org/licenses/ |
|||
|
|||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
|||
|
|||
1. Definitions. |
|||
|
|||
"License" shall mean the terms and conditions for use, reproduction, |
|||
and distribution as defined by Sections 1 through 9 of this document. |
|||
|
|||
"Licensor" shall mean the copyright owner or entity authorized by |
|||
the copyright owner that is granting the License. |
|||
|
|||
"Legal Entity" shall mean the union of the acting entity and all |
|||
other entities that control, are controlled by, or are under common |
|||
control with that entity. For the purposes of this definition, |
|||
"control" means (i) the power, direct or indirect, to cause the |
|||
direction or management of such entity, whether by contract or |
|||
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
|||
outstanding shares, or (iii) beneficial ownership of such entity. |
|||
|
|||
"You" (or "Your") shall mean an individual or Legal Entity |
|||
exercising permissions granted by this License. |
|||
|
|||
"Source" form shall mean the preferred form for making modifications, |
|||
including but not limited to software source code, documentation |
|||
source, and configuration files. |
|||
|
|||
"Object" form shall mean any form resulting from mechanical |
|||
transformation or translation of a Source form, including but |
|||
not limited to compiled object code, generated documentation, |
|||
and conversions to other media types. |
|||
|
|||
"Work" shall mean the work of authorship, whether in Source or |
|||
Object form, made available under the License, as indicated by a |
|||
copyright notice that is included in or attached to the work |
|||
(an example is provided in the Appendix below). |
|||
|
|||
"Derivative Works" shall mean any work, whether in Source or Object |
|||
form, that is based on (or derived from) the Work and for which the |
|||
editorial revisions, annotations, elaborations, or other modifications |
|||
represent, as a whole, an original work of authorship. For the purposes |
|||
of this License, Derivative Works shall not include works that remain |
|||
separable from, or merely link (or bind by name) to the interfaces of, |
|||
the Work and Derivative Works thereof. |
|||
|
|||
"Contribution" shall mean any work of authorship, including |
|||
the original version of the Work and any modifications or additions |
|||
to that Work or Derivative Works thereof, that is intentionally |
|||
submitted to Licensor for inclusion in the Work by the copyright owner |
|||
or by an individual or Legal Entity authorized to submit on behalf of |
|||
the copyright owner. For the purposes of this definition, "submitted" |
|||
means any form of electronic, verbal, or written communication sent |
|||
to the Licensor or its representatives, including but not limited to |
|||
communication on electronic mailing lists, source code control systems, |
|||
and issue tracking systems that are managed by, or on behalf of, the |
|||
Licensor for the purpose of discussing and improving the Work, but |
|||
excluding communication that is conspicuously marked or otherwise |
|||
designated in writing by the copyright owner as "Not a Contribution." |
|||
|
|||
"Contributor" shall mean Licensor and any individual or Legal Entity |
|||
on behalf of whom a Contribution has been received by Licensor and |
|||
subsequently incorporated within the Work. |
|||
|
|||
2. Grant of Copyright License. Subject to the terms and conditions of |
|||
this License, each Contributor hereby grants to You a perpetual, |
|||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
|||
copyright license to reproduce, prepare Derivative Works of, |
|||
publicly display, publicly perform, sublicense, and distribute the |
|||
Work and such Derivative Works in Source or Object form. |
|||
|
|||
3. Grant of Patent License. Subject to the terms and conditions of |
|||
this License, each Contributor hereby grants to You a perpetual, |
|||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
|||
(except as stated in this section) patent license to make, have made, |
|||
use, offer to sell, sell, import, and otherwise transfer the Work, |
|||
where such license applies only to those patent claims licensable |
|||
by such Contributor that are necessarily infringed by their |
|||
Contribution(s) alone or by combination of their Contribution(s) |
|||
with the Work to which such Contribution(s) was submitted. If You |
|||
institute patent litigation against any entity (including a |
|||
cross-claim or counterclaim in a lawsuit) alleging that the Work |
|||
or a Contribution incorporated within the Work constitutes direct |
|||
or contributory patent infringement, then any patent licenses |
|||
granted to You under this License for that Work shall terminate |
|||
as of the date such litigation is filed. |
|||
|
|||
4. Redistribution. You may reproduce and distribute copies of the |
|||
Work or Derivative Works thereof in any medium, with or without |
|||
modifications, and in Source or Object form, provided that You |
|||
meet the following conditions: |
|||
|
|||
(a) You must give any other recipients of the Work or |
|||
Derivative Works a copy of this License; and |
|||
|
|||
(b) You must cause any modified files to carry prominent notices |
|||
stating that You changed the files; and |
|||
|
|||
(c) You must retain, in the Source form of any Derivative Works |
|||
that You distribute, all copyright, patent, trademark, and |
|||
attribution notices from the Source form of the Work, |
|||
excluding those notices that do not pertain to any part of |
|||
the Derivative Works; and |
|||
|
|||
(d) If the Work includes a "NOTICE" text file as part of its |
|||
distribution, then any Derivative Works that You distribute must |
|||
include a readable copy of the attribution notices contained |
|||
within such NOTICE file, excluding those notices that do not |
|||
pertain to any part of the Derivative Works, in at least one |
|||
of the following places: within a NOTICE text file distributed |
|||
as part of the Derivative Works; within the Source form or |
|||
documentation, if provided along with the Derivative Works; or, |
|||
within a display generated by the Derivative Works, if and |
|||
wherever such third-party notices normally appear. The contents |
|||
of the NOTICE file are for informational purposes only and |
|||
do not modify the License. You may add Your own attribution |
|||
notices within Derivative Works that You distribute, alongside |
|||
or as an addendum to the NOTICE text from the Work, provided |
|||
that such additional attribution notices cannot be construed |
|||
as modifying the License. |
|||
|
|||
You may add Your own copyright statement to Your modifications and |
|||
may provide additional or different license terms and conditions |
|||
for use, reproduction, or distribution of Your modifications, or |
|||
for any such Derivative Works as a whole, provided Your use, |
|||
reproduction, and distribution of the Work otherwise complies with |
|||
the conditions stated in this License. |
|||
|
|||
5. Submission of Contributions. Unless You explicitly state otherwise, |
|||
any Contribution intentionally submitted for inclusion in the Work |
|||
by You to the Licensor shall be under the terms and conditions of |
|||
this License, without any additional terms or conditions. |
|||
Notwithstanding the above, nothing herein shall supersede or modify |
|||
the terms of any separate license agreement you may have executed |
|||
with Licensor regarding such Contributions. |
|||
|
|||
6. Trademarks. This License does not grant permission to use the trade |
|||
names, trademarks, service marks, or product names of the Licensor, |
|||
except as required for reasonable and customary use in describing the |
|||
origin of the Work and reproducing the content of the NOTICE file. |
|||
|
|||
7. Disclaimer of Warranty. Unless required by applicable law or |
|||
agreed to in writing, Licensor provides the Work (and each |
|||
Contributor provides its Contributions) on an "AS IS" BASIS, |
|||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
|||
implied, including, without limitation, any warranties or conditions |
|||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
|||
PARTICULAR PURPOSE. You are solely responsible for determining the |
|||
appropriateness of using or redistributing the Work and assume any |
|||
risks associated with Your exercise of permissions under this License. |
|||
|
|||
8. Limitation of Liability. In no event and under no legal theory, |
|||
whether in tort (including negligence), contract, or otherwise, |
|||
unless required by applicable law (such as deliberate and grossly |
|||
negligent acts) or agreed to in writing, shall any Contributor be |
|||
liable to You for damages, including any direct, indirect, special, |
|||
incidental, or consequential damages of any character arising as a |
|||
result of this License or out of the use or inability to use the |
|||
Work (including but not limited to damages for loss of goodwill, |
|||
work stoppage, computer failure or malfunction, or any and all |
|||
other commercial damages or losses), even if such Contributor |
|||
has been advised of the possibility of such damages. |
|||
|
|||
9. Accepting Warranty or Additional Liability. While redistributing |
|||
the Work or Derivative Works thereof, You may choose to offer, |
|||
and charge a fee for, acceptance of support, warranty, indemnity, |
|||
or other liability obligations and/or rights consistent with this |
|||
License. However, in accepting such obligations, You may act only |
|||
on Your own behalf and on Your sole responsibility, not on behalf |
|||
of any other Contributor, and only if You agree to indemnify, |
|||
defend, and hold each Contributor harmless for any liability |
|||
incurred by, or claims asserted against, such Contributor by reason |
|||
of your accepting any such warranty or additional liability. |
|||
|
|||
END OF TERMS AND CONDITIONS |
|||
|
|||
APPENDIX: How to apply the Apache License to your work. |
|||
|
|||
To apply the Apache License to your work, attach the following |
|||
boilerplate notice, with the fields enclosed by brackets "[]" |
|||
replaced with your own identifying information. (Don't include |
|||
the brackets!) The text should be enclosed in the appropriate |
|||
comment syntax for the file format. We also recommend that a |
|||
file or class name and description of purpose be included on the |
|||
same "printed page" as the copyright notice for easier |
|||
identification within third-party archives. |
|||
|
|||
Copyright 2020 Unity Technologies |
|||
|
|||
Licensed under the Apache License, Version 2.0 (the "License"); |
|||
you may not use this file except in compliance with the License. |
|||
You may obtain a copy of the License at |
|||
|
|||
http://www.apache.org/licenses/LICENSE-2.0 |
|||
|
|||
Unless required by applicable law or agreed to in writing, software |
|||
distributed under the License is distributed on an "AS IS" BASIS, |
|||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
See the License for the specific language governing permissions and |
|||
limitations under the License. |
|
|||
fileFormatVersion: 2 |
|||
guid: f5121630b2ec60f48a2515492953d095 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
.PHONY: help |
|||
|
|||
help: |
|||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' |
|||
|
|||
.DEFAULT_GOAL := help |
|||
|
|||
GCP_PROJECT_ID := unity-ai-thea-test |
|||
TAG ?= latest |
|||
|
|||
build: ## Build datasetinsights docker image
|
|||
@echo "Building docker image for datasetinsights with tag: $(TAG)" |
|||
@docker build -t datasetinsights:$(TAG) . |
|||
|
|||
push: ## Push datasetinsights docker image to registry
|
|||
@echo "Uploading docker image to GCS registry with tag: $(TAG)" |
|||
@docker tag datasetinsights:$(TAG) gcr.io/$(GCP_PROJECT_ID)/datasetinsights:$(TAG) && \
|
|||
docker push gcr.io/$(GCP_PROJECT_ID)/datasetinsights:$(TAG) |
|
|||
fileFormatVersion: 2 |
|||
guid: 350053f97e4e1434a9999c029bd2c83d |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
# Dataset Insights |
|||
|
|||
[![PyPI python](https://img.shields.io/pypi/pyversions/datasetinsights)](https://pypi.org/project/datasetinsights) |
|||
[![PyPI version](https://badge.fury.io/py/datasetinsights.svg)](https://pypi.org/project/datasetinsights) |
|||
[![Downloads](https://pepy.tech/badge/datasetinsights)](https://pepy.tech/project/datasetinsights) |
|||
[![Tests](https://github.com/Unity-Technologies/datasetinsights/actions/workflows/linting-and-unittests.yaml/badge.svg?branch=master&event=push)](https://github.com/Unity-Technologies/datasetinsights/actions/workflows/linting-and-unittests.yaml?query=branch%3Amaster+event%3Apush) |
|||
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) |
|||
|
|||
Unity Dataset Insights is a python package for downloading, parsing and analyzing synthetic datasets generated using the Unity [Perception package](https://github.com/Unity-Technologies/com.unity.perception). |
|||
|
|||
## Installation |
|||
|
|||
Dataset Insights maintains a pip package for easy installation. It can work in any standard Python environment using `pip install datasetinsights` command. |
|||
|
|||
## Getting Started |
|||
|
|||
### Dataset Statistics |
|||
|
|||
We provide a sample [notebook](notebooks/Perception_Statistics.ipynb) to help you load synthetic datasets generated using [Perception package](https://github.com/Unity-Technologies/com.unity.perception) and visualize dataset statistics. We plan to support other sample Unity projects in the future. |
|||
|
|||
### Dataset Download |
|||
|
|||
You can download the datasets from HTTP(s), GCS, and Unity simulation projects using the 'download' command from CLI or API. |
|||
|
|||
[CLI](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.commands.html#datasetinsights-commands-download) |
|||
|
|||
```bash |
|||
datasetinsights download \ |
|||
--source-uri=<xxx> \ |
|||
--output=$HOME/data |
|||
``` |
|||
[Programmatically](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.io.downloader.html#module-datasetinsights.io.downloader.gcs_downloader) |
|||
|
|||
UnitySimulationDownloader downloads a dataset from Unity Simulation. |
|||
|
|||
```python3 |
|||
from datasetinsights.io.downloader import UnitySimulationDownloader |
|||
|
|||
source_uri=usim://<project_id>/<run_execution_id> |
|||
dest = "~/data" |
|||
access_token = "XXX" |
|||
downloader = UnitySimulationDownloader(access_token=access_token) |
|||
downloader.download(source_uri=source_uri, output=dest) |
|||
``` |
|||
GCSDatasetDownloader downloads a dataset from GCS location. |
|||
```python3 |
|||
from datasetinsights.io.downloader import GCSDatasetDownloader |
|||
|
|||
source_uri=gs://url/to/file.zip or gs://url/to/folder |
|||
dest = "~/data" |
|||
downloader = GCSDatasetDownloader() |
|||
downloader.download(source_uri=source_uri, output=dest) |
|||
``` |
|||
HTTPDatasetDownloader downloads a dataset from any HTTP(S) location. |
|||
```python3 |
|||
from datasetinsights.io.downloader import HTTPDatasetDownloader |
|||
|
|||
source_uri=http://url.to.file.zip |
|||
dest = "~/data" |
|||
downloader = HTTPDatasetDownloader() |
|||
downloader.download(source_uri=source_uri, output=dest) |
|||
``` |
|||
### Dataset Explore |
|||
You can explore the dataset [schema](https://datasetinsights.readthedocs.io/en/latest/Synthetic_Dataset_Schema.html#synthetic-dataset-schema) by using following API: |
|||
|
|||
[Unity Perception](https://datasetinsights.readthedocs.io/en/latest/datasetinsights.datasets.unity_perception.html#datasetinsights-datasets-unity-perception) |
|||
|
|||
AnnotationDefinitions and MetricDefinitions loads synthetic dataset definition tables and return a dictionary containing the definitions. |
|||
|
|||
```python3 |
|||
from datasetinsights.datasets.unity_perception import AnnotationDefinitions, |
|||
MetricDefinitions |
|||
annotation_def = AnnotationDefinitions(data_root=dest, version="my_schema_version") |
|||
definition_dict = annotation_def.get_definition(def_id="my_definition_id") |
|||
|
|||
metric_def = MetricDefinitions(data_root=dest, version="my_schema_version") |
|||
definition_dict = metric_def.get_definition(def_id="my_definition_id") |
|||
``` |
|||
Captures loads synthetic dataset captures tables and return a pandas dataframe with captures and annotations columns. |
|||
|
|||
```python3 |
|||
from datasetinsights.datasets.unity_perception import Captures |
|||
captures = Captures(data_root=dest, version="my_schema_version") |
|||
captures_df = captures.filter(def_id="my_definition_id") |
|||
``` |
|||
Metrics loads synthetic dataset metrics table which holds extra metadata that can be used to describe a particular sequence, capture or annotation and return a pandas dataframe with captures and metrics columns. |
|||
|
|||
```python3 |
|||
from datasetinsights.datasets.unity_perception import Metrics |
|||
metrics = Metrics(data_root=dest, version="my_schema_version") |
|||
metrics_df = metrics.filter_metrics(def_id="my_definition_id") |
|||
``` |
|||
|
|||
## Docker |
|||
|
|||
You can use the pre-build docker image [unitytechnologies/datasetinsights](https://hub.docker.com/r/unitytechnologies/datasetinsights) to run similar commands. |
|||
|
|||
## Documentation |
|||
|
|||
You can find the API documentation on [readthedocs](https://datasetinsights.readthedocs.io/en/latest/). |
|||
|
|||
## Contributing |
|||
|
|||
Please let us know if you encounter a bug by filing an issue. To learn more about making a contribution to Dataset Insights, please see our Contribution [page](CONTRIBUTING.md). |
|||
|
|||
## License |
|||
|
|||
Dataset Insights is licensed under the Apache License, Version 2.0. See [LICENSE](LICENCE) for the full license text. |
|||
|
|||
## Citation |
|||
If you find this package useful, consider citing it using: |
|||
``` |
|||
@misc{datasetinsights2020, |
|||
title={Unity {D}ataset {I}nsights Package}, |
|||
author={{Unity Technologies}}, |
|||
howpublished={\url{https://github.com/Unity-Technologies/datasetinsights}}, |
|||
year={2020} |
|||
} |
|||
``` |
|
|||
fileFormatVersion: 2 |
|||
guid: 3b0dfe6bbcfa1ef44864b006d71f63f0 |
|||
TextScriptImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: bb75890755a687e4d89909469f8da056 |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 5890473f9d95fc44abcc1849da06c437 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import logging |
|||
|
|||
import click |
|||
|
|||
from datasetinsights.commands import Entrypoint |
|||
from datasetinsights.constants import CONTEXT_SETTINGS |
|||
|
|||
logging.basicConfig( |
|||
level=logging.INFO, |
|||
format=( |
|||
"%(levelname)s | %(asctime)s | %(name)s | %(threadName)s | " |
|||
"%(message)s" |
|||
), |
|||
datefmt="%Y-%m-%d %H:%M:%S", |
|||
) |
|||
logger = logging.getLogger(__name__) |
|||
|
|||
|
|||
@click.command( |
|||
cls=Entrypoint, help="Dataset Insights.", context_settings=CONTEXT_SETTINGS, |
|||
) |
|||
@click.option( |
|||
"-v", |
|||
"--verbose", |
|||
is_flag=True, |
|||
default=False, |
|||
help="Enables verbose mode.", |
|||
) |
|||
def entrypoint(verbose): |
|||
if verbose: |
|||
root_logger = logging.getLogger() |
|||
root_logger.setLevel(logging.DEBUG) |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
entrypoint() |
|
|||
fileFormatVersion: 2 |
|||
guid: 2f272c80a2e6e7c46a61577129759411 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: da74e8a36d75ca1459cdf8eb860f9686 |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import os |
|||
|
|||
import click |
|||
|
|||
|
|||
class Entrypoint(click.MultiCommand): |
|||
""" Click MultiCommand Entrypoint For Datasetinsights CLI |
|||
""" |
|||
|
|||
def list_commands(self, ctx): |
|||
"""Dynamically get the list of commands.""" |
|||
rv = [] |
|||
for filename in os.listdir(os.path.dirname(__file__)): |
|||
if filename.endswith(".py") and not filename.startswith("__init__"): |
|||
rv.append(filename[:-3]) |
|||
rv.sort() |
|||
|
|||
return rv |
|||
|
|||
def get_command(self, ctx, name): |
|||
"""Dynamically get the command.""" |
|||
ns = {} |
|||
fn = os.path.join(os.path.dirname(__file__), name + ".py") |
|||
if not os.path.exists(fn): |
|||
return None |
|||
with open(fn) as f: |
|||
code = compile(f.read(), fn, "exec") |
|||
eval(code, ns, ns) |
|||
|
|||
return ns["cli"] |
|
|||
fileFormatVersion: 2 |
|||
guid: ab7da21cef59d8e4a8695c9ae8e072e7 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import logging |
|||
import re |
|||
|
|||
import click |
|||
|
|||
import datasetinsights.constants as const |
|||
from datasetinsights.io.downloader.base import create_dataset_downloader |
|||
|
|||
logger = logging.getLogger(__name__) |
|||
|
|||
|
|||
class SourceURI(click.ParamType): |
|||
"""Represents the Source URI Parameter type. |
|||
|
|||
This extends click.ParamType that allows click framework to validates |
|||
supported source URI according to the prefix pattern. |
|||
|
|||
Raises: |
|||
click.BadParameter: if the validation failed. |
|||
""" |
|||
|
|||
name = "source_uri" |
|||
PREFIX_PATTERN = r"^gs://|^http(s)?://|^usim://" |
|||
|
|||
def convert(self, value, param, ctx): |
|||
""" Validate source URI and Converts the value. |
|||
""" |
|||
match = re.search(self.PREFIX_PATTERN, value) |
|||
if not match: |
|||
message = ( |
|||
f"The source uri {value} is not supported. " |
|||
f"Pattern: {self.PREFIX_PATTERN}" |
|||
) |
|||
self.fail(message, param, ctx) |
|||
|
|||
return value |
|||
|
|||
|
|||
@click.command(context_settings=const.CONTEXT_SETTINGS,) |
|||
@click.option( |
|||
"-s", |
|||
"--source-uri", |
|||
type=SourceURI(), |
|||
required=True, |
|||
help=( |
|||
"URI of where this data should be downloaded. " |
|||
f"Supported source uri patterns {SourceURI.PREFIX_PATTERN}" |
|||
), |
|||
) |
|||
@click.option( |
|||
"-o", |
|||
"--output", |
|||
type=click.Path(exists=True, file_okay=False, writable=True), |
|||
default=const.DEFAULT_DATA_ROOT, |
|||
help="Directory on localhost where datasets should be downloaded.", |
|||
) |
|||
@click.option( |
|||
"-b", |
|||
"--include-binary", |
|||
is_flag=True, |
|||
default=False, |
|||
help=( |
|||
"Whether to download binary files such as images or LIDAR point " |
|||
"clouds. This flag applies to Datasets where metadata " |
|||
"(e.g. annotation json, dataset catalog, ...) can be separated from " |
|||
"binary files." |
|||
), |
|||
) |
|||
@click.option( |
|||
"--access-token", |
|||
type=str, |
|||
default=None, |
|||
help="Unity Simulation access token. " |
|||
"This will override synthetic datasets source-uri for Unity Simulation", |
|||
) |
|||
@click.option( |
|||
"--checksum-file", |
|||
type=str, |
|||
default=None, |
|||
help="Dataset checksum text file path. " |
|||
"Path can be a HTTP(S) url or a local file path. This will help check the " |
|||
"integrity of the downloaded dataset.", |
|||
) |
|||
def cli( |
|||
source_uri, output, include_binary, access_token, checksum_file, |
|||
): |
|||
"""Download datasets to localhost from known locations. |
|||
|
|||
The download command can support downloading from 3 types of sources |
|||
|
|||
1. Download from Unity Simulation: |
|||
|
|||
You can specify project_id, run_execution_id, access_token in source-uri: |
|||
|
|||
\b |
|||
datasetinsights download \\ |
|||
--source-uri=usim://<access_token>@<project_id>/<run_execution_id> \\ |
|||
--output=$HOME/data |
|||
|
|||
Alternatively, you can also override access_token such as: |
|||
|
|||
\b |
|||
datasetinsights download \\ |
|||
--source-uri=usim://<project_id>/<run_execution_id> \\ |
|||
--output=$HOME/data \\ |
|||
--access-token=<access_token> |
|||
|
|||
2. Downloading from a public http(s) url: |
|||
|
|||
\b |
|||
datasetinsights download \\ |
|||
--source-uri=http://url/to/file.zip \\ |
|||
--output=$HOME/data |
|||
|
|||
3. Downloading from a GCS url: |
|||
|
|||
\b |
|||
datasetinsights download \\ |
|||
--source-uri=gs://url/to/file.zip \\ |
|||
--output=$HOME/data |
|||
|
|||
or download all objects under the same directory: |
|||
|
|||
\b |
|||
datasetinsights download \\ |
|||
--source-uri=gs://url/to/directory \\ |
|||
--output=$HOME/data |
|||
""" |
|||
ctx = click.get_current_context() |
|||
logger.debug(f"Called download command with parameters: {ctx.params}") |
|||
|
|||
downloader = create_dataset_downloader( |
|||
source_uri=source_uri, access_token=access_token |
|||
) |
|||
downloader.download( |
|||
source_uri=source_uri, |
|||
output=output, |
|||
include_binary=include_binary, |
|||
checksum_file=checksum_file, |
|||
) |
|
|||
fileFormatVersion: 2 |
|||
guid: 6e67465ce5402284ba070e1c908b78a0 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
DEFAULT_DATA_ROOT = "/data" |
|||
|
|||
# Default Unity Project ID where USim jobs was executed |
|||
DEFAULT_PROJECT_ID = "474ba200-4dcc-4976-818e-0efd28efed30" |
|||
USIM_API_ENDPOINT = "https://api.simulation.unity3d.com" |
|||
|
|||
# Default Timing text for codetiming.Timer decorator |
|||
TIMING_TEXT = "[{name}] elapsed time: {:0.4f} seconds." |
|||
|
|||
# Click CLI context settings |
|||
CONTEXT_SETTINGS = { |
|||
"help_option_names": ["-h", "--help"], |
|||
"show_default": True, |
|||
"ignore_unknown_options": True, |
|||
"allow_extra_args": True, |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 5ba9ab0cee77c5d4c9fc9b6f3190164b |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import argparse |
|||
import json |
|||
import os |
|||
|
|||
import dash_core_components as dcc |
|||
import dash_html_components as html |
|||
from dash.dependencies import Input, Output |
|||
|
|||
import datasetinsights.stats.visualization.overview as overview |
|||
from datasetinsights.stats.visualization.app import get_app |
|||
from datasetinsights.stats.visualization.object_detection import ( |
|||
render_object_detection_layout, |
|||
) |
|||
|
|||
app = get_app() |
|||
|
|||
|
|||
def main_layout(): |
|||
""" Method for generating main app layout. |
|||
|
|||
Returns: |
|||
html layout: main layout design with tabs for overview statistics |
|||
and object detection. |
|||
""" |
|||
app_layout = html.Div( |
|||
[ |
|||
html.H1( |
|||
children="Dataset Insights", |
|||
style={ |
|||
"textAlign": "center", |
|||
"padding": 20, |
|||
"background": "lightgrey", |
|||
}, |
|||
), |
|||
html.Div( |
|||
[ |
|||
dcc.Tabs( |
|||
id="page_tabs", |
|||
value="dataset_overview", |
|||
children=[ |
|||
dcc.Tab( |
|||
label="Overview", value="dataset_overview", |
|||
), |
|||
dcc.Tab( |
|||
label="Object Detection", |
|||
value="object_detection", |
|||
), |
|||
], |
|||
), |
|||
html.Div(id="main_page_tabs"), |
|||
] |
|||
), |
|||
# Sharing data between callbacks using hidden division. |
|||
# These hidden dcc and html components are for storing data-root |
|||
# into the division. This is further used in callbacks made in the |
|||
# object_detection module. This is a temporary hack and can be found |
|||
# in example 1 of sharing data between callback dash tutorial. |
|||
# ref: https://dash.plotly.com/sharing-data-between-callbacks |
|||
# TODO: Fix this using a better solution to share data. |
|||
dcc.Dropdown(id="dropdown", style={"display": "none"}), |
|||
html.Div(id="data_root_value", style={"display": "none"}), |
|||
] |
|||
) |
|||
return app_layout |
|||
|
|||
|
|||
@app.callback( |
|||
Output("data_root_value", "children"), [Input("dropdown", "value")] |
|||
) |
|||
def store_data_root(value): |
|||
""" Method for storing data-root value in a hidden division. |
|||
|
|||
Returns: |
|||
json : data-root encoded in json to be stored in data_root_value div. |
|||
""" |
|||
json_data_root = json.dumps(data_root) |
|||
|
|||
return json_data_root |
|||
|
|||
|
|||
@app.callback( |
|||
Output("main_page_tabs", "children"), |
|||
[Input("page_tabs", "value"), Input("data_root_value", "children")], |
|||
) |
|||
def render_content(value, json_data_root): |
|||
""" Method for rendering dashboard layout based |
|||
on the selected tab value. |
|||
|
|||
Args: |
|||
value(str): selected tab value |
|||
json_data_root: data root stored in hidden div in json format. |
|||
|
|||
Returns: |
|||
html layout: layout for the selected tab. |
|||
""" |
|||
# read data root value from the data_root_value division |
|||
data_root = json.loads(json_data_root) |
|||
if value == "dataset_overview": |
|||
return overview.html_overview(data_root) |
|||
elif value == "object_detection": |
|||
return render_object_detection_layout(data_root) |
|||
|
|||
|
|||
def check_path(path): |
|||
""" Method for checking if the given data-root path is valid or not.""" |
|||
if os.path.isdir(path): |
|||
return path |
|||
else: |
|||
raise ValueError(f"Path {path} not found") |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
parser = argparse.ArgumentParser() |
|||
parser.add_argument("--data-root", help="Path to the data root") |
|||
args = parser.parse_args() |
|||
data_root = check_path(args.data_root) |
|||
app.layout = main_layout() |
|||
app.run_server(debug=True) |
|
|||
fileFormatVersion: 2 |
|||
guid: 58ebaee02a1b55e43b6d59489b8612a7 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 108e7ec1fa06ea84eb5be362e4dea313 |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: edbce5e9bac110a438499a740428ba9e |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
class DatasetNotFoundError(Exception): |
|||
""" Raise when a dataset file can't be found.""" |
|
|||
fileFormatVersion: 2 |
|||
guid: 3c63f372326b3e74c867cb1ddde4fa58 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
""" Simulation Dataset Catalog |
|||
""" |
|||
|
|||
|
|||
import logging |
|||
|
|||
from pyquaternion import Quaternion |
|||
|
|||
from datasetinsights.io.bbox import BBox2D, BBox3D |
|||
|
|||
logger = logging.getLogger(__name__) |
|||
|
|||
|
|||
def read_bounding_box_3d(annotation, label_mappings=None): |
|||
""" Convert dictionary representations of 3d bounding boxes into objects |
|||
of the BBox3d class |
|||
|
|||
Args: |
|||
annotation (List[dict]): 3D bounding box annotation |
|||
label_mappings (dict): a dict of {label_id: label_name} mapping |
|||
|
|||
Returns: |
|||
A list of 3d bounding box objects |
|||
""" |
|||
|
|||
bboxes = [] |
|||
|
|||
for b in annotation: |
|||
label_id = b["label_id"] |
|||
translation = ( |
|||
b["translation"]["x"], |
|||
b["translation"]["y"], |
|||
b["translation"]["z"], |
|||
) |
|||
size = (b["size"]["x"], b["size"]["y"], b["size"]["z"]) |
|||
rotation = b["rotation"] |
|||
rotation = Quaternion( |
|||
x=rotation["x"], y=rotation["y"], z=rotation["z"], w=rotation["w"] |
|||
) |
|||
|
|||
if label_mappings and label_id not in label_mappings: |
|||
continue |
|||
box = BBox3D( |
|||
translation=translation, |
|||
size=size, |
|||
label=label_id, |
|||
sample_token=0, |
|||
score=1, |
|||
rotation=rotation, |
|||
) |
|||
bboxes.append(box) |
|||
|
|||
return bboxes |
|||
|
|||
|
|||
def read_bounding_box_2d(annotation, label_mappings=None): |
|||
"""Convert dictionary representations of 2d bounding boxes into objects |
|||
of the BBox2D class |
|||
|
|||
Args: |
|||
annotation (List[dict]): 2D bounding box annotation |
|||
label_mappings (dict): a dict of {label_id: label_name} mapping |
|||
|
|||
Returns: |
|||
A list of 2D bounding box objects |
|||
""" |
|||
bboxes = [] |
|||
for b in annotation: |
|||
label_id = b["label_id"] |
|||
x = b["x"] |
|||
y = b["y"] |
|||
w = b["width"] |
|||
h = b["height"] |
|||
if label_mappings and label_id not in label_mappings: |
|||
continue |
|||
box = BBox2D(label=label_id, x=x, y=y, w=w, h=h) |
|||
bboxes.append(box) |
|||
|
|||
return bboxes |
|
|||
fileFormatVersion: 2 |
|||
guid: ef52ac9e1f365b94ea2981908c8b47b3 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: eb590aab824eace43af805377245b87e |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
from .captures import Captures |
|||
from .metrics import Metrics |
|||
from .references import AnnotationDefinitions, Egos, MetricDefinitions, Sensors |
|||
|
|||
__all__ = [ |
|||
"AnnotationDefinitions", |
|||
"Captures", |
|||
"Egos", |
|||
"Metrics", |
|||
"MetricDefinitions", |
|||
"Sensors", |
|||
] |
|
|||
fileFormatVersion: 2 |
|||
guid: 762895cdff237464ab31b646ccddb66e |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
""" Load Synthetic dataset captures and annotations tables |
|||
""" |
|||
import pandas as pd |
|||
|
|||
from datasetinsights.constants import DEFAULT_DATA_ROOT |
|||
|
|||
from .exceptions import DefinitionIDError |
|||
from .tables import DATASET_TABLES, SCHEMA_VERSION, glob, load_table |
|||
|
|||
|
|||
class Captures: |
|||
"""Load captures table |
|||
|
|||
A capture record stores the relationship between a captured file, |
|||
a collection of annotations, and extra metadata that describes this |
|||
capture. For more detail, see schema design here: |
|||
|
|||
:ref:`captures` |
|||
|
|||
Examples: |
|||
|
|||
.. code-block:: python |
|||
|
|||
>>> captures = Captures(data_root="/data") |
|||
#captures class automatically loads the captures (e.g. lidar scan, |
|||
image, depth map) and the annotations (e.g semantic segmentation |
|||
labels, bounding boxes, etc.) |
|||
>>> data = captures.filter(def_id="6716c783-1c0e-44ae-b1b5-7f068454b66e") # noqa E501 table command not be broken down into multiple lines |
|||
#return the captures and annotations filtered by the annotation |
|||
definition id |
|||
|
|||
Attributes: |
|||
captures (pd.DataFrame): a collection of captures without annotations |
|||
annotations (pd.DataFrame): a collection of annotations |
|||
""" |
|||
|
|||
TABLE_NAME = "captures" |
|||
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file |
|||
|
|||
def __init__(self, data_root=DEFAULT_DATA_ROOT, version=SCHEMA_VERSION): |
|||
""" Initialize Captures |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset |
|||
version (str): desired schema version |
|||
""" |
|||
self.captures = self._load_captures(data_root, version) |
|||
self.annotations = self._load_annotations(data_root, version) |
|||
|
|||
def _load_captures(self, data_root, version): |
|||
"""Load captures except annotations. |
|||
:ref:`captures` |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset |
|||
version (str): desired schema version |
|||
|
|||
Returns: |
|||
A pandas dataframe with combined capture records. |
|||
Columns: 'id' (UUID of the capture), 'sequence_id', |
|||
'step' (index of captures), 'timestamp' (Simulation timestamp in |
|||
milliseconds since the sequence started.), 'sensor' |
|||
(sensor attributes), 'ego' (ego pose of the simulation), |
|||
'filename' (single filename that stores captured data) |
|||
|
|||
Example Captures DataFrame: |
|||
id(str) sequence_id(str) step(int) timestamp(float) \ |
|||
cdc8bc5c... 2954c... 300 4.979996 |
|||
|
|||
sensor (dict) \ |
|||
{'sensor_id': 'da873b...', 'ego_id': '44ca9...', 'modality': 'camera', |
|||
'translation': [0.0, 0.0, 0.0], 'rotation': [0.0, 0.0, 0.0, 1.0], |
|||
'scale': 0.344577253} |
|||
|
|||
|
|||
ego (dict) \ |
|||
{'ego_id': '44ca9...', 'translation': [0.0, 0.0, -20.0], |
|||
'rotation': [0.0, 0.0, 0.0, 1.0], 'velocity': None, |
|||
'acceleration': None} |
|||
|
|||
filename (str) format (str) |
|||
RGB3/rgb_30... PNG |
|||
|
|||
""" |
|||
captures = [] |
|||
for c_file in glob(data_root, self.FILE_PATTERN): |
|||
capture = load_table(c_file, self.TABLE_NAME, version, max_level=0) |
|||
if "annotations" in capture.columns: |
|||
capture.drop(columns="annotations") |
|||
|
|||
captures.append(capture) |
|||
|
|||
# pd.concat might create memory bottleneck |
|||
return pd.concat(captures, axis=0) |
|||
|
|||
def _load_annotations(self, data_root, version): |
|||
"""Load annotations and capture IDs. |
|||
:ref:`capture-annotation` |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset |
|||
version (str): desired schema version |
|||
|
|||
Returns: |
|||
A pandas dataframe with combined annotation records |
|||
Columns: 'id' (annotation id), 'annotation_definition' (annotation |
|||
definition ID), |
|||
'values' |
|||
(list of objects that store annotation data, e.g. 2d bounding |
|||
box), 'capture.id' |
|||
|
|||
Example Annotation Dataframe: |
|||
|
|||
id(str) annotation_definition(str) \ |
|||
ace0... 6716c... |
|||
|
|||
values (dict) \ |
|||
[{'label_id': 34, 'label_name': 'snack_chips_pringles', |
|||
...'height': 118.0}, {'label_id': 35, '... 'height': 91.0}...] |
|||
|
|||
capture.id (str) |
|||
cdc8b... |
|||
""" |
|||
annotations = [] |
|||
for c_file in glob(data_root, self.FILE_PATTERN): |
|||
try: |
|||
annotation = load_table( |
|||
c_file, |
|||
self.TABLE_NAME, |
|||
version, |
|||
record_path="annotations", |
|||
meta="id", |
|||
meta_prefix="capture.", |
|||
) |
|||
except KeyError: |
|||
annotation = pd.DataFrame( |
|||
{"annotation_definition": [], "capture.id": []} |
|||
) |
|||
|
|||
annotations.append(annotation) |
|||
|
|||
return pd.concat(annotations, axis=0) |
|||
|
|||
def filter(self, def_id): |
|||
"""Get captures and annotations filtered by annotation definition id |
|||
:ref:`captures` |
|||
|
|||
Args: |
|||
def_id (int): annotation definition id used to filter results |
|||
|
|||
Returns: |
|||
A pandas dataframe with captures and annotations |
|||
Columns: 'id' (capture id), 'sequence_id', 'step', 'timestamp', |
|||
'sensor', 'ego', |
|||
'filename', 'format', 'annotation.id', |
|||
'annotation.annotation_definition','annotation.values' |
|||
|
|||
Raises: |
|||
DefinitionIDError: Raised if none of the annotation records in the |
|||
combined annotation and captures dataframe match the def_id |
|||
specified as a parameter. |
|||
|
|||
Example Returned Dataframe (first row): |
|||
|
|||
|
|||
+---------------+------------------+-----------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+---------------+--------------+---------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------------------+ |
|||
| label_id(int) | sequence_id(str) | step(int) | timestamp (float) | sensor (dict) | ego (dict) | filename(str) | format (str) | annotation.id (str) | annotation.annotation_definition(str) | annotation.values | |
|||
+===============+==================+===========+===================+===============================================================================================================================================================+============+===============+==============+=====================+=======================================+=======================================================================================================================+ |
|||
| 2 | None | 50 | 4.9 | {'sensor_id': 'dDa873b...', 'ego_id': '44ca9...', 'modality': 'camera','translation': [0.0, 0.0, 0.0], 'rotation': [0.0, 0.0, 0.0, 1.0],'scale': 0.344577253} | ... | RGB3/asd.png | PNG | ace0 | 6716c | [{'label_id': 34, 'label_name': 'snack_chips_pringles',...'height': 118.0}, {'label_id': 35, '... 'height': 91.0}...] | |
|||
+---------------+------------------+-----------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+---------------+--------------+---------------------+---------------------------------------+-----------------------------------------------------------------------------------------------------------------------+ |
|||
|
|||
""" # noqa: E501 table should not be broken down into multiple lines |
|||
if self.annotations.empty: |
|||
msg = ( |
|||
f"Can't find annotations records associate with the given " |
|||
f"definition id {def_id}." |
|||
) |
|||
raise DefinitionIDError(msg) |
|||
|
|||
mask = self.annotations.annotation_definition == def_id |
|||
annotations = ( |
|||
self.annotations[mask] |
|||
.set_index("capture.id") |
|||
.add_prefix("annotation.") |
|||
) |
|||
captures = self.captures.set_index("id") |
|||
|
|||
combined = ( |
|||
captures.join(annotations, how="inner") |
|||
.reset_index() |
|||
.rename(columns={"index": "id"}) |
|||
) |
|||
|
|||
if combined.empty: |
|||
msg = ( |
|||
f"Can't find annotations records associate with the given " |
|||
f"definition id {def_id}." |
|||
) |
|||
raise DefinitionIDError(msg) |
|||
|
|||
return combined |
|
|||
fileFormatVersion: 2 |
|||
guid: 414a16086f4a33f40a03ab87d72bf11c |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
class DefinitionIDError(Exception): |
|||
""" Raise when a given definition id can't be found. |
|||
""" |
|
|||
fileFormatVersion: 2 |
|||
guid: 5ad46965cdf3e4c45b627e8c92108b03 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
"""Load Synthetic dataset Metrics |
|||
""" |
|||
import json |
|||
|
|||
import dask.bag as db |
|||
|
|||
from datasetinsights.constants import DEFAULT_DATA_ROOT |
|||
|
|||
from .exceptions import DefinitionIDError |
|||
from .tables import DATASET_TABLES, SCHEMA_VERSION, glob |
|||
from .validation import verify_version |
|||
|
|||
|
|||
class Metrics: |
|||
"""Load metrics table |
|||
|
|||
Metrics store extra metadata that can be used to describe a particular |
|||
sequence, capture or annotation. Metric records are stored as arbitrary |
|||
number (M) of key-value pairs. |
|||
For more detail, see schema design doc: |
|||
:ref:`metrics` |
|||
|
|||
Attributes: |
|||
metrics (dask.bag.core.Bag): a collection of metrics records |
|||
Examples: |
|||
>>> metrics = Metrics(data_root="/data") |
|||
>>> metrics_df = metrics.filter_metrics(def_id="my_definition_id") |
|||
#metrics_df now contains all the metrics data corresponding to |
|||
"my_definition_id" |
|||
|
|||
One example of metrics_df (first row shown below): |
|||
|
|||
+---------------+------------------+---------------------+ |
|||
| label_id(int) | instance_id(int) | visible_pixels(int) | |
|||
+===============+==================+=====================+ |
|||
| 2 | 2 | 2231 | |
|||
+---------------+------------------+---------------------+ |
|||
|
|||
""" |
|||
|
|||
TABLE_NAME = "metrics" |
|||
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file |
|||
|
|||
def __init__(self, data_root=DEFAULT_DATA_ROOT, version=SCHEMA_VERSION): |
|||
""" Initialize Metrics |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset containing |
|||
metrics |
|||
version (str): desired schema version |
|||
""" |
|||
self.metrics = self._load_metrics(data_root, version) |
|||
|
|||
def _load_metrics(self, data_root, version): |
|||
""" |
|||
`:ref:`metrics` |
|||
|
|||
|
|||
Args: |
|||
data_root: (str): the root directory of the dataset containing |
|||
metrics |
|||
version (str): desired schema version |
|||
|
|||
Returns: |
|||
dask.bag.core.Bag |
|||
""" |
|||
metrics_files = db.from_sequence(glob(data_root, self.FILE_PATTERN)) |
|||
metrics = metrics_files.map( |
|||
lambda path: Metrics._load_json(path, self.TABLE_NAME, version) |
|||
).flatten() |
|||
|
|||
return metrics |
|||
|
|||
@staticmethod |
|||
def _normalize_values(metric): |
|||
""" Filter unnecessary info from metric. |
|||
1-level faltten of metrics.values column. |
|||
""" |
|||
values = metric["values"] |
|||
for value in values: |
|||
value["capture_id"] = metric["capture_id"] |
|||
value["annotation_id"] = metric["annotation_id"] |
|||
value["sequence_id"] = metric["sequence_id"] |
|||
value["step"] = metric["step"] |
|||
|
|||
return values |
|||
|
|||
def filter_metrics(self, def_id): |
|||
"""Get all metrics filtered by a given metric definition id |
|||
|
|||
Args: |
|||
def_id (str): metric definition id used to filter results |
|||
Raises: |
|||
DefinitionIDError: raised if no metrics records match the given |
|||
def_id |
|||
Returns (pd.DataFrame): |
|||
Columns: "label_id", "capture_id", "annotation_id", "sequence_id", |
|||
"step" |
|||
""" |
|||
metrics = ( |
|||
self.metrics.filter( |
|||
lambda metric: metric["metric_definition"] == def_id |
|||
) |
|||
.map(Metrics._normalize_values) |
|||
.flatten() |
|||
) |
|||
if metrics.count().compute() == 0: |
|||
msg = ( |
|||
f"Can't find metrics records associated with the given " |
|||
f"definition id {def_id}." |
|||
) |
|||
raise DefinitionIDError(msg) |
|||
|
|||
return metrics.to_dataframe().compute() |
|||
|
|||
@staticmethod |
|||
def _load_json(filename, table_name, version): |
|||
"""Load records from json files into a dict |
|||
""" |
|||
with open(filename, "r") as file: |
|||
data = json.load(file) |
|||
verify_version(data, version) |
|||
|
|||
return data[table_name] |
|
|||
fileFormatVersion: 2 |
|||
guid: c20dd3dca57f517448ffb0fec4e6a631 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
""" Load Synthetic dataset references tables |
|||
""" |
|||
import pandas as pd |
|||
|
|||
from .tables import DATASET_TABLES, SCHEMA_VERSION, glob, load_table |
|||
from .validation import NoRecordError |
|||
|
|||
|
|||
class AnnotationDefinitions: |
|||
"""Load annotation_definitions table |
|||
|
|||
For more detail, see schema design here: |
|||
:ref:`annotation_definitions.json` |
|||
|
|||
Attributes: |
|||
table (pd): a collection of annotation_definitions records |
|||
""" |
|||
|
|||
TABLE_NAME = "annotation_definitions" |
|||
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file |
|||
|
|||
def __init__(self, data_root, version=SCHEMA_VERSION): |
|||
""" Initialize AnnotationDefinitions |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset containing |
|||
tables |
|||
version (str): desired schema version |
|||
""" |
|||
self.table = self.load_annotation_definitions(data_root, version) |
|||
|
|||
def load_annotation_definitions(self, data_root, version): |
|||
"""Load annotation definition files. |
|||
|
|||
For more detail, see schema design here: |
|||
:ref:`annotation_definitions.json` |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset containing |
|||
tables |
|||
version (str): desired schema version |
|||
|
|||
Returns: |
|||
A Pandas dataframe with annotation definition records. |
|||
Columns: 'id' (annotation id), 'name' (annotation name), |
|||
'description' (string description), 'format' |
|||
(string describing format), 'spec' ( Format-specific specification |
|||
for the annotation values) |
|||
""" |
|||
definitions = [] |
|||
for def_file in glob(data_root, self.FILE_PATTERN): |
|||
definition = load_table(def_file, self.TABLE_NAME, version) |
|||
definitions.append(definition) |
|||
|
|||
if definitions: |
|||
combined = pd.concat(definitions, axis=0).drop_duplicates( |
|||
subset="id" |
|||
) |
|||
else: |
|||
combined = pd.DataFrame({}) |
|||
|
|||
return combined |
|||
|
|||
def get_definition(self, def_id): |
|||
"""Get the annotation definition for a given definition id |
|||
|
|||
Args: |
|||
def_id (int): annotation definition id used to filter results |
|||
|
|||
Returns: |
|||
a dictionary containing the annotation definition |
|||
""" |
|||
mask = self.table.id == def_id |
|||
definition = self.table[mask] |
|||
if definition.empty: |
|||
raise NoRecordError( |
|||
f"No records are found in the annotation_definitions file " |
|||
f"that matches the specified definition id: {def_id}" |
|||
) |
|||
definition = definition.to_dict("records")[0] |
|||
|
|||
return definition |
|||
|
|||
|
|||
class MetricDefinitions: |
|||
"""Load metric_definitions table |
|||
|
|||
For more detail, see schema design here: |
|||
|
|||
:ref:`metric_definitions.json` |
|||
|
|||
Attributes: |
|||
table (pd): a collection of metric_definitions records with columns: id |
|||
(id for metric definition), name, description, spec (definition specific |
|||
spec) |
|||
""" |
|||
|
|||
TABLE_NAME = "metric_definitions" |
|||
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file |
|||
|
|||
def __init__(self, data_root, version=SCHEMA_VERSION): |
|||
""" Initialize MetricDefinitions |
|||
Args: |
|||
data_root (str): the root directory of the dataset containing |
|||
tables |
|||
version (str): desired schema version |
|||
""" |
|||
self.table = self.load_metric_definitions(data_root, version) |
|||
|
|||
def load_metric_definitions(self, data_root, version): |
|||
"""Load metric definition files. |
|||
|
|||
:ref:`metric_definitions.json` |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset containing tables |
|||
version (str): desired schema version |
|||
|
|||
Returns: |
|||
A Pandas dataframe with metric definition records. |
|||
a collection of metric_definitions records with columns: id |
|||
(id for metric definition), name, description, spec (definition specific |
|||
spec) |
|||
""" |
|||
definitions = [] |
|||
for def_file in glob(data_root, self.FILE_PATTERN): |
|||
definition = load_table(def_file, self.TABLE_NAME, version) |
|||
definitions.append(definition) |
|||
|
|||
combined = pd.concat(definitions, axis=0).drop_duplicates(subset="id") |
|||
|
|||
return combined |
|||
|
|||
def get_definition(self, def_id): |
|||
"""Get the metric definition for a given definition id |
|||
|
|||
Args: |
|||
def_id (int): metric definition id used to filter results |
|||
|
|||
Returns: |
|||
a dictionary containing metric definition |
|||
""" |
|||
mask = self.table.id == def_id |
|||
definition = self.table[mask] |
|||
if definition.empty: |
|||
raise NoRecordError( |
|||
f"No records are found in the metric_definitions file " |
|||
f"that matches the specified definition id: {def_id}" |
|||
) |
|||
definition = definition.to_dict("records")[0] |
|||
|
|||
return definition |
|||
|
|||
|
|||
class Egos: |
|||
"""Load egos table |
|||
|
|||
For more detail, see schema design here: |
|||
:ref:`egos.json` |
|||
|
|||
Attributes: |
|||
table (pd): a collection of egos records |
|||
""" |
|||
|
|||
TABLE_NAME = "egos" |
|||
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file |
|||
|
|||
def __init__(self, data_root, version=SCHEMA_VERSION): |
|||
"""Initialize `:ref:Egos` |
|||
|
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset containing |
|||
ego tables. Two columns: id (ego id) and description |
|||
version (str): desired schema version |
|||
""" |
|||
self.table = self.load_egos(data_root, version) |
|||
|
|||
def load_egos(self, data_root, version): |
|||
"""Load egos files. |
|||
For more detail, see schema design here: |
|||
|
|||
:ref:`egos.json` |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset containing |
|||
ego tables |
|||
version (str): desired schema version |
|||
|
|||
Returns: |
|||
A pandas dataframe with all ego records with two columns: id |
|||
(ego id) and description |
|||
""" |
|||
egos = [] |
|||
for ego_file in glob(data_root, self.FILE_PATTERN): |
|||
ego = load_table(ego_file, self.TABLE_NAME, version) |
|||
egos.append(ego) |
|||
combined = pd.concat(egos, axis=0).drop_duplicates(subset="id") |
|||
|
|||
return combined |
|||
|
|||
|
|||
class Sensors: |
|||
"""Load sensors table |
|||
|
|||
For more detail, see schema design here: |
|||
|
|||
:ref:`sensors.json` |
|||
|
|||
Attributes: |
|||
table (pd): a collection of sensors records with columns: |
|||
'id' (sensor id), 'ego_id', 'modality' |
|||
({camera, lidar, radar, sonar,...} -- Sensor modality), 'description' |
|||
|
|||
""" |
|||
|
|||
TABLE_NAME = "sensors" |
|||
FILE_PATTERN = DATASET_TABLES[TABLE_NAME].file |
|||
|
|||
def __init__(self, data_root, version=SCHEMA_VERSION): |
|||
""" Initialize Sensors |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset containing |
|||
tables |
|||
version (str): desired schema version |
|||
""" |
|||
self.table = self.load_sensors(data_root, version) |
|||
|
|||
def load_sensors(self, data_root, version): |
|||
"""Load sensors files. |
|||
|
|||
For more detail, see schema design here: |
|||
|
|||
:ref:`sensors.json` |
|||
|
|||
Args: |
|||
data_root (str): the root directory of the dataset containing |
|||
tables |
|||
version (str): desired schema version |
|||
|
|||
Returns: |
|||
A pandas dataframe with all sensors records with columns: |
|||
'id' (sensor id), 'ego_id', 'modality' |
|||
({camera, lidar, radar, sonar,...} -- Sensor modality), 'description' |
|||
""" |
|||
sensors = [] |
|||
for sensor_file in glob(data_root, self.FILE_PATTERN): |
|||
sensor = load_table(sensor_file, self.TABLE_NAME, version) |
|||
sensors.append(sensor) |
|||
combined = pd.concat(sensors, axis=0).drop_duplicates(subset="id") |
|||
|
|||
return combined |
|
|||
fileFormatVersion: 2 |
|||
guid: 28399836ae95db949b8a439aeda34d9a |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import json |
|||
import logging |
|||
import pathlib |
|||
from collections import namedtuple |
|||
from enum import Enum |
|||
|
|||
import pandas as pd |
|||
|
|||
from .validation import verify_version |
|||
|
|||
logger = logging.getLogger(__name__) |
|||
SCHEMA_VERSION = "0.0.1" # Synthetic dataset schema version |
|||
|
|||
|
|||
class FileType(Enum): |
|||
BINARY = "binary" |
|||
REFERENCE = "reference" |
|||
METRIC = "metric" |
|||
CAPTURE = "capture" |
|||
|
|||
|
|||
Table = namedtuple("Table", "file pattern filetype") |
|||
DATASET_TABLES = { |
|||
"annotation_definitions": Table( |
|||
"**/annotation_definitions.json", |
|||
r"(?:\w|-|/)*annotation_definitions.json", |
|||
FileType.REFERENCE, |
|||
), |
|||
"captures": Table( |
|||
"**/captures_*.json", |
|||
r"(?:\w|-|/)*captures_[0-9]+.json", |
|||
FileType.CAPTURE, |
|||
), |
|||
"egos": Table("**/egos.json", r"(?:\w|-|/)*egos.json", FileType.REFERENCE), |
|||
"metric_definitions": Table( |
|||
"**/metric_definitions.json", |
|||
r"(?:\w|-|/)*metric_definitions.json", |
|||
FileType.REFERENCE, |
|||
), |
|||
"metrics": Table( |
|||
"**/metrics_*.json", r"(?:\w|-|/)*metrics_[0-9]+.json", FileType.METRIC |
|||
), |
|||
"sensors": Table( |
|||
"**/sensors.json", r"(?:\w|-|/)*sensors.json", FileType.REFERENCE |
|||
), |
|||
} |
|||
|
|||
|
|||
def glob(data_root, pattern): |
|||
"""Find all matching files in a directory. |
|||
|
|||
Args: |
|||
data_root (str): directory containing capture files |
|||
pattern (str): Unix file pattern |
|||
|
|||
Yields: |
|||
str: matched filenames in a directory |
|||
""" |
|||
path = pathlib.Path(data_root) |
|||
for fp in path.glob(pattern): |
|||
yield fp |
|||
|
|||
|
|||
def load_table(json_file, table_name, version, **kwargs): |
|||
"""Load records from json files into a pandas table |
|||
|
|||
Args: |
|||
json_file (str): filename to json. |
|||
table_name (str): table name in the json file to be loaded |
|||
version (str): requested version of this table |
|||
**kwargs: arbitrary keyword arguments to be passed to pandas' |
|||
json_normalize method. |
|||
|
|||
Returns: |
|||
a pandas dataframe of the loaded table. |
|||
|
|||
Raises: |
|||
VersionError: If the version in json file does not match the requested |
|||
version. |
|||
""" |
|||
logger.debug(f"Loading table {table_name} from {json_file}") |
|||
data = json.load(open(json_file, "r")) |
|||
verify_version(data, version) |
|||
table = pd.json_normalize(data[table_name], **kwargs) |
|||
|
|||
return table |
|
|||
fileFormatVersion: 2 |
|||
guid: a463dc85bbc1a464e99103d91ede2bc9 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
""" Validate Simulation Data |
|||
""" |
|||
|
|||
|
|||
class VersionError(Exception): |
|||
"""Raise when the data file version does not match""" |
|||
|
|||
pass |
|||
|
|||
|
|||
class DuplicateRecordError(Exception): |
|||
""" Raise when the definition file has duplicate definition id |
|||
""" |
|||
|
|||
pass |
|||
|
|||
|
|||
class NoRecordError(Exception): |
|||
""" Raise when no record is found matching a given definition id |
|||
""" |
|||
|
|||
pass |
|||
|
|||
|
|||
def verify_version(json_data, version): |
|||
"""Verify json schema version |
|||
|
|||
Args: |
|||
json_data (json): a json object loaded from file. |
|||
version (str): string of the requested version. |
|||
|
|||
Raises: |
|||
VersionError: If the version in json file does not match the requested |
|||
version. |
|||
""" |
|||
loaded = json_data["version"] |
|||
if loaded != version: |
|||
raise VersionError(f"Version mismatch. Expected version: {version}") |
|||
|
|||
|
|||
def check_duplicate_records(table, column, table_name): |
|||
""" Check if table has duplicate records for a given column |
|||
|
|||
Args: |
|||
table (pd.DataFrame): a pandas dataframe |
|||
column (str): the column where no duplication is allowed |
|||
table_name (str): table name |
|||
|
|||
Raises: |
|||
DuplicateRecordError: If duplicate records are found in a column |
|||
""" |
|||
if table[column].nunique() != len(table): |
|||
raise DuplicateRecordError( |
|||
f"Duplicate record was found in {column} of table {table_name}. " |
|||
f"This column is expected to be unique. Violating this requirement " |
|||
f"might cause ambiguity when the records are loaded." |
|||
) |
|
|||
fileFormatVersion: 2 |
|||
guid: 7f34c23e908e98d4e9dca0d7e8bd9bf1 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: b7b7c71c5464e5d469afcd30440015e4 |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
from .bbox import BBox2D |
|||
from .downloader import create_dataset_downloader |
|||
|
|||
__all__ = [ |
|||
"BBox2D", |
|||
"create_dataset_downloader", |
|||
] |
|
|||
fileFormatVersion: 2 |
|||
guid: cd7b49678064cc140a42019e9ae81e3d |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 1dd346d7eb846064883556c3b6a2a8bd |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import hashlib |
|||
import logging |
|||
import os |
|||
import re |
|||
import tempfile |
|||
import zlib |
|||
from pathlib import Path |
|||
|
|||
import requests |
|||
from requests.adapters import HTTPAdapter |
|||
from requests.packages.urllib3.util.retry import Retry |
|||
|
|||
from .exceptions import ChecksumError, DownloadError |
|||
|
|||
logger = logging.getLogger(__name__) |
|||
|
|||
# Timeout of requests (in seconds) |
|||
DEFAULT_TIMEOUT = 1800 |
|||
# Retry after failed request |
|||
DEFAULT_MAX_RETRIES = 5 |
|||
|
|||
|
|||
class TimeoutHTTPAdapter(HTTPAdapter): |
|||
def __init__(self, timeout, *args, **kwargs): |
|||
self.timeout = timeout |
|||
super().__init__(*args, **kwargs) |
|||
|
|||
def send(self, request, **kwargs): |
|||
kwargs["timeout"] = self.timeout |
|||
return super().send(request, **kwargs) |
|||
|
|||
|
|||
def download_file(source_uri: str, dest_path: str, file_name: str = None): |
|||
"""Download a file specified from a source uri |
|||
|
|||
Args: |
|||
source_uri (str): source url where the file should be downloaded |
|||
dest_path (str): destination path of the file |
|||
file_name (str): file name of the file to be downloaded |
|||
|
|||
Returns: |
|||
String of destination path. |
|||
""" |
|||
logger.debug(f"Trying to download file from {source_uri} -> {dest_path}") |
|||
adapter = TimeoutHTTPAdapter( |
|||
timeout=DEFAULT_TIMEOUT, max_retries=Retry(total=DEFAULT_MAX_RETRIES) |
|||
) |
|||
with requests.Session() as http: |
|||
http.mount("https://", adapter) |
|||
try: |
|||
response = http.get(source_uri) |
|||
response.raise_for_status() |
|||
except requests.exceptions.RequestException as ex: |
|||
logger.error(ex) |
|||
err_msg = ( |
|||
f"The request download from {source_uri} -> {dest_path} can't " |
|||
f"be completed." |
|||
) |
|||
|
|||
raise DownloadError(err_msg) |
|||
else: |
|||
dest_path = Path(dest_path) |
|||
if not file_name: |
|||
file_name = _parse_filename(response, source_uri) |
|||
dest_path = dest_path / file_name |
|||
dest_path.parent.mkdir(parents=True, exist_ok=True) |
|||
with open(dest_path, "wb") as f: |
|||
f.write(response.content) |
|||
|
|||
return dest_path |
|||
|
|||
|
|||
def checksum_matches(filepath, expected_checksum, algorithm="CRC32"): |
|||
""" Check if the checksum matches |
|||
|
|||
Args: |
|||
filepath (str): the doaloaded file path |
|||
expected_checksum (int): expected checksum of the file |
|||
algorithm (str): checksum algorithm. Defaults to CRC32 |
|||
|
|||
Returns: |
|||
True if the file checksum matches. |
|||
""" |
|||
computed = compute_checksum(filepath, algorithm) |
|||
return computed == expected_checksum |
|||
|
|||
|
|||
def validate_checksum(filepath, expected_checksum, algorithm="CRC32"): |
|||
""" Validate checksum of the downloaded file. |
|||
|
|||
Args: |
|||
filepath (str): the doaloaded file path |
|||
expected_checksum (int): expected checksum of the file |
|||
algorithm (str): checksum algorithm. Defaults to CRC32 |
|||
|
|||
Raises: |
|||
ChecksumError if the file checksum does not match. |
|||
""" |
|||
if not checksum_matches(filepath, expected_checksum, algorithm): |
|||
raise ChecksumError |
|||
|
|||
|
|||
def compute_checksum(filepath, algorithm="CRC32"): |
|||
""" Compute the checksum of a file. |
|||
|
|||
Args: |
|||
filepath (str): the doaloaded file path |
|||
algorithm (str): checksum algorithm. Defaults to CRC32 |
|||
|
|||
Returns: |
|||
int: the checksum value |
|||
""" |
|||
if algorithm == "CRC32": |
|||
chs = _crc32_checksum(filepath) |
|||
elif algorithm == "MD5": |
|||
chs = _md5_checksum(filepath) |
|||
else: |
|||
raise ValueError("Unsupported checksum algorithm!") |
|||
|
|||
return chs |
|||
|
|||
|
|||
def _crc32_checksum(filepath): |
|||
""" Calculate the checksum of a file using CRC32. |
|||
""" |
|||
with open(filepath, "rb") as f: |
|||
checksum = zlib.crc32(f.read()) |
|||
|
|||
return checksum |
|||
|
|||
|
|||
def _md5_checksum(filename): |
|||
""" Calculate the checksum of a file using MD5. |
|||
""" |
|||
md5 = hashlib.md5() |
|||
with open(filename, "rb") as f: |
|||
for chunk in iter(lambda: f.read(4096), b""): |
|||
md5.update(chunk) |
|||
return md5.hexdigest() |
|||
|
|||
|
|||
def get_checksum_from_file(filepath): |
|||
""" This method return checksum of the file whose filepath is given. |
|||
|
|||
Args: |
|||
filepath (str): Path of the checksum file. |
|||
Path can be HTTP(s) url or local path. |
|||
|
|||
Raises: |
|||
ValueError: Raises this error if filepath is not local or not |
|||
HTTP or HTTPS url. |
|||
|
|||
""" |
|||
|
|||
if filepath.startswith(("http://", "https://")): |
|||
with tempfile.TemporaryDirectory() as tmp: |
|||
checksum_file_path = os.path.join(tmp, "checksum.txt") |
|||
file_path = download_file( |
|||
source_uri=filepath, dest_path=checksum_file_path |
|||
) |
|||
return _read_checksum_from_txt(file_path) |
|||
|
|||
elif os.path.isfile(filepath): |
|||
return _read_checksum_from_txt(filepath) |
|||
|
|||
else: |
|||
raise ValueError(f"Can not get checksum from path: {filepath}") |
|||
|
|||
|
|||
def _read_checksum_from_txt(filepath): |
|||
""" This method reads checksum from a txt file and returns it. |
|||
|
|||
Args: |
|||
filepath (str): Local filepath of the checksum file. |
|||
|
|||
Returns: |
|||
str: checksum value from the checksum file. |
|||
|
|||
""" |
|||
with open(filepath) as file: |
|||
checksum = file.read() |
|||
return checksum |
|||
|
|||
|
|||
def _parse_filename(response, uri): |
|||
file_name = _get_filename_from_response(response) |
|||
if file_name is None: |
|||
file_name = _get_file_name_from_uri(uri) |
|||
return file_name |
|||
|
|||
|
|||
def _get_filename_from_response(response): |
|||
""" Gets filename from requests response object |
|||
|
|||
Args: |
|||
response: requests.Response() object that contains the server's |
|||
response to the HTTP request. |
|||
|
|||
Returns: |
|||
filename (str): Name of the file to be downloaded |
|||
""" |
|||
cd = response.headers.get("content-disposition") |
|||
if not cd: |
|||
return None |
|||
file_name = re.findall("filename=(.+)", cd) |
|||
if len(file_name) == 0: |
|||
return None |
|||
return file_name[0] |
|||
|
|||
|
|||
def _get_file_name_from_uri(uri): |
|||
""" Gets filename from URI |
|||
|
|||
Args: |
|||
uri (str): URI |
|||
|
|||
""" |
|||
return uri.split("/")[-1] |
|
|||
fileFormatVersion: 2 |
|||
guid: c216948dd1001a74a8924e984e10fd07 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 95081547889c43a41b2e74a036c3f840 |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
from .base import create_dataset_downloader |
|||
from .gcs_downloader import GCSDatasetDownloader |
|||
from .http_downloader import HTTPDatasetDownloader |
|||
from .unity_simulation import UnitySimulationDownloader |
|||
|
|||
__all__ = [ |
|||
"UnitySimulationDownloader", |
|||
"HTTPDatasetDownloader", |
|||
"create_dataset_downloader", |
|||
"GCSDatasetDownloader", |
|||
] |
|
|||
fileFormatVersion: 2 |
|||
guid: ae10a9b1d099dc14b9d9b6cf8b340846 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import re |
|||
from abc import ABC, abstractmethod |
|||
|
|||
_registry = {} |
|||
|
|||
|
|||
def _find_downloader(source_uri): |
|||
""" |
|||
This function returns the correct DatasetDownloader |
|||
from a registry based on the source-uri provided |
|||
|
|||
Args: |
|||
source_uri: URI of where this data should be downloaded. |
|||
|
|||
Returns: The dataset downloader class that is registered with the |
|||
source-uri protocol. |
|||
|
|||
""" |
|||
protocols = "|".join(_registry.keys()) |
|||
pattern = re.compile(f"({protocols})") |
|||
|
|||
protocol = pattern.findall(source_uri) |
|||
|
|||
if source_uri.startswith(("https://", "http://")): |
|||
protocol = "http://" |
|||
elif protocol: |
|||
protocol = protocol[0] |
|||
else: |
|||
raise ValueError(f"Downloader not found for source-uri '{source_uri}'") |
|||
|
|||
return _registry.get(protocol) |
|||
|
|||
|
|||
def create_dataset_downloader(source_uri, **kwargs): |
|||
""" |
|||
This function instantiates the dataset downloader |
|||
after finding it with the source-uri provided |
|||
|
|||
Args: |
|||
source_uri: URI used to look up the correct dataset downloader |
|||
**kwargs: |
|||
|
|||
Returns: The dataset downloader instance matching the source-uri. |
|||
|
|||
""" |
|||
downloader_class = _find_downloader(source_uri=source_uri) |
|||
return downloader_class(**kwargs) |
|||
|
|||
|
|||
class DatasetDownloader(ABC): |
|||
"""This is the base class for all dataset downloaders |
|||
The DatasetDownloader can be subclasses in the following way |
|||
|
|||
class NewDatasetDownloader(DatasetDownloader, protocol="protocol://") |
|||
|
|||
Here the 'protocol://' should match the prefix that the method download |
|||
source_uri supports. Example http:// gs:// |
|||
|
|||
""" |
|||
|
|||
def __init__(self, **kwargs): |
|||
pass |
|||
|
|||
@classmethod |
|||
def __init_subclass__(cls, protocol=None, **kwargs): |
|||
if protocol: |
|||
_registry[protocol] = cls |
|||
else: |
|||
raise NotImplementedError( |
|||
f"Subclass needs to have class keyword argument named protocol." |
|||
) |
|||
super().__init_subclass__(**kwargs) |
|||
|
|||
@abstractmethod |
|||
def download(self, source_uri, output, **kwargs): |
|||
""" This method downloads a dataset stored at the source_uri and stores it |
|||
in the output directory |
|||
|
|||
Args: |
|||
source_uri: URI that points to the dataset that should be downloaded |
|||
output: path to local folder where the dataset should be stored |
|||
""" |
|||
raise NotImplementedError("Subclass needs to implement this method") |
|
|||
fileFormatVersion: 2 |
|||
guid: 7f5c18ade6b16754ca766b5a79f8dea7 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
from datasetinsights.io.downloader.base import DatasetDownloader |
|||
from datasetinsights.io.gcs import GCSClient |
|||
|
|||
|
|||
class GCSDatasetDownloader(DatasetDownloader, protocol="gs://"): |
|||
""" This class is used to download data from GCS |
|||
""" |
|||
|
|||
def __init__(self, **kwargs): |
|||
""" initiating GCSDownloader |
|||
""" |
|||
self.client = GCSClient() |
|||
|
|||
def download(self, source_uri=None, output=None, **kwargs): |
|||
""" |
|||
|
|||
Args: |
|||
source_uri: This is the downloader-uri that indicates where on |
|||
GCS the dataset should be downloaded from. |
|||
The expected source-uri follows these patterns |
|||
gs://bucket/folder or gs://bucket/folder/data.zip |
|||
|
|||
output: This is the path to the directory |
|||
where the download will store the dataset. |
|||
""" |
|||
self.client.download(local_path=output, url=source_uri) |
|
|||
fileFormatVersion: 2 |
|||
guid: f6a00cd7d3adcf84fb7ddd1d8eb6ee75 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import logging |
|||
import os |
|||
|
|||
from datasetinsights.io.download import ( |
|||
download_file, |
|||
get_checksum_from_file, |
|||
validate_checksum, |
|||
) |
|||
from datasetinsights.io.downloader.base import DatasetDownloader |
|||
from datasetinsights.io.exceptions import ChecksumError |
|||
|
|||
logger = logging.getLogger(__name__) |
|||
|
|||
|
|||
class HTTPDatasetDownloader(DatasetDownloader, protocol="http://"): |
|||
""" This class is used to download data from any HTTP or HTTPS public url |
|||
and perform function such as downloading the dataset and checksum |
|||
validation if checksum file path is provided. |
|||
""" |
|||
|
|||
def download(self, source_uri, output, checksum_file=None, **kwargs): |
|||
""" This method is used to download the dataset from HTTP or HTTPS url. |
|||
|
|||
Args: |
|||
source_uri (str): This is the downloader-uri that indicates where |
|||
the dataset should be downloaded from. |
|||
|
|||
output (str): This is the path to the directory where the download |
|||
will store the dataset. |
|||
|
|||
checksum_file (str): This is path of the txt file that contains |
|||
checksum of the dataset to be downloaded. It |
|||
can be HTTP or HTTPS url or local path. |
|||
|
|||
Raises: |
|||
ChecksumError: This will raise this error if checksum doesn't |
|||
matches |
|||
|
|||
""" |
|||
dataset_path = download_file(source_uri, output) |
|||
|
|||
if checksum_file: |
|||
logger.debug("Reading checksum from checksum file.") |
|||
checksum = get_checksum_from_file(checksum_file) |
|||
try: |
|||
logger.debug("Validating checksum!!") |
|||
validate_checksum(dataset_path, int(checksum)) |
|||
except ChecksumError as e: |
|||
logger.info("Checksum mismatch. Deleting the downloaded file.") |
|||
os.remove(dataset_path) |
|||
raise e |
|
|||
fileFormatVersion: 2 |
|||
guid: 68dd19858eaf21948ab9099bd283a3ed |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
"""UnitySimulationDownloader downloads a dataset from Unity Simulation""" |
|||
import concurrent.futures |
|||
import logging |
|||
import os |
|||
import re |
|||
from pathlib import Path |
|||
|
|||
import numpy as np |
|||
import pandas as pd |
|||
import requests |
|||
from codetiming import Timer |
|||
from requests.packages.urllib3.util.retry import Retry |
|||
from tqdm import tqdm |
|||
|
|||
import datasetinsights.constants as const |
|||
from datasetinsights.datasets.unity_perception.tables import ( |
|||
DATASET_TABLES, |
|||
FileType, |
|||
) |
|||
from datasetinsights.io.download import TimeoutHTTPAdapter, download_file |
|||
from datasetinsights.io.downloader.base import DatasetDownloader |
|||
from datasetinsights.io.exceptions import DownloadError |
|||
|
|||
# number of workers for ThreadPoolExecutor. This is the default value |
|||
# in python3.8 |
|||
MAX_WORKER = min(32, os.cpu_count() + 4) |
|||
# Timeout of requests (in seconds) |
|||
DEFAULT_TIMEOUT = 1800 |
|||
# Retry after failed request |
|||
DEFAULT_MAX_RETRIES = 5 |
|||
|
|||
|
|||
logger = logging.getLogger(__name__) |
|||
|
|||
|
|||
class UnitySimulationDownloader(DatasetDownloader, protocol="usim://"): |
|||
""" This class is used to download data from Unity Simulation |
|||
|
|||
For more on Unity Simulation please see these |
|||
`docs <https://github.com/Unity-Technologies/Unity-Simulation-Docs>` |
|||
|
|||
Args: |
|||
access_token (str): Access token to be used to authenticate to |
|||
unity simulation for downloading the dataset |
|||
|
|||
""" |
|||
|
|||
SOURCE_URI_PATTERN = r"usim://([^@]*)?@?([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})/(\w+)" # noqa: E501 |
|||
|
|||
def __init__(self, access_token=None, **kwargs): |
|||
super().__init__(**kwargs) |
|||
self.access_token = access_token |
|||
self.run_execution_id = None |
|||
self.project_id = None |
|||
|
|||
def download(self, source_uri, output, include_binary=False, **kwargs): |
|||
""" Download from Unity Simulation |
|||
|
|||
Args: |
|||
source_uri: This is the downloader-uri that indicates where on |
|||
unity simulation the dataset should be downloaded from. |
|||
The expected source-uri should follow these patterns: |
|||
usim://access-token@project-id/run-execution-id |
|||
or |
|||
usim://project-id/run-execution-id |
|||
output: This is the path to the directory where the download |
|||
method will store the dataset. |
|||
include_binary: Whether to download binary files such as images |
|||
or LIDAR point clouds. This flag applies to Datasets where |
|||
metadata (e.g. annotation json, dataset catalog, ...) |
|||
can be separated from binary files. |
|||
|
|||
""" |
|||
self.parse_source_uri(source_uri) |
|||
manifest_file = os.path.join(output, f"{self.run_execution_id}.csv") |
|||
manifest_file = download_manifest( |
|||
self.run_execution_id, |
|||
manifest_file, |
|||
self.access_token, |
|||
project_id=self.project_id, |
|||
) |
|||
|
|||
dl_worker = Downloader(manifest_file, output) |
|||
dl_worker.download_references() |
|||
dl_worker.download_metrics() |
|||
dl_worker.download_captures() |
|||
if include_binary: |
|||
dl_worker.download_binary_files() |
|||
|
|||
def parse_source_uri(self, source_uri): |
|||
""" Parse unity simulation source uri |
|||
|
|||
Args: |
|||
source_uri: Parses source-uri in the following format |
|||
usim://access-token@project-id/run-execution-id |
|||
or |
|||
usim://project-id/run-execution-id |
|||
|
|||
""" |
|||
pattern = re.compile(self.SOURCE_URI_PATTERN) |
|||
result = pattern.findall(source_uri) |
|||
if len(result) == 1: |
|||
(access_token, project_id, run_execution_id,) = pattern.findall( |
|||
source_uri |
|||
)[0] |
|||
if not self.access_token: |
|||
if access_token: |
|||
self.access_token = access_token |
|||
else: |
|||
raise ValueError(f"Missing access token") |
|||
if project_id: |
|||
self.project_id = project_id |
|||
if run_execution_id: |
|||
self.run_execution_id = run_execution_id |
|||
|
|||
else: |
|||
raise ValueError( |
|||
f"{source_uri} needs to be in format" |
|||
f" usim://access_token@project_id/run_execution_id " |
|||
f"or usim://project_id/run_execution_id " |
|||
) |
|||
|
|||
|
|||
def _filter_unsuccessful_attempts(manifest_df): |
|||
""" |
|||
remove all rows from a dataframe where a greater attempt_id exists for |
|||
the 'instance_id'. This is necessary so that we avoid using data from |
|||
a failed USim run and only get the most recent retry. |
|||
Args: |
|||
manifest_df (pandas df): must have columns 'attempt_id', 'app_param_id' |
|||
and 'instance_id' |
|||
|
|||
Returns(pandas df): where all rows for earlier attempt ids have been |
|||
removed |
|||
|
|||
""" |
|||
last_attempt_per_instance = manifest_df.groupby("instance_id")[ |
|||
"attempt_id" |
|||
].agg(["max"]) |
|||
merged = manifest_df.merge( |
|||
how="outer", |
|||
right=last_attempt_per_instance, |
|||
left_on="instance_id", |
|||
right_on="instance_id", |
|||
) |
|||
filtered = merged[merged["attempt_id"] == merged["max"]] |
|||
filtered = filtered.reset_index(drop=True) |
|||
filtered = filtered.drop(columns="max") |
|||
return filtered |
|||
|
|||
|
|||
class Downloader: |
|||
"""Parse a given manifest file to download simulation output |
|||
|
|||
For more on Unity Simulation please see these |
|||
`docs <https://github.com/Unity-Technologies/Unity-Simulation-Docs>`_ |
|||
|
|||
Attributes: |
|||
manifest (DataFrame): the csv manifest file stored in a pandas dataframe |
|||
data_root (str): root directory where the simulation output should |
|||
be downloaded |
|||
""" |
|||
|
|||
MANIFEST_FILE_COLUMNS = ( |
|||
"run_execution_id", |
|||
"app_param_id", |
|||
"instance_id", |
|||
"attempt_id", |
|||
"file_name", |
|||
"download_uri", |
|||
) |
|||
|
|||
def __init__(self, manifest_file: str, data_root: str): |
|||
""" Initialize Downloader |
|||
|
|||
Args: |
|||
manifest_file (str): path to a manifest file |
|||
data_root (str): root directory where the simulation output should |
|||
be downloaded |
|||
""" |
|||
self.manifest = pd.read_csv( |
|||
manifest_file, header=0, names=self.MANIFEST_FILE_COLUMNS |
|||
) |
|||
self.manifest = _filter_unsuccessful_attempts(manifest_df=self.manifest) |
|||
self.manifest["filetype"] = self.match_filetypes(self.manifest) |
|||
self.data_root = data_root |
|||
|
|||
@staticmethod |
|||
def match_filetypes(manifest): |
|||
""" Match filetypes for every rows in the manifest file. |
|||
|
|||
Args: |
|||
manifest (pd.DataFrame): the manifest csv file |
|||
|
|||
Returns: |
|||
a list of filetype strings |
|||
""" |
|||
filenames = manifest.file_name |
|||
filetypes = [] |
|||
for name in filenames: |
|||
for _, table in DATASET_TABLES.items(): |
|||
if re.match(table.pattern, name): |
|||
filetypes.append(table.filetype) |
|||
break |
|||
else: |
|||
filetypes.append(FileType.BINARY) |
|||
|
|||
return filetypes |
|||
|
|||
@Timer(name="download_all", text=const.TIMING_TEXT, logger=logging.info) |
|||
def download_all(self): |
|||
""" Download all files in the manifest file. |
|||
""" |
|||
matched_rows = np.ones(len(self.manifest), dtype=bool) |
|||
downloaded = self._download_rows(matched_rows) |
|||
logger.info( |
|||
f"Total {len(downloaded)} files in manifest are successfully " |
|||
f"downloaded." |
|||
) |
|||
|
|||
@Timer( |
|||
name="download_references", text=const.TIMING_TEXT, logger=logging.info |
|||
) |
|||
def download_references(self): |
|||
""" Download all reference files. |
|||
All reference tables are static tables during the simulation. |
|||
This typically comes from the definition of the simulation and should |
|||
be created before tasks running distributed at different instances. |
|||
""" |
|||
logger.info("Downloading references files...") |
|||
matched_rows = self.manifest.filetype == FileType.REFERENCE |
|||
downloaded = self._download_rows(matched_rows) |
|||
|
|||
logger.info( |
|||
f"Total {len(downloaded)} reference files are successfully " |
|||
f"downloaded." |
|||
) |
|||
|
|||
@Timer(name="download_metrics", text=const.TIMING_TEXT, logger=logging.info) |
|||
def download_metrics(self): |
|||
""" Download all metrics files. |
|||
""" |
|||
logger.info("Downloading metrics files...") |
|||
matched_rows = self.manifest.filetype == FileType.METRIC |
|||
downloaded = self._download_rows(matched_rows) |
|||
logger.info( |
|||
f"Total {len(downloaded)} metric files are successfully downloaded." |
|||
) |
|||
|
|||
@Timer( |
|||
name="download_captures", text=const.TIMING_TEXT, logger=logging.info |
|||
) |
|||
def download_captures(self): |
|||
""" Download all captures files. See :ref:`captures` |
|||
""" |
|||
logger.info("Downloading captures files...") |
|||
matched_rows = self.manifest.filetype == FileType.CAPTURE |
|||
downloaded = self._download_rows(matched_rows) |
|||
logger.info( |
|||
f"Total {len(downloaded)} capture files are successfully " |
|||
f"downloaded." |
|||
) |
|||
|
|||
@Timer( |
|||
name="download_binary_files", |
|||
text=const.TIMING_TEXT, |
|||
logger=logging.info, |
|||
) |
|||
def download_binary_files(self): |
|||
""" Download all binary files. |
|||
""" |
|||
logger.info("Downloading binary files...") |
|||
matched_rows = self.manifest.filetype == FileType.BINARY |
|||
downloaded = self._download_rows(matched_rows) |
|||
logger.info( |
|||
f"Total {len(downloaded)} binary files are successfully " |
|||
f"downloaded." |
|||
) |
|||
|
|||
def _download_rows(self, matched_rows): |
|||
""" Download matched rows in a manifest file. |
|||
|
|||
Note: |
|||
We might need to download 1M+ of simulation output files, in this case |
|||
we don't want to have a single file transfer failure holding back on |
|||
getting the simulation data. Here download exception are captured. |
|||
We only log an error message and requires uses to pay attention to |
|||
this error. |
|||
|
|||
Args: |
|||
matched_rows (pd.Series): boolean series indicator of the manifest |
|||
file that should be downloaded |
|||
|
|||
Returns: |
|||
list of strings representing the downloaded destination path. |
|||
""" |
|||
n_expected = sum(matched_rows) |
|||
future_downloaded = [] |
|||
downloaded = [] |
|||
with concurrent.futures.ThreadPoolExecutor(MAX_WORKER) as executor: |
|||
for _, row in self.manifest[matched_rows].iterrows(): |
|||
source_uri = row.download_uri |
|||
relative_path = Path(self.data_root, row.file_name) |
|||
dest_path = relative_path.parent |
|||
file_name = relative_path.name |
|||
future = executor.submit( |
|||
download_file, source_uri, dest_path, file_name |
|||
) |
|||
future_downloaded.append(future) |
|||
|
|||
for future in tqdm( |
|||
concurrent.futures.as_completed(future_downloaded), |
|||
total=n_expected, |
|||
): |
|||
try: |
|||
downloaded.append(future.result()) |
|||
except DownloadError as ex: |
|||
logger.error(ex) |
|||
|
|||
n_downloaded = len(downloaded) |
|||
if n_downloaded != n_expected: |
|||
logger.warning( |
|||
f"Found {n_expected} matching records in the manifest file, " |
|||
f"but only {n_downloaded} are downloaded." |
|||
) |
|||
|
|||
return downloaded |
|||
|
|||
|
|||
def download_manifest( |
|||
run_execution_id, manifest_file, access_token, project_id, use_cache=True |
|||
): |
|||
""" Download manifest file from a single run_execution_id |
|||
For more on Unity Simulation see these |
|||
`docs <https://github.com/Unity-Technologies/Unity-Simulation-Docs>`_ |
|||
|
|||
|
|||
Args: |
|||
run_execution_id (str): Unity Simulation run execution id |
|||
manifest_file (str): path to the destination of the manifest_file |
|||
access_token (str): short lived authorization token |
|||
project_id (str): Unity project id that has Unity Simulation enabled |
|||
use_cache (bool, optional): indicator to skip download if manifest |
|||
file already exists. Default: True. |
|||
|
|||
Returns: |
|||
str: Full path to the manifest_file |
|||
""" |
|||
api_endpoint = const.USIM_API_ENDPOINT |
|||
project_url = f"{api_endpoint}/v1/projects/{project_id}/" |
|||
data_url = f"{project_url}runs/{run_execution_id}/data" |
|||
if Path(manifest_file).exists() and use_cache: |
|||
logger.info( |
|||
f"Mainfest file {manifest_file} already exists. Skipping downloads." |
|||
) |
|||
return manifest_file |
|||
|
|||
logger.info( |
|||
f"Trying to download manifest file for run-execution-id " |
|||
f"{run_execution_id}" |
|||
) |
|||
adapter = TimeoutHTTPAdapter( |
|||
timeout=DEFAULT_TIMEOUT, max_retries=Retry(total=DEFAULT_MAX_RETRIES) |
|||
) |
|||
headers = { |
|||
"Authorization": f"Bearer {access_token}", |
|||
"Content-Type": "application/json", |
|||
} |
|||
with requests.Session() as http: |
|||
http.mount("https://", adapter) |
|||
try: |
|||
resp = http.get(data_url, headers=headers) |
|||
resp.raise_for_status() |
|||
except requests.exceptions.RequestException as ex: |
|||
logger.error(ex) |
|||
err_msg = ( |
|||
f"Failed to download manifest file for run-execution-id: " |
|||
f"{run_execution_id}." |
|||
) |
|||
raise DownloadError(err_msg) |
|||
else: |
|||
Path(manifest_file).parent.mkdir(parents=True, exist_ok=True) |
|||
with open(manifest_file, "wb") as f: |
|||
for chunk in resp.iter_content(chunk_size=1024): |
|||
f.write(chunk) |
|||
|
|||
logger.info( |
|||
f"Manifest file {manifest_file} downloaded for run-execution-id " |
|||
f"{run_execution_id}" |
|||
) |
|||
|
|||
return manifest_file |
|
|||
fileFormatVersion: 2 |
|||
guid: 8c40e40e50b7fb24ca8d68b31022e159 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
class DownloadError(Exception): |
|||
""" Raise when download file failed. |
|||
""" |
|||
|
|||
|
|||
class ChecksumError(Exception): |
|||
""" Raises when the downloaded file checksum is not correct. |
|||
""" |
|||
|
|||
|
|||
class InvalidTrackerError(Exception): |
|||
""" Raises when unknown tracker requested . |
|||
""" |
|
|||
fileFormatVersion: 2 |
|||
guid: 7f42738b771e0cb42816c12788bdb76e |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import base64 |
|||
import logging |
|||
import os |
|||
import re |
|||
from os import makedirs |
|||
from os.path import basename, isdir |
|||
from pathlib import Path |
|||
|
|||
from google.cloud.storage import Client |
|||
|
|||
from datasetinsights.io.download import validate_checksum |
|||
from datasetinsights.io.exceptions import ChecksumError |
|||
|
|||
logger = logging.getLogger(__name__) |
|||
|
|||
|
|||
class GCSClient: |
|||
""" This class is used to download data from GCS location |
|||
and perform function such as downloading the dataset and checksum |
|||
validation. |
|||
""" |
|||
|
|||
GCS_PREFIX = "^gs://" |
|||
KEY_SEPARATOR = "/" |
|||
|
|||
def __init__(self, **kwargs): |
|||
""" Initialize a client to google cloud storage (GCS). |
|||
""" |
|||
self.client = Client(**kwargs) |
|||
|
|||
def download(self, *, url=None, local_path=None, bucket=None, key=None): |
|||
""" This method is used to download the dataset from GCS. |
|||
|
|||
Args: |
|||
url (str): This is the downloader-uri that indicates where |
|||
the dataset should be downloaded from. |
|||
|
|||
local_path (str): This is the path to the directory where the |
|||
download will store the dataset. |
|||
|
|||
bucket (str): gcs bucket name |
|||
key (str): object key path |
|||
|
|||
Examples: |
|||
>>> url = "gs://bucket/folder or gs://bucket/folder/data.zip" |
|||
>>> local_path = "/tmp/folder" |
|||
>>> bucket ="bucket" |
|||
>>> key ="folder/data.zip" or "folder" |
|||
|
|||
""" |
|||
if not (bucket and key) and url: |
|||
bucket, key = self._parse(url) |
|||
|
|||
bucket_obj = self.client.get_bucket(bucket) |
|||
if self._is_file(bucket_obj, key): |
|||
self._download_file(bucket_obj, key, local_path) |
|||
else: |
|||
self._download_folder(bucket_obj, key, local_path) |
|||
|
|||
def _download_folder(self, bucket, key, local_path): |
|||
""" download all files from directory |
|||
""" |
|||
blobs = bucket.list_blobs(prefix=key) |
|||
for blob in blobs: |
|||
local_file_path = blob.name.replace(key, local_path) |
|||
self._download_validate(blob, local_file_path) |
|||
|
|||
def _download_file(self, bucket, key, local_path): |
|||
""" download single file |
|||
""" |
|||
blob = bucket.get_blob(key) |
|||
key_suffix = key.replace("/" + basename(key), "") |
|||
local_file_path = blob.name.replace(key_suffix, local_path) |
|||
self._download_validate(blob, local_file_path) |
|||
|
|||
def _download_validate(self, blob, local_file_path): |
|||
""" download file and validate checksum |
|||
""" |
|||
self._download_blob(blob, local_file_path) |
|||
self._checksum(blob, local_file_path) |
|||
|
|||
def _download_blob(self, blob, local_file_path): |
|||
""" download blob from gcs |
|||
Raises: |
|||
NotFound: This will raise when object not found |
|||
""" |
|||
dst_dir = local_file_path.replace("/" + basename(local_file_path), "") |
|||
key = blob.name |
|||
if not isdir(dst_dir): |
|||
makedirs(dst_dir) |
|||
|
|||
logger.info(f"Downloading from {key} to {local_file_path}.") |
|||
blob.download_to_filename(local_file_path) |
|||
|
|||
def _checksum(self, blob, filename): |
|||
"""validate checksum and delete file if checksum does not match |
|||
|
|||
Raises: |
|||
ChecksumError: This will raise this error if checksum doesn't |
|||
matches |
|||
""" |
|||
expected_checksum = blob.md5_hash |
|||
if expected_checksum: |
|||
expected_checksum_hex = self._md5_hex(expected_checksum) |
|||
try: |
|||
validate_checksum( |
|||
filename, expected_checksum_hex, algorithm="MD5" |
|||
) |
|||
except ChecksumError as e: |
|||
logger.exception( |
|||
"Checksum mismatch. Delete the downloaded files." |
|||
) |
|||
os.remove(filename) |
|||
raise e |
|||
|
|||
def _is_file(self, bucket, key): |
|||
"""Check if the key is a file or directory""" |
|||
blob = bucket.get_blob(key) |
|||
return blob and blob.name == key |
|||
|
|||
def _md5_hex(self, checksum): |
|||
"""fix the missing padding if requires and converts into hex""" |
|||
missing_padding = len(checksum) % 4 |
|||
if missing_padding != 0: |
|||
checksum += "=" * (4 - missing_padding) |
|||
return base64.b64decode(checksum).hex() |
|||
|
|||
def _parse(self, url): |
|||
"""Split an GCS-prefixed URL into bucket and path.""" |
|||
match = re.search(self.GCS_PREFIX, url) |
|||
if not match: |
|||
raise ValueError( |
|||
f"Specified destination prefix: {url} does not start " |
|||
f"with {self.GCS_PREFIX}" |
|||
) |
|||
url = url[len(self.GCS_PREFIX) - 1 :] |
|||
if self.KEY_SEPARATOR not in url: |
|||
raise ValueError( |
|||
f"Specified destination prefix: {self.GCS_PREFIX + url} does " |
|||
f"not have object key " |
|||
) |
|||
idx = url.index(self.KEY_SEPARATOR) |
|||
bucket = url[:idx] |
|||
path = url[(idx + 1) :] |
|||
|
|||
return bucket, path |
|||
|
|||
def upload( |
|||
self, *, local_path=None, bucket=None, key=None, url=None, pattern="*" |
|||
): |
|||
""" Upload a file or list of files from directory to GCS |
|||
|
|||
Args: |
|||
url (str): This is the gcs location that indicates where |
|||
the dataset should be uploaded. |
|||
|
|||
local_path (str): This is the path to the directory or file |
|||
where the data is stored. |
|||
|
|||
bucket (str): gcs bucket name |
|||
key (str): object key path |
|||
pattern: Unix glob patterns. Use **/* for recursive glob. |
|||
|
|||
Examples: |
|||
For file upload: |
|||
>>> url = "gs://bucket/folder/data.zip" |
|||
>>> local_path = "/tmp/folder/data.zip" |
|||
>>> bucket ="bucket" |
|||
>>> key ="folder/data.zip" |
|||
For directory upload: |
|||
>>> url = "gs://bucket/folder" |
|||
>>> local_path = "/tmp/folder" |
|||
>>> bucket ="bucket" |
|||
>>> key ="folder" |
|||
>>> key ="**/*" |
|||
|
|||
""" |
|||
if not (bucket and key) and url: |
|||
bucket, key = self._parse(url) |
|||
|
|||
bucket_obj = self.client.get_bucket(bucket) |
|||
if isdir(local_path): |
|||
self._upload_folder( |
|||
local_path=local_path, |
|||
bucket=bucket_obj, |
|||
key=key, |
|||
pattern=pattern, |
|||
) |
|||
else: |
|||
self._upload_file(local_path=local_path, bucket=bucket_obj, key=key) |
|||
|
|||
def _upload_file(self, local_path=None, bucket=None, key=None): |
|||
""" Upload a single object to GCS |
|||
""" |
|||
blob = bucket.blob(key) |
|||
logger.info(f"Uploading from {local_path} to {key}.") |
|||
blob.upload_from_filename(local_path) |
|||
|
|||
def _upload_folder( |
|||
self, local_path=None, bucket=None, key=None, pattern="*" |
|||
): |
|||
"""Upload all files from a folder to GCS based on pattern |
|||
""" |
|||
for path in Path(local_path).glob(pattern): |
|||
if path.is_dir(): |
|||
continue |
|||
full_path = str(path) |
|||
relative_path = str(path.relative_to(local_path)) |
|||
object_key = os.path.join(key, relative_path) |
|||
self._upload_file( |
|||
local_path=full_path, bucket=bucket, key=object_key |
|||
) |
|||
|
|||
def get_most_recent_blob(self, url=None, bucket_name=None, key=None): |
|||
""" Get the last updated blob in a given bucket under given prefix |
|||
|
|||
Args: |
|||
bucket_name (str): gcs bucket name |
|||
key (str): object key path |
|||
""" |
|||
if not (bucket_name and key) and url: |
|||
bucket_name, key = self._parse(url) |
|||
|
|||
bucket = self.client.get_bucket(bucket_name) |
|||
|
|||
if self._is_file(bucket, key): |
|||
# Called on file, return file |
|||
return bucket.get_blob(key) |
|||
else: |
|||
logger.debug( |
|||
f"Cloud path not a file. Checking for most recent file in {url}" |
|||
) |
|||
# Return the blob with the max update time (most recent) |
|||
blobs = self._list_blobs(bucket, prefix=key) |
|||
return max( |
|||
blobs, key=lambda blob: bucket.get_blob(blob.name).updated |
|||
) |
|||
|
|||
def _list_blobs(self, bucket_name=None, prefix=None): |
|||
"""List all blobs with given prefix |
|||
""" |
|||
blobs = self.client.list_blobs(bucket_name, prefix=prefix) |
|||
blob_list = list(blobs) |
|||
logger.debug(f"Blobs in {bucket_name} under prefix {prefix}:") |
|||
logger.debug(blob_list) |
|||
return blob_list |
|
|||
fileFormatVersion: 2 |
|||
guid: 2781a7a725105754c8a575f07f16b4c2 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 387629385a081e846bed63e41e33ea22 |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
from .statistics import RenderedObjectInfo |
|||
from .visualization.plots import ( |
|||
bar_plot, |
|||
grid_plot, |
|||
histogram_plot, |
|||
model_performance_box_plot, |
|||
model_performance_comparison_box_plot, |
|||
plot_bboxes, |
|||
plot_keypoints, |
|||
rotation_plot, |
|||
) |
|||
|
|||
__all__ = [ |
|||
"bar_plot", |
|||
"grid_plot", |
|||
"histogram_plot", |
|||
"plot_bboxes", |
|||
"model_performance_box_plot", |
|||
"model_performance_comparison_box_plot", |
|||
"rotation_plot", |
|||
"RenderedObjectInfo", |
|||
"plot_keypoints", |
|||
] |
|
|||
fileFormatVersion: 2 |
|||
guid: 60558960135c8064cbc9d241acc15d72 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
import logging |
|||
|
|||
import datasetinsights.constants as const |
|||
from datasetinsights.datasets.unity_perception import MetricDefinitions, Metrics |
|||
from datasetinsights.datasets.unity_perception.tables import SCHEMA_VERSION |
|||
|
|||
logger = logging.getLogger(__name__) |
|||
|
|||
|
|||
class RenderedObjectInfo: |
|||
"""Rendered Object Info in Captures |
|||
|
|||
This metric stores common object info captured by a sensor in the simulation |
|||
environment. It can be used to calculate object statistics such as |
|||
object count, object rotation and visible pixels. |
|||
|
|||
Attributes: |
|||
raw_table (pd.DataFrame): rendered object info stored with a tidy |
|||
pandas dataframe. Columns "label_id", "instance_id", "visible_pixels", |
|||
"capture_id, "label_name". |
|||
|
|||
Examples: |
|||
|
|||
.. code-block:: python |
|||
|
|||
>>> # set the data root path to where data was stored |
|||
>>> data_root = "$HOME/data" |
|||
>>> # use rendered object info definition id |
|||
>>> definition_id = "659c6e36-f9f8-4dd6-9651-4a80e51eabc4" |
|||
>>> roinfo = RenderedObjectInfo(data_root, definition_id) |
|||
#total object count per label dataframe |
|||
>>> roinfo.total_counts() |
|||
label_id label_name count |
|||
1 object1 10 |
|||
2 object2 21 |
|||
#object count per capture dataframe |
|||
>>> roinfo.per_capture_counts() |
|||
capture_id count |
|||
qwerty 10 |
|||
asdfgh 21 |
|||
""" |
|||
|
|||
LABEL = "label_id" |
|||
LABEL_READABLE = "label_name" |
|||
INDEX_COLUMN = "capture_id" |
|||
VALUE_COLUMN = "values" |
|||
COUNT_COLUMN = "count" |
|||
|
|||
def __init__( |
|||
self, |
|||
data_root=const.DEFAULT_DATA_ROOT, |
|||
version=SCHEMA_VERSION, |
|||
def_id=None, |
|||
): |
|||
"""Initialize RenderedObjectInfo |
|||
|
|||
Args: |
|||
data_root (str): root directory where the dataset was stored |
|||
version (str): synthetic dataset schema version |
|||
def_id (str): rendered object info definition id |
|||
""" |
|||
filtered_metrics = Metrics(data_root, version).filter_metrics(def_id) |
|||
label_mappings = self._read_label_mappings(data_root, version, def_id) |
|||
self.raw_table = self._read_filtered_metrics( |
|||
filtered_metrics, label_mappings |
|||
) |
|||
|
|||
def num_captures(self): |
|||
"""Total number of captures |
|||
|
|||
Returns: |
|||
integer: Total number of captures |
|||
""" |
|||
return self.raw_table[self.INDEX_COLUMN].nunique() |
|||
|
|||
@staticmethod |
|||
def _read_label_mappings(data_root, version, def_id): |
|||
"""Read label_mappings from a metric_definition record. |
|||
|
|||
Args: |
|||
data_root (str): root directory where the dataset was stored |
|||
version (str): synthetic dataset schema version |
|||
def_id (str): rendered object info definition id |
|||
|
|||
Returns: |
|||
dict: The mappings of {label_id: label_name} |
|||
""" |
|||
definition = MetricDefinitions(data_root, version).get_definition( |
|||
def_id |
|||
) |
|||
name = RenderedObjectInfo.LABEL |
|||
readable_name = RenderedObjectInfo.LABEL_READABLE |
|||
|
|||
return {d[name]: d[readable_name] for d in definition["spec"]} |
|||
|
|||
@staticmethod |
|||
def _read_filtered_metrics(filtered_metrics, label_mappings): |
|||
"""Read label_mappings from a metric_definition record. |
|||
|
|||
Args: |
|||
filtered_metrics (pd.DataFrame): A pandas dataframe for metrics |
|||
filtered by definition id. |
|||
label_mappings (dict): the mappings of {label_id: label_name} |
|||
|
|||
Returns: |
|||
pd.DataFrame: rendered object info stored with a tidy |
|||
pandas dataframe. Columns "label_id", "instance_id", |
|||
"visible_pixels", "capture_id, "label_name". |
|||
""" |
|||
filtered_metrics[RenderedObjectInfo.LABEL_READABLE] = filtered_metrics[ |
|||
RenderedObjectInfo.LABEL |
|||
].map(label_mappings) |
|||
# Remove metrics data not defined in label_mappings |
|||
filtered_metrics.dropna( |
|||
subset=[RenderedObjectInfo.LABEL_READABLE], inplace=True |
|||
) |
|||
|
|||
return filtered_metrics |
|||
|
|||
def total_counts(self): |
|||
"""Aggregate Total Object Counts Per Label |
|||
|
|||
Returns: |
|||
pd.DataFrame: Total object counts table. |
|||
Columns "label_id", "label_name", "count" |
|||
""" |
|||
agg = ( |
|||
self.raw_table.groupby([self.LABEL, self.LABEL_READABLE]) |
|||
.size() |
|||
.to_frame(name=self.COUNT_COLUMN) |
|||
.reset_index() |
|||
) |
|||
|
|||
return agg |
|||
|
|||
def per_capture_counts(self): |
|||
""" Aggregate Object Counts Per Label |
|||
|
|||
Returns: |
|||
pd.DataFrame: Total object counts table. |
|||
Columns "capture_id", "count" |
|||
""" |
|||
agg = ( |
|||
self.raw_table.groupby(self.INDEX_COLUMN) |
|||
.size() |
|||
.to_frame(name=self.COUNT_COLUMN) |
|||
.reset_index() |
|||
) |
|||
|
|||
return agg |
|
|||
fileFormatVersion: 2 |
|||
guid: 6cf35b682544b9e4a9df8c371766c9a1 |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
|
|||
fileFormatVersion: 2 |
|||
guid: 6835dc223ee79af449a720c046e342ff |
|||
folderAsset: yes |
|||
DefaultImporter: |
|||
externalObjects: {} |
|||
userData: |
|||
assetBundleName: |
|||
assetBundleVariant: |
部分文件因为文件数量过多而无法显示
撰写
预览
正在加载...
取消
保存
Reference in new issue