-
Notifications
You must be signed in to change notification settings - Fork 4
feat: add LeRobot v3 dataset import support #268
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
0615de9
d3303df
d928047
681bb56
6cddb65
8612de4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| """ | ||
| Import a LeRobot dataset into a FastLabel robotics project. | ||
|
|
||
| Requires: pip install fastlabel[robotics] | ||
|
|
||
| Supports LeRobot v3 dataset format only. | ||
| v3: data/chunk-*/file-*.parquet, videos/.../chunk-*/file-*.mp4 | ||
| """ | ||
|
|
||
| from fastlabel import Client | ||
|
|
||
| client = Client() | ||
|
|
||
| # Import all episodes | ||
| results = client.import_lerobot( | ||
| project="your-project-slug", | ||
| lerobot_data_path="/path/to/lerobot/dataset", | ||
| ) | ||
|
|
||
| # Import specific episodes by index | ||
| results = client.import_lerobot( | ||
| project="your-project-slug", | ||
| lerobot_data_path="/path/to/lerobot/dataset", | ||
| episode_indices=[0, 1, 2], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| import logging | ||
| import os | ||
| import re | ||
| import shutil | ||
| import urllib.parse | ||
| from concurrent.futures import ThreadPoolExecutor, wait | ||
| from pathlib import Path | ||
|
|
@@ -14,7 +15,7 @@ | |
| import xmltodict | ||
| from PIL import Image, ImageColor, ImageDraw | ||
|
|
||
| from fastlabel import const, converters, utils | ||
| from fastlabel import const, converters, lerobot, utils | ||
| from fastlabel.const import ( | ||
| EXPORT_IMAGE_WITH_ANNOTATIONS_SUPPORTED_IMAGE_TYPES, | ||
| KEYPOINT_MIN_STROKE_WIDTH, | ||
|
|
@@ -28,7 +29,7 @@ | |
| ) | ||
|
|
||
| from .api import Api | ||
| from .exceptions import FastLabelInvalidException | ||
| from .exceptions import FastLabelException, FastLabelInvalidException | ||
| from .query import DatasetObjectGetQuery | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
@@ -2072,6 +2073,64 @@ def create_robotics_task( | |
|
|
||
| return self.api.post_request(endpoint, payload=payload) | ||
|
|
||
| def import_lerobot( | ||
| self, | ||
| project: str, | ||
| lerobot_data_path: str, | ||
| episode_indices: list = None, | ||
| ) -> list: | ||
| """ | ||
| Import a LeRobot dataset into a FastLabel robotics project. | ||
|
|
||
| Automatically detects LeRobot dataset version (v3). | ||
| For each episode, creates a robotics task and uploads the video files | ||
| and frame data (converted from parquet to JSON). | ||
|
|
||
| Requires: pip install fastlabel[robotics] | ||
|
|
||
| project is slug of your project (Required). | ||
| lerobot_data_path is the path to the LeRobot dataset directory (Required). | ||
| episode_indices is a list of episode indices to import. | ||
| If None, all episodes are imported (Optional). | ||
| """ | ||
| data_path = Path(lerobot_data_path) | ||
| episode_map = lerobot.build_episode_map(data_path) | ||
| if episode_indices is None: | ||
| episode_indices = sorted(episode_map.keys()) | ||
|
|
||
| results = [] | ||
| for episode_index in episode_indices: | ||
| episode_name = lerobot.format_episode_name(episode_index) | ||
| self.create_robotics_task(project=project, name=episode_name) | ||
|
|
||
| zip_path = lerobot.create_episode_zip( | ||
| lerobot_data_path=data_path, | ||
| episode_index=episode_index, | ||
| episode_map=episode_map, | ||
| ) | ||
| try: | ||
| result = self.import_robotics_contents_file( | ||
| project=project, file_path=zip_path | ||
| ) | ||
| results.append( | ||
| {"episode": episode_name, "success": True, "result": result} | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [imo] 一部でも失敗したら返却がいらないなら |
||
| ) | ||
| except FastLabelException as e: | ||
| results.append( | ||
| { | ||
| "episode": episode_name, | ||
| "success": False, | ||
| "result": {"error": str(e)}, | ||
| } | ||
| ) | ||
| finally: | ||
| zip_file = Path(zip_path) | ||
| tmp_dir = zip_file.parent | ||
| if tmp_dir.exists(): | ||
| shutil.rmtree(tmp_dir) | ||
|
|
||
| return results | ||
|
|
||
| def import_appendix_file( | ||
| self, | ||
| project: str, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| from fastlabel.exceptions import FastLabelInvalidException | ||
| from fastlabel.lerobot import v3 | ||
| from fastlabel.lerobot.common import ( | ||
| check_dependencies, | ||
| detect_version, | ||
| format_episode_name, | ||
| get_camera_dirs, | ||
| ) | ||
|
|
||
| __all__ = [ | ||
| "build_episode_map", | ||
| "get_episode_indices", | ||
| "create_episode_zip", | ||
| "format_episode_name", | ||
| "get_camera_dirs", | ||
| ] | ||
|
|
||
|
|
||
| def get_episode_indices(lerobot_data_path): | ||
| """Get all episode indices from a LeRobot v3 dataset.""" | ||
| check_dependencies() | ||
| version = detect_version(lerobot_data_path) | ||
| if version == "v2": | ||
| raise FastLabelInvalidException( | ||
| "LeRobot dataset v2 is not supported. Please convert to v3.", | ||
| 422, | ||
| ) | ||
|
Comment on lines
+23
to
+27
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. まずはメインで使われている v3 をサポートしています。 |
||
| return v3.get_episode_indices(lerobot_data_path) | ||
|
|
||
|
|
||
| def build_episode_map(lerobot_data_path): | ||
| """Build episode map from dataset. Returns a dict keyed by episode index.""" | ||
| check_dependencies() | ||
| version = detect_version(lerobot_data_path) | ||
| if version == "v2": | ||
| raise FastLabelInvalidException( | ||
| "LeRobot dataset v2 is not supported. Please convert to v3.", | ||
| 422, | ||
| ) | ||
| return v3._build_episode_map(lerobot_data_path) | ||
|
|
||
|
|
||
| def create_episode_zip(lerobot_data_path, episode_index, episode_map=None): | ||
| """Create a ZIP file for a single episode in the format expected by FastLabel. | ||
|
|
||
| Supports LeRobot dataset v3 only. | ||
|
|
||
| ZIP structure (files at root, ZIP name = episode name): | ||
| {content_name}.mp4 (one per camera) | ||
| {episode_name}.json (frame data) | ||
|
|
||
| Returns the path to the created ZIP file. | ||
| The caller is responsible for cleaning up the returned ZIP file. | ||
| """ | ||
| check_dependencies() | ||
| version = detect_version(lerobot_data_path) | ||
| if version == "v2": | ||
| raise FastLabelInvalidException( | ||
| "LeRobot dataset v2 is not supported. Please convert to v3.", | ||
| 422, | ||
| ) | ||
| return v3.create_episode_zip(lerobot_data_path, episode_index, episode_map) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| from pathlib import Path | ||
|
|
||
| from fastlabel.exceptions import FastLabelInvalidException | ||
|
|
||
|
|
||
| def check_dependencies(): | ||
| try: | ||
| import pandas # noqa: F401 | ||
| import pyarrow # noqa: F401 | ||
| except ImportError: | ||
| raise FastLabelInvalidException( | ||
| "pandas and pyarrow are required for LeRobot support. " | ||
| "Install them with: pip install fastlabel[robotics]", | ||
| 422, | ||
| ) | ||
|
|
||
|
|
||
| def detect_version(lerobot_data_path: Path) -> str: | ||
| """Detect LeRobot dataset version (v2 or v3). | ||
|
|
||
| Both versions use data/chunk-XXX/ directories. | ||
| v2: data/chunk-XXX/episode_YYYYYY.parquet | ||
| v3: data/chunk-XXX/file-YYY.parquet | ||
| """ | ||
| data_dir = lerobot_data_path / "data" | ||
| if not data_dir.exists(): | ||
| raise FastLabelInvalidException(f"Data directory not found: {data_dir}", 422) | ||
|
|
||
| for chunk_dir in data_dir.iterdir(): | ||
| if not chunk_dir.is_dir() or not chunk_dir.name.startswith("chunk-"): | ||
| continue | ||
| for f in chunk_dir.iterdir(): | ||
| if f.suffix != ".parquet": | ||
| continue | ||
| if f.stem.startswith("episode_"): | ||
| return "v2" | ||
| if f.stem.startswith("file-"): | ||
| return "v3" | ||
|
|
||
| raise FastLabelInvalidException( | ||
| "Could not detect LeRobot dataset version. " | ||
| "Expected data/chunk-XXX/episode_*.parquet (v2) " | ||
| "or data/chunk-XXX/file-*.parquet (v3).", | ||
| 422, | ||
| ) | ||
|
|
||
|
|
||
| def format_episode_name(episode_index: int) -> str: | ||
| return f"episode_{episode_index:06d}" | ||
|
|
||
|
|
||
| def get_camera_dirs(lerobot_data_path: Path) -> list: | ||
| """Get camera directories and their content names. | ||
| Returns [(camera_dir, content_name), ...]. | ||
| e.g. observation.images.top -> content_name = "images_top" | ||
| """ | ||
| videos_dir = lerobot_data_path / "videos" | ||
| if not videos_dir.exists(): | ||
| return [] | ||
|
|
||
| results = [] | ||
| for obs_dir in sorted(videos_dir.iterdir()): | ||
| if not obs_dir.is_dir(): | ||
| continue | ||
| parts = obs_dir.name.split(".") | ||
| if parts[0] != "observation": | ||
| raise FastLabelInvalidException( | ||
| f"Unexpected camera dir name: {obs_dir.name}" | ||
| ) | ||
|
|
||
| content_name = "_".join(parts[1:]) | ||
| results.append((obs_dir, content_name)) | ||
| return results |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nits]
parquet読み込みが何度も走っているのが若干気にはなりました
create_episode_zipから内部的に呼び出している_build_episode_mapがループ毎に全 parquet をスキャンしてエピソードのインデックス情報を構築してそうで、
結果は呼び出し間で変わらなそうだなと
これがパフォーマンスにどれくらい影響があるのかちょっとわかってないのですが、ループ外で一度だけ構築して渡す設計でもいいかもと思ったりしました
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
確かにその通りですね!ループ外のメインのメソッドで作成して、引数として渡す構造に変更しました!
d3303df