Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
files: '^.*\.py'
types: [file]
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
rev: 7.3.0
hooks:
- id: flake8
files: '^.*\.py'
Expand Down
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- [Sequential PCD](#sequential-pcd)
- [DICOM](#dicom)
- [Robotics](#robotics)
- [Import LeRobot Dataset](#import-lerobot-dataset)
- [Common](#common)
- [Appendix](#appendix)
- [Annotation](#annotation)
Expand Down Expand Up @@ -2207,6 +2208,37 @@ history = client.import_robotics_contents_file(
)
```

#### Import LeRobot Dataset

Import a [LeRobot](https://github.com/huggingface/lerobot) dataset (v3) into a FastLabel robotics project.

Requires additional dependencies:

```bash
pip install fastlabel[robotics]
```

For each episode, this method creates a robotics task and uploads the video files and frame data (converted from parquet to JSON).

```python
results = client.import_lerobot(
project="YOUR_PROJECT_SLUG",
lerobot_data_path="/path/to/lerobot/dataset",
)
```

You can also specify which episodes to import:

```python
results = client.import_lerobot(
project="YOUR_PROJECT_SLUG",
lerobot_data_path="/path/to/lerobot/dataset",
episode_indices=[0, 1, 2],
)
```

> **Note:** Only LeRobot dataset v3 is supported. v2 datasets need to be converted to v3 before importing.

### Common

APIs for update and delete and count are same over all tasks.
Expand Down
25 changes: 25 additions & 0 deletions examples/import_lerobot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Import a LeRobot dataset into a FastLabel robotics project.

Requires: pip install fastlabel[robotics]

Supports LeRobot v3 dataset format only.
v3: data/chunk-*/file-*.parquet, videos/.../chunk-*/file-*.mp4
"""

from fastlabel import Client

client = Client()

# Import all episodes
results = client.import_lerobot(
project="your-project-slug",
lerobot_data_path="/path/to/lerobot/dataset",
)

# Import specific episodes by index
results = client.import_lerobot(
project="your-project-slug",
lerobot_data_path="/path/to/lerobot/dataset",
episode_indices=[0, 1, 2],
)
63 changes: 61 additions & 2 deletions fastlabel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import os
import re
import shutil
import urllib.parse
from concurrent.futures import ThreadPoolExecutor, wait
from pathlib import Path
Expand All @@ -14,7 +15,7 @@
import xmltodict
from PIL import Image, ImageColor, ImageDraw

from fastlabel import const, converters, utils
from fastlabel import const, converters, lerobot, utils
from fastlabel.const import (
EXPORT_IMAGE_WITH_ANNOTATIONS_SUPPORTED_IMAGE_TYPES,
KEYPOINT_MIN_STROKE_WIDTH,
Expand All @@ -28,7 +29,7 @@
)

from .api import Api
from .exceptions import FastLabelInvalidException
from .exceptions import FastLabelException, FastLabelInvalidException
from .query import DatasetObjectGetQuery

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -2072,6 +2073,64 @@ def create_robotics_task(

return self.api.post_request(endpoint, payload=payload)

def import_lerobot(
self,
project: str,
lerobot_data_path: str,
episode_indices: list = None,
) -> list:
"""
Import a LeRobot dataset into a FastLabel robotics project.

Automatically detects LeRobot dataset version (v3).
For each episode, creates a robotics task and uploads the video files
and frame data (converted from parquet to JSON).

Requires: pip install fastlabel[robotics]

project is slug of your project (Required).
lerobot_data_path is the path to the LeRobot dataset directory (Required).
episode_indices is a list of episode indices to import.
If None, all episodes are imported (Optional).
"""
data_path = Path(lerobot_data_path)
episode_map = lerobot.build_episode_map(data_path)
if episode_indices is None:
episode_indices = sorted(episode_map.keys())

results = []
for episode_index in episode_indices:
episode_name = lerobot.format_episode_name(episode_index)
self.create_robotics_task(project=project, name=episode_name)

zip_path = lerobot.create_episode_zip(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nits]

parquet読み込みが何度も走っているのが若干気にはなりました
create_episode_zipから内部的に呼び出している_build_episode_map
ループ毎に全 parquet をスキャンしてエピソードのインデックス情報を構築してそうで、
結果は呼び出し間で変わらなそうだなと

これがパフォーマンスにどれくらい影響があるのかちょっとわかってないのですが、ループ外で一度だけ構築して渡す設計でもいいかもと思ったりしました

Copy link
Contributor Author

@rikunosuke rikunosuke Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

確かにその通りですね!ループ外のメインのメソッドで作成して、引数として渡す構造に変更しました!

d3303df

lerobot_data_path=data_path,
episode_index=episode_index,
episode_map=episode_map,
)
try:
result = self.import_robotics_contents_file(
project=project, file_path=zip_path
)
results.append(
{"episode": episode_name, "success": True, "result": result}
Copy link

@kamei-takuma kamei-takuma Mar 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[imo]
例外が発生した時のexceptがなさそうなので、例外が発生するとその時点で処理を中断して返却値なしとなるAll-or-nothin的な実装だと読み取ったのですが、
一方、resultsに各エピソードの処理結果をsuccess として格納しており、部分的にも処理した結果を返したいようにも見えてます。

一部でも失敗したら返却がいらないならsuccessってキーとしているっけ?という感じですし、
一部失敗しても全部処理をし切りたいならexceptで受けて {"success": False, ...} 等を
resultsに追加してcontinueで次に進む等の対応が必要な気がしました。

)
except FastLabelException as e:
results.append(
{
"episode": episode_name,
"success": False,
"result": {"error": str(e)},
}
)
finally:
zip_file = Path(zip_path)
tmp_dir = zip_file.parent
if tmp_dir.exists():
shutil.rmtree(tmp_dir)

return results

def import_appendix_file(
self,
project: str,
Expand Down
62 changes: 62 additions & 0 deletions fastlabel/lerobot/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from fastlabel.exceptions import FastLabelInvalidException
from fastlabel.lerobot import v3
from fastlabel.lerobot.common import (
check_dependencies,
detect_version,
format_episode_name,
get_camera_dirs,
)

__all__ = [
"build_episode_map",
"get_episode_indices",
"create_episode_zip",
"format_episode_name",
"get_camera_dirs",
]


def get_episode_indices(lerobot_data_path):
"""Get all episode indices from a LeRobot v3 dataset."""
check_dependencies()
version = detect_version(lerobot_data_path)
if version == "v2":
raise FastLabelInvalidException(
"LeRobot dataset v2 is not supported. Please convert to v3.",
422,
)
Comment on lines +23 to +27
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

まずはメインで使われている v3 をサポートしています。
v2 に関しては、必要今後検討します。

return v3.get_episode_indices(lerobot_data_path)


def build_episode_map(lerobot_data_path):
"""Build episode map from dataset. Returns a dict keyed by episode index."""
check_dependencies()
version = detect_version(lerobot_data_path)
if version == "v2":
raise FastLabelInvalidException(
"LeRobot dataset v2 is not supported. Please convert to v3.",
422,
)
return v3._build_episode_map(lerobot_data_path)


def create_episode_zip(lerobot_data_path, episode_index, episode_map=None):
"""Create a ZIP file for a single episode in the format expected by FastLabel.

Supports LeRobot dataset v3 only.

ZIP structure (files at root, ZIP name = episode name):
{content_name}.mp4 (one per camera)
{episode_name}.json (frame data)

Returns the path to the created ZIP file.
The caller is responsible for cleaning up the returned ZIP file.
"""
check_dependencies()
version = detect_version(lerobot_data_path)
if version == "v2":
raise FastLabelInvalidException(
"LeRobot dataset v2 is not supported. Please convert to v3.",
422,
)
return v3.create_episode_zip(lerobot_data_path, episode_index, episode_map)
73 changes: 73 additions & 0 deletions fastlabel/lerobot/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from pathlib import Path

from fastlabel.exceptions import FastLabelInvalidException


def check_dependencies():
try:
import pandas # noqa: F401
import pyarrow # noqa: F401
except ImportError:
raise FastLabelInvalidException(
"pandas and pyarrow are required for LeRobot support. "
"Install them with: pip install fastlabel[robotics]",
422,
)


def detect_version(lerobot_data_path: Path) -> str:
"""Detect LeRobot dataset version (v2 or v3).

Both versions use data/chunk-XXX/ directories.
v2: data/chunk-XXX/episode_YYYYYY.parquet
v3: data/chunk-XXX/file-YYY.parquet
"""
data_dir = lerobot_data_path / "data"
if not data_dir.exists():
raise FastLabelInvalidException(f"Data directory not found: {data_dir}", 422)

for chunk_dir in data_dir.iterdir():
if not chunk_dir.is_dir() or not chunk_dir.name.startswith("chunk-"):
continue
for f in chunk_dir.iterdir():
if f.suffix != ".parquet":
continue
if f.stem.startswith("episode_"):
return "v2"
if f.stem.startswith("file-"):
return "v3"

raise FastLabelInvalidException(
"Could not detect LeRobot dataset version. "
"Expected data/chunk-XXX/episode_*.parquet (v2) "
"or data/chunk-XXX/file-*.parquet (v3).",
422,
)


def format_episode_name(episode_index: int) -> str:
return f"episode_{episode_index:06d}"


def get_camera_dirs(lerobot_data_path: Path) -> list:
"""Get camera directories and their content names.
Returns [(camera_dir, content_name), ...].
e.g. observation.images.top -> content_name = "images_top"
"""
videos_dir = lerobot_data_path / "videos"
if not videos_dir.exists():
return []

results = []
for obs_dir in sorted(videos_dir.iterdir()):
if not obs_dir.is_dir():
continue
parts = obs_dir.name.split(".")
if parts[0] != "observation":
raise FastLabelInvalidException(
f"Unexpected camera dir name: {obs_dir.name}"
)

content_name = "_".join(parts[1:])
results.append((obs_dir, content_name))
return results
Loading
Loading