diff --git a/client/dive-common/apispec.ts b/client/dive-common/apispec.ts index 93c2f7e89..dc8649663 100644 --- a/client/dive-common/apispec.ts +++ b/client/dive-common/apispec.ts @@ -355,6 +355,78 @@ export interface SegmentationStatusResponse { ready?: boolean; } +/** + * Text Query Types for open-vocabulary detection/segmentation + */ + +/** A single detection returned from a text query */ +export interface TextQueryDetection { + /** Bounding box [x1, y1, x2, y2] */ + box: [number, number, number, number]; + /** Polygon coordinates as [x, y] pairs */ + polygon?: [number, number][]; + /** Confidence score */ + score: number; + /** Label/class name (often the query text) */ + label: string; + /** Low-res mask for refinement (optional) */ + lowResMask?: number[][]; +} + +export interface TextQueryRequest { + /** Path to the image file */ + imagePath: string; + /** Text query describing what to find (e.g., "fish", "person swimming") */ + text: string; + /** Confidence threshold for detections (default: 0.3) */ + boxThreshold?: number; + /** Maximum number of detections to return (default: 10) */ + maxDetections?: number; + /** Optional boxes to refine [x1, y1, x2, y2][] */ + boxes?: [number, number, number, number][]; + /** Optional keypoints for refinement [x, y][] */ + points?: [number, number][]; + /** Labels for points: 1 for foreground, 0 for background */ + pointLabels?: number[]; + /** Optional masks to refine */ + masks?: number[][][]; +} + +export interface TextQueryResponse { + /** Whether the query succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** List of detections found */ + detections?: TextQueryDetection[]; + /** The original query text */ + query?: string; + /** Whether fallback method was used (no native text support) */ + fallback?: boolean; +} + +export interface RefineDetectionsRequest { + /** Path to the image file */ + imagePath: string; + /** Detections to refine */ + detections: TextQueryDetection[]; + /** Optional additional keypoints for refinement [x, y][] */ + points?: [number, number][]; + /** Labels for additional points: 1 for foreground, 0 for background */ + pointLabels?: number[]; + /** Whether to include refined masks in response */ + refineMasks?: boolean; +} + +export interface RefineDetectionsResponse { + /** Whether the refinement succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Refined detections */ + detections?: TextQueryDetection[]; +} + export { provideApi, useApi, diff --git a/client/dive-common/components/EditorMenu.vue b/client/dive-common/components/EditorMenu.vue index 551b3fbf6..f29e6a6f7 100644 --- a/client/dive-common/components/EditorMenu.vue +++ b/client/dive-common/components/EditorMenu.vue @@ -81,6 +81,9 @@ export default defineComponent({ 'set-annotation-state', 'update:tail-settings', 'update:show-user-created-icon', + 'text-query-init', + 'text-query', + 'text-query-all-frames', ], setup(props, { emit }) { const toolTimeTimeout = ref(null); @@ -99,6 +102,59 @@ export default defineComponent({ localStorage.setItem(STORAGE_KEY, String(value)); }); + // Text query state + const textQueryDialogOpen = ref(false); + const textQueryInput = ref(''); + const textQueryLoading = ref(false); + const textQueryThreshold = ref(0.3); + const textQueryInitializing = ref(false); + const textQueryServiceError = ref(''); + const textQueryAllFrames = ref(false); + + const openTextQueryDialog = () => { + textQueryDialogOpen.value = true; + textQueryInput.value = ''; + textQueryServiceError.value = ''; + textQueryAllFrames.value = false; + textQueryInitializing.value = true; + emit('text-query-init'); + }; + + const closeTextQueryDialog = () => { + textQueryDialogOpen.value = false; + textQueryInput.value = ''; + textQueryServiceError.value = ''; + textQueryInitializing.value = false; + textQueryAllFrames.value = false; + }; + + const onTextQueryServiceReady = (success: boolean, error?: string) => { + textQueryInitializing.value = false; + if (!success) { + textQueryServiceError.value = error || 'Text query service is not available'; + } + }; + + const submitTextQuery = () => { + if (!textQueryInput.value.trim()) { + return; + } + textQueryLoading.value = true; + if (textQueryAllFrames.value) { + emit('text-query-all-frames', { + text: textQueryInput.value.trim(), + boxThreshold: textQueryThreshold.value, + }); + } else { + emit('text-query', { + text: textQueryInput.value.trim(), + boxThreshold: textQueryThreshold.value, + }); + } + closeTextQueryDialog(); + textQueryLoading.value = false; + }; + const modeToolTips = { Creating: { rectangle: 'Drag to draw rectangle. Press ESC to exit.', @@ -151,6 +207,10 @@ export default defineComponent({ const mousetrap = computed((): Mousetrap[] => [ ...flatten(editButtons.value.map((b) => b.mousetrap || [])), + { + bind: 't', + handler: () => openTextQueryDialog(), + }, ]); const activeEditButton = computed(() => editButtons.value.find((b) => b.active) || editButtons.value[0]); @@ -225,6 +285,18 @@ export default defineComponent({ activeEditButton, editButtonsMenuKey, activeSegmentationRecipe, + // Text query + textQueryDialogOpen, + textQueryInput, + textQueryLoading, + textQueryThreshold, + textQueryInitializing, + textQueryServiceError, + textQueryAllFrames, + openTextQueryDialog, + closeTextQueryDialog, + onTextQueryServiceReady, + submitTextQuery, }; }, }); @@ -366,6 +438,16 @@ export default defineComponent({ + + +
T:
+ mdi-text-search +
diff --git a/client/dive-common/components/Viewer.vue b/client/dive-common/components/Viewer.vue index d853601b1..8f6bacaf0 100644 --- a/client/dive-common/components/Viewer.vue +++ b/client/dive-common/components/Viewer.vue @@ -166,6 +166,17 @@ export default defineComponent({ const controlsRef = ref(); const controlsHeight = ref(0); const controlsCollapsed = ref(false); + const editorMenuRef = ref(); + + /** + * Forward text query service ready status to EditorMenu + * Called by ViewerLoader when text query service initialization completes + */ + function onTextQueryServiceReady(success: boolean, error?: string) { + if (editorMenuRef.value?.onTextQueryServiceReady) { + editorMenuRef.value.onTextQueryServiceReady(success, error); + } + } const sideBarCollapsed = ref(false); // Sidebar mode: 'left', 'bottom', or 'collapsed' @@ -1190,6 +1201,8 @@ export default defineComponent({ controlsHeight, controlsCollapsed, sideBarCollapsed, + editorMenuRef, + onTextQueryServiceReady, sidebarMode, cycleSidebarMode, sidebarModeIcon, @@ -1372,6 +1385,7 @@ export default defineComponent({