From b134432766507f92b7f9765083dea6e72297ee0b Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 11:08:57 +0000 Subject: [PATCH 1/9] fix[fuzz]: correct artifact location Signed-off-by: Joe Isaacs --- .github/workflows/fuzzer-fix-automation.yml | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/workflows/fuzzer-fix-automation.yml b/.github/workflows/fuzzer-fix-automation.yml index e794e94c114..fc94d99881f 100644 --- a/.github/workflows/fuzzer-fix-automation.yml +++ b/.github/workflows/fuzzer-fix-automation.yml @@ -132,19 +132,9 @@ jobs: RUN_ID=$(echo "$ARTIFACT_URL" | grep -oP 'runs/\K[0-9]+') ARTIFACT_ID=$(echo "$ARTIFACT_URL" | grep -oP 'artifacts/\K[0-9]+') - # Map target name to artifact name (hardcoded in fuzz.yml) + # Artifact name matches run-fuzzer.yml upload: ${{ inputs.fuzz_target }}-crash-artifacts TARGET="${{ steps.extract.outputs.target }}" - case "$TARGET" in - file_io) - ARTIFACT_NAME="io-fuzzing-crash-artifacts" - ;; - array_ops) - ARTIFACT_NAME="operations-fuzzing-crash-artifacts" - ;; - *) - ARTIFACT_NAME="${TARGET}-fuzzing-crash-artifacts" - ;; - esac + ARTIFACT_NAME="${TARGET}-crash-artifacts" echo "Downloading artifact $ARTIFACT_NAME (ID: $ARTIFACT_ID) from run $RUN_ID" From b3748db05182d8cd3277768714fb82267955bef6 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 14:12:22 +0000 Subject: [PATCH 2/9] fix[fuzz]: correct stack frame compare Signed-off-by: Joe Isaacs --- .github/scripts/fuzz_report/extract.py | 55 ++++++++- .../scripts/fuzz_report/tests/test_extract.py | 110 ++++++++++++++++++ 2 files changed, 159 insertions(+), 6 deletions(-) diff --git a/.github/scripts/fuzz_report/extract.py b/.github/scripts/fuzz_report/extract.py index e7b569e2bcf..12d4d12b7ac 100644 --- a/.github/scripts/fuzz_report/extract.py +++ b/.github/scripts/fuzz_report/extract.py @@ -72,14 +72,15 @@ def extract_crash_location(log_content: str) -> str: func_name = match.group(1) # Try "N: function_name\n at ./path" format (Rust backtrace) - # Skip generic closures like {closure#0}; find the first real function + # The `at ./` regex excludes /rustc/ stdlib frames; _is_noise_frame() + # further excludes vortex-error boilerplate and closure wrappers. if not func_name: for m in re.finditer(r"\s+\d+:\s+(\S+)\n\s+at\s+\./([^\n]+)", log_content): name = m.group(1) - name_clean = re.sub(r"<.*", "", name) - if name_clean.startswith("{"): + path = m.group(2) + if _is_noise_frame(name, path): continue - func_name = name_clean + func_name = re.sub(r"<.*", "", name) break if func_name: @@ -162,6 +163,39 @@ def extract_error_variant(log_content: str) -> str: return "unknown" +# Paths that are error-handling / panic infrastructure, not real crash sites. +# Frames from /rustc/ stdlib are already excluded by the `at ./` regex (they +# have `at /rustc/...` paths). This list covers project-local paths that still +# match `at ./` but are boilerplate. Add new entries here as needed. +NOISE_FRAME_PATHS = [ + "vortex-error/src/lib.rs", +] + + +def _is_noise_frame(func_name: str, path: str) -> bool: + """Return True if this stack frame is panic/error-handling boilerplate. + + Two layers of noise are filtered: + + 1. Frames from /rustc/ stdlib (rust_begin_unwind, panic_fmt, etc.) are + already excluded by the `at ./` regex — they have `at /rustc/...` paths, + so the regex never matches them. + + 2. Frames whose path starts with an entry in NOISE_FRAME_PATHS. These are + project-local but are still infrastructure (e.g. vortex_expect, + vortex_unwrap in vortex-error/src/lib.rs). + + 3. Closure wrappers like {closure#0} that appear in generic unwrap/expect + call chains. + """ + clean = re.sub(r"<.*", "", func_name) + if clean.startswith("{"): + return True + if any(path.startswith(prefix) for prefix in NOISE_FRAME_PATHS): + return True + return False + + def extract_stack_frames(log_content: str) -> list[str]: """Extract stack trace frames (function names only). @@ -171,9 +205,18 @@ def extract_stack_frames(log_content: str) -> list[str]: frames = [] # Best: "N: function_name\n at ./path" (Rust backtrace, most informative) - # The `at ./` path already confirms it's project code (not /rustc/ stdlib). - for match in re.finditer(r"\s+\d+:\s+(\S+)\n\s+at\s+\./", log_content): + # + # The `at ./` pattern provides the first layer of filtering: it only matches + # project-local paths, so /rustc/ stdlib frames (rust_begin_unwind, panic_fmt, + # unwrap_or_else, etc.) are never captured. + # + # _is_noise_frame() provides the second layer: it filters out project-local + # frames that are still boilerplate (vortex-error/src/lib.rs, closures). + for match in re.finditer(r"\s+\d+:\s+(\S+)\n\s+at\s+\./([^\n]+)", log_content): func = match.group(1) + path = match.group(2) + if _is_noise_frame(func, path): + continue # Strip generic parameters like <...> func = re.sub(r"<.*", "", func) frames.append(func) diff --git a/.github/scripts/fuzz_report/tests/test_extract.py b/.github/scripts/fuzz_report/tests/test_extract.py index b3f023d50ef..27aa9dee1f2 100644 --- a/.github/scripts/fuzz_report/tests/test_extract.py +++ b/.github/scripts/fuzz_report/tests/test_extract.py @@ -6,7 +6,9 @@ import pytest from fuzz_report.extract import ( + NOISE_FRAME_PATHS, CrashInfo, + _is_noise_frame, extract_crash_info, extract_crash_location, extract_debug_output, @@ -78,6 +80,69 @@ ==12345== ERROR: libFuzzer: deadly signal """ +RUST_BACKTRACE_WITH_ERROR_BOILERPLATE = """ +thread 'main' panicked at vortex-scalar/src/constructor.rs:61:10: +called `Result::unwrap()` on an `Err` value: VortexError +stack backtrace: + 0: __rustc::rust_begin_unwind + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/std/src/panicking.rs:689:5 + 1: core::panicking::panic_fmt + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:80:14 + 2: panic_display + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:259:5 + 3: {closure#1} + at ./vortex-error/src/lib.rs:457:9 + 4: unwrap_or_else> + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/result.rs:1622:23 + 5: vortex_expect + at ./vortex-error/src/lib.rs:310:14 + 6: decimal + at ./vortex-scalar/src/constructor.rs:61:10 + 7: sum + at ./vortex-array/src/arrays/decimal/compute/sum.rs:57:32 + 8: invoke + at ./vortex-array/src/vtable/compute.rs:120:9 + +==12345== ERROR: libFuzzer: deadly signal +""" + + +class TestIsNoiseFrame: + """Unit tests for the _is_noise_frame helper. + + Note: /rustc/ stdlib frames (rust_begin_unwind, panic_fmt, unwrap_or_else) + are never passed to _is_noise_frame because the `at ./` regex already + excludes them — they have `at /rustc/...` paths. + + _is_noise_frame handles the second layer: project-local frames that are + still error-handling boilerplate, driven by the NOISE_FRAME_PATHS deny list. + """ + + def test_deny_list_is_not_empty(self): + assert len(NOISE_FRAME_PATHS) > 0 + + @pytest.mark.parametrize("path", NOISE_FRAME_PATHS) + def test_all_deny_list_entries_are_noise(self, path: str): + assert _is_noise_frame("some_func", f"{path}:1:1") + + def test_closure_in_vortex_error_is_noise(self): + assert _is_noise_frame( + "{closure#1}", + "vortex-error/src/lib.rs:457:9", + ) + + def test_bare_closure_is_noise(self): + assert _is_noise_frame("{closure#0}", "some/other/path.rs:1:1") + + def test_real_frame_is_not_noise(self): + assert not _is_noise_frame("decimal", "vortex-scalar/src/constructor.rs:61:10") + + def test_real_frame_with_generics_is_not_noise(self): + assert not _is_noise_frame( + "invoke", + "vortex-array/src/vtable/compute.rs:120:9", + ) + class TestExtractPanicLocation: def test_standard_format(self): @@ -100,6 +165,27 @@ def test_fallback_to_panic_location(self): loc = extract_crash_location(log) assert "slice.rs:142" in loc + def test_skips_vortex_error_boilerplate(self): + """Two layers of noise filtering in the Rust backtrace format: + + Layer 1 (implicit via regex): Frames from /rustc/ stdlib paths like + rust_begin_unwind, panic_fmt, unwrap_or_else are never matched because + the regex requires `at ./` (project-local), not `at /rustc/`. + + Layer 2 (explicit via _is_noise_frame): Frames from ./vortex-error/src/lib.rs + (vortex_expect, closures) ARE project-local but are still error-handling + boilerplate, so they are explicitly skipped. + """ + loc = extract_crash_location(RUST_BACKTRACE_WITH_ERROR_BOILERPLATE) + # Layer 1: /rustc/ stdlib frames never matched + assert "rust_begin_unwind" not in loc + assert "panic_fmt" not in loc + assert "unwrap_or_else" not in loc + # Layer 2: ./vortex-error/src/lib.rs frames explicitly filtered + assert "vortex_expect" not in loc + # Result: the real crash site + assert "decimal" in loc + class TestExtractPanicMessage: def test_index_bounds(self): @@ -145,6 +231,30 @@ def test_no_frames(self): frames = extract_stack_frames("no stack trace here") assert frames == ["unknown"] + def test_skips_vortex_error_boilerplate(self): + """Two layers of noise filtering in the Rust backtrace format: + + Layer 1 (implicit via regex): Frames from /rustc/ stdlib paths like + rust_begin_unwind, panic_fmt, unwrap_or_else are never matched because + the regex requires `at ./` (project-local), not `at /rustc/`. + + Layer 2 (explicit via _is_noise_frame): Frames from ./vortex-error/src/lib.rs + (vortex_expect, closures) ARE project-local but are still error-handling + boilerplate, so they are explicitly skipped. + """ + frames = extract_stack_frames(RUST_BACKTRACE_WITH_ERROR_BOILERPLATE) + # Layer 1: /rustc/ stdlib frames never matched (at /rustc/... not at ./) + assert all("rust_begin_unwind" not in f for f in frames) + assert all("panic_fmt" not in f for f in frames) + assert all("panic_display" not in f for f in frames) + assert all("unwrap_or_else" not in f for f in frames) + # Layer 2: ./vortex-error/src/lib.rs frames explicitly filtered + assert all("vortex_expect" not in f for f in frames) + assert all("{closure" not in f for f in frames) + # Result: only the real crash frames remain + assert "decimal" in frames + assert "sum" in frames + class TestExtractStackTraceRaw: def test_extracts_backtrace(self): From de3bf94a71214320bfbb630cf2c8e9d4171b8044 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 14:32:37 +0000 Subject: [PATCH 3/9] fix[fuzz]: correct stack frame compare Signed-off-by: Joe Isaacs --- .github/scripts/fuzz_report/cli.py | 3 + .github/scripts/fuzz_report/dedup.py | 69 +++++++++++++++++-- .github/scripts/fuzz_report/extract.py | 10 +-- .../scripts/fuzz_report/tests/test_extract.py | 19 ++++- 4 files changed, 92 insertions(+), 9 deletions(-) diff --git a/.github/scripts/fuzz_report/cli.py b/.github/scripts/fuzz_report/cli.py index 9e82b0e2852..586d2df45d6 100644 --- a/.github/scripts/fuzz_report/cli.py +++ b/.github/scripts/fuzz_report/cli.py @@ -245,6 +245,7 @@ def cmd_dry_run(args: argparse.Namespace) -> int: print(f" panic_message: {crash_info.panic_message}", file=sys.stderr) print(f" crash_type: {crash_info.crash_type}", file=sys.stderr) print(f" seed_hash: {crash_info.seed_hash}", file=sys.stderr) + print(f" stack_frames: {crash_info.stack_frames[:5]}", file=sys.stderr) print(file=sys.stderr) # Step 2: Dedup (if issues file provided) @@ -261,6 +262,8 @@ def cmd_dry_run(args: argparse.Namespace) -> int: print(f" confidence: {dedup_result.confidence}", file=sys.stderr) print(f" issue: #{dedup_result.issue_number}", file=sys.stderr) print(f" reason: {dedup_result.reason}", file=sys.stderr) + if dedup_result.debug: + print(f" debug: {json.dumps(dedup_result.debug, indent=4)}", file=sys.stderr) print(file=sys.stderr) # Write dedup to temp file so _determine_action can read it diff --git a/.github/scripts/fuzz_report/dedup.py b/.github/scripts/fuzz_report/dedup.py index 88c12a84a98..e2118d2b80d 100644 --- a/.github/scripts/fuzz_report/dedup.py +++ b/.github/scripts/fuzz_report/dedup.py @@ -21,6 +21,8 @@ class DedupResult: issue_title: str | None = None reason: str = "" check_order: int | None = None + # Debug details: what values were compared to produce this result + debug: dict | None = None def to_dict(self) -> dict: return {k: v for k, v in asdict(self).items() if v is not None} @@ -56,9 +58,15 @@ def check_seed_hash(seed_hash: str, issues: list[dict]) -> DedupResult: issue_url=issue.get("url"), issue_title=issue.get("title"), reason="Exact seed hash match - same crash input", + debug={"seed_hash": seed_hash}, ) - return DedupResult(duplicate=False, check="seed_hash", reason="No matching seed hash found") + return DedupResult( + duplicate=False, + check="seed_hash", + reason="No matching seed hash found", + debug={"seed_hash": seed_hash}, + ) def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult: @@ -68,6 +76,7 @@ def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult duplicate=False, check="panic_location", reason="No panic location provided", + debug={"panic_location": panic_location or ""}, ) # Extract file:line pattern @@ -85,19 +94,33 @@ def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult issue_url=issue.get("url"), issue_title=issue.get("title"), reason=f"Same panic location (file:line): {file_pattern}", + debug={ + "panic_location": panic_location, + "file_pattern": file_pattern, + "matched_issue": issue["number"], + }, ) return DedupResult( duplicate=False, check="panic_location", reason="No matching panic location found", + debug={ + "panic_location": panic_location, + "file_pattern": file_pattern, + }, ) def check_stack_trace(stack_hash: str, issues: list[dict]) -> DedupResult: """Check if stack trace hash exists in any issue body.""" if not stack_hash or stack_hash == "unknown": - return DedupResult(duplicate=False, check="stack_trace", reason="No stack hash provided") + return DedupResult( + duplicate=False, + check="stack_trace", + reason="No stack hash provided", + debug={"stack_hash": stack_hash or ""}, + ) for issue in issues: body = issue.get("body", "") @@ -110,12 +133,17 @@ def check_stack_trace(stack_hash: str, issues: list[dict]) -> DedupResult: issue_url=issue.get("url"), issue_title=issue.get("title"), reason="Same stack trace (top 5 frames match)", + debug={ + "stack_hash": stack_hash, + "matched_issue": issue["number"], + }, ) return DedupResult( duplicate=False, check="stack_trace", reason="No matching stack trace hash found", + debug={"stack_hash": stack_hash}, ) @@ -123,7 +151,10 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict """Check if error pattern exists in any issue body.""" if not message_hash: return DedupResult( - duplicate=False, check="error_pattern", reason="No message hash provided" + duplicate=False, + check="error_pattern", + reason="No message hash provided", + debug={"error_variant": error_variant or ""}, ) # First try: exact message hash match @@ -138,6 +169,11 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict issue_url=issue.get("url"), issue_title=issue.get("title"), reason="Same error pattern (normalized message match)", + debug={ + "message_hash": message_hash, + "error_variant": error_variant, + "matched_issue": issue["number"], + }, ) # Second try: same error variant (lower confidence) @@ -153,10 +189,21 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict issue_url=issue.get("url"), issue_title=issue.get("title"), reason=f"Same error variant type: {error_variant}", + debug={ + "message_hash": message_hash, + "error_variant": error_variant, + "matched_issue": issue["number"], + }, ) return DedupResult( - duplicate=False, check="error_pattern", reason="No matching error pattern found" + duplicate=False, + check="error_pattern", + reason="No matching error pattern found", + debug={ + "message_hash": message_hash, + "error_variant": error_variant, + }, ) @@ -164,32 +211,46 @@ def check_duplicate(crash_info: CrashInfo, issues_path: str | Path) -> DedupResu """Run all deduplication checks in order. First match wins.""" issues = load_issues(issues_path) + # Summary of extracted values for debugging (attached to every result) + extraction_summary = { + "panic_location": crash_info.panic_location, + "crash_location": crash_info.crash_location, + "error_variant": crash_info.error_variant, + "stack_frames_top5": crash_info.stack_frames[:5], + "normalized_message": crash_info.normalized_message, + } + # Check 1: Seed hash (exact duplicate) result = check_seed_hash(crash_info.seed_hash, issues) if result.duplicate: result.check_order = 1 + result.debug = {**(result.debug or {}), "extraction": extraction_summary} return result # Check 2: Panic location (same crash site) result = check_panic_location(crash_info.panic_location, issues) if result.duplicate: result.check_order = 2 + result.debug = {**(result.debug or {}), "extraction": extraction_summary} return result # Check 3: Stack trace hash (same call path) result = check_stack_trace(crash_info.stack_trace_hash, issues) if result.duplicate: result.check_order = 3 + result.debug = {**(result.debug or {}), "extraction": extraction_summary} return result # Check 4: Error pattern (normalized message) result = check_error_pattern(crash_info.message_hash, crash_info.error_variant, issues) if result.duplicate: result.check_order = 4 + result.debug = {**(result.debug or {}), "extraction": extraction_summary} return result # No matches found return DedupResult( duplicate=False, reason="No duplicate detected by any check", + debug={"extraction": extraction_summary}, ) diff --git a/.github/scripts/fuzz_report/extract.py b/.github/scripts/fuzz_report/extract.py index 12d4d12b7ac..a29e0f01271 100644 --- a/.github/scripts/fuzz_report/extract.py +++ b/.github/scripts/fuzz_report/extract.py @@ -43,10 +43,12 @@ def extract_panic_location(log_content: str) -> str: if match: return match.group(1) - # Extract from vortex path in log - match = re.search(r"(vortex[^/]+/src/[^:]+:\d+)", log_content) - if match: - return match.group(1) + # Extract from vortex path in log, skipping noise paths (NOISE_FRAME_PATHS) + for match in re.finditer(r"(vortex[^/]+/src/[^:]+:\d+)", log_content): + loc = match.group(1) + if any(loc.startswith(prefix) or prefix in loc for prefix in NOISE_FRAME_PATHS): + continue + return loc return "unknown" diff --git a/.github/scripts/fuzz_report/tests/test_extract.py b/.github/scripts/fuzz_report/tests/test_extract.py index 27aa9dee1f2..49129e920b0 100644 --- a/.github/scripts/fuzz_report/tests/test_extract.py +++ b/.github/scripts/fuzz_report/tests/test_extract.py @@ -92,7 +92,7 @@ at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:259:5 3: {closure#1} at ./vortex-error/src/lib.rs:457:9 - 4: unwrap_or_else> + 4: unwrap_or_else at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/result.rs:1622:23 5: vortex_expect at ./vortex-error/src/lib.rs:310:14 @@ -151,6 +151,23 @@ def test_standard_format(self): def test_unknown_when_missing(self): assert extract_panic_location("no panic here") == "unknown" + def test_fallback_skips_noise_paths(self): + """When the `panicked at` line is absent, the fallback regex scans for + vortex paths in the log. It must skip NOISE_FRAME_PATHS like + vortex-error/src/lib.rs and return the real crash site instead. + """ + # Log WITHOUT a `panicked at` line — only a stack trace + log = """\ +stack backtrace: + 5: vortex_expect + at ./vortex-error/src/lib.rs:310:14 + 6: decimal + at ./vortex-scalar/src/constructor.rs:61:10 +""" + loc = extract_panic_location(log) + assert "lib.rs" not in loc + assert "constructor.rs:61" in loc + class TestExtractCrashLocation: def test_with_vortex_frame(self): From 1c201557bd65a6fd6b056caf3a2db809da28c616 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 14:43:18 +0000 Subject: [PATCH 4/9] fix[fuzz]: correct stack frame compare Signed-off-by: Joe Isaacs --- .../scripts/fuzz_report/tests/test_dedup.py | 206 +++++++++++++++++- 1 file changed, 205 insertions(+), 1 deletion(-) diff --git a/.github/scripts/fuzz_report/tests/test_dedup.py b/.github/scripts/fuzz_report/tests/test_dedup.py index 59328313dcf..8f5c5718577 100644 --- a/.github/scripts/fuzz_report/tests/test_dedup.py +++ b/.github/scripts/fuzz_report/tests/test_dedup.py @@ -13,7 +13,7 @@ check_seed_hash, check_stack_trace, ) -from fuzz_report.extract import CrashInfo +from fuzz_report.extract import CrashInfo, extract_crash_info EXISTING_ISSUES = [ { @@ -219,3 +219,207 @@ def test_missing_issues_file(self, temp_dir): crash_info = _make_crash_info() result = check_duplicate(crash_info, str(temp_dir / "nonexistent.json")) assert result.duplicate is False + + +# --------------------------------------------------------------------------- +# End-to-end tests using real crash logs from production fuzzer runs. +# +# These logs are taken from https://github.com/vortex-data/vortex/issues/6048 +# where two completely different bugs were incorrectly matched because the +# extractor pointed at vortex-error boilerplate instead of the real crash site. +# --------------------------------------------------------------------------- + +# Crash 1: mask/struct cast error — panics in run_fuzz_action because a +# vortex_expect call in the mask operation fails. +MASK_STRUCT_CAST_LOG = """\ +thread 'main' panicked at fuzz/src/array/mod.rs:645:22: +mask operation should succeed in fuzz test: + Cannot add non-nullable field during struct cast +stack backtrace: + 0: __rustc::rust_begin_unwind + at /rustc/18d13b5332916ffca8eadb9106d54b5b434e9978/library/std/src/panicking.rs:689:5 + 1: core::panicking::panic_fmt + at /rustc/18d13b5332916ffca8eadb9106d54b5b434e9978/library/core/src/panicking.rs:80:14 + 2: panic_display + at /rustc/18d13b5332916ffca8eadb9106d54b5b434e9978/library/core/src/panicking.rs:259:5 + 3: {closure#1}, vortex_error::VortexError> + at ./vortex-error/src/lib.rs:457:9 + 4: unwrap_or_else, vortex_error::VortexError> + at /rustc/18d13b5332916ffca8eadb9106d54b5b434e9978/library/core/src/result.rs:1622:23 + 5: vortex_expect, vortex_error::VortexError> + at ./vortex-error/src/lib.rs:310:14 + 6: run_fuzz_action + at ./fuzz/src/array/mod.rs:645:22 + 7: __libfuzzer_sys_run + at ./fuzz/fuzz_targets/array_ops.rs:14:11 + +==12345== ERROR: libFuzzer: deadly signal +""" + +# Crash 2: decimal sum overflow — panics constructing a decimal Scalar +# because the computed sum doesn't fit the declared precision. +DECIMAL_SUM_LOG = """\ +thread 'main' panicked at vortex-scalar/src/constructor.rs:61:10: +unable to construct a decimal Scalar: + Incompatible dtype decimal(76,75) with value decimal256(51612137) +stack backtrace: + 0: __rustc::rust_begin_unwind + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/std/src/panicking.rs:689:5 + 1: core::panicking::panic_fmt + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:80:14 + 2: panic_display + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:259:5 + 3: {closure#1} + at ./vortex-error/src/lib.rs:457:9 + 4: unwrap_or_else + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/result.rs:1622:23 + 5: vortex_expect + at ./vortex-error/src/lib.rs:310:14 + 6: decimal + at ./vortex-scalar/src/constructor.rs:61:10 + 7: sum + at ./vortex-array/src/arrays/decimal/compute/sum.rs:57:32 + 8: invoke + at ./vortex-array/src/compute/sum.rs:226:17 + +==12345== ERROR: libFuzzer: deadly signal +""" + + +def _build_issue_body(crash_info: CrashInfo) -> str: + """Build a realistic issue body from extracted crash info. + + Mirrors the new_issue.md template: crash location, panic message, + raw stack trace, and the hidden hash comment. + """ + return ( + f"## Fuzzing Crash Report\n\n" + f"**Crash Location**: `{crash_info.crash_location}`\n\n" + f"**Error Message**:\n```\n{crash_info.panic_message}\n```\n\n" + f"**Stack Trace**:\n```\n{crash_info.stack_trace_raw}\n```\n\n" + f"- **Target**: `array_ops`\n" + f"- **Error Variant**: `{crash_info.error_variant}`\n\n" + f"" + ) + + +class TestEndToEndDedup: + """End-to-end tests: extract from real logs, build issue bodies, run dedup. + + Reproduces the false-match scenario from + https://github.com/vortex-data/vortex/issues/6048 where two unrelated + crashes (mask/struct-cast vs decimal/sum) were matched because the + extractor pointed at vortex-error boilerplate. + """ + + @pytest.fixture + def crash1_info(self, temp_dir): + log_path = temp_dir / "mask_crash.log" + log_path.write_text(MASK_STRUCT_CAST_LOG) + return extract_crash_info(str(log_path)) + + @pytest.fixture + def crash2_info(self, temp_dir): + log_path = temp_dir / "decimal_crash.log" + log_path.write_text(DECIMAL_SUM_LOG) + return extract_crash_info(str(log_path)) + + def test_extraction_skips_boilerplate(self, crash1_info, crash2_info): + """Both crashes should extract real locations, not vortex-error.""" + # Crash 1: mask/struct cast + assert "vortex-error" not in crash1_info.panic_location + assert "vortex-error" not in crash1_info.crash_location + assert "mod.rs:645" in crash1_info.panic_location + assert "run_fuzz_action" in crash1_info.crash_location + + # Crash 2: decimal/sum + assert "vortex-error" not in crash2_info.panic_location + assert "vortex-error" not in crash2_info.crash_location + assert "constructor.rs:61" in crash2_info.panic_location + assert "decimal" in crash2_info.crash_location + + def test_stack_frames_are_different(self, crash1_info, crash2_info): + """The two crashes should produce entirely different stack frames.""" + assert crash1_info.stack_frames != crash2_info.stack_frames + assert crash1_info.stack_trace_hash != crash2_info.stack_trace_hash + + def test_panic_locations_are_different(self, crash1_info, crash2_info): + """The two crashes should have different panic locations.""" + assert crash1_info.panic_location != crash2_info.panic_location + + def test_no_high_confidence_match(self, crash1_info, crash2_info, temp_dir): + """Crash 2 must NOT match an issue created from crash 1 at + high or exact confidence. The old bug would match on + 'lib.rs:310' (panic_location check, high confidence). + """ + issue_body = _build_issue_body(crash1_info) + issues_path = temp_dir / "issues.json" + issues_path.write_text( + json.dumps([ + { + "number": 6048, + "title": "Fuzzing Crash: VortexError in array_ops", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6048", + }, + ]) + ) + + result = check_duplicate(crash2_info, str(issues_path)) + + # Must not match on panic_location or stack_trace (the old false match) + if result.duplicate: + assert result.check != "panic_location", ( + f"False panic_location match! debug={result.debug}" + ) + assert result.check != "stack_trace", ( + f"False stack_trace match! debug={result.debug}" + ) + assert result.confidence != "exact", ( + f"False exact match! debug={result.debug}" + ) + + def test_same_crash_does_match(self, crash1_info, temp_dir): + """A second occurrence of the SAME crash should still be detected.""" + issue_body = _build_issue_body(crash1_info) + issues_path = temp_dir / "issues.json" + issues_path.write_text( + json.dumps([ + { + "number": 6048, + "title": "Fuzzing Crash: VortexError in array_ops", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6048", + }, + ]) + ) + + result = check_duplicate(crash1_info, str(issues_path)) + assert result.duplicate is True + # Should match on panic_location or stack_trace (high confidence) + assert result.check in ("panic_location", "stack_trace") + assert result.confidence == "high" + + def test_debug_info_is_present(self, crash1_info, crash2_info, temp_dir): + """Dedup results should include debug details for diagnosis.""" + issue_body = _build_issue_body(crash1_info) + issues_path = temp_dir / "issues.json" + issues_path.write_text( + json.dumps([ + { + "number": 6048, + "title": "Fuzzing Crash: VortexError in array_ops", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6048", + }, + ]) + ) + + result = check_duplicate(crash2_info, str(issues_path)) + assert result.debug is not None + assert "extraction" in result.debug + assert "panic_location" in result.debug["extraction"] + assert "crash_location" in result.debug["extraction"] + assert "stack_frames_top5" in result.debug["extraction"] From 84eccb94bcaa7f7eff776d33e5111f9b880fde91 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 16:01:41 +0000 Subject: [PATCH 5/9] fix[fuzz]: correct stack frame compare Signed-off-by: Joe Isaacs --- .github/scripts/fuzz_report/extract.py | 94 +++++++-- .../scripts/fuzz_report/tests/test_dedup.py | 180 +++++++++++++++++- .../scripts/fuzz_report/tests/test_extract.py | 92 ++++++++- 3 files changed, 342 insertions(+), 24 deletions(-) diff --git a/.github/scripts/fuzz_report/extract.py b/.github/scripts/fuzz_report/extract.py index a29e0f01271..10ae3e5d12d 100644 --- a/.github/scripts/fuzz_report/extract.py +++ b/.github/scripts/fuzz_report/extract.py @@ -31,22 +31,39 @@ def to_json(self) -> str: return json.dumps(self.to_dict(), indent=2) +def _is_noise_path(path: str) -> bool: + """Return True if a file path is error-handling boilerplate. + + The `panicked at` line can point at vortex-error/src/lib.rs when + vortex_expect/vortex_unwrap panics — that's the macro location, not + the real crash site. This helper filters those out everywhere. + """ + return any(prefix in path for prefix in NOISE_FRAME_PATHS) + + def extract_panic_location(log_content: str) -> str: - """Extract panic location (file:line) from log.""" + """Extract panic location (file:line) from log. + + Skips noise paths (NOISE_FRAME_PATHS) even when they appear in the + `panicked at` line itself — e.g. vortex_expect panics report + vortex-error/src/lib.rs as the location, not the actual caller. + """ # Look for "panicked at file:line:" pattern (newer Rust format) match = re.search(r"panicked at ([^:]+\.rs:\d+)", log_content) - if match: + if match and not _is_noise_path(match.group(1)): return match.group(1) # Look for "panicked at 'msg', file:line" pattern (older Rust format) match = re.search(r"panicked at [^,]+, ([^:]+:\d+)", log_content) - if match: + if match and not _is_noise_path(match.group(1)): return match.group(1) - # Extract from vortex path in log, skipping noise paths (NOISE_FRAME_PATHS) - for match in re.finditer(r"(vortex[^/]+/src/[^:]+:\d+)", log_content): + # Fallback: scan "at ./path:line" from stack trace, skipping noise. + # The `at ./` prefix scopes to project-local paths; _is_noise_path() + # further excludes boilerplate like vortex-error/src/lib.rs. + for match in re.finditer(r"at \./([^:\s]+:\d+)", log_content): loc = match.group(1) - if any(loc.startswith(prefix) or prefix in loc for prefix in NOISE_FRAME_PATHS): + if _is_noise_path(loc): continue return loc @@ -62,16 +79,18 @@ def extract_crash_location(log_content: str) -> str: # Format 4: "N: function_name\n at ./path/file.rs:line" func_name = None - # Try "#N 0x... in vortex..." format - match = re.search(r"#\d+\s+0x[a-f0-9]+\s+in\s+(vortex[^\s<(]+)", log_content) - if match: - func_name = match.group(1) + # Try "#N 0x... in func" format (libfuzzer), skip noise prefixes + for m in re.finditer(r"#\d+\s+0x[a-f0-9]+\s+in\s+([^\s<(]+)", log_content): + if not _is_noise_func(m.group(1)): + func_name = m.group(1) + break - # Try "N: 0x... - vortex..." format + # Try "N: 0x... - func" format (dash), skip noise prefixes if not func_name: - match = re.search(r"\d+:\s+0x[a-f0-9]+\s+-\s+(vortex[^\s<(]+)", log_content) - if match: - func_name = match.group(1) + for m in re.finditer(r"\d+:\s+0x[a-f0-9]+\s+-\s+([^\s<(]+)", log_content): + if not _is_noise_func(m.group(1)): + func_name = m.group(1) + break # Try "N: function_name\n at ./path" format (Rust backtrace) # The `at ./` regex excludes /rustc/ stdlib frames; _is_noise_frame() @@ -173,6 +192,25 @@ def extract_error_variant(log_content: str) -> str: "vortex-error/src/lib.rs", ] +# Function-name prefixes that are never the real crash site. +# Used for stack formats that lack file paths (libfuzzer, dash format). +NOISE_FUNC_PREFIXES = ( + "std::", + "core::", + "alloc::", + "__", # sanitizer, fuzzer, and C runtime internals +) + +# Exact function names (after stripping generics) that are error-handling +# boilerplate. These supplement NOISE_FUNC_PREFIXES for cases where the +# function doesn't match a prefix but is still infrastructure. +NOISE_FUNC_NAMES = frozenset({ + "vortex_expect", + "vortex_unwrap", + "panic_display", + "rust_begin_unwind", +}) + def _is_noise_frame(func_name: str, path: str) -> bool: """Return True if this stack frame is panic/error-handling boilerplate. @@ -183,8 +221,8 @@ def _is_noise_frame(func_name: str, path: str) -> bool: already excluded by the `at ./` regex — they have `at /rustc/...` paths, so the regex never matches them. - 2. Frames whose path starts with an entry in NOISE_FRAME_PATHS. These are - project-local but are still infrastructure (e.g. vortex_expect, + 2. Frames whose path matches NOISE_FRAME_PATHS (via _is_noise_path). + These are project-local but are still infrastructure (e.g. vortex_expect, vortex_unwrap in vortex-error/src/lib.rs). 3. Closure wrappers like {closure#0} that appear in generic unwrap/expect @@ -193,11 +231,25 @@ def _is_noise_frame(func_name: str, path: str) -> bool: clean = re.sub(r"<.*", "", func_name) if clean.startswith("{"): return True - if any(path.startswith(prefix) for prefix in NOISE_FRAME_PATHS): + if _is_noise_path(path): return True return False +def _is_noise_func(func_name: str) -> bool: + """Return True if a function name is obviously infrastructure. + + Used for stack trace formats that lack file paths (libfuzzer ``#N 0x… + in func``, dash ``N: 0x… - func``). Checks both prefix-based rules + (NOISE_FUNC_PREFIXES) and exact-name rules (NOISE_FUNC_NAMES). + """ + if func_name.startswith(NOISE_FUNC_PREFIXES): + return True + # Strip generics for exact match (regex already strips them, but be safe) + clean = re.sub(r"<.*", "", func_name) + return clean in NOISE_FUNC_NAMES + + def extract_stack_frames(log_content: str) -> list[str]: """Extract stack trace frames (function names only). @@ -223,18 +275,18 @@ def extract_stack_frames(log_content: str) -> list[str]: func = re.sub(r"<.*", "", func) frames.append(func) - # Fallback: "#N 0x... in function_name" + # Fallback: "#N 0x... in function_name" (libfuzzer format, no paths) if not frames: for match in re.finditer(r"#\d+\s+0x[a-f0-9]+\s+in\s+([^\s<(]+)", log_content): func = match.group(1) - if func.startswith(("vortex", "std", "core", "alloc")): + if not _is_noise_func(func): frames.append(func) - # Fallback: "N: 0x... - function_name" + # Fallback: "N: 0x... - function_name" (dash format, no paths) if not frames: for match in re.finditer(r"\d+:\s+0x[a-f0-9]+\s+-\s+([^\s<(]+)", log_content): func = match.group(1) - if func.startswith(("vortex", "std", "core", "alloc")): + if not _is_noise_func(func): frames.append(func) return frames[:10] if frames else ["unknown"] diff --git a/.github/scripts/fuzz_report/tests/test_dedup.py b/.github/scripts/fuzz_report/tests/test_dedup.py index 8f5c5718577..e82aae29113 100644 --- a/.github/scripts/fuzz_report/tests/test_dedup.py +++ b/.github/scripts/fuzz_report/tests/test_dedup.py @@ -231,8 +231,11 @@ def test_missing_issues_file(self, temp_dir): # Crash 1: mask/struct cast error — panics in run_fuzz_action because a # vortex_expect call in the mask operation fails. +# NOTE: The `panicked at` line points at vortex-error/src/lib.rs (the +# vortex_expect macro site), NOT the real caller. This is the actual +# format from CI logs. MASK_STRUCT_CAST_LOG = """\ -thread 'main' panicked at fuzz/src/array/mod.rs:645:22: +thread '' panicked at vortex-error/src/lib.rs:310:33: mask operation should succeed in fuzz test: Cannot add non-nullable field during struct cast stack backtrace: @@ -258,8 +261,10 @@ def test_missing_issues_file(self, temp_dir): # Crash 2: decimal sum overflow — panics constructing a decimal Scalar # because the computed sum doesn't fit the declared precision. +# NOTE: Same as crash 1, the `panicked at` line points at vortex-error, +# not the real caller. This is the actual format from CI logs. DECIMAL_SUM_LOG = """\ -thread 'main' panicked at vortex-scalar/src/constructor.rs:61:10: +thread '' panicked at vortex-error/src/lib.rs:310:33: unable to construct a decimal Scalar: Incompatible dtype decimal(76,75) with value decimal256(51612137) stack backtrace: @@ -423,3 +428,174 @@ def test_debug_info_is_present(self, crash1_info, crash2_info, temp_dir): assert "panic_location" in result.debug["extraction"] assert "crash_location" in result.debug["extraction"] assert "stack_frames_top5" in result.debug["extraction"] + + +# ── Real-world test from GitHub issue #6429 ────────────────────────────── +# Two different fuzz targets (file_io and array_ops) hit the same +# decimal Scalar construction bug via the same call path. The dedup +# should match them correctly on the real crash site, not on the +# vortex-error/src/lib.rs boilerplate. + +ISSUE_6429_FILE_IO_LOG = """\ +thread '' panicked at vortex-error/src/lib.rs:310:33: +unable to construct a decimal Scalar: + Incompatible dtype decimal(76,-74)? with value decimal256(-1699999) +stack backtrace: + 0: __rustc::rust_begin_unwind + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/std/src/panicking.rs:689:5 + 1: core::panicking::panic_fmt + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:80:14 + 2: panic_display + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:259:5 + 3: {closure#1} + at ./vortex-error/src/lib.rs:457:9 + 4: unwrap_or_else + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/result.rs:1622:23 + 5: vortex_expect + at ./vortex-error/src/lib.rs:310:14 + 6: decimal + at ./vortex-scalar/src/constructor.rs:61:10 + 7: sum + at ./vortex-array/src/arrays/decimal/compute/sum.rs:57:32 + 8: invoke + at ./vortex-array/src/compute/sum.rs:226:17 + 9: sum_impl + at ./vortex-array/src/compute/sum.rs:250:38 + 10: invoke + at ./vortex-array/src/compute/sum.rs:146:26 + 11: invoke + at ./vortex-array/src/compute/mod.rs:144:34 + 12: sum_with_accumulator + at ./vortex-array/src/compute/sum.rs:53:10 + 13: sum + at ./vortex-array/src/compute/sum.rs:70:5 + 14: compute_stat + at ./vortex-array/src/stats/array.rs:157:22 + 15: push_chunk + at ./vortex-layout/src/layouts/zoned/zone_map.rs:211:49 + 16: write + at ./vortex-file/src/writer.rs:385:22 + 17: __libfuzzer_sys_run + at ./fuzz/fuzz_targets/file_io.rs:73:10 + +==12345== ERROR: libFuzzer: deadly signal +""" + +ISSUE_6429_ARRAY_OPS_LOG = """\ +thread '' panicked at vortex-error/src/lib.rs:310:33: +unable to construct a decimal Scalar: + Incompatible dtype decimal(76,75)? with value decimal256(51612137) +stack backtrace: + 0: __rustc::rust_begin_unwind + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/std/src/panicking.rs:689:5 + 1: core::panicking::panic_fmt + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:80:14 + 2: panic_display + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/panicking.rs:259:5 + 3: {closure#1} + at ./vortex-error/src/lib.rs:457:9 + 4: unwrap_or_else + at /rustc/9e79395f92bff6a8f536430e42a4beae69f60ff8/library/core/src/result.rs:1622:23 + 5: vortex_expect + at ./vortex-error/src/lib.rs:310:14 + 6: decimal + at ./vortex-scalar/src/constructor.rs:61:10 + 7: sum + at ./vortex-array/src/arrays/decimal/compute/sum.rs:57:32 + 8: invoke + at ./vortex-array/src/compute/sum.rs:226:17 + 9: sum_impl + at ./vortex-array/src/compute/sum.rs:250:38 + 10: invoke + at ./vortex-array/src/compute/sum.rs:146:26 + 11: invoke + at ./vortex-array/src/compute/mod.rs:144:34 + 12: sum_with_accumulator + at ./vortex-array/src/compute/sum.rs:53:10 + 13: sum + at ./vortex-array/src/compute/sum.rs:70:5 + 14: sum_canonical_array + at ./fuzz/src/array/sum.rs:12:5 + 15: arbitrary + at ./fuzz/src/array/mod.rs:313:38 + +==12345== ERROR: libFuzzer: deadly signal +""" + + +class TestIssue6429: + """End-to-end test for GitHub issue #6429. + + Two targets (file_io, array_ops) crash in the same decimal Scalar + constructor bug. The dedup should: + - Extract the real crash site (constructor.rs:61), not vortex-error + - Match them as duplicates on panic_location or stack_trace + - Report the match reason referencing constructor.rs, not lib.rs + """ + + @pytest.fixture + def file_io_info(self, temp_dir): + p = temp_dir / "file_io.log" + p.write_text(ISSUE_6429_FILE_IO_LOG) + return extract_crash_info(str(p)) + + @pytest.fixture + def array_ops_info(self, temp_dir): + p = temp_dir / "array_ops.log" + p.write_text(ISSUE_6429_ARRAY_OPS_LOG) + return extract_crash_info(str(p)) + + def test_extraction_skips_vortex_error(self, file_io_info, array_ops_info): + """Neither crash should reference vortex-error in extracted fields.""" + for info in (file_io_info, array_ops_info): + assert "vortex-error" not in info.panic_location + assert "vortex-error" not in info.crash_location + assert "vortex_expect" not in info.crash_location + assert "constructor.rs:61" in info.panic_location + assert "decimal" in info.crash_location + + def test_no_noise_in_stack_frames(self, file_io_info, array_ops_info): + """Stack frames should not contain any noise.""" + for info in (file_io_info, array_ops_info): + assert all("vortex_expect" not in f for f in info.stack_frames) + assert all("{closure" not in f for f in info.stack_frames) + assert "decimal" in info.stack_frames + + def test_same_bug_matches_correctly( + self, file_io_info, array_ops_info, temp_dir + ): + """array_ops crash should match the file_io issue — same bug.""" + issue_body = _build_issue_body(file_io_info) + issues_path = temp_dir / "issues.json" + issues_path.write_text( + json.dumps([{ + "number": 6429, + "title": "Fuzzing Crash: VortexError in file_io", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6429", + }]) + ) + + result = check_duplicate(array_ops_info, str(issues_path)) + assert result.duplicate is True + assert result.confidence == "high" + + def test_match_reason_references_real_site( + self, file_io_info, array_ops_info, temp_dir + ): + """The match reason must reference the real crash, not boilerplate.""" + issue_body = _build_issue_body(file_io_info) + issues_path = temp_dir / "issues.json" + issues_path.write_text( + json.dumps([{ + "number": 6429, + "title": "Fuzzing Crash: VortexError in file_io", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6429", + }]) + ) + + result = check_duplicate(array_ops_info, str(issues_path)) + assert "lib.rs:310" not in result.reason + if result.check == "panic_location": + assert "constructor.rs:61" in result.reason diff --git a/.github/scripts/fuzz_report/tests/test_extract.py b/.github/scripts/fuzz_report/tests/test_extract.py index 49129e920b0..c18b9d78b9b 100644 --- a/.github/scripts/fuzz_report/tests/test_extract.py +++ b/.github/scripts/fuzz_report/tests/test_extract.py @@ -7,8 +7,11 @@ from fuzz_report.extract import ( NOISE_FRAME_PATHS, + NOISE_FUNC_NAMES, + NOISE_FUNC_PREFIXES, CrashInfo, _is_noise_frame, + _is_noise_func, extract_crash_info, extract_crash_location, extract_debug_output, @@ -81,7 +84,7 @@ """ RUST_BACKTRACE_WITH_ERROR_BOILERPLATE = """ -thread 'main' panicked at vortex-scalar/src/constructor.rs:61:10: +thread '' panicked at vortex-error/src/lib.rs:310:33: called `Result::unwrap()` on an `Err` value: VortexError stack backtrace: 0: __rustc::rust_begin_unwind @@ -144,6 +147,55 @@ def test_real_frame_with_generics_is_not_noise(self): ) +class TestIsNoiseFunc: + """Unit tests for _is_noise_func — filters function names in stack formats + that lack file paths (libfuzzer ``#N 0x… in func``, dash ``N: 0x… - func``). + """ + + def test_prefix_list_is_not_empty(self): + assert len(NOISE_FUNC_PREFIXES) > 0 + + @pytest.mark.parametrize("prefix", NOISE_FUNC_PREFIXES) + def test_all_prefixes_are_noise(self, prefix: str): + assert _is_noise_func(f"{prefix}some_function") + + def test_std_panicking_is_noise(self): + assert _is_noise_func("std::panicking::begin_panic_handler") + + def test_core_panicking_is_noise(self): + assert _is_noise_func("core::panicking::panic_fmt") + + def test_dunder_sanitizer_is_noise(self): + assert _is_noise_func("__sanitizer_print_stack_trace") + + def test_name_list_is_not_empty(self): + assert len(NOISE_FUNC_NAMES) > 0 + + @pytest.mark.parametrize("name", sorted(NOISE_FUNC_NAMES)) + def test_all_exact_names_are_noise(self, name: str): + assert _is_noise_func(name) + + def test_vortex_expect_is_noise(self): + assert _is_noise_func("vortex_expect") + + def test_vortex_expect_with_generics_is_noise(self): + assert _is_noise_func( + "vortex_expect" + ) + + def test_vortex_unwrap_is_noise(self): + assert _is_noise_func("vortex_unwrap") + + def test_vortex_func_is_not_noise(self): + assert not _is_noise_func("vortex_array::compute::slice::slice_primitive") + + def test_fuzz_func_is_not_noise(self): + assert not _is_noise_func("fuzz::array::run_fuzz_action") + + def test_plain_func_is_not_noise(self): + assert not _is_noise_func("decimal") + + class TestExtractPanicLocation: def test_standard_format(self): assert extract_panic_location(INDEX_BOUNDS_LOG) == "vortex-array/src/compute/slice.rs:142" @@ -151,6 +203,27 @@ def test_standard_format(self): def test_unknown_when_missing(self): assert extract_panic_location("no panic here") == "unknown" + def test_panicked_at_noise_path_is_skipped(self): + """vortex_expect panics report vortex-error/src/lib.rs as the + `panicked at` location. This is the macro site, not the real crash. + The extractor must skip it and find the real location from the + stack trace instead. + """ + # This is the ACTUAL format from CI logs — panicked at points at + # vortex-error/src/lib.rs, not the real caller. + log = """\ +thread '' panicked at vortex-error/src/lib.rs:310:33: +unable to construct a decimal Scalar +stack backtrace: + 5: vortex_expect + at ./vortex-error/src/lib.rs:310:14 + 6: decimal + at ./vortex-scalar/src/constructor.rs:61:10 +""" + loc = extract_panic_location(log) + assert "lib.rs" not in loc + assert "constructor.rs:61" in loc + def test_fallback_skips_noise_paths(self): """When the `panicked at` line is absent, the fallback regex scans for vortex paths in the log. It must skip NOISE_FRAME_PATHS like @@ -243,6 +316,23 @@ def test_in_format(self): frames = extract_stack_frames(LIBFUZZER_FRAME_LOG) assert len(frames) > 0 assert any("vortex" in f for f in frames) + # std:: frames should be filtered out + assert all(not f.startswith("std::") for f in frames) + + def test_in_format_non_vortex_crash(self): + """Crashes in non-vortex code (e.g. fuzz/) should still be captured.""" + log = """\ +stack backtrace: + #0 0x7f1234567890 in std::panicking::begin_panic_handler + #1 0x7f1234567891 in fuzz::array::run_fuzz_action + #2 0x7f1234567892 in __libfuzzer_sys_run + +==12345== ERROR: libFuzzer: deadly signal +""" + frames = extract_stack_frames(log) + assert "fuzz::array::run_fuzz_action" in frames + assert all(not f.startswith("std::") for f in frames) + assert all(not f.startswith("__") for f in frames) def test_no_frames(self): frames = extract_stack_frames("no stack trace here") From 73a17e136af1061b23701a19823909bf7195c669 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 16:17:41 +0000 Subject: [PATCH 6/9] fix[fuzz]: correct stack frame compare Signed-off-by: Joe Isaacs --- .github/scripts/fuzz_report/extract.py | 76 ++++++++++++------- .../scripts/fuzz_report/tests/test_extract.py | 34 +++++++++ 2 files changed, 83 insertions(+), 27 deletions(-) diff --git a/.github/scripts/fuzz_report/extract.py b/.github/scripts/fuzz_report/extract.py index 10ae3e5d12d..e1f748e326a 100644 --- a/.github/scripts/fuzz_report/extract.py +++ b/.github/scripts/fuzz_report/extract.py @@ -71,38 +71,38 @@ def extract_panic_location(log_content: str) -> str: def extract_crash_location(log_content: str) -> str: - """Extract crash location as file:function_name from stack frames.""" - # Look for first vortex frame in various stack trace formats - # Format 1: "#N 0x... in function_name" - # Format 2: "N: 0x... - function_name" - # Format 3: "#N 0x... in function_name /path/file.rs:line" - # Format 4: "N: function_name\n at ./path/file.rs:line" + """Extract crash location as file:function_name from stack frames. + + Prefers the Rust backtrace format (``N: func at ./path``) because it has + file paths that enable reliable noise filtering. Falls back to the + libfuzzer and dash formats which only have function names. + """ func_name = None - # Try "#N 0x... in func" format (libfuzzer), skip noise prefixes - for m in re.finditer(r"#\d+\s+0x[a-f0-9]+\s+in\s+([^\s<(]+)", log_content): - if not _is_noise_func(m.group(1)): - func_name = m.group(1) - break + # Best: "N: function_name\n at ./path" format (Rust backtrace) + # The `at ./` regex excludes /rustc/ stdlib frames; _is_noise_frame() + # further excludes vortex-error boilerplate and closure wrappers. + for m in re.finditer(r"\s+\d+:\s+(\S+)\n\s+at\s+\./([^\n]+)", log_content): + name = m.group(1) + path = m.group(2) + if _is_noise_frame(name, path): + continue + func_name = re.sub(r"<.*", "", name) + break - # Try "N: 0x... - func" format (dash), skip noise prefixes + # Fallback: "#N 0x... in func" format (libfuzzer), skip noise prefixes if not func_name: - for m in re.finditer(r"\d+:\s+0x[a-f0-9]+\s+-\s+([^\s<(]+)", log_content): + for m in re.finditer(r"#\d+\s+0x[a-f0-9]+\s+in\s+([^\s<(]+)", log_content): if not _is_noise_func(m.group(1)): func_name = m.group(1) break - # Try "N: function_name\n at ./path" format (Rust backtrace) - # The `at ./` regex excludes /rustc/ stdlib frames; _is_noise_frame() - # further excludes vortex-error boilerplate and closure wrappers. + # Fallback: "N: 0x... - func" format (dash), skip noise prefixes if not func_name: - for m in re.finditer(r"\s+\d+:\s+(\S+)\n\s+at\s+\./([^\n]+)", log_content): - name = m.group(1) - path = m.group(2) - if _is_noise_frame(name, path): - continue - func_name = re.sub(r"<.*", "", name) - break + for m in re.finditer(r"\d+:\s+0x[a-f0-9]+\s+-\s+([^\s<(]+)", log_content): + if not _is_noise_func(m.group(1)): + func_name = m.group(1) + break if func_name: panic_loc = extract_panic_location(log_content) @@ -198,6 +198,7 @@ def extract_error_variant(log_content: str) -> str: "std::", "core::", "alloc::", + "fuzzer::", # libfuzzer C++ internals (e.g. fuzzer::PrintStackTrace) "__", # sanitizer, fuzzer, and C runtime internals ) @@ -292,15 +293,36 @@ def extract_stack_frames(log_content: str) -> list[str]: return frames[:10] if frames else ["unknown"] +# Maximum number of lines to keep in raw stack traces. Deep async/futures +# call chains produce 100+ frames with huge generic signatures that blow past +# token limits in issue bodies and Claude analysis. The first ~40 lines +# always contain the crash site and immediate callers. +_MAX_RAW_TRACE_LINES = 40 + + +def _truncate_trace(raw: str) -> str: + """Truncate a raw stack trace to _MAX_RAW_TRACE_LINES.""" + lines = raw.splitlines() + if len(lines) <= _MAX_RAW_TRACE_LINES: + return raw + kept = lines[:_MAX_RAW_TRACE_LINES] + kept.append(f" ... ({len(lines) - _MAX_RAW_TRACE_LINES} more frames truncated)") + return "\n".join(kept) + + def extract_stack_trace_raw(log_content: str) -> str: - """Extract the raw stack trace section from the log.""" + """Extract the raw stack trace section from the log. + + Truncated to ~40 lines to avoid enormous issue bodies and token-limit + failures in downstream Claude analysis. + """ # Look for "stack backtrace:" section match = re.search( r"(stack backtrace:\n(?:.*\n)*?)(?:\n\n|==\d+==|note:)", log_content, ) if match: - return match.group(1).strip() + return _truncate_trace(match.group(1).strip()) # Look for "Backtrace:" section (vortex_error format) match = re.search( @@ -308,7 +330,7 @@ def extract_stack_trace_raw(log_content: str) -> str: log_content, ) if match: - return match.group(1).strip() + return _truncate_trace(match.group(1).strip()) # Look for numbered frame lines with addresses lines = [] @@ -316,7 +338,7 @@ def extract_stack_trace_raw(log_content: str) -> str: if re.match(r"\s*#?\d+[:\s]+0x[a-f0-9]+", line): lines.append(line) if lines: - return "\n".join(lines) + return _truncate_trace("\n".join(lines)) return "" diff --git a/.github/scripts/fuzz_report/tests/test_extract.py b/.github/scripts/fuzz_report/tests/test_extract.py index c18b9d78b9b..990fe4861ec 100644 --- a/.github/scripts/fuzz_report/tests/test_extract.py +++ b/.github/scripts/fuzz_report/tests/test_extract.py @@ -186,6 +186,12 @@ def test_vortex_expect_with_generics_is_noise(self): def test_vortex_unwrap_is_noise(self): assert _is_noise_func("vortex_unwrap") + def test_fuzzer_print_stack_trace_is_noise(self): + assert _is_noise_func("fuzzer::PrintStackTrace") + + def test_fuzzer_prefix_is_noise(self): + assert _is_noise_func("fuzzer::Fuzzer::ExecuteCallback") + def test_vortex_func_is_not_noise(self): assert not _is_noise_func("vortex_array::compute::slice::slice_primitive") @@ -276,6 +282,34 @@ def test_skips_vortex_error_boilerplate(self): # Result: the real crash site assert "decimal" in loc + def test_skips_fuzzer_print_stack_trace(self): + """libfuzzer inserts its own C++ frames like fuzzer::PrintStackTrace + early in the crash handler stack. These must be skipped. + """ + log = """\ +thread '' panicked at vortex-error/src/lib.rs:310:33: +unable to construct a decimal Scalar +stack backtrace: + 0: __rustc::rust_begin_unwind + at /rustc/abc123/library/std/src/panicking.rs:689:5 + 1: core::panicking::panic_fmt + at /rustc/abc123/library/core/src/panicking.rs:80:14 + 2: vortex_expect + at ./vortex-error/src/lib.rs:310:14 + 3: decimal + at ./vortex-scalar/src/constructor.rs:61:10 + +==12345== ERROR: libFuzzer: deadly signal + #0 0x55e0a0 in fuzzer::PrintStackTrace() + #1 0x55e0b0 in fuzzer::Fuzzer::CrashCallback() + #2 0x7f0000 in vortex_scalar::scalar::Scalar::from +""" + loc = extract_crash_location(log) + assert "fuzzer::PrintStackTrace" not in loc + assert "fuzzer::Fuzzer" not in loc + assert "decimal" in loc + assert "constructor.rs:61" in loc + class TestExtractPanicMessage: def test_index_bounds(self): From 920fcd89ecf97a981040b3ee90658c4e451b16cc Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 17:08:19 +0000 Subject: [PATCH 7/9] fix[fuzz]: correct stack frame compare Signed-off-by: Joe Isaacs --- .github/scripts/fuzz_report/cli.py | 108 ++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/.github/scripts/fuzz_report/cli.py b/.github/scripts/fuzz_report/cli.py index 586d2df45d6..ac1e2cca734 100644 --- a/.github/scripts/fuzz_report/cli.py +++ b/.github/scripts/fuzz_report/cli.py @@ -4,6 +4,7 @@ import argparse import json import os +import re import subprocess import sys from pathlib import Path @@ -14,6 +15,10 @@ TEMPLATES_DIR = Path(__file__).parent / "templates" +# Marker used to find/update the single recurrence-tracking comment. +_RECURRENCE_MARKER = "" +_RECURRENCE_COUNT_RE = r"" + def parse_var_arg(var_str: str) -> tuple[str, str]: """Parse a -v KEY=VALUE argument into (key, value).""" @@ -80,7 +85,14 @@ def _build_template_variables( def _determine_action( dedup_path: str | Path | None, ) -> tuple[str, dict | None]: - """Determine action from dedup result. Returns (action, dedup_dict).""" + """Determine action from dedup result. Returns (action, dedup_dict). + + Actions: + create – new issue + skip – exact duplicate, do nothing + update_count – high-confidence duplicate, bump recurrence counter + comment – medium-confidence duplicate, post full comment + """ if not dedup_path or not Path(dedup_path).exists(): return "create", None @@ -91,9 +103,85 @@ def _determine_action( if dedup.get("confidence") == "exact": return "skip", dedup + if dedup.get("confidence") == "high": + return "update_count", dedup + return "comment", dedup +def _render_recurrence_body(count: int) -> str: + """Render the minimal recurrence-tracking comment body.""" + return ( + f"Seen **{count}** time{'s' if count != 1 else ''}\n\n" + f"" + ) + + +def _update_recurrence_count(repo: str, issue_number: int | str) -> int: + """Find-or-create the recurrence comment, incrementing its count. + + Uses a compare-and-swap pattern: reads the current count from the + existing comment (if any), increments it, and writes back. + + Returns the new count. + """ + # List all comments on the issue + result = subprocess.run( + [ + "gh", "api", + f"repos/{repo}/issues/{issue_number}/comments", + "--paginate", + "--jq", + f'.[] | select(.body | contains("{_RECURRENCE_MARKER}"))' + f" | {{id: .id, body: .body}}", + ], + capture_output=True, + text=True, + check=True, + ) + + existing_id = None + current_count = 0 + + for line in result.stdout.strip().splitlines(): + if not line: + continue + comment = json.loads(line) + existing_id = comment["id"] + m = re.search(_RECURRENCE_COUNT_RE, comment["body"]) + if m: + current_count = int(m.group(1)) + break + + new_count = current_count + 1 + body = _render_recurrence_body(new_count) + + if existing_id: + # Update existing comment (not atomic — race is acceptable since + # fuzz CI jobs are serialized) + subprocess.run( + [ + "gh", "api", + f"repos/{repo}/issues/comments/{existing_id}", + "-X", "PATCH", + "-f", f"body={body}", + ], + check=True, + ) + else: + # Create new recurrence comment + subprocess.run( + [ + "gh", "api", + f"repos/{repo}/issues/{issue_number}/comments", + "-f", f"body={body}", + ], + check=True, + ) + + return new_count + + def cmd_extract(args: argparse.Namespace) -> int: """Extract crash info from log file.""" if not Path(args.log_file).exists(): @@ -168,6 +256,15 @@ def cmd_report(args: argparse.Namespace) -> int: _write_github_output("issue_number", str(existing_issue)) return 0 + if action == "update_count": + new_count = _update_recurrence_count(args.repo, existing_issue) + print( + f"Updated recurrence count on #{existing_issue} to {new_count}", + file=sys.stderr, + ) + _write_github_output("issue_number", str(existing_issue)) + return 0 + if action == "comment": variables.setdefault("DEDUP_REASON", dedup.get("reason", "")) variables.setdefault("DEDUP_CONFIDENCE", dedup.get("confidence", "")) @@ -291,6 +388,15 @@ def cmd_dry_run(args: argparse.Namespace) -> int: ) return 0 + if action == "update_count": + print( + f"(would update recurrence count on #{existing_issue})", + file=sys.stderr, + ) + print(file=sys.stderr) + print(_render_recurrence_body(1)) + return 0 + if action == "comment": template_path = TEMPLATES_DIR / "related_comment.md" variables.setdefault("DEDUP_REASON", dedup.get("reason", "")) From 4660fffa815dc7b3ff14075312581743ab627bf8 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 17:51:01 +0000 Subject: [PATCH 8/9] fix[fuzz]: correct stack frame compare Signed-off-by: Joe Isaacs --- .github/workflows/report-fuzz-crash.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/report-fuzz-crash.yml b/.github/workflows/report-fuzz-crash.yml index 07ef2406284..1505e6abb8a 100644 --- a/.github/workflows/report-fuzz-crash.yml +++ b/.github/workflows/report-fuzz-crash.yml @@ -99,7 +99,11 @@ jobs: github_token: ${{ secrets.gh_token }} show_full_output: true prompt: | - Read the file crash_info.json and the fuzzer log at logs/fuzz_output.log. + Read crash_info.json — it has all extracted fields (panic_location, crash_location, + panic_message, error_variant, stack_frames, stack_trace_raw, debug_output). + If you need more context, read the first 200 lines of logs/fuzz_output.log + (use offset=0, limit=200 — the file may be very large). + Provide a brief (2-4 sentence) root cause analysis of the crash. Focus on: - What the crash is (the error type and location) - Why it likely happens (the root cause) From 7f3e2e5f00b8f856c29121d622060ad0fdb755cc Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 11 Feb 2026 18:03:28 +0000 Subject: [PATCH 9/9] fix[fuzz]: correct stack frame compare Signed-off-by: Joe Isaacs --- .github/scripts/fuzz_report/cli.py | 21 ++-- .github/scripts/fuzz_report/extract.py | 14 +-- .../scripts/fuzz_report/tests/test_dedup.py | 102 +++++++++--------- 3 files changed, 75 insertions(+), 62 deletions(-) diff --git a/.github/scripts/fuzz_report/cli.py b/.github/scripts/fuzz_report/cli.py index 0bc4192dd0b..979dfb2b9aa 100644 --- a/.github/scripts/fuzz_report/cli.py +++ b/.github/scripts/fuzz_report/cli.py @@ -130,12 +130,12 @@ def _update_recurrence_count(repo: str, issue_number: int | str) -> int: # List all comments on the issue result = subprocess.run( [ - "gh", "api", + "gh", + "api", f"repos/{repo}/issues/{issue_number}/comments", "--paginate", "--jq", - f'.[] | select(.body | contains("{_RECURRENCE_MARKER}"))' - f" | {{id: .id, body: .body}}", + f'.[] | select(.body | contains("{_RECURRENCE_MARKER}")) | {{id: .id, body: .body}}', ], capture_output=True, text=True, @@ -163,10 +163,13 @@ def _update_recurrence_count(repo: str, issue_number: int | str) -> int: # fuzz CI jobs are serialized) subprocess.run( [ - "gh", "api", + "gh", + "api", f"repos/{repo}/issues/comments/{existing_id}", - "-X", "PATCH", - "-f", f"body={body}", + "-X", + "PATCH", + "-f", + f"body={body}", ], check=True, ) @@ -174,9 +177,11 @@ def _update_recurrence_count(repo: str, issue_number: int | str) -> int: # Create new recurrence comment subprocess.run( [ - "gh", "api", + "gh", + "api", f"repos/{repo}/issues/{issue_number}/comments", - "-f", f"body={body}", + "-f", + f"body={body}", ], check=True, ) diff --git a/.github/scripts/fuzz_report/extract.py b/.github/scripts/fuzz_report/extract.py index e1f748e326a..d24a31030c3 100644 --- a/.github/scripts/fuzz_report/extract.py +++ b/.github/scripts/fuzz_report/extract.py @@ -205,12 +205,14 @@ def extract_error_variant(log_content: str) -> str: # Exact function names (after stripping generics) that are error-handling # boilerplate. These supplement NOISE_FUNC_PREFIXES for cases where the # function doesn't match a prefix but is still infrastructure. -NOISE_FUNC_NAMES = frozenset({ - "vortex_expect", - "vortex_unwrap", - "panic_display", - "rust_begin_unwind", -}) +NOISE_FUNC_NAMES = frozenset( + { + "vortex_expect", + "vortex_unwrap", + "panic_display", + "rust_begin_unwind", + } +) def _is_noise_frame(func_name: str, path: str) -> bool: diff --git a/.github/scripts/fuzz_report/tests/test_dedup.py b/.github/scripts/fuzz_report/tests/test_dedup.py index e82aae29113..0efd52d541d 100644 --- a/.github/scripts/fuzz_report/tests/test_dedup.py +++ b/.github/scripts/fuzz_report/tests/test_dedup.py @@ -362,14 +362,16 @@ def test_no_high_confidence_match(self, crash1_info, crash2_info, temp_dir): issue_body = _build_issue_body(crash1_info) issues_path = temp_dir / "issues.json" issues_path.write_text( - json.dumps([ - { - "number": 6048, - "title": "Fuzzing Crash: VortexError in array_ops", - "body": issue_body, - "url": "https://github.com/vortex-data/vortex/issues/6048", - }, - ]) + json.dumps( + [ + { + "number": 6048, + "title": "Fuzzing Crash: VortexError in array_ops", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6048", + }, + ] + ) ) result = check_duplicate(crash2_info, str(issues_path)) @@ -379,26 +381,24 @@ def test_no_high_confidence_match(self, crash1_info, crash2_info, temp_dir): assert result.check != "panic_location", ( f"False panic_location match! debug={result.debug}" ) - assert result.check != "stack_trace", ( - f"False stack_trace match! debug={result.debug}" - ) - assert result.confidence != "exact", ( - f"False exact match! debug={result.debug}" - ) + assert result.check != "stack_trace", f"False stack_trace match! debug={result.debug}" + assert result.confidence != "exact", f"False exact match! debug={result.debug}" def test_same_crash_does_match(self, crash1_info, temp_dir): """A second occurrence of the SAME crash should still be detected.""" issue_body = _build_issue_body(crash1_info) issues_path = temp_dir / "issues.json" issues_path.write_text( - json.dumps([ - { - "number": 6048, - "title": "Fuzzing Crash: VortexError in array_ops", - "body": issue_body, - "url": "https://github.com/vortex-data/vortex/issues/6048", - }, - ]) + json.dumps( + [ + { + "number": 6048, + "title": "Fuzzing Crash: VortexError in array_ops", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6048", + }, + ] + ) ) result = check_duplicate(crash1_info, str(issues_path)) @@ -412,14 +412,16 @@ def test_debug_info_is_present(self, crash1_info, crash2_info, temp_dir): issue_body = _build_issue_body(crash1_info) issues_path = temp_dir / "issues.json" issues_path.write_text( - json.dumps([ - { - "number": 6048, - "title": "Fuzzing Crash: VortexError in array_ops", - "body": issue_body, - "url": "https://github.com/vortex-data/vortex/issues/6048", - }, - ]) + json.dumps( + [ + { + "number": 6048, + "title": "Fuzzing Crash: VortexError in array_ops", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6048", + }, + ] + ) ) result = check_duplicate(crash2_info, str(issues_path)) @@ -561,38 +563,42 @@ def test_no_noise_in_stack_frames(self, file_io_info, array_ops_info): assert all("{closure" not in f for f in info.stack_frames) assert "decimal" in info.stack_frames - def test_same_bug_matches_correctly( - self, file_io_info, array_ops_info, temp_dir - ): + def test_same_bug_matches_correctly(self, file_io_info, array_ops_info, temp_dir): """array_ops crash should match the file_io issue — same bug.""" issue_body = _build_issue_body(file_io_info) issues_path = temp_dir / "issues.json" issues_path.write_text( - json.dumps([{ - "number": 6429, - "title": "Fuzzing Crash: VortexError in file_io", - "body": issue_body, - "url": "https://github.com/vortex-data/vortex/issues/6429", - }]) + json.dumps( + [ + { + "number": 6429, + "title": "Fuzzing Crash: VortexError in file_io", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6429", + } + ] + ) ) result = check_duplicate(array_ops_info, str(issues_path)) assert result.duplicate is True assert result.confidence == "high" - def test_match_reason_references_real_site( - self, file_io_info, array_ops_info, temp_dir - ): + def test_match_reason_references_real_site(self, file_io_info, array_ops_info, temp_dir): """The match reason must reference the real crash, not boilerplate.""" issue_body = _build_issue_body(file_io_info) issues_path = temp_dir / "issues.json" issues_path.write_text( - json.dumps([{ - "number": 6429, - "title": "Fuzzing Crash: VortexError in file_io", - "body": issue_body, - "url": "https://github.com/vortex-data/vortex/issues/6429", - }]) + json.dumps( + [ + { + "number": 6429, + "title": "Fuzzing Crash: VortexError in file_io", + "body": issue_body, + "url": "https://github.com/vortex-data/vortex/issues/6429", + } + ] + ) ) result = check_duplicate(array_ops_info, str(issues_path))