Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 114 additions & 1 deletion .github/scripts/fuzz_report/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import argparse
import json
import os
import re
import subprocess
import sys
from pathlib import Path
Expand All @@ -14,6 +15,9 @@

TEMPLATES_DIR = Path(__file__).parent / "templates"

# Marker used to find/update the single recurrence-tracking comment.
_RECURRENCE_MARKER = "<!-- fuzzer-recurrence-tracker -->"
_RECURRENCE_COUNT_RE = r"<!-- fuzzer-recurrence-tracker count:(\d+) -->"
# Variables that must be set (non-empty) before creating or commenting on an issue.
REQUIRED_REPORT_VARIABLES = ["FUZZ_TARGET", "CRASH_FILE", "ARTIFACT_URL"]

Expand Down Expand Up @@ -83,7 +87,14 @@ def _build_template_variables(
def _determine_action(
dedup_path: str | Path | None,
) -> tuple[str, dict | None]:
"""Determine action from dedup result. Returns (action, dedup_dict)."""
"""Determine action from dedup result. Returns (action, dedup_dict).

Actions:
create – new issue
skip – exact duplicate, do nothing
update_count – high-confidence duplicate, bump recurrence counter
comment – medium-confidence duplicate, post full comment
"""
if not dedup_path or not Path(dedup_path).exists():
return "create", None

Expand All @@ -94,9 +105,90 @@ def _determine_action(
if dedup.get("confidence") == "exact":
return "skip", dedup

if dedup.get("confidence") == "high":
return "update_count", dedup

return "comment", dedup


def _render_recurrence_body(count: int) -> str:
"""Render the minimal recurrence-tracking comment body."""
return (
f"Seen **{count}** time{'s' if count != 1 else ''}\n\n"
f"<!-- fuzzer-recurrence-tracker count:{count} -->"
)


def _update_recurrence_count(repo: str, issue_number: int | str) -> int:
"""Find-or-create the recurrence comment, incrementing its count.

Uses a compare-and-swap pattern: reads the current count from the
existing comment (if any), increments it, and writes back.

Returns the new count.
"""
# List all comments on the issue
result = subprocess.run(
[
"gh",
"api",
f"repos/{repo}/issues/{issue_number}/comments",
"--paginate",
"--jq",
f'.[] | select(.body | contains("{_RECURRENCE_MARKER}")) | {{id: .id, body: .body}}',
],
capture_output=True,
text=True,
check=True,
)

existing_id = None
current_count = 0

for line in result.stdout.strip().splitlines():
if not line:
continue
comment = json.loads(line)
existing_id = comment["id"]
m = re.search(_RECURRENCE_COUNT_RE, comment["body"])
if m:
current_count = int(m.group(1))
break

new_count = current_count + 1
body = _render_recurrence_body(new_count)

if existing_id:
# Update existing comment (not atomic — race is acceptable since
# fuzz CI jobs are serialized)
subprocess.run(
[
"gh",
"api",
f"repos/{repo}/issues/comments/{existing_id}",
"-X",
"PATCH",
"-f",
f"body={body}",
],
check=True,
)
else:
# Create new recurrence comment
subprocess.run(
[
"gh",
"api",
f"repos/{repo}/issues/{issue_number}/comments",
"-f",
f"body={body}",
],
check=True,
)

return new_count


def cmd_extract(args: argparse.Namespace) -> int:
"""Extract crash info from log file."""
if not Path(args.log_file).exists():
Expand Down Expand Up @@ -193,6 +285,15 @@ def cmd_report(args: argparse.Namespace) -> int:
_write_github_output("issue_number", str(existing_issue))
return 0

if action == "update_count":
new_count = _update_recurrence_count(args.repo, existing_issue)
print(
f"Updated recurrence count on #{existing_issue} to {new_count}",
file=sys.stderr,
)
_write_github_output("issue_number", str(existing_issue))
return 0

if action == "comment":
variables.setdefault("DEDUP_REASON", dedup.get("reason", ""))
variables.setdefault("DEDUP_CONFIDENCE", dedup.get("confidence", ""))
Expand Down Expand Up @@ -270,6 +371,7 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
print(f" panic_message: {crash_info.panic_message}", file=sys.stderr)
print(f" crash_type: {crash_info.crash_type}", file=sys.stderr)
print(f" seed_hash: {crash_info.seed_hash}", file=sys.stderr)
print(f" stack_frames: {crash_info.stack_frames[:5]}", file=sys.stderr)
print(file=sys.stderr)

# Step 2: Dedup (if issues file provided)
Expand All @@ -286,6 +388,8 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
print(f" confidence: {dedup_result.confidence}", file=sys.stderr)
print(f" issue: #{dedup_result.issue_number}", file=sys.stderr)
print(f" reason: {dedup_result.reason}", file=sys.stderr)
if dedup_result.debug:
print(f" debug: {json.dumps(dedup_result.debug, indent=4)}", file=sys.stderr)
print(file=sys.stderr)

# Write dedup to temp file so _determine_action can read it
Expand Down Expand Up @@ -322,6 +426,15 @@ def cmd_dry_run(args: argparse.Namespace) -> int:
)
return 0

if action == "update_count":
print(
f"(would update recurrence count on #{existing_issue})",
file=sys.stderr,
)
print(file=sys.stderr)
print(_render_recurrence_body(1))
return 0

if action == "comment":
template_path = TEMPLATES_DIR / "related_comment.md"
variables.setdefault("DEDUP_REASON", dedup.get("reason", ""))
Expand Down
69 changes: 65 additions & 4 deletions .github/scripts/fuzz_report/dedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class DedupResult:
issue_title: str | None = None
reason: str = ""
check_order: int | None = None
# Debug details: what values were compared to produce this result
debug: dict | None = None

def to_dict(self) -> dict:
return {k: v for k, v in asdict(self).items() if v is not None}
Expand Down Expand Up @@ -56,9 +58,15 @@ def check_seed_hash(seed_hash: str, issues: list[dict]) -> DedupResult:
issue_url=issue.get("url"),
issue_title=issue.get("title"),
reason="Exact seed hash match - same crash input",
debug={"seed_hash": seed_hash},
)

return DedupResult(duplicate=False, check="seed_hash", reason="No matching seed hash found")
return DedupResult(
duplicate=False,
check="seed_hash",
reason="No matching seed hash found",
debug={"seed_hash": seed_hash},
)


def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult:
Expand All @@ -68,6 +76,7 @@ def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult
duplicate=False,
check="panic_location",
reason="No panic location provided",
debug={"panic_location": panic_location or ""},
)

# Extract file:line pattern
Expand All @@ -85,19 +94,33 @@ def check_panic_location(panic_location: str, issues: list[dict]) -> DedupResult
issue_url=issue.get("url"),
issue_title=issue.get("title"),
reason=f"Same panic location (file:line): {file_pattern}",
debug={
"panic_location": panic_location,
"file_pattern": file_pattern,
"matched_issue": issue["number"],
},
)

return DedupResult(
duplicate=False,
check="panic_location",
reason="No matching panic location found",
debug={
"panic_location": panic_location,
"file_pattern": file_pattern,
},
)


def check_stack_trace(stack_hash: str, issues: list[dict]) -> DedupResult:
"""Check if stack trace hash exists in any issue body."""
if not stack_hash or stack_hash == "unknown":
return DedupResult(duplicate=False, check="stack_trace", reason="No stack hash provided")
return DedupResult(
duplicate=False,
check="stack_trace",
reason="No stack hash provided",
debug={"stack_hash": stack_hash or ""},
)

for issue in issues:
body = issue.get("body", "")
Expand All @@ -110,20 +133,28 @@ def check_stack_trace(stack_hash: str, issues: list[dict]) -> DedupResult:
issue_url=issue.get("url"),
issue_title=issue.get("title"),
reason="Same stack trace (top 5 frames match)",
debug={
"stack_hash": stack_hash,
"matched_issue": issue["number"],
},
)

return DedupResult(
duplicate=False,
check="stack_trace",
reason="No matching stack trace hash found",
debug={"stack_hash": stack_hash},
)


def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict]) -> DedupResult:
"""Check if error pattern exists in any issue body."""
if not message_hash:
return DedupResult(
duplicate=False, check="error_pattern", reason="No message hash provided"
duplicate=False,
check="error_pattern",
reason="No message hash provided",
debug={"error_variant": error_variant or ""},
)

# First try: exact message hash match
Expand All @@ -138,6 +169,11 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict
issue_url=issue.get("url"),
issue_title=issue.get("title"),
reason="Same error pattern (normalized message match)",
debug={
"message_hash": message_hash,
"error_variant": error_variant,
"matched_issue": issue["number"],
},
)

# Second try: same error variant (lower confidence)
Expand All @@ -153,43 +189,68 @@ def check_error_pattern(message_hash: str, error_variant: str, issues: list[dict
issue_url=issue.get("url"),
issue_title=issue.get("title"),
reason=f"Same error variant type: {error_variant}",
debug={
"message_hash": message_hash,
"error_variant": error_variant,
"matched_issue": issue["number"],
},
)

return DedupResult(
duplicate=False, check="error_pattern", reason="No matching error pattern found"
duplicate=False,
check="error_pattern",
reason="No matching error pattern found",
debug={
"message_hash": message_hash,
"error_variant": error_variant,
},
)


def check_duplicate(crash_info: CrashInfo, issues_path: str | Path) -> DedupResult:
"""Run all deduplication checks in order. First match wins."""
issues = load_issues(issues_path)

# Summary of extracted values for debugging (attached to every result)
extraction_summary = {
"panic_location": crash_info.panic_location,
"crash_location": crash_info.crash_location,
"error_variant": crash_info.error_variant,
"stack_frames_top5": crash_info.stack_frames[:5],
"normalized_message": crash_info.normalized_message,
}

# Check 1: Seed hash (exact duplicate)
result = check_seed_hash(crash_info.seed_hash, issues)
if result.duplicate:
result.check_order = 1
result.debug = {**(result.debug or {}), "extraction": extraction_summary}
return result

# Check 2: Panic location (same crash site)
result = check_panic_location(crash_info.panic_location, issues)
if result.duplicate:
result.check_order = 2
result.debug = {**(result.debug or {}), "extraction": extraction_summary}
return result

# Check 3: Stack trace hash (same call path)
result = check_stack_trace(crash_info.stack_trace_hash, issues)
if result.duplicate:
result.check_order = 3
result.debug = {**(result.debug or {}), "extraction": extraction_summary}
return result

# Check 4: Error pattern (normalized message)
result = check_error_pattern(crash_info.message_hash, crash_info.error_variant, issues)
if result.duplicate:
result.check_order = 4
result.debug = {**(result.debug or {}), "extraction": extraction_summary}
return result

# No matches found
return DedupResult(
duplicate=False,
reason="No duplicate detected by any check",
debug={"extraction": extraction_summary},
)
Loading
Loading