github workflows

2026-07-24 23:50:22 +00:00 · 2026-05-13 18:21:39 +02:00
parent afbccf971a
commit 611cb1079a
9 changed files with 1358 additions and 2 deletions
--- a/.github/scripts/doc-coverage-check.py
+++ b/.github/scripts/doc-coverage-check.py
@@ -0,0 +1,277 @@
+#!/usr/bin/env python3
+"""
+Documentation coverage checker for xrpld.
+
+Parses coverxygen LCOV output, compares against per-module thresholds
+defined in .github/doc-coverage-thresholds.json, and generates a
+markdown report suitable for posting as a PR comment.
+
+Usage:
+    python3 doc-coverage-check.py \
+        --lcov-file doc-coverage.info \
+        --threshold-file .github/doc-coverage-thresholds.json \
+        --output doc-coverage-report.md \
+        [--base-lcov-file base-doc-coverage.info]
+"""
+
+import argparse
+import json
+import re
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+
+def parse_lcov(lcov_path: str) -> dict[str, dict[str, int]]:
+    """Parse LCOV-format file into per-file coverage data.
+
+    Returns a dict mapping file paths to {"documented": N, "total": N}.
+    """
+    coverage = {}
+    current_file = None
+    documented = 0
+    total = 0
+
+    with open(lcov_path) as f:
+        for line in f:
+            line = line.strip()
+            if line.startswith("SF:"):
+                current_file = line[3:]
+                documented = 0
+                total = 0
+            elif line.startswith("DA:"):
+                parts = line[3:].split(",")
+                if len(parts) >= 2:
+                    total += 1
+                    if int(parts[1]) > 0:
+                        documented += 1
+            elif line == "end_of_record":
+                if current_file:
+                    coverage[current_file] = {
+                        "documented": documented,
+                        "total": total,
+                    }
+                current_file = None
+
+    return coverage
+
+
+def compute_module_coverage(
+    coverage: dict[str, dict[str, int]],
+    module_prefixes: list[str],
+) -> dict[str, dict[str, int | float]]:
+    """Aggregate file-level coverage into module-level stats."""
+    modules = {}
+    for prefix in module_prefixes:
+        doc = 0
+        tot = 0
+        for filepath, stats in coverage.items():
+            if filepath.startswith(prefix) or f"/{prefix}" in filepath:
+                doc += stats["documented"]
+                tot += stats["total"]
+        pct = (doc / tot * 100) if tot > 0 else 0.0
+        modules[prefix] = {"documented": doc, "total": tot, "percent": round(pct, 1)}
+    return modules
+
+
+def compute_global_coverage(
+    coverage: dict[str, dict[str, int]],
+) -> dict[str, int | float]:
+    """Compute overall coverage across all files."""
+    doc = sum(s["documented"] for s in coverage.values())
+    tot = sum(s["total"] for s in coverage.values())
+    pct = (doc / tot * 100) if tot > 0 else 0.0
+    return {"documented": doc, "total": tot, "percent": round(pct, 1)}
+
+
+def check_ratchet(
+    current: dict[str, dict[str, int | float]],
+    base: dict[str, dict[str, int | float]] | None,
+    current_global: dict[str, int | float],
+    base_global: dict[str, int | float] | None,
+) -> list[str]:
+    """Check that no module or global coverage decreased vs base branch."""
+    violations = []
+
+    if base_global and current_global["percent"] < base_global["percent"]:
+        violations.append(
+            f"Global coverage decreased: {base_global['percent']}% -> "
+            f"{current_global['percent']}%"
+        )
+
+    if base:
+        for module, stats in current.items():
+            if module in base and stats["percent"] < base[module]["percent"]:
+                violations.append(
+                    f"`{module}` coverage decreased: "
+                    f"{base[module]['percent']}% -> {stats['percent']}%"
+                )
+
+    return violations
+
+
+def check_new_files(
+    coverage: dict[str, dict[str, int]],
+    new_files: list[str],
+    min_coverage: int,
+) -> list[str]:
+    """Check that new files meet minimum documentation coverage."""
+    violations = []
+    for filepath in new_files:
+        for covered_path, stats in coverage.items():
+            if filepath in covered_path or covered_path.endswith(filepath):
+                if stats["total"] > 0:
+                    pct = stats["documented"] / stats["total"] * 100
+                    if pct < min_coverage:
+                        violations.append(
+                            f"`{filepath}` has {pct:.0f}% doc coverage "
+                            f"(minimum {min_coverage}%)"
+                        )
+                break
+    return violations
+
+
+def coverage_emoji(pct: float) -> str:
+    if pct >= 80:
+        return "+"
+    if pct >= 50:
+        return "~"
+    return "-"
+
+
+def generate_report(
+    global_stats: dict[str, int | float],
+    module_stats: dict[str, dict[str, int | float]],
+    thresholds: dict,
+    violations: list[str],
+    new_file_violations: list[str],
+) -> str:
+    """Generate a markdown report for the PR comment."""
+    lines = []
+    lines.append("## Documentation Coverage Report")
+    lines.append("")
+
+    passed = not violations and not new_file_violations
+    status = "PASSED" if passed else "FAILED"
+    lines.append(f"**Status:** {status}")
+    lines.append(
+        f"**Global Coverage:** {global_stats['percent']}% "
+        f"({global_stats['documented']}/{global_stats['total']} entities documented)"
+    )
+    lines.append(
+        f"**Minimum Threshold:** {thresholds.get('global_minimum', 0)}%"
+    )
+    lines.append("")
+
+    if violations or new_file_violations:
+        lines.append("### Violations")
+        lines.append("")
+        for v in violations + new_file_violations:
+            lines.append(f"- {v}")
+        lines.append("")
+
+    lines.append("### Module Coverage")
+    lines.append("")
+    lines.append("| Module | Coverage | Documented | Total | Threshold |")
+    lines.append("|--------|----------|------------|-------|-----------|")
+
+    module_thresholds = thresholds.get("module_thresholds", {})
+    for module in sorted(module_stats.keys()):
+        stats = module_stats[module]
+        threshold = module_thresholds.get(module, 0)
+        emoji = coverage_emoji(stats["percent"])
+        lines.append(
+            f"| `{module}` | {stats['percent']}% | "
+            f"{stats['documented']} | {stats['total']} | {threshold}% |"
+        )
+
+    lines.append("")
+    lines.append(
+        "*Coverage measured by [coverxygen](https://github.com/psycofdj/coverxygen). "
+        "See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) "
+        "for documentation guidelines.*"
+    )
+
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Check documentation coverage")
+    parser.add_argument("--lcov-file", required=True, help="Path to LCOV coverage file")
+    parser.add_argument(
+        "--threshold-file", required=True, help="Path to thresholds JSON"
+    )
+    parser.add_argument("--output", required=True, help="Path to write markdown report")
+    parser.add_argument(
+        "--base-lcov-file", default=None, help="Path to base branch LCOV file"
+    )
+    parser.add_argument(
+        "--new-files",
+        default="",
+        help="Comma-separated list of new C++ files in this PR",
+    )
+    args = parser.parse_args()
+
+    with open(args.threshold_file) as f:
+        thresholds = json.load(f)
+
+    coverage = parse_lcov(args.lcov_file)
+    module_prefixes = list(thresholds.get("module_thresholds", {}).keys())
+    module_stats = compute_module_coverage(coverage, module_prefixes)
+    global_stats = compute_global_coverage(coverage)
+
+    base_coverage = None
+    base_module_stats = None
+    base_global_stats = None
+    if args.base_lcov_file and Path(args.base_lcov_file).exists():
+        base_coverage = parse_lcov(args.base_lcov_file)
+        base_module_stats = compute_module_coverage(base_coverage, module_prefixes)
+        base_global_stats = compute_global_coverage(base_coverage)
+
+    violations = []
+
+    if global_stats["percent"] < thresholds.get("global_minimum", 0):
+        violations.append(
+            f"Global coverage {global_stats['percent']}% is below minimum "
+            f"{thresholds['global_minimum']}%"
+        )
+
+    for module, threshold in thresholds.get("module_thresholds", {}).items():
+        if module in module_stats and module_stats[module]["percent"] < threshold:
+            violations.append(
+                f"`{module}` coverage {module_stats[module]['percent']}% is below "
+                f"threshold {threshold}%"
+            )
+
+    if thresholds.get("ratchet_mode") == "no_decrease":
+        violations.extend(
+            check_ratchet(
+                module_stats, base_module_stats, global_stats, base_global_stats
+            )
+        )
+
+    new_file_violations = []
+    if args.new_files:
+        new_files = [f.strip() for f in args.new_files.split(",") if f.strip()]
+        new_file_min = thresholds.get("new_file_minimum", 80)
+        new_file_violations = check_new_files(coverage, new_files, new_file_min)
+
+    report = generate_report(
+        global_stats, module_stats, thresholds, violations, new_file_violations
+    )
+
+    with open(args.output, "w") as f:
+        f.write(report)
+
+    print(report)
+
+    if violations or new_file_violations:
+        print(f"\nFAILED: {len(violations) + len(new_file_violations)} violation(s)")
+        sys.exit(1)
+    else:
+        print("\nPASSED: All coverage thresholds met")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- a/.github/scripts/doc-review.py
+++ b/.github/scripts/doc-review.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+"""
+Diff-aware documentation review for xrpld PRs.
+
+For each changed C++ file, extracts the diff hunks and existing doc
+comments, then asks the Anthropic API whether documentation needs
+updating. Produces:
+  - doc-review-report.md: summary comment for the PR
+  - doc-review-comments.json: inline review comments with file/line info
+"""
+
+import json
+import os
+import re
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+try:
+    import anthropic
+except ImportError:
+    print("ERROR: anthropic package not installed. Run: pip install anthropic")
+    sys.exit(1)
+
+MODEL = "claude-sonnet-4-6"
+MAX_TOKENS = 2048
+
+SYSTEM_PROMPT = """You are a documentation reviewer for the xrpld (XRP Ledger daemon) C++ codebase.
+
+Your job is to review code changes and determine whether existing documentation
+comments need updating, or whether new documentation is needed.
+
+Documentation style: Javadoc-style Doxygen comments (/** ... */).
+See the project's docs/DOCUMENTATION_STANDARDS.md for full guidelines.
+
+Rules:
+- Only flag REAL semantic drift: changed behavior, new parameters, removed
+  functionality, changed return values, new error conditions.
+- Do NOT flag cosmetic changes (whitespace, formatting, variable renames that
+  don't change semantics).
+- Do NOT suggest docs for private implementation details unless the logic is
+  genuinely non-obvious.
+- Do NOT paraphrase function signatures. Good docs explain WHY and WHAT
+  BEHAVIOR, not WHAT THE CODE LITERALLY DOES.
+- Be terse. Each finding should be 1-3 sentences.
+
+For each issue found, respond with a JSON array of objects:
+{
+  "issues": [
+    {
+      "file": "path/to/file.h",
+      "line": 42,
+      "severity": "warning" | "suggestion",
+      "message": "Brief description of the doc issue",
+      "suggested_doc": "Optional: suggested doc comment text"
+    }
+  ],
+  "summary": "One-paragraph summary of documentation state for this file"
+}
+
+If no issues are found, return: {"issues": [], "summary": "Documentation is up to date."}
+Respond ONLY with valid JSON. No markdown fences, no explanation outside JSON."""
+
+
+@dataclass
+class FileAnalysis:
+    path: str
+    diff: str
+    existing_docs: str
+    file_content: str
+
+
+def get_diff(base_sha: str, head_sha: str, filepath: str) -> str:
+    """Get the unified diff for a specific file between two commits."""
+    try:
+        result = subprocess.run(
+            ["git", "diff", f"{base_sha}...{head_sha}", "--", filepath],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        return result.stdout
+    except subprocess.CalledProcessError:
+        return ""
+
+
+def extract_doc_comments(content: str) -> str:
+    """Extract all /** ... */ doc comments from file content."""
+    pattern = r'/\*\*[\s\S]*?\*/'
+    matches = re.findall(pattern, content)
+    return "\n\n".join(matches) if matches else "(no documentation comments found)"
+
+
+def read_file_safe(filepath: str) -> str:
+    """Read a file, returning empty string if it doesn't exist."""
+    try:
+        return Path(filepath).read_text(encoding="utf-8", errors="replace")
+    except (FileNotFoundError, PermissionError):
+        return ""
+
+
+def analyze_file(client: anthropic.Anthropic, analysis: FileAnalysis) -> dict:
+    """Send a file's diff and docs to the API for review."""
+    user_prompt = f"""Review the following code change for documentation accuracy.
+
+## File: {analysis.path}
+
+## Git Diff:
+```
+{analysis.diff[:8000]}
+```
+
+## Existing Documentation Comments:
+```
+{analysis.existing_docs[:4000]}
+```
+
+## Current File Content (first 200 lines for context):
+```cpp
+{chr(10).join(analysis.file_content.split(chr(10))[:200])}
+```
+
+Analyze whether the diff introduces changes that make existing docs inaccurate,
+or adds new public API surface that lacks documentation."""
+
+    try:
+        response = client.messages.create(
+            model=MODEL,
+            max_tokens=MAX_TOKENS,
+            system=SYSTEM_PROMPT,
+            messages=[{"role": "user", "content": user_prompt}],
+        )
+        text = response.content[0].text.strip()
+        if text.startswith("```"):
+            text = re.sub(r'^```\w*\n?', '', text)
+            text = re.sub(r'\n?```$', '', text)
+        return json.loads(text)
+    except (json.JSONDecodeError, Exception) as e:
+        return {
+            "issues": [],
+            "summary": f"Analysis failed: {str(e)[:200]}",
+        }
+
+
+def generate_report(
+    results: dict[str, dict],
+    changed_files: list[str],
+) -> str:
+    """Generate the markdown summary report."""
+    lines = ["## Documentation Review Report", ""]
+
+    total_issues = sum(len(r.get("issues", [])) for r in results.values())
+    warnings = sum(
+        1
+        for r in results.values()
+        for i in r.get("issues", [])
+        if i.get("severity") == "warning"
+    )
+    suggestions = total_issues - warnings
+
+    if total_issues == 0:
+        lines.append("No documentation issues found.")
+    else:
+        lines.append(
+            f"Found **{total_issues}** documentation issue(s) "
+            f"across **{len(changed_files)}** changed file(s): "
+            f"{warnings} warning(s), {suggestions} suggestion(s)."
+        )
+
+    lines.append("")
+    lines.append(f"Files reviewed: {len(changed_files)}")
+    lines.append("")
+
+    for filepath, result in sorted(results.items()):
+        issues = result.get("issues", [])
+        summary = result.get("summary", "")
+        if issues:
+            lines.append(f"### `{filepath}`")
+            lines.append("")
+            lines.append(summary)
+            lines.append("")
+            for issue in issues:
+                severity = issue.get("severity", "suggestion")
+                icon = "**Warning:**" if severity == "warning" else "**Suggestion:**"
+                line_num = issue.get("line", "?")
+                msg = issue.get("message", "")
+                lines.append(f"- {icon} Line {line_num}: {msg}")
+            lines.append("")
+
+    lines.append("---")
+    lines.append(
+        "*Automated documentation review. "
+        "See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) "
+        "for guidelines.*"
+    )
+
+    return "\n".join(lines)
+
+
+def generate_inline_comments(results: dict[str, dict]) -> list[dict]:
+    """Generate inline PR review comments from analysis results."""
+    comments = []
+    for filepath, result in results.items():
+        for issue in result.get("issues", []):
+            line = issue.get("line")
+            if not line or not isinstance(line, int):
+                continue
+
+            body = issue.get("message", "")
+            suggested = issue.get("suggested_doc")
+            if suggested:
+                body += f"\n\n**Suggested documentation:**\n```cpp\n{suggested}\n```"
+
+            severity = issue.get("severity", "suggestion")
+            prefix = "Doc Warning" if severity == "warning" else "Doc Suggestion"
+            body = f"**{prefix}:** {body}"
+
+            comments.append({"path": filepath, "line": line, "body": body})
+
+    return comments
+
+
+def main():
+    api_key = os.environ.get("ANTHROPIC_API_KEY")
+    if not api_key:
+        print("ERROR: ANTHROPIC_API_KEY not set")
+        sys.exit(1)
+
+    changed_files_str = os.environ.get("CHANGED_FILES", "")
+    if not changed_files_str:
+        print("No changed files to review")
+        sys.exit(0)
+
+    base_sha = os.environ.get("BASE_SHA", "HEAD~1")
+    head_sha = os.environ.get("HEAD_SHA", "HEAD")
+
+    changed_files = [f.strip() for f in changed_files_str.split() if f.strip()]
+    cpp_files = [
+        f for f in changed_files if f.endswith((".h", ".hpp", ".cpp"))
+    ]
+
+    if not cpp_files:
+        print("No C++ files changed")
+        sys.exit(0)
+
+    print(f"Reviewing {len(cpp_files)} file(s) for documentation accuracy...")
+
+    client = anthropic.Anthropic(api_key=api_key)
+    results = {}
+
+    for filepath in cpp_files:
+        print(f"  Analyzing: {filepath}")
+        diff = get_diff(base_sha, head_sha, filepath)
+        if not diff:
+            continue
+
+        content = read_file_safe(filepath)
+        existing_docs = extract_doc_comments(content)
+
+        analysis = FileAnalysis(
+            path=filepath,
+            diff=diff,
+            existing_docs=existing_docs,
+            file_content=content,
+        )
+        results[filepath] = analyze_file(client, analysis)
+
+    report = generate_report(results, cpp_files)
+    Path("doc-review-report.md").write_text(report)
+    print("\nReport written to doc-review-report.md")
+
+    comments = generate_inline_comments(results)
+    Path("doc-review-comments.json").write_text(json.dumps(comments, indent=2))
+    print(f"Generated {len(comments)} inline comment(s)")
+
+
+if __name__ == "__main__":
+    main()