mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-03 08:46:46 +00:00
github workflows
This commit is contained in:
277
.github/scripts/doc-coverage-check.py
vendored
Normal file
277
.github/scripts/doc-coverage-check.py
vendored
Normal file
@@ -0,0 +1,277 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Documentation coverage checker for xrpld.
|
||||
|
||||
Parses coverxygen LCOV output, compares against per-module thresholds
|
||||
defined in .github/doc-coverage-thresholds.json, and generates a
|
||||
markdown report suitable for posting as a PR comment.
|
||||
|
||||
Usage:
|
||||
python3 doc-coverage-check.py \
|
||||
--lcov-file doc-coverage.info \
|
||||
--threshold-file .github/doc-coverage-thresholds.json \
|
||||
--output doc-coverage-report.md \
|
||||
[--base-lcov-file base-doc-coverage.info]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def parse_lcov(lcov_path: str) -> dict[str, dict[str, int]]:
|
||||
"""Parse LCOV-format file into per-file coverage data.
|
||||
|
||||
Returns a dict mapping file paths to {"documented": N, "total": N}.
|
||||
"""
|
||||
coverage = {}
|
||||
current_file = None
|
||||
documented = 0
|
||||
total = 0
|
||||
|
||||
with open(lcov_path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith("SF:"):
|
||||
current_file = line[3:]
|
||||
documented = 0
|
||||
total = 0
|
||||
elif line.startswith("DA:"):
|
||||
parts = line[3:].split(",")
|
||||
if len(parts) >= 2:
|
||||
total += 1
|
||||
if int(parts[1]) > 0:
|
||||
documented += 1
|
||||
elif line == "end_of_record":
|
||||
if current_file:
|
||||
coverage[current_file] = {
|
||||
"documented": documented,
|
||||
"total": total,
|
||||
}
|
||||
current_file = None
|
||||
|
||||
return coverage
|
||||
|
||||
|
||||
def compute_module_coverage(
|
||||
coverage: dict[str, dict[str, int]],
|
||||
module_prefixes: list[str],
|
||||
) -> dict[str, dict[str, int | float]]:
|
||||
"""Aggregate file-level coverage into module-level stats."""
|
||||
modules = {}
|
||||
for prefix in module_prefixes:
|
||||
doc = 0
|
||||
tot = 0
|
||||
for filepath, stats in coverage.items():
|
||||
if filepath.startswith(prefix) or f"/{prefix}" in filepath:
|
||||
doc += stats["documented"]
|
||||
tot += stats["total"]
|
||||
pct = (doc / tot * 100) if tot > 0 else 0.0
|
||||
modules[prefix] = {"documented": doc, "total": tot, "percent": round(pct, 1)}
|
||||
return modules
|
||||
|
||||
|
||||
def compute_global_coverage(
|
||||
coverage: dict[str, dict[str, int]],
|
||||
) -> dict[str, int | float]:
|
||||
"""Compute overall coverage across all files."""
|
||||
doc = sum(s["documented"] for s in coverage.values())
|
||||
tot = sum(s["total"] for s in coverage.values())
|
||||
pct = (doc / tot * 100) if tot > 0 else 0.0
|
||||
return {"documented": doc, "total": tot, "percent": round(pct, 1)}
|
||||
|
||||
|
||||
def check_ratchet(
|
||||
current: dict[str, dict[str, int | float]],
|
||||
base: dict[str, dict[str, int | float]] | None,
|
||||
current_global: dict[str, int | float],
|
||||
base_global: dict[str, int | float] | None,
|
||||
) -> list[str]:
|
||||
"""Check that no module or global coverage decreased vs base branch."""
|
||||
violations = []
|
||||
|
||||
if base_global and current_global["percent"] < base_global["percent"]:
|
||||
violations.append(
|
||||
f"Global coverage decreased: {base_global['percent']}% -> "
|
||||
f"{current_global['percent']}%"
|
||||
)
|
||||
|
||||
if base:
|
||||
for module, stats in current.items():
|
||||
if module in base and stats["percent"] < base[module]["percent"]:
|
||||
violations.append(
|
||||
f"`{module}` coverage decreased: "
|
||||
f"{base[module]['percent']}% -> {stats['percent']}%"
|
||||
)
|
||||
|
||||
return violations
|
||||
|
||||
|
||||
def check_new_files(
|
||||
coverage: dict[str, dict[str, int]],
|
||||
new_files: list[str],
|
||||
min_coverage: int,
|
||||
) -> list[str]:
|
||||
"""Check that new files meet minimum documentation coverage."""
|
||||
violations = []
|
||||
for filepath in new_files:
|
||||
for covered_path, stats in coverage.items():
|
||||
if filepath in covered_path or covered_path.endswith(filepath):
|
||||
if stats["total"] > 0:
|
||||
pct = stats["documented"] / stats["total"] * 100
|
||||
if pct < min_coverage:
|
||||
violations.append(
|
||||
f"`{filepath}` has {pct:.0f}% doc coverage "
|
||||
f"(minimum {min_coverage}%)"
|
||||
)
|
||||
break
|
||||
return violations
|
||||
|
||||
|
||||
def coverage_emoji(pct: float) -> str:
|
||||
if pct >= 80:
|
||||
return "+"
|
||||
if pct >= 50:
|
||||
return "~"
|
||||
return "-"
|
||||
|
||||
|
||||
def generate_report(
|
||||
global_stats: dict[str, int | float],
|
||||
module_stats: dict[str, dict[str, int | float]],
|
||||
thresholds: dict,
|
||||
violations: list[str],
|
||||
new_file_violations: list[str],
|
||||
) -> str:
|
||||
"""Generate a markdown report for the PR comment."""
|
||||
lines = []
|
||||
lines.append("## Documentation Coverage Report")
|
||||
lines.append("")
|
||||
|
||||
passed = not violations and not new_file_violations
|
||||
status = "PASSED" if passed else "FAILED"
|
||||
lines.append(f"**Status:** {status}")
|
||||
lines.append(
|
||||
f"**Global Coverage:** {global_stats['percent']}% "
|
||||
f"({global_stats['documented']}/{global_stats['total']} entities documented)"
|
||||
)
|
||||
lines.append(
|
||||
f"**Minimum Threshold:** {thresholds.get('global_minimum', 0)}%"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
if violations or new_file_violations:
|
||||
lines.append("### Violations")
|
||||
lines.append("")
|
||||
for v in violations + new_file_violations:
|
||||
lines.append(f"- {v}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("### Module Coverage")
|
||||
lines.append("")
|
||||
lines.append("| Module | Coverage | Documented | Total | Threshold |")
|
||||
lines.append("|--------|----------|------------|-------|-----------|")
|
||||
|
||||
module_thresholds = thresholds.get("module_thresholds", {})
|
||||
for module in sorted(module_stats.keys()):
|
||||
stats = module_stats[module]
|
||||
threshold = module_thresholds.get(module, 0)
|
||||
emoji = coverage_emoji(stats["percent"])
|
||||
lines.append(
|
||||
f"| `{module}` | {stats['percent']}% | "
|
||||
f"{stats['documented']} | {stats['total']} | {threshold}% |"
|
||||
)
|
||||
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"*Coverage measured by [coverxygen](https://github.com/psycofdj/coverxygen). "
|
||||
"See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) "
|
||||
"for documentation guidelines.*"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Check documentation coverage")
|
||||
parser.add_argument("--lcov-file", required=True, help="Path to LCOV coverage file")
|
||||
parser.add_argument(
|
||||
"--threshold-file", required=True, help="Path to thresholds JSON"
|
||||
)
|
||||
parser.add_argument("--output", required=True, help="Path to write markdown report")
|
||||
parser.add_argument(
|
||||
"--base-lcov-file", default=None, help="Path to base branch LCOV file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--new-files",
|
||||
default="",
|
||||
help="Comma-separated list of new C++ files in this PR",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.threshold_file) as f:
|
||||
thresholds = json.load(f)
|
||||
|
||||
coverage = parse_lcov(args.lcov_file)
|
||||
module_prefixes = list(thresholds.get("module_thresholds", {}).keys())
|
||||
module_stats = compute_module_coverage(coverage, module_prefixes)
|
||||
global_stats = compute_global_coverage(coverage)
|
||||
|
||||
base_coverage = None
|
||||
base_module_stats = None
|
||||
base_global_stats = None
|
||||
if args.base_lcov_file and Path(args.base_lcov_file).exists():
|
||||
base_coverage = parse_lcov(args.base_lcov_file)
|
||||
base_module_stats = compute_module_coverage(base_coverage, module_prefixes)
|
||||
base_global_stats = compute_global_coverage(base_coverage)
|
||||
|
||||
violations = []
|
||||
|
||||
if global_stats["percent"] < thresholds.get("global_minimum", 0):
|
||||
violations.append(
|
||||
f"Global coverage {global_stats['percent']}% is below minimum "
|
||||
f"{thresholds['global_minimum']}%"
|
||||
)
|
||||
|
||||
for module, threshold in thresholds.get("module_thresholds", {}).items():
|
||||
if module in module_stats and module_stats[module]["percent"] < threshold:
|
||||
violations.append(
|
||||
f"`{module}` coverage {module_stats[module]['percent']}% is below "
|
||||
f"threshold {threshold}%"
|
||||
)
|
||||
|
||||
if thresholds.get("ratchet_mode") == "no_decrease":
|
||||
violations.extend(
|
||||
check_ratchet(
|
||||
module_stats, base_module_stats, global_stats, base_global_stats
|
||||
)
|
||||
)
|
||||
|
||||
new_file_violations = []
|
||||
if args.new_files:
|
||||
new_files = [f.strip() for f in args.new_files.split(",") if f.strip()]
|
||||
new_file_min = thresholds.get("new_file_minimum", 80)
|
||||
new_file_violations = check_new_files(coverage, new_files, new_file_min)
|
||||
|
||||
report = generate_report(
|
||||
global_stats, module_stats, thresholds, violations, new_file_violations
|
||||
)
|
||||
|
||||
with open(args.output, "w") as f:
|
||||
f.write(report)
|
||||
|
||||
print(report)
|
||||
|
||||
if violations or new_file_violations:
|
||||
print(f"\nFAILED: {len(violations) + len(new_file_violations)} violation(s)")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nPASSED: All coverage thresholds met")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
279
.github/scripts/doc-review.py
vendored
Normal file
279
.github/scripts/doc-review.py
vendored
Normal file
@@ -0,0 +1,279 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Diff-aware documentation review for xrpld PRs.
|
||||
|
||||
For each changed C++ file, extracts the diff hunks and existing doc
|
||||
comments, then asks the Anthropic API whether documentation needs
|
||||
updating. Produces:
|
||||
- doc-review-report.md: summary comment for the PR
|
||||
- doc-review-comments.json: inline review comments with file/line info
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import anthropic
|
||||
except ImportError:
|
||||
print("ERROR: anthropic package not installed. Run: pip install anthropic")
|
||||
sys.exit(1)
|
||||
|
||||
MODEL = "claude-sonnet-4-6"
|
||||
MAX_TOKENS = 2048
|
||||
|
||||
SYSTEM_PROMPT = """You are a documentation reviewer for the xrpld (XRP Ledger daemon) C++ codebase.
|
||||
|
||||
Your job is to review code changes and determine whether existing documentation
|
||||
comments need updating, or whether new documentation is needed.
|
||||
|
||||
Documentation style: Javadoc-style Doxygen comments (/** ... */).
|
||||
See the project's docs/DOCUMENTATION_STANDARDS.md for full guidelines.
|
||||
|
||||
Rules:
|
||||
- Only flag REAL semantic drift: changed behavior, new parameters, removed
|
||||
functionality, changed return values, new error conditions.
|
||||
- Do NOT flag cosmetic changes (whitespace, formatting, variable renames that
|
||||
don't change semantics).
|
||||
- Do NOT suggest docs for private implementation details unless the logic is
|
||||
genuinely non-obvious.
|
||||
- Do NOT paraphrase function signatures. Good docs explain WHY and WHAT
|
||||
BEHAVIOR, not WHAT THE CODE LITERALLY DOES.
|
||||
- Be terse. Each finding should be 1-3 sentences.
|
||||
|
||||
For each issue found, respond with a JSON array of objects:
|
||||
{
|
||||
"issues": [
|
||||
{
|
||||
"file": "path/to/file.h",
|
||||
"line": 42,
|
||||
"severity": "warning" | "suggestion",
|
||||
"message": "Brief description of the doc issue",
|
||||
"suggested_doc": "Optional: suggested doc comment text"
|
||||
}
|
||||
],
|
||||
"summary": "One-paragraph summary of documentation state for this file"
|
||||
}
|
||||
|
||||
If no issues are found, return: {"issues": [], "summary": "Documentation is up to date."}
|
||||
Respond ONLY with valid JSON. No markdown fences, no explanation outside JSON."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileAnalysis:
|
||||
path: str
|
||||
diff: str
|
||||
existing_docs: str
|
||||
file_content: str
|
||||
|
||||
|
||||
def get_diff(base_sha: str, head_sha: str, filepath: str) -> str:
|
||||
"""Get the unified diff for a specific file between two commits."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "diff", f"{base_sha}...{head_sha}", "--", filepath],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
return result.stdout
|
||||
except subprocess.CalledProcessError:
|
||||
return ""
|
||||
|
||||
|
||||
def extract_doc_comments(content: str) -> str:
|
||||
"""Extract all /** ... */ doc comments from file content."""
|
||||
pattern = r'/\*\*[\s\S]*?\*/'
|
||||
matches = re.findall(pattern, content)
|
||||
return "\n\n".join(matches) if matches else "(no documentation comments found)"
|
||||
|
||||
|
||||
def read_file_safe(filepath: str) -> str:
|
||||
"""Read a file, returning empty string if it doesn't exist."""
|
||||
try:
|
||||
return Path(filepath).read_text(encoding="utf-8", errors="replace")
|
||||
except (FileNotFoundError, PermissionError):
|
||||
return ""
|
||||
|
||||
|
||||
def analyze_file(client: anthropic.Anthropic, analysis: FileAnalysis) -> dict:
|
||||
"""Send a file's diff and docs to the API for review."""
|
||||
user_prompt = f"""Review the following code change for documentation accuracy.
|
||||
|
||||
## File: {analysis.path}
|
||||
|
||||
## Git Diff:
|
||||
```
|
||||
{analysis.diff[:8000]}
|
||||
```
|
||||
|
||||
## Existing Documentation Comments:
|
||||
```
|
||||
{analysis.existing_docs[:4000]}
|
||||
```
|
||||
|
||||
## Current File Content (first 200 lines for context):
|
||||
```cpp
|
||||
{chr(10).join(analysis.file_content.split(chr(10))[:200])}
|
||||
```
|
||||
|
||||
Analyze whether the diff introduces changes that make existing docs inaccurate,
|
||||
or adds new public API surface that lacks documentation."""
|
||||
|
||||
try:
|
||||
response = client.messages.create(
|
||||
model=MODEL,
|
||||
max_tokens=MAX_TOKENS,
|
||||
system=SYSTEM_PROMPT,
|
||||
messages=[{"role": "user", "content": user_prompt}],
|
||||
)
|
||||
text = response.content[0].text.strip()
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r'^```\w*\n?', '', text)
|
||||
text = re.sub(r'\n?```$', '', text)
|
||||
return json.loads(text)
|
||||
except (json.JSONDecodeError, Exception) as e:
|
||||
return {
|
||||
"issues": [],
|
||||
"summary": f"Analysis failed: {str(e)[:200]}",
|
||||
}
|
||||
|
||||
|
||||
def generate_report(
|
||||
results: dict[str, dict],
|
||||
changed_files: list[str],
|
||||
) -> str:
|
||||
"""Generate the markdown summary report."""
|
||||
lines = ["## Documentation Review Report", ""]
|
||||
|
||||
total_issues = sum(len(r.get("issues", [])) for r in results.values())
|
||||
warnings = sum(
|
||||
1
|
||||
for r in results.values()
|
||||
for i in r.get("issues", [])
|
||||
if i.get("severity") == "warning"
|
||||
)
|
||||
suggestions = total_issues - warnings
|
||||
|
||||
if total_issues == 0:
|
||||
lines.append("No documentation issues found.")
|
||||
else:
|
||||
lines.append(
|
||||
f"Found **{total_issues}** documentation issue(s) "
|
||||
f"across **{len(changed_files)}** changed file(s): "
|
||||
f"{warnings} warning(s), {suggestions} suggestion(s)."
|
||||
)
|
||||
|
||||
lines.append("")
|
||||
lines.append(f"Files reviewed: {len(changed_files)}")
|
||||
lines.append("")
|
||||
|
||||
for filepath, result in sorted(results.items()):
|
||||
issues = result.get("issues", [])
|
||||
summary = result.get("summary", "")
|
||||
if issues:
|
||||
lines.append(f"### `{filepath}`")
|
||||
lines.append("")
|
||||
lines.append(summary)
|
||||
lines.append("")
|
||||
for issue in issues:
|
||||
severity = issue.get("severity", "suggestion")
|
||||
icon = "**Warning:**" if severity == "warning" else "**Suggestion:**"
|
||||
line_num = issue.get("line", "?")
|
||||
msg = issue.get("message", "")
|
||||
lines.append(f"- {icon} Line {line_num}: {msg}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("---")
|
||||
lines.append(
|
||||
"*Automated documentation review. "
|
||||
"See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) "
|
||||
"for guidelines.*"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_inline_comments(results: dict[str, dict]) -> list[dict]:
|
||||
"""Generate inline PR review comments from analysis results."""
|
||||
comments = []
|
||||
for filepath, result in results.items():
|
||||
for issue in result.get("issues", []):
|
||||
line = issue.get("line")
|
||||
if not line or not isinstance(line, int):
|
||||
continue
|
||||
|
||||
body = issue.get("message", "")
|
||||
suggested = issue.get("suggested_doc")
|
||||
if suggested:
|
||||
body += f"\n\n**Suggested documentation:**\n```cpp\n{suggested}\n```"
|
||||
|
||||
severity = issue.get("severity", "suggestion")
|
||||
prefix = "Doc Warning" if severity == "warning" else "Doc Suggestion"
|
||||
body = f"**{prefix}:** {body}"
|
||||
|
||||
comments.append({"path": filepath, "line": line, "body": body})
|
||||
|
||||
return comments
|
||||
|
||||
|
||||
def main():
|
||||
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
print("ERROR: ANTHROPIC_API_KEY not set")
|
||||
sys.exit(1)
|
||||
|
||||
changed_files_str = os.environ.get("CHANGED_FILES", "")
|
||||
if not changed_files_str:
|
||||
print("No changed files to review")
|
||||
sys.exit(0)
|
||||
|
||||
base_sha = os.environ.get("BASE_SHA", "HEAD~1")
|
||||
head_sha = os.environ.get("HEAD_SHA", "HEAD")
|
||||
|
||||
changed_files = [f.strip() for f in changed_files_str.split() if f.strip()]
|
||||
cpp_files = [
|
||||
f for f in changed_files if f.endswith((".h", ".hpp", ".cpp"))
|
||||
]
|
||||
|
||||
if not cpp_files:
|
||||
print("No C++ files changed")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"Reviewing {len(cpp_files)} file(s) for documentation accuracy...")
|
||||
|
||||
client = anthropic.Anthropic(api_key=api_key)
|
||||
results = {}
|
||||
|
||||
for filepath in cpp_files:
|
||||
print(f" Analyzing: {filepath}")
|
||||
diff = get_diff(base_sha, head_sha, filepath)
|
||||
if not diff:
|
||||
continue
|
||||
|
||||
content = read_file_safe(filepath)
|
||||
existing_docs = extract_doc_comments(content)
|
||||
|
||||
analysis = FileAnalysis(
|
||||
path=filepath,
|
||||
diff=diff,
|
||||
existing_docs=existing_docs,
|
||||
file_content=content,
|
||||
)
|
||||
results[filepath] = analyze_file(client, analysis)
|
||||
|
||||
report = generate_report(results, cpp_files)
|
||||
Path("doc-review-report.md").write_text(report)
|
||||
print("\nReport written to doc-review-report.md")
|
||||
|
||||
comments = generate_inline_comments(results)
|
||||
Path("doc-review-comments.json").write_text(json.dumps(comments, indent=2))
|
||||
print(f"Generated {len(comments)} inline comment(s)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user