mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-03 00:36:48 +00:00
280 lines
8.6 KiB
Python
280 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Diff-aware documentation review for xrpld PRs.
|
|
|
|
For each changed C++ file, extracts the diff hunks and existing doc
|
|
comments, then asks the Anthropic API whether documentation needs
|
|
updating. Produces:
|
|
- doc-review-report.md: summary comment for the PR
|
|
- doc-review-comments.json: inline review comments with file/line info
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
try:
|
|
import anthropic
|
|
except ImportError:
|
|
print("ERROR: anthropic package not installed. Run: pip install anthropic")
|
|
sys.exit(1)
|
|
|
|
MODEL = "claude-sonnet-4-6"
|
|
MAX_TOKENS = 2048
|
|
|
|
SYSTEM_PROMPT = """You are a documentation reviewer for the xrpld (XRP Ledger daemon) C++ codebase.
|
|
|
|
Your job is to review code changes and determine whether existing documentation
|
|
comments need updating, or whether new documentation is needed.
|
|
|
|
Documentation style: Javadoc-style Doxygen comments (/** ... */).
|
|
See the project's docs/DOCUMENTATION_STANDARDS.md for full guidelines.
|
|
|
|
Rules:
|
|
- Only flag REAL semantic drift: changed behavior, new parameters, removed
|
|
functionality, changed return values, new error conditions.
|
|
- Do NOT flag cosmetic changes (whitespace, formatting, variable renames that
|
|
don't change semantics).
|
|
- Do NOT suggest docs for private implementation details unless the logic is
|
|
genuinely non-obvious.
|
|
- Do NOT paraphrase function signatures. Good docs explain WHY and WHAT
|
|
BEHAVIOR, not WHAT THE CODE LITERALLY DOES.
|
|
- Be terse. Each finding should be 1-3 sentences.
|
|
|
|
For each issue found, respond with a JSON array of objects:
|
|
{
|
|
"issues": [
|
|
{
|
|
"file": "path/to/file.h",
|
|
"line": 42,
|
|
"severity": "warning" | "suggestion",
|
|
"message": "Brief description of the doc issue",
|
|
"suggested_doc": "Optional: suggested doc comment text"
|
|
}
|
|
],
|
|
"summary": "One-paragraph summary of documentation state for this file"
|
|
}
|
|
|
|
If no issues are found, return: {"issues": [], "summary": "Documentation is up to date."}
|
|
Respond ONLY with valid JSON. No markdown fences, no explanation outside JSON."""
|
|
|
|
|
|
@dataclass
|
|
class FileAnalysis:
|
|
path: str
|
|
diff: str
|
|
existing_docs: str
|
|
file_content: str
|
|
|
|
|
|
def get_diff(base_sha: str, head_sha: str, filepath: str) -> str:
|
|
"""Get the unified diff for a specific file between two commits."""
|
|
try:
|
|
result = subprocess.run(
|
|
["git", "diff", f"{base_sha}...{head_sha}", "--", filepath],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
return result.stdout
|
|
except subprocess.CalledProcessError:
|
|
return ""
|
|
|
|
|
|
def extract_doc_comments(content: str) -> str:
|
|
"""Extract all /** ... */ doc comments from file content."""
|
|
pattern = r'/\*\*[\s\S]*?\*/'
|
|
matches = re.findall(pattern, content)
|
|
return "\n\n".join(matches) if matches else "(no documentation comments found)"
|
|
|
|
|
|
def read_file_safe(filepath: str) -> str:
|
|
"""Read a file, returning empty string if it doesn't exist."""
|
|
try:
|
|
return Path(filepath).read_text(encoding="utf-8", errors="replace")
|
|
except (FileNotFoundError, PermissionError):
|
|
return ""
|
|
|
|
|
|
def analyze_file(client: anthropic.Anthropic, analysis: FileAnalysis) -> dict:
|
|
"""Send a file's diff and docs to the API for review."""
|
|
user_prompt = f"""Review the following code change for documentation accuracy.
|
|
|
|
## File: {analysis.path}
|
|
|
|
## Git Diff:
|
|
```
|
|
{analysis.diff[:8000]}
|
|
```
|
|
|
|
## Existing Documentation Comments:
|
|
```
|
|
{analysis.existing_docs[:4000]}
|
|
```
|
|
|
|
## Current File Content (first 200 lines for context):
|
|
```cpp
|
|
{chr(10).join(analysis.file_content.split(chr(10))[:200])}
|
|
```
|
|
|
|
Analyze whether the diff introduces changes that make existing docs inaccurate,
|
|
or adds new public API surface that lacks documentation."""
|
|
|
|
try:
|
|
response = client.messages.create(
|
|
model=MODEL,
|
|
max_tokens=MAX_TOKENS,
|
|
system=SYSTEM_PROMPT,
|
|
messages=[{"role": "user", "content": user_prompt}],
|
|
)
|
|
text = response.content[0].text.strip()
|
|
if text.startswith("```"):
|
|
text = re.sub(r'^```\w*\n?', '', text)
|
|
text = re.sub(r'\n?```$', '', text)
|
|
return json.loads(text)
|
|
except (json.JSONDecodeError, Exception) as e:
|
|
return {
|
|
"issues": [],
|
|
"summary": f"Analysis failed: {str(e)[:200]}",
|
|
}
|
|
|
|
|
|
def generate_report(
|
|
results: dict[str, dict],
|
|
changed_files: list[str],
|
|
) -> str:
|
|
"""Generate the markdown summary report."""
|
|
lines = ["## Documentation Review Report", ""]
|
|
|
|
total_issues = sum(len(r.get("issues", [])) for r in results.values())
|
|
warnings = sum(
|
|
1
|
|
for r in results.values()
|
|
for i in r.get("issues", [])
|
|
if i.get("severity") == "warning"
|
|
)
|
|
suggestions = total_issues - warnings
|
|
|
|
if total_issues == 0:
|
|
lines.append("No documentation issues found.")
|
|
else:
|
|
lines.append(
|
|
f"Found **{total_issues}** documentation issue(s) "
|
|
f"across **{len(changed_files)}** changed file(s): "
|
|
f"{warnings} warning(s), {suggestions} suggestion(s)."
|
|
)
|
|
|
|
lines.append("")
|
|
lines.append(f"Files reviewed: {len(changed_files)}")
|
|
lines.append("")
|
|
|
|
for filepath, result in sorted(results.items()):
|
|
issues = result.get("issues", [])
|
|
summary = result.get("summary", "")
|
|
if issues:
|
|
lines.append(f"### `{filepath}`")
|
|
lines.append("")
|
|
lines.append(summary)
|
|
lines.append("")
|
|
for issue in issues:
|
|
severity = issue.get("severity", "suggestion")
|
|
icon = "**Warning:**" if severity == "warning" else "**Suggestion:**"
|
|
line_num = issue.get("line", "?")
|
|
msg = issue.get("message", "")
|
|
lines.append(f"- {icon} Line {line_num}: {msg}")
|
|
lines.append("")
|
|
|
|
lines.append("---")
|
|
lines.append(
|
|
"*Automated documentation review. "
|
|
"See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) "
|
|
"for guidelines.*"
|
|
)
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def generate_inline_comments(results: dict[str, dict]) -> list[dict]:
|
|
"""Generate inline PR review comments from analysis results."""
|
|
comments = []
|
|
for filepath, result in results.items():
|
|
for issue in result.get("issues", []):
|
|
line = issue.get("line")
|
|
if not line or not isinstance(line, int):
|
|
continue
|
|
|
|
body = issue.get("message", "")
|
|
suggested = issue.get("suggested_doc")
|
|
if suggested:
|
|
body += f"\n\n**Suggested documentation:**\n```cpp\n{suggested}\n```"
|
|
|
|
severity = issue.get("severity", "suggestion")
|
|
prefix = "Doc Warning" if severity == "warning" else "Doc Suggestion"
|
|
body = f"**{prefix}:** {body}"
|
|
|
|
comments.append({"path": filepath, "line": line, "body": body})
|
|
|
|
return comments
|
|
|
|
|
|
def main():
|
|
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
if not api_key:
|
|
print("ERROR: ANTHROPIC_API_KEY not set")
|
|
sys.exit(1)
|
|
|
|
changed_files_str = os.environ.get("CHANGED_FILES", "")
|
|
if not changed_files_str:
|
|
print("No changed files to review")
|
|
sys.exit(0)
|
|
|
|
base_sha = os.environ.get("BASE_SHA", "HEAD~1")
|
|
head_sha = os.environ.get("HEAD_SHA", "HEAD")
|
|
|
|
changed_files = [f.strip() for f in changed_files_str.split() if f.strip()]
|
|
cpp_files = [
|
|
f for f in changed_files if f.endswith((".h", ".hpp", ".cpp"))
|
|
]
|
|
|
|
if not cpp_files:
|
|
print("No C++ files changed")
|
|
sys.exit(0)
|
|
|
|
print(f"Reviewing {len(cpp_files)} file(s) for documentation accuracy...")
|
|
|
|
client = anthropic.Anthropic(api_key=api_key)
|
|
results = {}
|
|
|
|
for filepath in cpp_files:
|
|
print(f" Analyzing: {filepath}")
|
|
diff = get_diff(base_sha, head_sha, filepath)
|
|
if not diff:
|
|
continue
|
|
|
|
content = read_file_safe(filepath)
|
|
existing_docs = extract_doc_comments(content)
|
|
|
|
analysis = FileAnalysis(
|
|
path=filepath,
|
|
diff=diff,
|
|
existing_docs=existing_docs,
|
|
file_content=content,
|
|
)
|
|
results[filepath] = analyze_file(client, analysis)
|
|
|
|
report = generate_report(results, cpp_files)
|
|
Path("doc-review-report.md").write_text(report)
|
|
print("\nReport written to doc-review-report.md")
|
|
|
|
comments = generate_inline_comments(results)
|
|
Path("doc-review-comments.json").write_text(json.dumps(comments, indent=2))
|
|
print(f"Generated {len(comments)} inline comment(s)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|