From 5e193157a68d7c60c00b6e3160d84ca45f529928 Mon Sep 17 00:00:00 2001 From: Denis Angell Date: Thu, 14 May 2026 10:00:59 +0200 Subject: [PATCH] remove the py script --- .github/scripts/doc-review.py | 279 ------------------------------- .github/workflows/doc-review.yml | 43 ++--- SCOPE_OF_WORK.md | 31 ++-- 3 files changed, 34 insertions(+), 319 deletions(-) delete mode 100644 .github/scripts/doc-review.py diff --git a/.github/scripts/doc-review.py b/.github/scripts/doc-review.py deleted file mode 100644 index 87609917d3..0000000000 --- a/.github/scripts/doc-review.py +++ /dev/null @@ -1,279 +0,0 @@ -#!/usr/bin/env python3 -""" -Diff-aware documentation review for xrpld PRs. - -For each changed C++ file, extracts the diff hunks and existing doc -comments, then asks the Anthropic API whether documentation needs -updating. Produces: - - doc-review-report.md: summary comment for the PR - - doc-review-comments.json: inline review comments with file/line info -""" - -import json -import os -import re -import subprocess -import sys -from dataclasses import dataclass -from pathlib import Path - -try: - import anthropic -except ImportError: - print("ERROR: anthropic package not installed. Run: pip install anthropic") - sys.exit(1) - -MODEL = "claude-sonnet-4-6" -MAX_TOKENS = 2048 - -SYSTEM_PROMPT = """You are a documentation reviewer for the xrpld (XRP Ledger daemon) C++ codebase. - -Your job is to review code changes and determine whether existing documentation -comments need updating, or whether new documentation is needed. - -Documentation style: Javadoc-style Doxygen comments (/** ... */). -See the project's docs/DOCUMENTATION_STANDARDS.md for full guidelines. - -Rules: -- Only flag REAL semantic drift: changed behavior, new parameters, removed - functionality, changed return values, new error conditions. -- Do NOT flag cosmetic changes (whitespace, formatting, variable renames that - don't change semantics). -- Do NOT suggest docs for private implementation details unless the logic is - genuinely non-obvious. -- Do NOT paraphrase function signatures. Good docs explain WHY and WHAT - BEHAVIOR, not WHAT THE CODE LITERALLY DOES. -- Be terse. Each finding should be 1-3 sentences. - -For each issue found, respond with a JSON array of objects: -{ - "issues": [ - { - "file": "path/to/file.h", - "line": 42, - "severity": "warning" | "suggestion", - "message": "Brief description of the doc issue", - "suggested_doc": "Optional: suggested doc comment text" - } - ], - "summary": "One-paragraph summary of documentation state for this file" -} - -If no issues are found, return: {"issues": [], "summary": "Documentation is up to date."} -Respond ONLY with valid JSON. No markdown fences, no explanation outside JSON.""" - - -@dataclass -class FileAnalysis: - path: str - diff: str - existing_docs: str - file_content: str - - -def get_diff(base_sha: str, head_sha: str, filepath: str) -> str: - """Get the unified diff for a specific file between two commits.""" - try: - result = subprocess.run( - ["git", "diff", f"{base_sha}...{head_sha}", "--", filepath], - capture_output=True, - text=True, - check=True, - ) - return result.stdout - except subprocess.CalledProcessError: - return "" - - -def extract_doc_comments(content: str) -> str: - """Extract all /** ... */ doc comments from file content.""" - pattern = r'/\*\*[\s\S]*?\*/' - matches = re.findall(pattern, content) - return "\n\n".join(matches) if matches else "(no documentation comments found)" - - -def read_file_safe(filepath: str) -> str: - """Read a file, returning empty string if it doesn't exist.""" - try: - return Path(filepath).read_text(encoding="utf-8", errors="replace") - except (FileNotFoundError, PermissionError): - return "" - - -def analyze_file(client: anthropic.Anthropic, analysis: FileAnalysis) -> dict: - """Send a file's diff and docs to the API for review.""" - user_prompt = f"""Review the following code change for documentation accuracy. - -## File: {analysis.path} - -## Git Diff: -``` -{analysis.diff[:8000]} -``` - -## Existing Documentation Comments: -``` -{analysis.existing_docs[:4000]} -``` - -## Current File Content (first 200 lines for context): -```cpp -{chr(10).join(analysis.file_content.split(chr(10))[:200])} -``` - -Analyze whether the diff introduces changes that make existing docs inaccurate, -or adds new public API surface that lacks documentation.""" - - try: - response = client.messages.create( - model=MODEL, - max_tokens=MAX_TOKENS, - system=SYSTEM_PROMPT, - messages=[{"role": "user", "content": user_prompt}], - ) - text = response.content[0].text.strip() - if text.startswith("```"): - text = re.sub(r'^```\w*\n?', '', text) - text = re.sub(r'\n?```$', '', text) - return json.loads(text) - except (json.JSONDecodeError, Exception) as e: - return { - "issues": [], - "summary": f"Analysis failed: {str(e)[:200]}", - } - - -def generate_report( - results: dict[str, dict], - changed_files: list[str], -) -> str: - """Generate the markdown summary report.""" - lines = ["## Documentation Review Report", ""] - - total_issues = sum(len(r.get("issues", [])) for r in results.values()) - warnings = sum( - 1 - for r in results.values() - for i in r.get("issues", []) - if i.get("severity") == "warning" - ) - suggestions = total_issues - warnings - - if total_issues == 0: - lines.append("No documentation issues found.") - else: - lines.append( - f"Found **{total_issues}** documentation issue(s) " - f"across **{len(changed_files)}** changed file(s): " - f"{warnings} warning(s), {suggestions} suggestion(s)." - ) - - lines.append("") - lines.append(f"Files reviewed: {len(changed_files)}") - lines.append("") - - for filepath, result in sorted(results.items()): - issues = result.get("issues", []) - summary = result.get("summary", "") - if issues: - lines.append(f"### `{filepath}`") - lines.append("") - lines.append(summary) - lines.append("") - for issue in issues: - severity = issue.get("severity", "suggestion") - icon = "**Warning:**" if severity == "warning" else "**Suggestion:**" - line_num = issue.get("line", "?") - msg = issue.get("message", "") - lines.append(f"- {icon} Line {line_num}: {msg}") - lines.append("") - - lines.append("---") - lines.append( - "*Automated documentation review. " - "See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) " - "for guidelines.*" - ) - - return "\n".join(lines) - - -def generate_inline_comments(results: dict[str, dict]) -> list[dict]: - """Generate inline PR review comments from analysis results.""" - comments = [] - for filepath, result in results.items(): - for issue in result.get("issues", []): - line = issue.get("line") - if not line or not isinstance(line, int): - continue - - body = issue.get("message", "") - suggested = issue.get("suggested_doc") - if suggested: - body += f"\n\n**Suggested documentation:**\n```cpp\n{suggested}\n```" - - severity = issue.get("severity", "suggestion") - prefix = "Doc Warning" if severity == "warning" else "Doc Suggestion" - body = f"**{prefix}:** {body}" - - comments.append({"path": filepath, "line": line, "body": body}) - - return comments - - -def main(): - api_key = os.environ.get("ANTHROPIC_API_KEY") - if not api_key: - print("ERROR: ANTHROPIC_API_KEY not set") - sys.exit(1) - - changed_files_str = os.environ.get("CHANGED_FILES", "") - if not changed_files_str: - print("No changed files to review") - sys.exit(0) - - base_sha = os.environ.get("BASE_SHA", "HEAD~1") - head_sha = os.environ.get("HEAD_SHA", "HEAD") - - changed_files = [f.strip() for f in changed_files_str.split() if f.strip()] - cpp_files = [ - f for f in changed_files if f.endswith((".h", ".hpp", ".cpp")) - ] - - if not cpp_files: - print("No C++ files changed") - sys.exit(0) - - print(f"Reviewing {len(cpp_files)} file(s) for documentation accuracy...") - - client = anthropic.Anthropic(api_key=api_key) - results = {} - - for filepath in cpp_files: - print(f" Analyzing: {filepath}") - diff = get_diff(base_sha, head_sha, filepath) - if not diff: - continue - - content = read_file_safe(filepath) - existing_docs = extract_doc_comments(content) - - analysis = FileAnalysis( - path=filepath, - diff=diff, - existing_docs=existing_docs, - file_content=content, - ) - results[filepath] = analyze_file(client, analysis) - - report = generate_report(results, cpp_files) - Path("doc-review-report.md").write_text(report) - print("\nReport written to doc-review-report.md") - - comments = generate_inline_comments(results) - Path("doc-review-comments.json").write_text(json.dumps(comments, indent=2)) - print(f"Generated {len(comments)} inline comment(s)") - - -if __name__ == "__main__": - main() diff --git a/.github/workflows/doc-review.yml b/.github/workflows/doc-review.yml index 3cf62207a3..a447eef796 100644 --- a/.github/workflows/doc-review.yml +++ b/.github/workflows/doc-review.yml @@ -31,50 +31,41 @@ jobs: with: fetch-depth: 0 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + - name: Set up Node.js + uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 with: - python-version: '3.12' + node-version: '20' + cache: 'npm' + cache-dependency-path: .github/scripts/doc-agent/package-lock.json - - name: Install dependencies - run: pip install anthropic - - - name: Determine changed C++ files - id: changes - uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6 - with: - files: | - include/**/*.h - src/libxrpl/**/*.h - src/libxrpl/**/*.cpp - src/xrpld/**/*.h - src/xrpld/**/*.cpp + - name: Install doc-agent dependencies + working-directory: .github/scripts/doc-agent + run: npm ci - name: Run documentation review - if: steps.changes.outputs.any_changed == 'true' env: - CHANGED_FILES: ${{ steps.changes.outputs.all_changed_files }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.event.pull_request.head.sha }} - run: python3 .github/scripts/doc-review.py + run: | + cd .github/scripts/doc-agent + npm run review -- "${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}" - name: Post review summary - if: steps.changes.outputs.any_changed == 'true' && always() + if: always() uses: marocchino/sticky-pull-request-comment@67d0dec7b07ed060a405f9b2a64b8ab319fdd7db # v2.9.2 with: header: doc-review - path: doc-review-report.md + path: .github/scripts/doc-agent/doc-review-report.md - name: Post inline review comments - if: steps.changes.outputs.any_changed == 'true' && always() + if: always() uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: script: | const fs = require('fs'); - if (!fs.existsSync('doc-review-comments.json')) return; + const path = '.github/scripts/doc-agent/doc-review-comments.json'; + if (!fs.existsSync(path)) return; - const comments = JSON.parse(fs.readFileSync('doc-review-comments.json', 'utf8')); + const comments = JSON.parse(fs.readFileSync(path, 'utf8')); if (comments.length === 0) return; const pull_number = context.payload.pull_request.number; diff --git a/SCOPE_OF_WORK.md b/SCOPE_OF_WORK.md index 714c5d8058..135895dfeb 100644 --- a/SCOPE_OF_WORK.md +++ b/SCOPE_OF_WORK.md @@ -174,21 +174,25 @@ Flow: | File | Purpose | |------|---------| -| [.github/scripts/doc-review.py](.github/scripts/doc-review.py) | Diff-aware LLM analysis script (Python, uses `anthropic` SDK) | | [.github/workflows/doc-review.yml](.github/workflows/doc-review.yml) | CI workflow: runs on PR, posts review | -Flow: -1. On every PR, determine which C++ files changed. -2. For each changed file, extract git diff hunks and existing doc comments. -3. Send both to the Anthropic API with a prompt tuned for xrpld: - "Given this diff, are existing docs still accurate?" -4. Post results as **inline review comments** on specific lines (via - `actions/github-script`) AND a **sticky summary comment** on the PR. -5. Runs in **warning-only mode** — does not block merge. +The workflow invokes the doc-agent `review` mode (Section 3.4) directly — +there is no separate CI script. The same code path serves CI and local use, +so prompt and logic changes are tested in one place. -The doc-agent `review` mode is the same logic exposed as a local CLI -(`npm run review develop..HEAD` or `npm run review -- --pr 1234`), useful -for testing prompt changes before they ship to CI. +Flow: +1. On every PR, the workflow runs `npm run review -- "$BASE..$HEAD"` in the + doc-agent directory. +2. doc-agent enumerates C++ files changed in the range, extracts diff + hunks plus existing doc comments, and asks Claude per file whether the + docs are still accurate. +3. Outputs `doc-review-report.md` (sticky PR comment) and + `doc-review-comments.json` (inline review comments via + `actions/github-script`). +4. Runs in **warning-only mode** — does not block merge. + +Local invocation uses the same command: +`npm run review develop..HEAD` or `npm run review -- --pr 1234`. Cost: only changed files and changed hunks within those files are processed. Estimated ~$0.05–0.15 per PR. @@ -261,8 +265,7 @@ Tooling shipped as the foundation PR: - [x] [.github/scripts/doc-coverage-check.py](.github/scripts/doc-coverage-check.py) - [x] [.github/workflows/doc-coverage.yml](.github/workflows/doc-coverage.yml) - [x] [cmake/XrplDocs.cmake](cmake/XrplDocs.cmake) -- [x] [.github/workflows/doc-review.yml](.github/workflows/doc-review.yml) -- [x] [.github/scripts/doc-review.py](.github/scripts/doc-review.py) +- [x] [.github/workflows/doc-review.yml](.github/workflows/doc-review.yml) — invokes doc-agent `review` mode directly - [x] [.claude/commands/](.claude/commands/) — 4 developer slash commands **Exit criteria met:** All workflows pass on a test PR. Coverage report