From 5e193157a68d7c60c00b6e3160d84ca45f529928 Mon Sep 17 00:00:00 2001
From: Denis Angell <dangell@transia.co>
Date: Thu, 14 May 2026 10:00:59 +0200
Subject: [PATCH] remove the py script

---
 .github/scripts/doc-review.py    | 279 -------------------------------
 .github/workflows/doc-review.yml |  43 ++---
 SCOPE_OF_WORK.md                 |  31 ++--
 3 files changed, 34 insertions(+), 319 deletions(-)
 delete mode 100644 .github/scripts/doc-review.py

diff --git a/.github/scripts/doc-review.py b/.github/scripts/doc-review.py
deleted file mode 100644
index 87609917d3..0000000000
--- a/.github/scripts/doc-review.py
+++ /dev/null
@@ -1,279 +0,0 @@
-#!/usr/bin/env python3
-"""
-Diff-aware documentation review for xrpld PRs.
-
-For each changed C++ file, extracts the diff hunks and existing doc
-comments, then asks the Anthropic API whether documentation needs
-updating. Produces:
-  - doc-review-report.md: summary comment for the PR
-  - doc-review-comments.json: inline review comments with file/line info
-"""
-
-import json
-import os
-import re
-import subprocess
-import sys
-from dataclasses import dataclass
-from pathlib import Path
-
-try:
-    import anthropic
-except ImportError:
-    print("ERROR: anthropic package not installed. Run: pip install anthropic")
-    sys.exit(1)
-
-MODEL = "claude-sonnet-4-6"
-MAX_TOKENS = 2048
-
-SYSTEM_PROMPT = """You are a documentation reviewer for the xrpld (XRP Ledger daemon) C++ codebase.
-
-Your job is to review code changes and determine whether existing documentation
-comments need updating, or whether new documentation is needed.
-
-Documentation style: Javadoc-style Doxygen comments (/** ... */).
-See the project's docs/DOCUMENTATION_STANDARDS.md for full guidelines.
-
-Rules:
-- Only flag REAL semantic drift: changed behavior, new parameters, removed
-  functionality, changed return values, new error conditions.
-- Do NOT flag cosmetic changes (whitespace, formatting, variable renames that
-  don't change semantics).
-- Do NOT suggest docs for private implementation details unless the logic is
-  genuinely non-obvious.
-- Do NOT paraphrase function signatures. Good docs explain WHY and WHAT
-  BEHAVIOR, not WHAT THE CODE LITERALLY DOES.
-- Be terse. Each finding should be 1-3 sentences.
-
-For each issue found, respond with a JSON array of objects:
-{
-  "issues": [
-    {
-      "file": "path/to/file.h",
-      "line": 42,
-      "severity": "warning" | "suggestion",
-      "message": "Brief description of the doc issue",
-      "suggested_doc": "Optional: suggested doc comment text"
-    }
-  ],
-  "summary": "One-paragraph summary of documentation state for this file"
-}
-
-If no issues are found, return: {"issues": [], "summary": "Documentation is up to date."}
-Respond ONLY with valid JSON. No markdown fences, no explanation outside JSON."""
-
-
-@dataclass
-class FileAnalysis:
-    path: str
-    diff: str
-    existing_docs: str
-    file_content: str
-
-
-def get_diff(base_sha: str, head_sha: str, filepath: str) -> str:
-    """Get the unified diff for a specific file between two commits."""
-    try:
-        result = subprocess.run(
-            ["git", "diff", f"{base_sha}...{head_sha}", "--", filepath],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        return result.stdout
-    except subprocess.CalledProcessError:
-        return ""
-
-
-def extract_doc_comments(content: str) -> str:
-    """Extract all /** ... */ doc comments from file content."""
-    pattern = r'/\*\*[\s\S]*?\*/'
-    matches = re.findall(pattern, content)
-    return "\n\n".join(matches) if matches else "(no documentation comments found)"
-
-
-def read_file_safe(filepath: str) -> str:
-    """Read a file, returning empty string if it doesn't exist."""
-    try:
-        return Path(filepath).read_text(encoding="utf-8", errors="replace")
-    except (FileNotFoundError, PermissionError):
-        return ""
-
-
-def analyze_file(client: anthropic.Anthropic, analysis: FileAnalysis) -> dict:
-    """Send a file's diff and docs to the API for review."""
-    user_prompt = f"""Review the following code change for documentation accuracy.
-
-## File: {analysis.path}
-
-## Git Diff:
-```
-{analysis.diff[:8000]}
-```
-
-## Existing Documentation Comments:
-```
-{analysis.existing_docs[:4000]}
-```
-
-## Current File Content (first 200 lines for context):
-```cpp
-{chr(10).join(analysis.file_content.split(chr(10))[:200])}
-```
-
-Analyze whether the diff introduces changes that make existing docs inaccurate,
-or adds new public API surface that lacks documentation."""
-
-    try:
-        response = client.messages.create(
-            model=MODEL,
-            max_tokens=MAX_TOKENS,
-            system=SYSTEM_PROMPT,
-            messages=[{"role": "user", "content": user_prompt}],
-        )
-        text = response.content[0].text.strip()
-        if text.startswith("```"):
-            text = re.sub(r'^```\w*\n?', '', text)
-            text = re.sub(r'\n?```$', '', text)
-        return json.loads(text)
-    except (json.JSONDecodeError, Exception) as e:
-        return {
-            "issues": [],
-            "summary": f"Analysis failed: {str(e)[:200]}",
-        }
-
-
-def generate_report(
-    results: dict[str, dict],
-    changed_files: list[str],
-) -> str:
-    """Generate the markdown summary report."""
-    lines = ["## Documentation Review Report", ""]
-
-    total_issues = sum(len(r.get("issues", [])) for r in results.values())
-    warnings = sum(
-        1
-        for r in results.values()
-        for i in r.get("issues", [])
-        if i.get("severity") == "warning"
-    )
-    suggestions = total_issues - warnings
-
-    if total_issues == 0:
-        lines.append("No documentation issues found.")
-    else:
-        lines.append(
-            f"Found **{total_issues}** documentation issue(s) "
-            f"across **{len(changed_files)}** changed file(s): "
-            f"{warnings} warning(s), {suggestions} suggestion(s)."
-        )
-
-    lines.append("")
-    lines.append(f"Files reviewed: {len(changed_files)}")
-    lines.append("")
-
-    for filepath, result in sorted(results.items()):
-        issues = result.get("issues", [])
-        summary = result.get("summary", "")
-        if issues:
-            lines.append(f"### `{filepath}`")
-            lines.append("")
-            lines.append(summary)
-            lines.append("")
-            for issue in issues:
-                severity = issue.get("severity", "suggestion")
-                icon = "**Warning:**" if severity == "warning" else "**Suggestion:**"
-                line_num = issue.get("line", "?")
-                msg = issue.get("message", "")
-                lines.append(f"- {icon} Line {line_num}: {msg}")
-            lines.append("")
-
-    lines.append("---")
-    lines.append(
-        "*Automated documentation review. "
-        "See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) "
-        "for guidelines.*"
-    )
-
-    return "\n".join(lines)
-
-
-def generate_inline_comments(results: dict[str, dict]) -> list[dict]:
-    """Generate inline PR review comments from analysis results."""
-    comments = []
-    for filepath, result in results.items():
-        for issue in result.get("issues", []):
-            line = issue.get("line")
-            if not line or not isinstance(line, int):
-                continue
-
-            body = issue.get("message", "")
-            suggested = issue.get("suggested_doc")
-            if suggested:
-                body += f"\n\n**Suggested documentation:**\n```cpp\n{suggested}\n```"
-
-            severity = issue.get("severity", "suggestion")
-            prefix = "Doc Warning" if severity == "warning" else "Doc Suggestion"
-            body = f"**{prefix}:** {body}"
-
-            comments.append({"path": filepath, "line": line, "body": body})
-
-    return comments
-
-
-def main():
-    api_key = os.environ.get("ANTHROPIC_API_KEY")
-    if not api_key:
-        print("ERROR: ANTHROPIC_API_KEY not set")
-        sys.exit(1)
-
-    changed_files_str = os.environ.get("CHANGED_FILES", "")
-    if not changed_files_str:
-        print("No changed files to review")
-        sys.exit(0)
-
-    base_sha = os.environ.get("BASE_SHA", "HEAD~1")
-    head_sha = os.environ.get("HEAD_SHA", "HEAD")
-
-    changed_files = [f.strip() for f in changed_files_str.split() if f.strip()]
-    cpp_files = [
-        f for f in changed_files if f.endswith((".h", ".hpp", ".cpp"))
-    ]
-
-    if not cpp_files:
-        print("No C++ files changed")
-        sys.exit(0)
-
-    print(f"Reviewing {len(cpp_files)} file(s) for documentation accuracy...")
-
-    client = anthropic.Anthropic(api_key=api_key)
-    results = {}
-
-    for filepath in cpp_files:
-        print(f"  Analyzing: {filepath}")
-        diff = get_diff(base_sha, head_sha, filepath)
-        if not diff:
-            continue
-
-        content = read_file_safe(filepath)
-        existing_docs = extract_doc_comments(content)
-
-        analysis = FileAnalysis(
-            path=filepath,
-            diff=diff,
-            existing_docs=existing_docs,
-            file_content=content,
-        )
-        results[filepath] = analyze_file(client, analysis)
-
-    report = generate_report(results, cpp_files)
-    Path("doc-review-report.md").write_text(report)
-    print("\nReport written to doc-review-report.md")
-
-    comments = generate_inline_comments(results)
-    Path("doc-review-comments.json").write_text(json.dumps(comments, indent=2))
-    print(f"Generated {len(comments)} inline comment(s)")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/.github/workflows/doc-review.yml b/.github/workflows/doc-review.yml
index 3cf62207a3..a447eef796 100644
--- a/.github/workflows/doc-review.yml
+++ b/.github/workflows/doc-review.yml
@@ -31,50 +31,41 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+      - name: Set up Node.js
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
         with:
-          python-version: '3.12'
+          node-version: '20'
+          cache: 'npm'
+          cache-dependency-path: .github/scripts/doc-agent/package-lock.json
 
-      - name: Install dependencies
-        run: pip install anthropic
-
-      - name: Determine changed C++ files
-        id: changes
-        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
-        with:
-          files: |
-            include/**/*.h
-            src/libxrpl/**/*.h
-            src/libxrpl/**/*.cpp
-            src/xrpld/**/*.h
-            src/xrpld/**/*.cpp
+      - name: Install doc-agent dependencies
+        working-directory: .github/scripts/doc-agent
+        run: npm ci
 
       - name: Run documentation review
-        if: steps.changes.outputs.any_changed == 'true'
         env:
-          CHANGED_FILES: ${{ steps.changes.outputs.all_changed_files }}
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-          BASE_SHA: ${{ github.event.pull_request.base.sha }}
-          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
-        run: python3 .github/scripts/doc-review.py
+        run: |
+          cd .github/scripts/doc-agent
+          npm run review -- "${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
 
       - name: Post review summary
-        if: steps.changes.outputs.any_changed == 'true' && always()
+        if: always()
         uses: marocchino/sticky-pull-request-comment@67d0dec7b07ed060a405f9b2a64b8ab319fdd7db # v2.9.2
         with:
           header: doc-review
-          path: doc-review-report.md
+          path: .github/scripts/doc-agent/doc-review-report.md
 
       - name: Post inline review comments
-        if: steps.changes.outputs.any_changed == 'true' && always()
+        if: always()
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
         with:
           script: |
             const fs = require('fs');
-            if (!fs.existsSync('doc-review-comments.json')) return;
+            const path = '.github/scripts/doc-agent/doc-review-comments.json';
+            if (!fs.existsSync(path)) return;
 
-            const comments = JSON.parse(fs.readFileSync('doc-review-comments.json', 'utf8'));
+            const comments = JSON.parse(fs.readFileSync(path, 'utf8'));
             if (comments.length === 0) return;
 
             const pull_number = context.payload.pull_request.number;
diff --git a/SCOPE_OF_WORK.md b/SCOPE_OF_WORK.md
index 714c5d8058..135895dfeb 100644
--- a/SCOPE_OF_WORK.md
+++ b/SCOPE_OF_WORK.md
@@ -174,21 +174,25 @@ Flow:
 
 | File | Purpose |
 |------|---------|
-| [.github/scripts/doc-review.py](.github/scripts/doc-review.py) | Diff-aware LLM analysis script (Python, uses `anthropic` SDK) |
 | [.github/workflows/doc-review.yml](.github/workflows/doc-review.yml) | CI workflow: runs on PR, posts review |
 
-Flow:
-1. On every PR, determine which C++ files changed.
-2. For each changed file, extract git diff hunks and existing doc comments.
-3. Send both to the Anthropic API with a prompt tuned for xrpld:
-   "Given this diff, are existing docs still accurate?"
-4. Post results as **inline review comments** on specific lines (via
-   `actions/github-script`) AND a **sticky summary comment** on the PR.
-5. Runs in **warning-only mode** — does not block merge.
+The workflow invokes the doc-agent `review` mode (Section 3.4) directly —
+there is no separate CI script. The same code path serves CI and local use,
+so prompt and logic changes are tested in one place.
 
-The doc-agent `review` mode is the same logic exposed as a local CLI
-(`npm run review develop..HEAD` or `npm run review -- --pr 1234`), useful
-for testing prompt changes before they ship to CI.
+Flow:
+1. On every PR, the workflow runs `npm run review -- "$BASE..$HEAD"` in the
+   doc-agent directory.
+2. doc-agent enumerates C++ files changed in the range, extracts diff
+   hunks plus existing doc comments, and asks Claude per file whether the
+   docs are still accurate.
+3. Outputs `doc-review-report.md` (sticky PR comment) and
+   `doc-review-comments.json` (inline review comments via
+   `actions/github-script`).
+4. Runs in **warning-only mode** — does not block merge.
+
+Local invocation uses the same command:
+`npm run review develop..HEAD` or `npm run review -- --pr 1234`.
 
 Cost: only changed files and changed hunks within those files are
 processed. Estimated ~$0.05–0.15 per PR.
@@ -261,8 +265,7 @@ Tooling shipped as the foundation PR:
 - [x] [.github/scripts/doc-coverage-check.py](.github/scripts/doc-coverage-check.py)
 - [x] [.github/workflows/doc-coverage.yml](.github/workflows/doc-coverage.yml)
 - [x] [cmake/XrplDocs.cmake](cmake/XrplDocs.cmake)
-- [x] [.github/workflows/doc-review.yml](.github/workflows/doc-review.yml)
-- [x] [.github/scripts/doc-review.py](.github/scripts/doc-review.py)
+- [x] [.github/workflows/doc-review.yml](.github/workflows/doc-review.yml) — invokes doc-agent `review` mode directly
 - [x] [.claude/commands/](.claude/commands/) — 4 developer slash commands
 
 **Exit criteria met:** All workflows pass on a test PR. Coverage report