fix workflow

2026-07-25 08:00:22 +00:00 · 2026-05-14 08:44:53 +02:00
parent a05f951a0c
commit 1159ee32d8
11 changed files with 647 additions and 142 deletions
--- a/.github/scripts/doc-agent/src/audit.ts
+++ b/.github/scripts/doc-agent/src/audit.ts
@@ -0,0 +1,295 @@
+/**
+ * Audit mode: measure how completely each file's Doxygen documentation
+ * reflects the authoritative design intent in its sibling .ai.md.
+ *
+ * For each C++ file under the target that has a .ai.md sibling:
+ *   - Locate its header/source partner (if any) and the partner's .ai.md.
+ *   - Send primary + partner files and both .ai.md files to the agent.
+ *   - Parse a structured JSON verdict per file.
+ *
+ * Writes:
+ *   - doc-audit-report.json  Aggregated per-file results.
+ *   - doc-audit-report.md    Human-readable summary.
+ */
+
+import { existsSync, readdirSync, statSync } from 'node:fs';
+import { readFile, writeFile } from 'node:fs/promises';
+import { join, relative, resolve } from 'node:path';
+import { query } from '@anthropic-ai/claude-agent-sdk';
+import { MODEL, XRPLD_ROOT } from './config.js';
+import { findPartner } from './pairing.js';
+import { loadSystemPrompt } from './prompt-loader.js';
+
+const SOURCE_EXTS: ReadonlySet<string> = new Set(['.h', '.hpp', '.cpp']);
+const MAX_FILE_CHARS = 24_000;
+const MAX_AI_MD_CHARS = 16_000;
+const DEFAULT_CONCURRENCY = 5;
+
+interface AuditMissed {
+  function: string;
+  topic: string;
+  home: 'header' | 'source' | 'either';
+  current_state: 'absent' | 'wrong-home' | 'thin';
+  ai_md_quote: string;
+}
+
+interface AuditResult {
+  file: string;
+  ai_md_concepts: number;
+  translated: number;
+  missed: AuditMissed[];
+  verdict: 'rerun' | 'leave';
+}
+
+/**
+ * Recursively find C++ source files under a target path that have a
+ * sibling .ai.md.
+ */
+function findAuditTargets(target: string): string[] {
+  const absTarget = resolve(XRPLD_ROOT, target);
+  if (!existsSync(absTarget)) {
+    throw new Error(`Target does not exist: ${absTarget}`);
+  }
+
+  const out: string[] = [];
+  const consider = (file: string): void => {
+    const dotIdx = file.lastIndexOf('.');
+    if (dotIdx === -1) return;
+    const ext = file.slice(dotIdx);
+    if (!SOURCE_EXTS.has(ext)) return;
+    if (!existsSync(`${file}.ai.md`)) return;
+    out.push(file);
+  };
+
+  const stat = statSync(absTarget);
+  if (stat.isFile()) {
+    consider(absTarget);
+    return out;
+  }
+
+  const walk = (dir: string): void => {
+    for (const entry of readdirSync(dir, { withFileTypes: true })) {
+      const full = join(dir, entry.name);
+      if (entry.isDirectory()) walk(full);
+      else if (entry.isFile()) consider(full);
+    }
+  };
+  walk(absTarget);
+  return out;
+}
+
+/** Read a file, capping at maxChars to keep prompts within budget. */
+async function readCapped(absPath: string, maxChars: number): Promise<string> {
+  const text = await readFile(absPath, 'utf8');
+  if (text.length <= maxChars) return text;
+  return `${text.slice(0, maxChars)}\n\n... [truncated, ${text.length - maxChars} bytes elided] ...`;
+}
+
+/** Extract a JSON object from a possibly-fenced model response. */
+function extractJson(response: string): AuditResult | null {
+  const fenced = response.match(/```json\s*([\s\S]*?)```/);
+  const raw = fenced?.[1] ?? response.match(/(\{[\s\S]*\})/)?.[1];
+  if (raw === undefined) return null;
+  try {
+    return JSON.parse(raw) as AuditResult;
+  } catch {
+    return null;
+  }
+}
+
+/** Audit a single primary file against its .ai.md and partner context. */
+async function auditFile(absPrimary: string): Promise<AuditResult | null> {
+  const relPrimary = relative(XRPLD_ROOT, absPrimary);
+  console.log(`\n=== Auditing: ${relPrimary} ===`);
+
+  const primary = await readCapped(absPrimary, MAX_FILE_CHARS);
+  const primaryAiMd = await readCapped(`${absPrimary}.ai.md`, MAX_AI_MD_CHARS);
+
+  const absPartner = findPartner(absPrimary);
+  const relPartner = absPartner === null ? null : relative(XRPLD_ROOT, absPartner);
+  const partner = absPartner === null ? null : await readCapped(absPartner, MAX_FILE_CHARS);
+  const partnerAiMdPath = absPartner === null ? null : `${absPartner}.ai.md`;
+  const partnerAiMd =
+    partnerAiMdPath !== null && existsSync(partnerAiMdPath)
+      ? await readCapped(partnerAiMdPath, MAX_AI_MD_CHARS)
+      : null;
+
+  const partnerBlock =
+    relPartner === null || partner === null
+      ? ''
+      : `
+
+## Partner File (${relPartner})
+\`\`\`
+${partner}
+\`\`\`${
+          partnerAiMd === null
+            ? ''
+            : `
+
+## Partner's .ai.md (${relPartner}.ai.md)
+${partnerAiMd}`
+        }`;
+
+  const userPrompt = `Audit the documentation coverage of this file against its authoritative .ai.md.
+
+## Primary File (${relPrimary})
+\`\`\`
+${primary}
+\`\`\`
+
+## Primary's .ai.md (${relPrimary}.ai.md)
+${primaryAiMd}${partnerBlock}
+
+Output JSON per the schema in the system prompt. The "file" field MUST be
+"${relPrimary}".`;
+
+  const systemPrompt = await loadSystemPrompt('audit-file', relPrimary);
+
+  let response = '';
+  const result = query({
+    prompt: userPrompt,
+    options: {
+      model: MODEL,
+      systemPrompt,
+      cwd: XRPLD_ROOT,
+      allowedTools: ['Read', 'Glob', 'Grep'],
+      permissionMode: 'acceptEdits',
+    },
+  });
+
+  for await (const message of result) {
+    if (message.type === 'assistant') {
+      const content = message.message?.content;
+      if (Array.isArray(content)) {
+        for (const block of content) {
+          if (block.type === 'text') response += block.text;
+        }
+      }
+    }
+    if (message.type === 'result') {
+      const cost = message.total_cost_usd?.toFixed(4) ?? '?';
+      const inTok = message.usage?.['input_tokens'] ?? 0;
+      const outTok = message.usage?.['output_tokens'] ?? 0;
+      console.log(`  [Cost: $${cost}, Tokens: ${inTok}/${outTok}]`);
+    }
+  }
+
+  const parsed = extractJson(response);
+  if (parsed === null) {
+    console.warn(`  No JSON output for ${relPrimary}, skipping`);
+    return null;
+  }
+  parsed.file = relPrimary;
+  return parsed;
+}
+
+/** Render the aggregated markdown report. */
+function buildReport(results: readonly AuditResult[]): string {
+  const total = results.length;
+  const reruns = results.filter((r) => r.verdict === 'rerun');
+  const totalConcepts = results.reduce((s, r) => s + r.ai_md_concepts, 0);
+  const totalTranslated = results.reduce((s, r) => s + r.translated, 0);
+  const overallRate = totalConcepts === 0 ? 0 : Math.round((totalTranslated / totalConcepts) * 100);
+
+  const lines: string[] = [
+    '# Documentation Audit Report',
+    '',
+    `**Files audited:** ${total}`,
+    `**Overall translation rate:** ${overallRate}% (${totalTranslated} of ${totalConcepts} .ai.md concepts reflected in docstrings)`,
+    `**Files flagged for re-run:** ${reruns.length}`,
+    '',
+    '## Files flagged for re-run',
+    '',
+  ];
+
+  if (reruns.length === 0) {
+    lines.push('_None — all audited files passed._', '');
+  } else {
+    lines.push('| File | Translated | Missed | Rate |', '|------|-----------:|-------:|-----:|');
+    for (const r of reruns.sort(
+      (a, b) =>
+        a.translated / Math.max(a.ai_md_concepts, 1) - b.translated / Math.max(b.ai_md_concepts, 1),
+    )) {
+      const rate = r.ai_md_concepts === 0 ? 0 : Math.round((r.translated / r.ai_md_concepts) * 100);
+      lines.push(`| \`${r.file}\` | ${r.translated} | ${r.missed.length} | ${rate}% |`);
+    }
+    lines.push('', '## Top missed concepts (sampled)', '');
+    for (const r of reruns.slice(0, 10)) {
+      if (r.missed.length === 0) continue;
+      lines.push(`### \`${r.file}\``, '');
+      for (const m of r.missed.slice(0, 5)) {
+        lines.push(`- **${m.function}** — ${m.topic}`);
+        lines.push(`  > ${m.ai_md_quote.replace(/\n/g, ' ').slice(0, 200)}`);
+      }
+      lines.push('');
+    }
+  }
+
+  return lines.join('\n');
+}
+
+/**
+ * Run async work over a list of items with bounded concurrency. Mirrors the
+ * minimal slice of p-limit we actually need; collects results in input order.
+ */
+async function mapWithConcurrency<T, R>(
+  items: readonly T[],
+  limit: number,
+  worker: (item: T, index: number) => Promise<R>,
+): Promise<R[]> {
+  const results = new Array<R>(items.length);
+  let next = 0;
+
+  async function pump(): Promise<void> {
+    while (true) {
+      const index = next++;
+      if (index >= items.length) return;
+      // biome-ignore lint/style/noNonNullAssertion: index < items.length
+      results[index] = await worker(items[index]!, index);
+    }
+  }
+
+  const workers = Array.from({ length: Math.min(limit, items.length) }, pump);
+  await Promise.all(workers);
+  return results;
+}
+
+/**
+ * Audit every C++ file with a .ai.md sibling under the target path.
+ *
+ * Concurrency is read from the AUDIT_CONCURRENCY env var (default 5).
+ */
+export async function auditTarget(target: string): Promise<void> {
+  const files = findAuditTargets(target);
+  const concurrency = Number(process.env['AUDIT_CONCURRENCY']) || DEFAULT_CONCURRENCY;
+  console.log(
+    `Found ${files.length} file(s) with .ai.md siblings to audit (concurrency=${concurrency}).`,
+  );
+
+  let completed = 0;
+  const raw = await mapWithConcurrency(files, concurrency, async (file) => {
+    try {
+      const result = await auditFile(file);
+      completed++;
+      console.log(`  Progress: ${completed}/${files.length}`);
+      return result;
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      console.warn(`  Audit failed for ${file}: ${message}`);
+      completed++;
+      console.log(`  Progress: ${completed}/${files.length}`);
+      return null;
+    }
+  });
+  const results = raw.filter((r): r is AuditResult => r !== null);
+
+  const report = buildReport(results);
+  await writeFile('doc-audit-report.md', report);
+  await writeFile('doc-audit-report.json', JSON.stringify(results, null, 2));
+
+  const reruns = results.filter((r) => r.verdict === 'rerun').length;
+  console.log(`\nAudited: ${results.length}/${files.length}`);
+  console.log(`Flagged for re-run: ${reruns}`);
+  console.log('Reports: doc-audit-report.md, doc-audit-report.json');
+}
--- a/.github/scripts/doc-agent/src/document.ts
+++ b/.github/scripts/doc-agent/src/document.ts
@@ -7,6 +7,7 @@ import { readFile } from 'node:fs/promises';
 import { join, relative, resolve } from 'node:path';
 import { query } from '@anthropic-ai/claude-agent-sdk';
 import { MODEL, XRPLD_ROOT } from './config.js';
+import { findPartner } from './pairing.js';
 import { loadSystemPrompt } from './prompt-loader.js';

 const CPP_EXTENSIONS: ReadonlySet<string> = new Set(['.h', '.hpp', '.cpp']);
@@ -65,6 +66,11 @@ async function readAiContext(absPath: string): Promise<string | null> {

 /**
 * Document a single file by running the documentation agent against it.
+ *
+ * Inject the partner file's path + its `.ai.md` (if any) into the prompt
+ * so the agent can apply the "contract on header, implementation on
+ * source" policy with full visibility into the other half. The agent
+ * Reads the partner only as reference; only the primary file is edited.
 */
 async function documentFile(absPath: string): Promise<void> {
  const relPath = relative(XRPLD_ROOT, absPath);
@@ -75,15 +81,33 @@ async function documentFile(absPath: string): Promise<void> {
  const aiContextBlock =
    aiContext === null
      ? ''
-      : `\n\n## Authoritative AI Context (${relPath}.ai.md)\n\nThe following is high-signal prose describing this file's purpose, design,\nand non-obvious behavior. Treat it as the source of truth for intent and\nbehavior. Your job is to translate this into structured Doxygen \`/** */\`\ncomments on the actual declarations.\n\n---\n\n${aiContext}\n---`;
+      : `\n\n## Primary's Authoritative AI Context (${relPath}.ai.md)\n\nThe following is high-signal prose describing this file's purpose, design,\nand non-obvious behavior. Treat it as the source of truth for intent and\nbehavior. Your job is to translate this into structured Doxygen \`/** */\`\ncomments on the actual declarations.\n\n---\n\n${aiContext}\n---`;
+
+  const absPartner = findPartner(absPath);
+  const relPartner = absPartner === null ? null : relative(XRPLD_ROOT, absPartner);
+  const partnerAiContext = absPartner === null ? null : await readAiContext(absPartner);
+  const partnerBlock =
+    relPartner === null
+      ? ''
+      : `\n\n## Partner File\n\nThis file's partner is **${relPartner}**. Use the Read tool to see its\ncurrent docstrings before deciding what belongs on the primary. A concept\nalready documented on the partner does not need to be duplicated here.\nConversely, an implementation-depth concept currently on the partner that\nbelongs on the source (or vice versa) should be moved.${
+          partnerAiContext === null
+            ? ''
+            : `\n\n### Partner's Authoritative AI Context (${relPartner}.ai.md)\n\n---\n\n${partnerAiContext}\n---`
+        }`;

  const userPrompt = `Add Doxygen documentation to: ${relPath}

 The file is rooted at ${XRPLD_ROOT}. Use the Read tool to read it, the Edit
 tool to add documentation, and Glob/Grep to find related tests or callers
-when needed.
+when needed.${
+    relPartner === null
+      ? ''
+      : ` Use Read on the partner file (${relPartner}) to see what's already
+documented there.`
+  }

-Do not modify any code logic — only add documentation comments.${aiContextBlock}`;
+Do not modify any code logic — only add documentation comments to the
+primary file (${relPath}). Do NOT edit the partner file.${aiContextBlock}${partnerBlock}`;

  const result = query({
    prompt: userPrompt,
--- a/.github/scripts/doc-agent/src/index.ts
+++ b/.github/scripts/doc-agent/src/index.ts
@@ -10,6 +10,7 @@
 *   doc-agent regen-skills protocol
 */

+import { auditTarget } from './audit.js';
 import { documentTarget } from './document.js';
 import { regenSkills } from './regen-skills.js';
 import { reviewDiff } from './review.js';
@@ -21,6 +22,9 @@ Usage:
  doc-agent document <file-or-directory>   Add Doxygen documentation
  doc-agent review <base>..<head>          Detect doc drift in range
  doc-agent review --pr <number>           Detect doc drift for a PR
+  doc-agent audit <file-or-directory>      Measure how completely each file's
+                                           docstrings reflect its .ai.md intent;
+                                           outputs doc-audit-report.{md,json}
  doc-agent regen-skills <module>          Regenerate docs/skills/soul/<module>.md
                                           from sibling .ai.md files

@@ -62,6 +66,13 @@ async function main(): Promise<void> {
    return;
  }

+  if (mode === 'audit') {
+    const target = args[0];
+    if (target === undefined) printUsageAndExit(1);
+    await auditTarget(target);
+    return;
+  }
+
  if (mode === 'regen-skills') {
    const moduleName = args[0];
    if (moduleName === undefined) printUsageAndExit(1);
--- a/.github/scripts/doc-agent/src/pairing.ts
+++ b/.github/scripts/doc-agent/src/pairing.ts
@@ -0,0 +1,47 @@
+/**
+ * Header/source pairing for C++ files in the xrpld layout.
+ *
+ * libxrpl: src/libxrpl/<X>.cpp <-> include/xrpl/<X>.h
+ * xrpld:   src/xrpld/<X>.cpp   <-> src/xrpld/<X>.h (same directory)
+ *
+ * Inline-only headers may have no .cpp partner; standalone .cpp may have
+ * no .h partner.
+ */
+
+import { existsSync } from 'node:fs';
+import { relative, resolve } from 'node:path';
+import { XRPLD_ROOT } from './config.js';
+
+/**
+ * Compute the partner file path for a given primary, by swapping the
+ * extension between header/source. Returns null if no candidate exists
+ * on disk.
+ */
+export function findPartner(absPrimary: string): string | null {
+  const rel = relative(XRPLD_ROOT, absPrimary);
+  const dotIdx = rel.lastIndexOf('.');
+  if (dotIdx === -1) return null;
+  const stem = rel.slice(0, dotIdx);
+  const ext = rel.slice(dotIdx);
+
+  const candidates: string[] = [];
+
+  if (ext === '.cpp') {
+    if (stem.startsWith('src/libxrpl/')) {
+      const tail = stem.slice('src/libxrpl/'.length);
+      candidates.push(`include/xrpl/${tail}.h`, `include/xrpl/${tail}.hpp`);
+    }
+    candidates.push(`${stem}.h`, `${stem}.hpp`);
+  } else if (ext === '.h' || ext === '.hpp') {
+    if (stem.startsWith('include/xrpl/')) {
+      candidates.push(`src/libxrpl/${stem.slice('include/xrpl/'.length)}.cpp`);
+    }
+    candidates.push(`${stem}.cpp`);
+  }
+
+  for (const candidate of candidates) {
+    const abs = resolve(XRPLD_ROOT, candidate);
+    if (existsSync(abs) && abs !== absPrimary) return abs;
+  }
+  return null;
+}