Files
rippled/.github/scripts/doc-agent/src/audit.ts
Denis Angell 1159ee32d8 fix workflow
2026-05-14 08:44:53 +02:00

296 lines
9.5 KiB
TypeScript

/**
* Audit mode: measure how completely each file's Doxygen documentation
* reflects the authoritative design intent in its sibling .ai.md.
*
* For each C++ file under the target that has a .ai.md sibling:
* - Locate its header/source partner (if any) and the partner's .ai.md.
* - Send primary + partner files and both .ai.md files to the agent.
* - Parse a structured JSON verdict per file.
*
* Writes:
* - doc-audit-report.json Aggregated per-file results.
* - doc-audit-report.md Human-readable summary.
*/
import { existsSync, readdirSync, statSync } from 'node:fs';
import { readFile, writeFile } from 'node:fs/promises';
import { join, relative, resolve } from 'node:path';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { MODEL, XRPLD_ROOT } from './config.js';
import { findPartner } from './pairing.js';
import { loadSystemPrompt } from './prompt-loader.js';
const SOURCE_EXTS: ReadonlySet<string> = new Set(['.h', '.hpp', '.cpp']);
const MAX_FILE_CHARS = 24_000;
const MAX_AI_MD_CHARS = 16_000;
const DEFAULT_CONCURRENCY = 5;
interface AuditMissed {
function: string;
topic: string;
home: 'header' | 'source' | 'either';
current_state: 'absent' | 'wrong-home' | 'thin';
ai_md_quote: string;
}
interface AuditResult {
file: string;
ai_md_concepts: number;
translated: number;
missed: AuditMissed[];
verdict: 'rerun' | 'leave';
}
/**
* Recursively find C++ source files under a target path that have a
* sibling .ai.md.
*/
function findAuditTargets(target: string): string[] {
const absTarget = resolve(XRPLD_ROOT, target);
if (!existsSync(absTarget)) {
throw new Error(`Target does not exist: ${absTarget}`);
}
const out: string[] = [];
const consider = (file: string): void => {
const dotIdx = file.lastIndexOf('.');
if (dotIdx === -1) return;
const ext = file.slice(dotIdx);
if (!SOURCE_EXTS.has(ext)) return;
if (!existsSync(`${file}.ai.md`)) return;
out.push(file);
};
const stat = statSync(absTarget);
if (stat.isFile()) {
consider(absTarget);
return out;
}
const walk = (dir: string): void => {
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const full = join(dir, entry.name);
if (entry.isDirectory()) walk(full);
else if (entry.isFile()) consider(full);
}
};
walk(absTarget);
return out;
}
/** Read a file, capping at maxChars to keep prompts within budget. */
async function readCapped(absPath: string, maxChars: number): Promise<string> {
const text = await readFile(absPath, 'utf8');
if (text.length <= maxChars) return text;
return `${text.slice(0, maxChars)}\n\n... [truncated, ${text.length - maxChars} bytes elided] ...`;
}
/** Extract a JSON object from a possibly-fenced model response. */
function extractJson(response: string): AuditResult | null {
const fenced = response.match(/```json\s*([\s\S]*?)```/);
const raw = fenced?.[1] ?? response.match(/(\{[\s\S]*\})/)?.[1];
if (raw === undefined) return null;
try {
return JSON.parse(raw) as AuditResult;
} catch {
return null;
}
}
/** Audit a single primary file against its .ai.md and partner context. */
async function auditFile(absPrimary: string): Promise<AuditResult | null> {
const relPrimary = relative(XRPLD_ROOT, absPrimary);
console.log(`\n=== Auditing: ${relPrimary} ===`);
const primary = await readCapped(absPrimary, MAX_FILE_CHARS);
const primaryAiMd = await readCapped(`${absPrimary}.ai.md`, MAX_AI_MD_CHARS);
const absPartner = findPartner(absPrimary);
const relPartner = absPartner === null ? null : relative(XRPLD_ROOT, absPartner);
const partner = absPartner === null ? null : await readCapped(absPartner, MAX_FILE_CHARS);
const partnerAiMdPath = absPartner === null ? null : `${absPartner}.ai.md`;
const partnerAiMd =
partnerAiMdPath !== null && existsSync(partnerAiMdPath)
? await readCapped(partnerAiMdPath, MAX_AI_MD_CHARS)
: null;
const partnerBlock =
relPartner === null || partner === null
? ''
: `
## Partner File (${relPartner})
\`\`\`
${partner}
\`\`\`${
partnerAiMd === null
? ''
: `
## Partner's .ai.md (${relPartner}.ai.md)
${partnerAiMd}`
}`;
const userPrompt = `Audit the documentation coverage of this file against its authoritative .ai.md.
## Primary File (${relPrimary})
\`\`\`
${primary}
\`\`\`
## Primary's .ai.md (${relPrimary}.ai.md)
${primaryAiMd}${partnerBlock}
Output JSON per the schema in the system prompt. The "file" field MUST be
"${relPrimary}".`;
const systemPrompt = await loadSystemPrompt('audit-file', relPrimary);
let response = '';
const result = query({
prompt: userPrompt,
options: {
model: MODEL,
systemPrompt,
cwd: XRPLD_ROOT,
allowedTools: ['Read', 'Glob', 'Grep'],
permissionMode: 'acceptEdits',
},
});
for await (const message of result) {
if (message.type === 'assistant') {
const content = message.message?.content;
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') response += block.text;
}
}
}
if (message.type === 'result') {
const cost = message.total_cost_usd?.toFixed(4) ?? '?';
const inTok = message.usage?.['input_tokens'] ?? 0;
const outTok = message.usage?.['output_tokens'] ?? 0;
console.log(` [Cost: $${cost}, Tokens: ${inTok}/${outTok}]`);
}
}
const parsed = extractJson(response);
if (parsed === null) {
console.warn(` No JSON output for ${relPrimary}, skipping`);
return null;
}
parsed.file = relPrimary;
return parsed;
}
/** Render the aggregated markdown report. */
function buildReport(results: readonly AuditResult[]): string {
const total = results.length;
const reruns = results.filter((r) => r.verdict === 'rerun');
const totalConcepts = results.reduce((s, r) => s + r.ai_md_concepts, 0);
const totalTranslated = results.reduce((s, r) => s + r.translated, 0);
const overallRate = totalConcepts === 0 ? 0 : Math.round((totalTranslated / totalConcepts) * 100);
const lines: string[] = [
'# Documentation Audit Report',
'',
`**Files audited:** ${total}`,
`**Overall translation rate:** ${overallRate}% (${totalTranslated} of ${totalConcepts} .ai.md concepts reflected in docstrings)`,
`**Files flagged for re-run:** ${reruns.length}`,
'',
'## Files flagged for re-run',
'',
];
if (reruns.length === 0) {
lines.push('_None — all audited files passed._', '');
} else {
lines.push('| File | Translated | Missed | Rate |', '|------|-----------:|-------:|-----:|');
for (const r of reruns.sort(
(a, b) =>
a.translated / Math.max(a.ai_md_concepts, 1) - b.translated / Math.max(b.ai_md_concepts, 1),
)) {
const rate = r.ai_md_concepts === 0 ? 0 : Math.round((r.translated / r.ai_md_concepts) * 100);
lines.push(`| \`${r.file}\` | ${r.translated} | ${r.missed.length} | ${rate}% |`);
}
lines.push('', '## Top missed concepts (sampled)', '');
for (const r of reruns.slice(0, 10)) {
if (r.missed.length === 0) continue;
lines.push(`### \`${r.file}\``, '');
for (const m of r.missed.slice(0, 5)) {
lines.push(`- **${m.function}** — ${m.topic}`);
lines.push(` > ${m.ai_md_quote.replace(/\n/g, ' ').slice(0, 200)}`);
}
lines.push('');
}
}
return lines.join('\n');
}
/**
* Run async work over a list of items with bounded concurrency. Mirrors the
* minimal slice of p-limit we actually need; collects results in input order.
*/
async function mapWithConcurrency<T, R>(
items: readonly T[],
limit: number,
worker: (item: T, index: number) => Promise<R>,
): Promise<R[]> {
const results = new Array<R>(items.length);
let next = 0;
async function pump(): Promise<void> {
while (true) {
const index = next++;
if (index >= items.length) return;
// biome-ignore lint/style/noNonNullAssertion: index < items.length
results[index] = await worker(items[index]!, index);
}
}
const workers = Array.from({ length: Math.min(limit, items.length) }, pump);
await Promise.all(workers);
return results;
}
/**
* Audit every C++ file with a .ai.md sibling under the target path.
*
* Concurrency is read from the AUDIT_CONCURRENCY env var (default 5).
*/
export async function auditTarget(target: string): Promise<void> {
const files = findAuditTargets(target);
const concurrency = Number(process.env['AUDIT_CONCURRENCY']) || DEFAULT_CONCURRENCY;
console.log(
`Found ${files.length} file(s) with .ai.md siblings to audit (concurrency=${concurrency}).`,
);
let completed = 0;
const raw = await mapWithConcurrency(files, concurrency, async (file) => {
try {
const result = await auditFile(file);
completed++;
console.log(` Progress: ${completed}/${files.length}`);
return result;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.warn(` Audit failed for ${file}: ${message}`);
completed++;
console.log(` Progress: ${completed}/${files.length}`);
return null;
}
});
const results = raw.filter((r): r is AuditResult => r !== null);
const report = buildReport(results);
await writeFile('doc-audit-report.md', report);
await writeFile('doc-audit-report.json', JSON.stringify(results, null, 2));
const reruns = results.filter((r) => r.verdict === 'rerun').length;
console.log(`\nAudited: ${results.length}/${files.length}`);
console.log(`Flagged for re-run: ${reruns}`);
console.log('Reports: doc-audit-report.md, doc-audit-report.json');
}