mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-04 09:16:47 +00:00
296 lines
9.5 KiB
TypeScript
296 lines
9.5 KiB
TypeScript
/**
|
|
* Audit mode: measure how completely each file's Doxygen documentation
|
|
* reflects the authoritative design intent in its sibling .ai.md.
|
|
*
|
|
* For each C++ file under the target that has a .ai.md sibling:
|
|
* - Locate its header/source partner (if any) and the partner's .ai.md.
|
|
* - Send primary + partner files and both .ai.md files to the agent.
|
|
* - Parse a structured JSON verdict per file.
|
|
*
|
|
* Writes:
|
|
* - doc-audit-report.json Aggregated per-file results.
|
|
* - doc-audit-report.md Human-readable summary.
|
|
*/
|
|
|
|
import { existsSync, readdirSync, statSync } from 'node:fs';
|
|
import { readFile, writeFile } from 'node:fs/promises';
|
|
import { join, relative, resolve } from 'node:path';
|
|
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
import { MODEL, XRPLD_ROOT } from './config.js';
|
|
import { findPartner } from './pairing.js';
|
|
import { loadSystemPrompt } from './prompt-loader.js';
|
|
|
|
const SOURCE_EXTS: ReadonlySet<string> = new Set(['.h', '.hpp', '.cpp']);
|
|
const MAX_FILE_CHARS = 24_000;
|
|
const MAX_AI_MD_CHARS = 16_000;
|
|
const DEFAULT_CONCURRENCY = 5;
|
|
|
|
interface AuditMissed {
|
|
function: string;
|
|
topic: string;
|
|
home: 'header' | 'source' | 'either';
|
|
current_state: 'absent' | 'wrong-home' | 'thin';
|
|
ai_md_quote: string;
|
|
}
|
|
|
|
interface AuditResult {
|
|
file: string;
|
|
ai_md_concepts: number;
|
|
translated: number;
|
|
missed: AuditMissed[];
|
|
verdict: 'rerun' | 'leave';
|
|
}
|
|
|
|
/**
|
|
* Recursively find C++ source files under a target path that have a
|
|
* sibling .ai.md.
|
|
*/
|
|
function findAuditTargets(target: string): string[] {
|
|
const absTarget = resolve(XRPLD_ROOT, target);
|
|
if (!existsSync(absTarget)) {
|
|
throw new Error(`Target does not exist: ${absTarget}`);
|
|
}
|
|
|
|
const out: string[] = [];
|
|
const consider = (file: string): void => {
|
|
const dotIdx = file.lastIndexOf('.');
|
|
if (dotIdx === -1) return;
|
|
const ext = file.slice(dotIdx);
|
|
if (!SOURCE_EXTS.has(ext)) return;
|
|
if (!existsSync(`${file}.ai.md`)) return;
|
|
out.push(file);
|
|
};
|
|
|
|
const stat = statSync(absTarget);
|
|
if (stat.isFile()) {
|
|
consider(absTarget);
|
|
return out;
|
|
}
|
|
|
|
const walk = (dir: string): void => {
|
|
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
const full = join(dir, entry.name);
|
|
if (entry.isDirectory()) walk(full);
|
|
else if (entry.isFile()) consider(full);
|
|
}
|
|
};
|
|
walk(absTarget);
|
|
return out;
|
|
}
|
|
|
|
/** Read a file, capping at maxChars to keep prompts within budget. */
|
|
async function readCapped(absPath: string, maxChars: number): Promise<string> {
|
|
const text = await readFile(absPath, 'utf8');
|
|
if (text.length <= maxChars) return text;
|
|
return `${text.slice(0, maxChars)}\n\n... [truncated, ${text.length - maxChars} bytes elided] ...`;
|
|
}
|
|
|
|
/** Extract a JSON object from a possibly-fenced model response. */
|
|
function extractJson(response: string): AuditResult | null {
|
|
const fenced = response.match(/```json\s*([\s\S]*?)```/);
|
|
const raw = fenced?.[1] ?? response.match(/(\{[\s\S]*\})/)?.[1];
|
|
if (raw === undefined) return null;
|
|
try {
|
|
return JSON.parse(raw) as AuditResult;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/** Audit a single primary file against its .ai.md and partner context. */
|
|
async function auditFile(absPrimary: string): Promise<AuditResult | null> {
|
|
const relPrimary = relative(XRPLD_ROOT, absPrimary);
|
|
console.log(`\n=== Auditing: ${relPrimary} ===`);
|
|
|
|
const primary = await readCapped(absPrimary, MAX_FILE_CHARS);
|
|
const primaryAiMd = await readCapped(`${absPrimary}.ai.md`, MAX_AI_MD_CHARS);
|
|
|
|
const absPartner = findPartner(absPrimary);
|
|
const relPartner = absPartner === null ? null : relative(XRPLD_ROOT, absPartner);
|
|
const partner = absPartner === null ? null : await readCapped(absPartner, MAX_FILE_CHARS);
|
|
const partnerAiMdPath = absPartner === null ? null : `${absPartner}.ai.md`;
|
|
const partnerAiMd =
|
|
partnerAiMdPath !== null && existsSync(partnerAiMdPath)
|
|
? await readCapped(partnerAiMdPath, MAX_AI_MD_CHARS)
|
|
: null;
|
|
|
|
const partnerBlock =
|
|
relPartner === null || partner === null
|
|
? ''
|
|
: `
|
|
|
|
## Partner File (${relPartner})
|
|
\`\`\`
|
|
${partner}
|
|
\`\`\`${
|
|
partnerAiMd === null
|
|
? ''
|
|
: `
|
|
|
|
## Partner's .ai.md (${relPartner}.ai.md)
|
|
${partnerAiMd}`
|
|
}`;
|
|
|
|
const userPrompt = `Audit the documentation coverage of this file against its authoritative .ai.md.
|
|
|
|
## Primary File (${relPrimary})
|
|
\`\`\`
|
|
${primary}
|
|
\`\`\`
|
|
|
|
## Primary's .ai.md (${relPrimary}.ai.md)
|
|
${primaryAiMd}${partnerBlock}
|
|
|
|
Output JSON per the schema in the system prompt. The "file" field MUST be
|
|
"${relPrimary}".`;
|
|
|
|
const systemPrompt = await loadSystemPrompt('audit-file', relPrimary);
|
|
|
|
let response = '';
|
|
const result = query({
|
|
prompt: userPrompt,
|
|
options: {
|
|
model: MODEL,
|
|
systemPrompt,
|
|
cwd: XRPLD_ROOT,
|
|
allowedTools: ['Read', 'Glob', 'Grep'],
|
|
permissionMode: 'acceptEdits',
|
|
},
|
|
});
|
|
|
|
for await (const message of result) {
|
|
if (message.type === 'assistant') {
|
|
const content = message.message?.content;
|
|
if (Array.isArray(content)) {
|
|
for (const block of content) {
|
|
if (block.type === 'text') response += block.text;
|
|
}
|
|
}
|
|
}
|
|
if (message.type === 'result') {
|
|
const cost = message.total_cost_usd?.toFixed(4) ?? '?';
|
|
const inTok = message.usage?.['input_tokens'] ?? 0;
|
|
const outTok = message.usage?.['output_tokens'] ?? 0;
|
|
console.log(` [Cost: $${cost}, Tokens: ${inTok}/${outTok}]`);
|
|
}
|
|
}
|
|
|
|
const parsed = extractJson(response);
|
|
if (parsed === null) {
|
|
console.warn(` No JSON output for ${relPrimary}, skipping`);
|
|
return null;
|
|
}
|
|
parsed.file = relPrimary;
|
|
return parsed;
|
|
}
|
|
|
|
/** Render the aggregated markdown report. */
|
|
function buildReport(results: readonly AuditResult[]): string {
|
|
const total = results.length;
|
|
const reruns = results.filter((r) => r.verdict === 'rerun');
|
|
const totalConcepts = results.reduce((s, r) => s + r.ai_md_concepts, 0);
|
|
const totalTranslated = results.reduce((s, r) => s + r.translated, 0);
|
|
const overallRate = totalConcepts === 0 ? 0 : Math.round((totalTranslated / totalConcepts) * 100);
|
|
|
|
const lines: string[] = [
|
|
'# Documentation Audit Report',
|
|
'',
|
|
`**Files audited:** ${total}`,
|
|
`**Overall translation rate:** ${overallRate}% (${totalTranslated} of ${totalConcepts} .ai.md concepts reflected in docstrings)`,
|
|
`**Files flagged for re-run:** ${reruns.length}`,
|
|
'',
|
|
'## Files flagged for re-run',
|
|
'',
|
|
];
|
|
|
|
if (reruns.length === 0) {
|
|
lines.push('_None — all audited files passed._', '');
|
|
} else {
|
|
lines.push('| File | Translated | Missed | Rate |', '|------|-----------:|-------:|-----:|');
|
|
for (const r of reruns.sort(
|
|
(a, b) =>
|
|
a.translated / Math.max(a.ai_md_concepts, 1) - b.translated / Math.max(b.ai_md_concepts, 1),
|
|
)) {
|
|
const rate = r.ai_md_concepts === 0 ? 0 : Math.round((r.translated / r.ai_md_concepts) * 100);
|
|
lines.push(`| \`${r.file}\` | ${r.translated} | ${r.missed.length} | ${rate}% |`);
|
|
}
|
|
lines.push('', '## Top missed concepts (sampled)', '');
|
|
for (const r of reruns.slice(0, 10)) {
|
|
if (r.missed.length === 0) continue;
|
|
lines.push(`### \`${r.file}\``, '');
|
|
for (const m of r.missed.slice(0, 5)) {
|
|
lines.push(`- **${m.function}** — ${m.topic}`);
|
|
lines.push(` > ${m.ai_md_quote.replace(/\n/g, ' ').slice(0, 200)}`);
|
|
}
|
|
lines.push('');
|
|
}
|
|
}
|
|
|
|
return lines.join('\n');
|
|
}
|
|
|
|
/**
|
|
* Run async work over a list of items with bounded concurrency. Mirrors the
|
|
* minimal slice of p-limit we actually need; collects results in input order.
|
|
*/
|
|
async function mapWithConcurrency<T, R>(
|
|
items: readonly T[],
|
|
limit: number,
|
|
worker: (item: T, index: number) => Promise<R>,
|
|
): Promise<R[]> {
|
|
const results = new Array<R>(items.length);
|
|
let next = 0;
|
|
|
|
async function pump(): Promise<void> {
|
|
while (true) {
|
|
const index = next++;
|
|
if (index >= items.length) return;
|
|
// biome-ignore lint/style/noNonNullAssertion: index < items.length
|
|
results[index] = await worker(items[index]!, index);
|
|
}
|
|
}
|
|
|
|
const workers = Array.from({ length: Math.min(limit, items.length) }, pump);
|
|
await Promise.all(workers);
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Audit every C++ file with a .ai.md sibling under the target path.
|
|
*
|
|
* Concurrency is read from the AUDIT_CONCURRENCY env var (default 5).
|
|
*/
|
|
export async function auditTarget(target: string): Promise<void> {
|
|
const files = findAuditTargets(target);
|
|
const concurrency = Number(process.env['AUDIT_CONCURRENCY']) || DEFAULT_CONCURRENCY;
|
|
console.log(
|
|
`Found ${files.length} file(s) with .ai.md siblings to audit (concurrency=${concurrency}).`,
|
|
);
|
|
|
|
let completed = 0;
|
|
const raw = await mapWithConcurrency(files, concurrency, async (file) => {
|
|
try {
|
|
const result = await auditFile(file);
|
|
completed++;
|
|
console.log(` Progress: ${completed}/${files.length}`);
|
|
return result;
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err);
|
|
console.warn(` Audit failed for ${file}: ${message}`);
|
|
completed++;
|
|
console.log(` Progress: ${completed}/${files.length}`);
|
|
return null;
|
|
}
|
|
});
|
|
const results = raw.filter((r): r is AuditResult => r !== null);
|
|
|
|
const report = buildReport(results);
|
|
await writeFile('doc-audit-report.md', report);
|
|
await writeFile('doc-audit-report.json', JSON.stringify(results, null, 2));
|
|
|
|
const reruns = results.filter((r) => r.verdict === 'rerun').length;
|
|
console.log(`\nAudited: ${results.length}/${files.length}`);
|
|
console.log(`Flagged for re-run: ${reruns}`);
|
|
console.log('Reports: doc-audit-report.md, doc-audit-report.json');
|
|
}
|