fix workflow

This commit is contained in:
Denis Angell
2026-05-14 08:44:53 +02:00
parent a05f951a0c
commit 1159ee32d8
11 changed files with 647 additions and 142 deletions

295
.github/scripts/doc-agent/src/audit.ts vendored Normal file
View File

@@ -0,0 +1,295 @@
/**
* Audit mode: measure how completely each file's Doxygen documentation
* reflects the authoritative design intent in its sibling .ai.md.
*
* For each C++ file under the target that has a .ai.md sibling:
* - Locate its header/source partner (if any) and the partner's .ai.md.
* - Send primary + partner files and both .ai.md files to the agent.
* - Parse a structured JSON verdict per file.
*
* Writes:
* - doc-audit-report.json Aggregated per-file results.
* - doc-audit-report.md Human-readable summary.
*/
import { existsSync, readdirSync, statSync } from 'node:fs';
import { readFile, writeFile } from 'node:fs/promises';
import { join, relative, resolve } from 'node:path';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { MODEL, XRPLD_ROOT } from './config.js';
import { findPartner } from './pairing.js';
import { loadSystemPrompt } from './prompt-loader.js';
const SOURCE_EXTS: ReadonlySet<string> = new Set(['.h', '.hpp', '.cpp']);
const MAX_FILE_CHARS = 24_000;
const MAX_AI_MD_CHARS = 16_000;
const DEFAULT_CONCURRENCY = 5;
interface AuditMissed {
function: string;
topic: string;
home: 'header' | 'source' | 'either';
current_state: 'absent' | 'wrong-home' | 'thin';
ai_md_quote: string;
}
interface AuditResult {
file: string;
ai_md_concepts: number;
translated: number;
missed: AuditMissed[];
verdict: 'rerun' | 'leave';
}
/**
* Recursively find C++ source files under a target path that have a
* sibling .ai.md.
*/
function findAuditTargets(target: string): string[] {
const absTarget = resolve(XRPLD_ROOT, target);
if (!existsSync(absTarget)) {
throw new Error(`Target does not exist: ${absTarget}`);
}
const out: string[] = [];
const consider = (file: string): void => {
const dotIdx = file.lastIndexOf('.');
if (dotIdx === -1) return;
const ext = file.slice(dotIdx);
if (!SOURCE_EXTS.has(ext)) return;
if (!existsSync(`${file}.ai.md`)) return;
out.push(file);
};
const stat = statSync(absTarget);
if (stat.isFile()) {
consider(absTarget);
return out;
}
const walk = (dir: string): void => {
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const full = join(dir, entry.name);
if (entry.isDirectory()) walk(full);
else if (entry.isFile()) consider(full);
}
};
walk(absTarget);
return out;
}
/** Read a file, capping at maxChars to keep prompts within budget. */
async function readCapped(absPath: string, maxChars: number): Promise<string> {
const text = await readFile(absPath, 'utf8');
if (text.length <= maxChars) return text;
return `${text.slice(0, maxChars)}\n\n... [truncated, ${text.length - maxChars} bytes elided] ...`;
}
/** Extract a JSON object from a possibly-fenced model response. */
function extractJson(response: string): AuditResult | null {
const fenced = response.match(/```json\s*([\s\S]*?)```/);
const raw = fenced?.[1] ?? response.match(/(\{[\s\S]*\})/)?.[1];
if (raw === undefined) return null;
try {
return JSON.parse(raw) as AuditResult;
} catch {
return null;
}
}
/** Audit a single primary file against its .ai.md and partner context. */
async function auditFile(absPrimary: string): Promise<AuditResult | null> {
const relPrimary = relative(XRPLD_ROOT, absPrimary);
console.log(`\n=== Auditing: ${relPrimary} ===`);
const primary = await readCapped(absPrimary, MAX_FILE_CHARS);
const primaryAiMd = await readCapped(`${absPrimary}.ai.md`, MAX_AI_MD_CHARS);
const absPartner = findPartner(absPrimary);
const relPartner = absPartner === null ? null : relative(XRPLD_ROOT, absPartner);
const partner = absPartner === null ? null : await readCapped(absPartner, MAX_FILE_CHARS);
const partnerAiMdPath = absPartner === null ? null : `${absPartner}.ai.md`;
const partnerAiMd =
partnerAiMdPath !== null && existsSync(partnerAiMdPath)
? await readCapped(partnerAiMdPath, MAX_AI_MD_CHARS)
: null;
const partnerBlock =
relPartner === null || partner === null
? ''
: `
## Partner File (${relPartner})
\`\`\`
${partner}
\`\`\`${
partnerAiMd === null
? ''
: `
## Partner's .ai.md (${relPartner}.ai.md)
${partnerAiMd}`
}`;
const userPrompt = `Audit the documentation coverage of this file against its authoritative .ai.md.
## Primary File (${relPrimary})
\`\`\`
${primary}
\`\`\`
## Primary's .ai.md (${relPrimary}.ai.md)
${primaryAiMd}${partnerBlock}
Output JSON per the schema in the system prompt. The "file" field MUST be
"${relPrimary}".`;
const systemPrompt = await loadSystemPrompt('audit-file', relPrimary);
let response = '';
const result = query({
prompt: userPrompt,
options: {
model: MODEL,
systemPrompt,
cwd: XRPLD_ROOT,
allowedTools: ['Read', 'Glob', 'Grep'],
permissionMode: 'acceptEdits',
},
});
for await (const message of result) {
if (message.type === 'assistant') {
const content = message.message?.content;
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') response += block.text;
}
}
}
if (message.type === 'result') {
const cost = message.total_cost_usd?.toFixed(4) ?? '?';
const inTok = message.usage?.['input_tokens'] ?? 0;
const outTok = message.usage?.['output_tokens'] ?? 0;
console.log(` [Cost: $${cost}, Tokens: ${inTok}/${outTok}]`);
}
}
const parsed = extractJson(response);
if (parsed === null) {
console.warn(` No JSON output for ${relPrimary}, skipping`);
return null;
}
parsed.file = relPrimary;
return parsed;
}
/** Render the aggregated markdown report. */
function buildReport(results: readonly AuditResult[]): string {
const total = results.length;
const reruns = results.filter((r) => r.verdict === 'rerun');
const totalConcepts = results.reduce((s, r) => s + r.ai_md_concepts, 0);
const totalTranslated = results.reduce((s, r) => s + r.translated, 0);
const overallRate = totalConcepts === 0 ? 0 : Math.round((totalTranslated / totalConcepts) * 100);
const lines: string[] = [
'# Documentation Audit Report',
'',
`**Files audited:** ${total}`,
`**Overall translation rate:** ${overallRate}% (${totalTranslated} of ${totalConcepts} .ai.md concepts reflected in docstrings)`,
`**Files flagged for re-run:** ${reruns.length}`,
'',
'## Files flagged for re-run',
'',
];
if (reruns.length === 0) {
lines.push('_None — all audited files passed._', '');
} else {
lines.push('| File | Translated | Missed | Rate |', '|------|-----------:|-------:|-----:|');
for (const r of reruns.sort(
(a, b) =>
a.translated / Math.max(a.ai_md_concepts, 1) - b.translated / Math.max(b.ai_md_concepts, 1),
)) {
const rate = r.ai_md_concepts === 0 ? 0 : Math.round((r.translated / r.ai_md_concepts) * 100);
lines.push(`| \`${r.file}\` | ${r.translated} | ${r.missed.length} | ${rate}% |`);
}
lines.push('', '## Top missed concepts (sampled)', '');
for (const r of reruns.slice(0, 10)) {
if (r.missed.length === 0) continue;
lines.push(`### \`${r.file}\``, '');
for (const m of r.missed.slice(0, 5)) {
lines.push(`- **${m.function}** — ${m.topic}`);
lines.push(` > ${m.ai_md_quote.replace(/\n/g, ' ').slice(0, 200)}`);
}
lines.push('');
}
}
return lines.join('\n');
}
/**
* Run async work over a list of items with bounded concurrency. Mirrors the
* minimal slice of p-limit we actually need; collects results in input order.
*/
async function mapWithConcurrency<T, R>(
items: readonly T[],
limit: number,
worker: (item: T, index: number) => Promise<R>,
): Promise<R[]> {
const results = new Array<R>(items.length);
let next = 0;
async function pump(): Promise<void> {
while (true) {
const index = next++;
if (index >= items.length) return;
// biome-ignore lint/style/noNonNullAssertion: index < items.length
results[index] = await worker(items[index]!, index);
}
}
const workers = Array.from({ length: Math.min(limit, items.length) }, pump);
await Promise.all(workers);
return results;
}
/**
* Audit every C++ file with a .ai.md sibling under the target path.
*
* Concurrency is read from the AUDIT_CONCURRENCY env var (default 5).
*/
export async function auditTarget(target: string): Promise<void> {
const files = findAuditTargets(target);
const concurrency = Number(process.env['AUDIT_CONCURRENCY']) || DEFAULT_CONCURRENCY;
console.log(
`Found ${files.length} file(s) with .ai.md siblings to audit (concurrency=${concurrency}).`,
);
let completed = 0;
const raw = await mapWithConcurrency(files, concurrency, async (file) => {
try {
const result = await auditFile(file);
completed++;
console.log(` Progress: ${completed}/${files.length}`);
return result;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.warn(` Audit failed for ${file}: ${message}`);
completed++;
console.log(` Progress: ${completed}/${files.length}`);
return null;
}
});
const results = raw.filter((r): r is AuditResult => r !== null);
const report = buildReport(results);
await writeFile('doc-audit-report.md', report);
await writeFile('doc-audit-report.json', JSON.stringify(results, null, 2));
const reruns = results.filter((r) => r.verdict === 'rerun').length;
console.log(`\nAudited: ${results.length}/${files.length}`);
console.log(`Flagged for re-run: ${reruns}`);
console.log('Reports: doc-audit-report.md, doc-audit-report.json');
}

View File

@@ -7,6 +7,7 @@ import { readFile } from 'node:fs/promises';
import { join, relative, resolve } from 'node:path';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { MODEL, XRPLD_ROOT } from './config.js';
import { findPartner } from './pairing.js';
import { loadSystemPrompt } from './prompt-loader.js';
const CPP_EXTENSIONS: ReadonlySet<string> = new Set(['.h', '.hpp', '.cpp']);
@@ -65,6 +66,11 @@ async function readAiContext(absPath: string): Promise<string | null> {
/**
* Document a single file by running the documentation agent against it.
*
* Inject the partner file's path + its `.ai.md` (if any) into the prompt
* so the agent can apply the "contract on header, implementation on
* source" policy with full visibility into the other half. The agent
* Reads the partner only as reference; only the primary file is edited.
*/
async function documentFile(absPath: string): Promise<void> {
const relPath = relative(XRPLD_ROOT, absPath);
@@ -75,15 +81,33 @@ async function documentFile(absPath: string): Promise<void> {
const aiContextBlock =
aiContext === null
? ''
: `\n\n## Authoritative AI Context (${relPath}.ai.md)\n\nThe following is high-signal prose describing this file's purpose, design,\nand non-obvious behavior. Treat it as the source of truth for intent and\nbehavior. Your job is to translate this into structured Doxygen \`/** */\`\ncomments on the actual declarations.\n\n---\n\n${aiContext}\n---`;
: `\n\n## Primary's Authoritative AI Context (${relPath}.ai.md)\n\nThe following is high-signal prose describing this file's purpose, design,\nand non-obvious behavior. Treat it as the source of truth for intent and\nbehavior. Your job is to translate this into structured Doxygen \`/** */\`\ncomments on the actual declarations.\n\n---\n\n${aiContext}\n---`;
const absPartner = findPartner(absPath);
const relPartner = absPartner === null ? null : relative(XRPLD_ROOT, absPartner);
const partnerAiContext = absPartner === null ? null : await readAiContext(absPartner);
const partnerBlock =
relPartner === null
? ''
: `\n\n## Partner File\n\nThis file's partner is **${relPartner}**. Use the Read tool to see its\ncurrent docstrings before deciding what belongs on the primary. A concept\nalready documented on the partner does not need to be duplicated here.\nConversely, an implementation-depth concept currently on the partner that\nbelongs on the source (or vice versa) should be moved.${
partnerAiContext === null
? ''
: `\n\n### Partner's Authoritative AI Context (${relPartner}.ai.md)\n\n---\n\n${partnerAiContext}\n---`
}`;
const userPrompt = `Add Doxygen documentation to: ${relPath}
The file is rooted at ${XRPLD_ROOT}. Use the Read tool to read it, the Edit
tool to add documentation, and Glob/Grep to find related tests or callers
when needed.
when needed.${
relPartner === null
? ''
: ` Use Read on the partner file (${relPartner}) to see what's already
documented there.`
}
Do not modify any code logic — only add documentation comments.${aiContextBlock}`;
Do not modify any code logic — only add documentation comments to the
primary file (${relPath}). Do NOT edit the partner file.${aiContextBlock}${partnerBlock}`;
const result = query({
prompt: userPrompt,

View File

@@ -10,6 +10,7 @@
* doc-agent regen-skills protocol
*/
import { auditTarget } from './audit.js';
import { documentTarget } from './document.js';
import { regenSkills } from './regen-skills.js';
import { reviewDiff } from './review.js';
@@ -21,6 +22,9 @@ Usage:
doc-agent document <file-or-directory> Add Doxygen documentation
doc-agent review <base>..<head> Detect doc drift in range
doc-agent review --pr <number> Detect doc drift for a PR
doc-agent audit <file-or-directory> Measure how completely each file's
docstrings reflect its .ai.md intent;
outputs doc-audit-report.{md,json}
doc-agent regen-skills <module> Regenerate docs/skills/soul/<module>.md
from sibling .ai.md files
@@ -62,6 +66,13 @@ async function main(): Promise<void> {
return;
}
if (mode === 'audit') {
const target = args[0];
if (target === undefined) printUsageAndExit(1);
await auditTarget(target);
return;
}
if (mode === 'regen-skills') {
const moduleName = args[0];
if (moduleName === undefined) printUsageAndExit(1);

View File

@@ -0,0 +1,47 @@
/**
* Header/source pairing for C++ files in the xrpld layout.
*
* libxrpl: src/libxrpl/<X>.cpp <-> include/xrpl/<X>.h
* xrpld: src/xrpld/<X>.cpp <-> src/xrpld/<X>.h (same directory)
*
* Inline-only headers may have no .cpp partner; standalone .cpp may have
* no .h partner.
*/
import { existsSync } from 'node:fs';
import { relative, resolve } from 'node:path';
import { XRPLD_ROOT } from './config.js';
/**
* Compute the partner file path for a given primary, by swapping the
* extension between header/source. Returns null if no candidate exists
* on disk.
*/
export function findPartner(absPrimary: string): string | null {
const rel = relative(XRPLD_ROOT, absPrimary);
const dotIdx = rel.lastIndexOf('.');
if (dotIdx === -1) return null;
const stem = rel.slice(0, dotIdx);
const ext = rel.slice(dotIdx);
const candidates: string[] = [];
if (ext === '.cpp') {
if (stem.startsWith('src/libxrpl/')) {
const tail = stem.slice('src/libxrpl/'.length);
candidates.push(`include/xrpl/${tail}.h`, `include/xrpl/${tail}.hpp`);
}
candidates.push(`${stem}.h`, `${stem}.hpp`);
} else if (ext === '.h' || ext === '.hpp') {
if (stem.startsWith('include/xrpl/')) {
candidates.push(`src/libxrpl/${stem.slice('include/xrpl/'.length)}.cpp`);
}
candidates.push(`${stem}.cpp`);
}
for (const candidate of candidates) {
const abs = resolve(XRPLD_ROOT, candidate);
if (existsSync(abs) && abs !== absPrimary) return abs;
}
return null;
}