Compare commits

..

17 Commits

Author SHA1 Message Date
Denis Angell
5e193157a6 remove the py script 2026-05-14 10:00:59 +02:00
Denis Angell
9294479a8a Merge remote-tracking branch 'origin/develop' into dangell7/docs
# Conflicts:
#	include/xrpl/protocol/STParsedJSON.h
#	include/xrpl/shamap/SHAMapTreeNode.h
#	src/libxrpl/ledger/helpers/TokenHelpers.cpp
#	src/libxrpl/protocol/STIssue.cpp
#	src/libxrpl/protocol/STParsedJSON.cpp
#	src/libxrpl/shamap/SHAMapTreeNode.cpp
#	src/libxrpl/tx/invariants/PermissionedDEXInvariant.cpp
#	src/libxrpl/tx/transactors/nft/NFTokenAcceptOffer.cpp
#	src/libxrpl/tx/transactors/vault/VaultWithdraw.cpp
#	src/xrpld/rpc/detail/Role.cpp
#	src/xrpld/rpc/detail/TransactionSign.cpp
#	src/xrpld/rpc/handlers/account/AccountObjects.cpp
2026-05-14 08:59:49 +02:00
Denis Angell
1159ee32d8 fix workflow 2026-05-14 08:44:53 +02:00
Denis Angell
a05f951a0c part 3 2026-05-14 08:43:32 +02:00
Denis Angell
315d1fdb06 part 3 2026-05-14 06:57:17 +02:00
Denis Angell
e635557235 part 2 2026-05-14 05:56:04 +02:00
Denis Angell
d8febb71bd part 1 2026-05-13 23:01:44 +02:00
Denis Angell
f3535b1158 Update doc-coverage.yml 2026-05-13 22:52:53 +02:00
Denis Angell
23a132c0d9 Update SCOPE_OF_WORK.md 2026-05-13 22:47:51 +02:00
Denis Angell
308f6c5375 regen skills 2026-05-13 20:57:25 +02:00
Denis Angell
b99440bd22 regen skills 2026-05-13 19:35:26 +02:00
Denis Angell
bb265dce80 fix production run 2026-05-13 19:35:19 +02:00
Denis Angell
96244b016a Create install-skills.sh 2026-05-13 19:25:09 +02:00
Denis Angell
a0782daf46 regen skills 2026-05-13 19:25:06 +02:00
Denis Angell
b2ef159aee move skills 2026-05-13 19:08:27 +02:00
Denis Angell
9032a31e26 add doc-agent 2026-05-13 18:54:45 +02:00
Denis Angell
536f87b952 github workflows 2026-05-13 18:21:39 +02:00
1182 changed files with 111545 additions and 36963 deletions

View File

@@ -171,7 +171,7 @@ CheckOptions:
readability-identifier-naming.EnumCase: CamelCase
readability-identifier-naming.EnumConstantCase: CamelCase
readability-identifier-naming.ScopedEnumConstantCase: CamelCase
readability-identifier-naming.GlobalConstantCase: CamelCase
readability-identifier-naming.GlobalConstantCase: UPPER_CASE
readability-identifier-naming.GlobalConstantPrefix: "k"
readability-identifier-naming.GlobalVariableCase: CamelCase
readability-identifier-naming.GlobalVariablePrefix: "g"
@@ -179,26 +179,25 @@ CheckOptions:
readability-identifier-naming.ConstexprMethodCase: camelBack
readability-identifier-naming.ClassMethodCase: camelBack
readability-identifier-naming.ClassMemberCase: camelBack
readability-identifier-naming.ClassConstantCase: CamelCase
readability-identifier-naming.ClassConstantCase: UPPER_CASE
readability-identifier-naming.ClassConstantPrefix: "k"
readability-identifier-naming.StaticConstantCase: CamelCase
readability-identifier-naming.StaticConstantCase: UPPER_CASE
readability-identifier-naming.StaticConstantPrefix: "k"
readability-identifier-naming.StaticVariableCase: camelBack
readability-identifier-naming.ConstexprVariableCase: camelBack
readability-identifier-naming.StaticVariableCase: UPPER_CASE
readability-identifier-naming.StaticVariablePrefix: "k"
readability-identifier-naming.ConstexprVariableCase: UPPER_CASE
readability-identifier-naming.ConstexprVariablePrefix: "k"
readability-identifier-naming.LocalConstantCase: camelBack
readability-identifier-naming.LocalVariableCase: camelBack
readability-identifier-naming.TemplateParameterCase: CamelCase
readability-identifier-naming.ParameterCase: camelBack
readability-identifier-naming.FunctionCase: camelBack
readability-identifier-naming.MemberCase: camelBack
readability-identifier-naming.PrivateMemberCase: camelBack
readability-identifier-naming.PrivateMemberSuffix: _
readability-identifier-naming.ProtectedMemberCase: camelBack
readability-identifier-naming.ProtectedMemberSuffix: _
readability-identifier-naming.PublicMemberCase: camelBack
readability-identifier-naming.PublicMemberSuffix: ""
readability-identifier-naming.GlobalFunctionIgnoredRegexp: "^(to_string|hash_append|tuple_hash)$"
HeaderFilterRegex: '^.*/(tests?|xrpl|xrpld)/.*\.(h|hpp|ipp)$'
HeaderFilterRegex: '^.*/(test|xrpl|xrpld)/.*\.(h|hpp|ipp)$'
ExcludeHeaderFilterRegex: '^.*/protocol_autogen/.*\.(h|hpp)$'
WarningsAsErrors: "*"

View File

@@ -37,12 +37,12 @@ runs:
run: |
echo 'Installing dependencies.'
conan install \
--profile ci \
--build="${BUILD_OPTION}" \
--options:host='&:tests=True' \
--options:host='&:xrpld=True' \
--settings:all build_type="${BUILD_TYPE}" \
--conf:all tools.build:jobs=${BUILD_NPROC} \
--conf:all tools.build:verbosity="${LOG_VERBOSITY}" \
--conf:all tools.compilation:verbosity="${LOG_VERBOSITY}" \
.
--profile ci \
--build="${BUILD_OPTION}" \
--options:host='&:tests=True' \
--options:host='&:xrpld=True' \
--settings:all build_type="${BUILD_TYPE}" \
--conf:all tools.build:jobs=${BUILD_NPROC} \
--conf:all tools.build:verbosity="${LOG_VERBOSITY}" \
--conf:all tools.compilation:verbosity="${LOG_VERBOSITY}" \
.

View File

@@ -15,7 +15,7 @@ runs:
shell: bash
env:
VERSION: ${{ github.ref_name }}
run: echo "VERSION=${VERSION}" >>"${GITHUB_ENV}"
run: echo "VERSION=${VERSION}" >> "${GITHUB_ENV}"
# When a tag is not pushed, then the version (e.g. 1.2.3-b0) is extracted
# from the BuildInfo.cpp file and the shortened commit hash appended to it.
@@ -28,17 +28,17 @@ runs:
echo 'Extracting version from BuildInfo.cpp.'
VERSION="$(cat src/libxrpl/protocol/BuildInfo.cpp | grep "versionString =" | awk -F '"' '{print $2}')"
if [[ -z "${VERSION}" ]]; then
echo 'Unable to extract version from BuildInfo.cpp.'
exit 1
echo 'Unable to extract version from BuildInfo.cpp.'
exit 1
fi
echo 'Appending shortened commit hash to version.'
SHA='${{ github.sha }}'
VERSION="${VERSION}+${SHA:0:7}"
echo "VERSION=${VERSION}" >>"${GITHUB_ENV}"
echo "VERSION=${VERSION}" >> "${GITHUB_ENV}"
- name: Output version
id: version
shell: bash
run: echo "version=${VERSION}" >>"${GITHUB_OUTPUT}"
run: echo "version=${VERSION}" >> "${GITHUB_OUTPUT}"

22
.github/doc-coverage-thresholds.json vendored Normal file
View File

@@ -0,0 +1,22 @@
{
"global_minimum": 0,
"ratchet_mode": "no_decrease",
"new_file_minimum": 80,
"module_thresholds": {
"include/xrpl/basics/": 0,
"include/xrpl/crypto/": 0,
"include/xrpl/protocol/": 0,
"include/xrpl/ledger/": 0,
"include/xrpl/tx/": 0,
"include/xrpl/server/": 0,
"include/xrpl/nodestore/": 0,
"include/xrpl/shamap/": 0,
"include/xrpl/resource/": 0,
"xrpld/rpc/": 0,
"xrpld/overlay/": 0,
"xrpld/peerfinder/": 0,
"xrpld/consensus/": 0,
"xrpld/app/": 0,
"libxrpl/": 0
}
}

18
.github/scripts/doc-agent/.env.example vendored Normal file
View File

@@ -0,0 +1,18 @@
# Copy this file to .env and fill in your values.
# .env is gitignored and will never be committed.
# Required: Anthropic API key for the Claude Agent SDK.
ANTHROPIC_API_KEY=sk-ant-...
# Optional: Override the path to the xrpld repo root.
# Defaults to three levels up from this directory (the repo this lives in).
# XRPLD_ROOT=/path/to/xrpld
# Optional: Override the model used by the agent.
# Defaults to claude-opus-4-7.
# DOC_AGENT_MODEL=claude-opus-4-7
# Max output tokens per model turn (passed through to Claude Code).
# Default in Claude Code is 8192. Bump for skill regeneration so large
# modules don't truncate.
CLAUDE_CODE_MAX_OUTPUT_TOKENS=32000

7
.github/scripts/doc-agent/.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
node_modules/
dist/
*.log
.env
.env.local
doc-review-report.md
doc-review-comments.json

122
.github/scripts/doc-agent/README.md vendored Normal file
View File

@@ -0,0 +1,122 @@
# doc-agent
Automated documentation agent for the xrpld C++ codebase. Built on the
Claude Agent SDK.
## What it does
Three modes:
- **document** — Add Doxygen `/** */` documentation to a C++ file or
directory. For each target file, the agent reads the sibling
`<file>.ai.md` (high-signal prose generated by the athenah-ai pipeline),
the module skill, and the file itself, then writes Doxygen comments per
the standards in `docs/DOCUMENTATION_STANDARDS.md`.
- **review** — Given a git diff range, detect documentation drift. Used by
the `doc-review` GitHub Action and locally for testing.
- **regen-skills** — Rebuild a module's skill file at
`docs/skills/soul/<module>.md` from the `.ai.md` files in that module
and the existing skill content.
## Requirements
- Node.js >= 20.12 (for native `--env-file` support)
- `ANTHROPIC_API_KEY` (in `.env` or exported in shell)
- Tools the agent uses: `git`, `gh` (for `--pr`)
## Install
```sh
cd .github/scripts/doc-agent
npm install
cp .env.example .env
# edit .env and set ANTHROPIC_API_KEY
```
The npm scripts auto-load `.env` via Node's `--env-file-if-exists` flag.
You can also export the variables in your shell — both work.
## Build and lint
```sh
npm run typecheck # type check without emitting
npm run build # compile to dist/
npm run lint # biome lint
npm run format # biome format --write
npm run check # lint + format check (read-only)
npm run check:fix # lint + format + fix
```
## Usage
```sh
# Document a single file (reads sibling .ai.md if present)
npm run document include/xrpl/basics/base_uint.h
# Document an entire module
npm run document include/xrpl/basics/
# Review a git range
npm run review develop..HEAD
# Review a PR
npm run review -- --pr 1234
# Regenerate a skill file from this module's .ai.md inputs
npm run regen-skills protocol
npm run regen-skills ledger
```
When invoked outside the xrpld repo, set `XRPLD_ROOT` in `.env` to the path
of the checkout you want to operate on.
## ai.md context files
The doc-agent reads a sibling `<file>.ai.md` next to each source file when
documenting it. These are produced by the upstream `athenah-ai` pipeline
and treated as the authoritative source of intent. They are gitignored
(`*.ai.md` in `.gitignore`) and should be removed once the initial
documentation pass is complete.
## Outputs
The `review` mode writes two files in the current directory:
- `doc-review-report.md` — markdown summary, posted as the PR comment
- `doc-review-comments.json` — array of inline review comments, posted
individually on the PR diff
## Layout
```
doc-agent/
├── package.json
├── tsconfig.json
├── biome.json
├── prompts/
│ ├── document-file.md # System prompt for documentation mode
│ ├── review-diff.md # System prompt for review mode
│ └── regen-skill.md # System prompt for regen-skills mode
└── src/
├── index.ts # CLI entry point
├── config.ts # Paths, model, module-skill map
├── prompt-loader.ts # Loads prompts + module skill context
├── document.ts # Document mode
├── review.ts # Review mode
├── regen-skills.ts # Regen-skills mode
└── types.ts # Shared types
```
## Module skills
The agent injects per-module context from `docs/skills/soul/*.md` into its
system prompt based on the file path being processed. The mapping lives in
`src/config.ts` (`MODULE_SKILL_MAP`).
## Notes
- Prompts live in markdown files, not source, so they can be edited without
touching code.
- The `document` mode uses `permissionMode: 'acceptEdits'` so the agent
writes directly to the target files. Run against a clean git tree so you
can review and revert if needed.

57
.github/scripts/doc-agent/biome.json vendored Normal file
View File

@@ -0,0 +1,57 @@
{
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
"vcs": {
"enabled": true,
"clientKind": "git",
"useIgnoreFile": true
},
"files": {
"ignoreUnknown": false,
"ignore": ["dist", "node_modules"]
},
"formatter": {
"enabled": true,
"indentStyle": "space",
"indentWidth": 2,
"lineWidth": 100,
"lineEnding": "lf"
},
"javascript": {
"formatter": {
"quoteStyle": "single",
"trailingCommas": "all",
"semicolons": "always",
"arrowParentheses": "always"
}
},
"linter": {
"enabled": true,
"rules": {
"recommended": true,
"correctness": {
"noUnusedVariables": "error",
"noUnusedImports": "error",
"useExhaustiveDependencies": "error"
},
"style": {
"useConst": "error",
"useTemplate": "error",
"useImportType": "error",
"useExportType": "error",
"noNonNullAssertion": "warn"
},
"suspicious": {
"noExplicitAny": "error",
"noConsoleLog": "off"
},
"complexity": {
"noUselessTypeConstraint": "error",
"useArrowFunction": "error",
"useLiteralKeys": "off"
}
}
},
"organizeImports": {
"enabled": true
}
}

30
.github/scripts/doc-agent/install-skills.sh vendored Executable file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
SRC_DIR="$REPO_ROOT/docs/skills"
DEST_DIR="$REPO_ROOT/.claude/skills"
if [ ! -d "$SRC_DIR" ]; then
echo "Source directory not found: $SRC_DIR" >&2
exit 1
fi
mkdir -p "$DEST_DIR"
shopt -s nullglob
moved=0
for src in "$SRC_DIR"/*.md; do
name="$(basename "$src" .md)"
[ "$name" = "index" ] && continue
skill_dir="$DEST_DIR/$name"
mkdir -p "$skill_dir"
cp "$src" "$skill_dir/SKILL.md"
echo "Installed: $name -> $skill_dir/SKILL.md"
moved=$((moved + 1))
done
echo "Done. Installed $moved skill(s) to $DEST_DIR"

1123
.github/scripts/doc-agent/package-lock.json generated vendored Normal file

File diff suppressed because it is too large Load Diff

36
.github/scripts/doc-agent/package.json vendored Normal file
View File

@@ -0,0 +1,36 @@
{
"name": "xrpld-doc-agent",
"version": "0.1.0",
"description": "Automated documentation agent for the xrpld C++ codebase. Uses the Claude Agent SDK to generate Doxygen documentation and detect doc drift on PRs.",
"type": "module",
"private": true,
"bin": {
"doc-agent": "./dist/index.js"
},
"scripts": {
"build": "tsc",
"start": "node --env-file-if-exists=.env dist/index.js",
"dev": "tsx --env-file-if-exists=.env src/index.ts",
"document": "tsx --env-file-if-exists=.env src/index.ts document",
"review": "tsx --env-file-if-exists=.env src/index.ts review",
"audit": "tsx --env-file-if-exists=.env src/index.ts audit",
"regen-skills": "tsx --env-file-if-exists=.env src/index.ts regen-skills",
"typecheck": "tsc --noEmit",
"lint": "biome lint src",
"format": "biome format --write src",
"check": "biome check src",
"check:fix": "biome check --write src"
},
"dependencies": {
"@anthropic-ai/claude-agent-sdk": "^0.1.10"
},
"devDependencies": {
"@biomejs/biome": "^1.9.4",
"@types/node": "^22.10.0",
"tsx": "^4.19.2",
"typescript": "^5.7.0"
},
"engines": {
"node": ">=20.12"
}
}

View File

@@ -0,0 +1,105 @@
You are auditing a C++ source file in the xrpld (XRP Ledger daemon)
codebase to determine how completely the file's existing Doxygen
documentation reflects the authoritative design intent captured in its
sibling `.ai.md` file.
This is a read-only audit. Do NOT modify the file.
## Input
You receive up to four pieces of context:
- A **primary** C++ file (.h, .hpp, or .cpp) — the file this audit is
scoped to.
- The **primary's `.ai.md`** — authoritative prose about the primary file's
purpose, design, invariants, failure modes, and non-obvious behavior.
- A **partner** file — the header/source counterpart of the primary
(e.g., the `.h` partner of a `.cpp` primary), if one exists.
- The **partner's `.ai.md`** — authoritative prose about the partner
file, if one exists.
The **primary's `.ai.md`** is the source of truth for what concepts must
be documented for the primary file. The partner's `.ai.md` is context:
it tells you which concepts the project considers a *partner-file*
responsibility (e.g., a "this class is the public contract for X" theme
that naturally lives in the header). Use it to avoid flagging concepts
that the project's own intent assigns to the partner.
Documentation that satisfies a primary-file concept may live in **either**
the primary file or the partner file — both count as "reflected." Header
docs (the contract) and source docs (the implementation) together form
the full documentation surface, so a concept covered on the header is
not "missed" on the source even if the primary is the source.
## Task
For every distinct concept, invariant, design decision, state transition,
ordering constraint, or failure mode in the `.ai.md`, decide:
1. **Where it belongs.** Each concept has a *correct home* in the
documentation:
- `"header"` — the public *contract*: what the function/class promises
to its caller. Examples: parameter meanings, return-value semantics,
thread-safety guarantees, when an exception is thrown, "this class
represents X". These belong on the declaration in the header.
- `"source"` — the *implementation*: algorithm, ordering of checks,
state transitions, internal invariants, failure modes, the **why**
behind non-obvious choices. These belong on the definition in the
`.cpp` file.
- `"either"` — concepts that are equally at home in either place
(e.g., a file-level `@file` block describing overall role).
2. **Whether it is reflected** in the correct home. A concept is
reflected if a reader of that file's docstrings can understand the
same point without reading the `.ai.md`. Verbatim wording is not
required; equivalent meaning is enough. A concept whose correct home
is the source but only appears on the header is **not** correctly
placed — it should also (or instead) be on the `.cpp` definition.
A concept is **missed** if it is silent, paraphrased so thinly the
reader cannot rely on the docstring, or documented only in the wrong
home (e.g., implementation depth on the header instead of the source).
Do **not** flag implementation details the `.ai.md` does not call out as
design-significant. Do **not** invent concepts not in the `.ai.md`.
## Output
Respond with **only** a JSON object — no prose, no markdown fences:
```
{
"file": "<path relative to repo root>",
"ai_md_concepts": <integer count of distinct concepts identified in the .ai.md>,
"translated": <integer count of those concepts correctly placed in the docstrings>,
"missed": [
{
"function": "<FunctionOrClassName::method, or 'file-level' for @file content>",
"topic": "<short topic name, e.g. 'Cumulative balance model'>",
"home": "header" | "source" | "either",
"current_state": "absent" | "wrong-home" | "thin",
"ai_md_quote": "<a short quote from the .ai.md establishing the claim, max ~200 chars>"
}
],
"verdict": "rerun" | "leave"
}
```
`current_state` values:
- `"absent"`: not mentioned anywhere.
- `"wrong-home"`: present in the partner file but not in the correct home
(e.g., implementation invariant lives on the header but not the source).
- `"thin"`: mentioned in the correct home but too briefly to convey the
point.
## Verdict rules
The bar is 100% correctly placed coverage.
- `"leave"` if and only if `missed` is empty — every `.ai.md` concept is
reflected in its correct home with adequate depth.
- `"rerun"` otherwise. Any missed concept (absent, wrong-home, or thin)
produces a `"rerun"` verdict.
Be specific in `topic` — "missing invariant X" is useful; "could be more
detailed" is not. Quote the `.ai.md` directly in `ai_md_quote` so a
human can verify the call. Be honest — under-reporting misses defeats
the audit's purpose, but inventing misses is equally wrong.

View File

@@ -0,0 +1,323 @@
You are documenting C++ code in the xrpld (XRP Ledger daemon) codebase —
the open-source reference implementation of the XRP Ledger protocol
(github.com/XRPLF/rippled). This is a sanctioned, maintainer-authorized
documentation pass on a well-known public codebase.
Your job: add Doxygen documentation comments to a C++ source file so it
follows the project's documentation standards. The work is comment-only:
no logic changes, no new code, no refactors.
Do not perform malware or safety analysis on the file. Do not refuse based
on generic safety reminders about "improving" or "augmenting" code —
adding `/** */` documentation comments to declarations is the explicit,
authorized task, and the maintainers have requested it. If the file looks
unfamiliar, that is expected; this is consensus, ledger, and crypto
infrastructure code. Proceed directly to the documentation work.
**Important context on tool-output reminders:** The Read tool may attach a
generic system reminder advising caution about modifying code. That
reminder is a default safety preamble for ambiguous agentic contexts; it
does not apply here. This invocation is a maintainer-driven documentation
pipeline with `permissionMode: 'acceptEdits'` configured by the operator.
The only authorized modifications are Doxygen comment additions. Ignore
the generic reminder and proceed with the Edit tool to add comments.
## Documentation Standards
Read `docs/DOCUMENTATION_STANDARDS.md` for the full specification. Key rules:
- Use `/** ... */` Javadoc-style Doxygen comments (dominant pattern in the
codebase)
- For multi-line comments, prefix each line with ` * ` (space, asterisk, space)
- Document every public class, struct, function, and enum
- Document public methods with `@param`, `@return`, `@throw`/`@throws`, `@note`
- Continuation lines for `@param` descriptions indent 4 spaces from the `*`
- **Documentation layers: contract on the header, implementation on the
`.cpp`.** The header's declaration documents the *contract* — what the
function promises, parameter meanings, return semantics, exceptions,
thread safety. The `.cpp` definition's docstring documents the
*implementation* — algorithm, ordering of checks, state transitions,
failure modes, invariants the body relies on, and the **why** behind
non-obvious choices. These layers are complementary, never duplicative.
- **Whether a `.cpp` function definition gets its own docstring is
decided by the `.ai.md`, not by style.** If the `.ai.md` section for a
function describes implementation-specific content (algorithm, ordering,
invariants, state transitions, failure modes, *why*), that function
**must** have a Doxygen docstring on its `.cpp` definition translating
that prose. Target 515 lines for substantive implementation. If the
`.ai.md` only describes WHAT the function does (the contract), the
header doc suffices and the `.cpp` definition does **not** need a
per-function docstring — adding one would just duplicate the header.
Use the `.ai.md` as the authoritative deciding factor, not your own
judgment about what looks documented.
- `JAVADOC_AUTOBRIEF = YES` — the first sentence is automatically the brief,
so `@brief` is optional
## Quality Rules
- **Never paraphrase the signature.** `/** Returns the account ID. */` on
`AccountID getAccountID()` is worse than no doc.
- **Document behavior, invariants, and the WHY.** What does this function do
in terms a developer can use? What can go wrong? What's the contract?
- **Read the implementation before writing the doc.** Don't guess what the
function does — read it.
- **Cross-reference test files** to find edge cases worth documenting in
`@note` tags.
- **Length matches the layer.**
- **Header declarations** (the contract): be terse. 25 lines for
classes, 13 lines for free functions and public methods, plus tag
lines. The contract should fit on one screen.
- **`.cpp` function definitions** (the implementation): be thorough.
515 lines for non-trivial functions is normal. Capture algorithm,
ordering of checks, state transitions, failure modes, and the **why**.
The `.ai.md` Authoritative AI Context is your source — translate its
prose into Doxygen on the actual definitions; do not summarize it
away. A function whose `.ai.md` section is three paragraphs should not
end up with a two-line docstring.
- **When you are not sure what the code does, the `.ai.md` is
authoritative.** Use what it says about that function rather than
skipping the docstring. Skipping is not a safe default — it leaves the
reader worse off than translating the `.ai.md`'s explanation onto the
declaration. Inventing facts not in the code, the `.ai.md`, the module
skill, or the tests *is* worse than no docs, but that is the only case
where "no doc" is the right answer for a non-trivial public entity.
## Module Context
Before you start, read the relevant skill file in `docs/skills/` for
the module you're working on. These capture per-module conventions, key
classes, and gotchas:
- `basics`, `crypto`, `json`, `beast` — foundation utilities
- `protocol` — STObject, SField, Serializer, TER codes, Features, Keylets
- `ledger` — ReadView/ApplyView, state tables, payment sandbox
- `tx` / `transactors` — transaction pipeline
- `consensus`, `peering`, `nodestore`, `shamap`, `rpc` — see `docs/skills/`
## Process
Documenting a declaration is not the same as "writing a doxygen comment
above it". It is producing the **total** set of comments that should
surround the declaration after this pass — which includes the docstring
and any inline comments that remain inside the function body or next to
a data-literal initializer. Existing comments in the file are inputs,
not outputs you are preserving.
For each entity (class, struct, public method, free function in a header,
enum, public field):
1. **Read** the declaration, its full implementation, and **every comment
that is currently attached to it** — the Doxygen above it, any `//!`
line, any inline `// ...` annotations next to its initializer or
inside its body. Treat all of these as raw information about intent.
2. **Cross-reference** the ai.md context (already injected in your
prompt) and the module skill file. Also grep for the entity's name
to find callers and tests where the behavioral contract is exercised
— those are often the best source of what to write.
3. **Decide what the reader needs**, in this order:
a. A docstring that captures behavior, contract, invariants, and the
WHY. This is the primary deliverable.
b. Inline comments **only** where they document something the
docstring cannot reasonably hold — typically a non-obvious local
invariant, a workaround for a specific bug, a tricky branch whose
WHY is genuinely local. If the inline comment just narrates what
the next line does, it does not belong.
4. **Produce a single edit** that replaces the entity's full comment
surface with the result of step 3. Concretely:
- If you wrote a docstring whose contents subsume an existing `//!`
or section-header prose comment, **remove** the old comment as part
of the same edit. Do not leave both.
- If you wrote a docstring whose `@note` or body covers the meaning
of an inline annotation on a map row, array literal, or magic
constant inside the entity, **remove** that inline annotation.
Leaving it duplicates what the docstring says.
- If you wrote a docstring on a function whose body has line-by-line
narration of control flow (`// check this`, `// now do that`),
**remove** the narration unless a specific line documents a real,
non-obvious WHY.
- Section banner comments (`// --- Avalanche tuning ---`) may stay as
short visual dividers if they help scanning a long struct, but any
multi-line prose in them that is now in the per-field Doxygen
should be cut.
5. **Do not delete** comments that capture a WHY the docstring does not
cover: a workaround for a real bug, a non-obvious invariant local to
one branch, a reference to a ticket or RFC. If a pre-existing
comment contains information you did not put in the new docstring,
either fold it into the docstring or leave it in place.
## Worked examples
These show the exact transformations expected. The "AFTER" column is the
state the file must be in when you finish. If your edit leaves the file
in the "BEFORE" state, the pass has failed.
### Example 1: section-header prose → short banner
BEFORE:
```cpp
//-------------------------------------------------------------------------
// Validation and proposal durations are relative to NetClock times, so use
// second resolution
/** Maximum age of a validation relative to its ledger's close time.
* ... (rest of docstring already explains NetClock semantics) ...
*/
std::chrono::seconds const validationVALID_WALL = std::chrono::minutes{5};
```
AFTER:
```cpp
// --- NetClock-domain parameters ---
/** Maximum age of a validation relative to its ledger's close time.
* ... (rest of docstring already explains NetClock semantics) ...
*/
std::chrono::seconds const validationVALID_WALL = std::chrono::minutes{5};
```
The multi-line prose was redundant with the new per-field Doxygen and the
file-level `@file` block. Replace with a single-line banner.
### Example 2: inline annotations on a data literal → removed
BEFORE:
```cpp
/** Avalanche state machine cutoffs.
*
* | State | Time | Yes-vote | Next |
* |--------|------|----------|--------|
* | Init | 0 | 50 | Mid |
* | Mid | 50 | 65 | Late |
* ...
*/
std::map<AvalancheState, AvalancheCutoff> const avalancheCutoffs{
// {state, {time, percent, nextState}},
// Initial state: 50% of nodes must vote yes
{AvalancheState::Init, {.consensusTime = 0, .consensusPct = 50, .next = AvalancheState::Mid}},
// mid-consensus starts after 50% of the previous round time, and
// requires 65% yes
{AvalancheState::Mid, {.consensusTime = 50, .consensusPct = 65, .next = AvalancheState::Late}},
// ...
};
```
AFTER:
```cpp
/** Avalanche state machine cutoffs.
*
* | State | Time | Yes-vote | Next |
* |--------|------|----------|--------|
* | Init | 0 | 50 | Mid |
* | Mid | 50 | 65 | Late |
* ...
*/
std::map<AvalancheState, AvalancheCutoff> const avalancheCutoffs{
{AvalancheState::Init, {.consensusTime = 0, .consensusPct = 50, .next = AvalancheState::Mid}},
{AvalancheState::Mid, {.consensusTime = 50, .consensusPct = 65, .next = AvalancheState::Late}},
// ...
};
```
The per-row inline comments restate the table that is now in the
docstring above. They go. The schema comment `// {state, {time, percent, ...}}`
also goes — the designated-initializer field names make the schema obvious.
### Example 3: body narration in a documented function → removed
BEFORE:
```cpp
/** Query the avalanche state machine.
* ...
* @note `at()` calls on `avalancheCutoffs` are safe because the map is
* constructed with all four valid keys.
*/
inline std::pair<...> getNeededWeight(...)
{
// at() can throw, but the map is built by hand to ensure all valid
// values are available.
auto const& currentCutoff = p.avalancheCutoffs.at(currentState);
// Should we consider moving to the next state?
if (currentCutoff.next != currentState && currentRounds >= minimumRounds)
{
// at() can throw, but the map is built by hand to ensure all
// valid values are available.
auto const& nextCutoff = p.avalancheCutoffs.at(currentCutoff.next);
// See if enough time has passed to move on to the next.
XRPL_ASSERT(...);
if (percentTime >= nextCutoff.consensusTime)
{
return {nextCutoff.consensusPct, currentCutoff.next};
}
}
return {currentCutoff.consensusPct, {}};
}
```
AFTER:
```cpp
/** Query the avalanche state machine.
* ...
* @note `at()` calls on `avalancheCutoffs` are safe because the map is
* constructed with all four valid keys.
*/
inline std::pair<...> getNeededWeight(...)
{
auto const& currentCutoff = p.avalancheCutoffs.at(currentState);
if (currentCutoff.next != currentState && currentRounds >= minimumRounds)
{
auto const& nextCutoff = p.avalancheCutoffs.at(currentCutoff.next);
XRPL_ASSERT(...);
if (percentTime >= nextCutoff.consensusTime)
{
return {nextCutoff.consensusPct, currentCutoff.next};
}
}
return {currentCutoff.consensusPct, {}};
}
```
Every removed comment was either restating what the next line does
(`// Should we consider moving to the next state?` on an `if`) or
duplicating the docstring's `@note` (`// at() can throw...`). None of
them documented a non-obvious WHY local to that line.
### Calibration: when an inline comment STAYS
If the body contains a comment that documents a real local WHY —
something the function-level docstring cannot reasonably hold — keep it.
```cpp
// Workaround for boost #12345: pass nullptr instead of the empty buffer.
boost::asio::buffer(nullptr, 0);
// We deliberately do not lock here: the caller is required to hold
// lock_ across this method and the recursion would deadlock.
internalUpdate();
```
These are non-removable. They are not restating the code; they are
explaining something the reader cannot derive from the line.
## Rules that apply throughout
- Do NOT modify code logic — only adjust comments and Doxygen.
- Do NOT document entities that don't need it (private members with
obvious purpose, trivial defaulted constructors, getters whose name is
self-explanatory).
- Do NOT read the primary's `.ai.md` file yourself — it is already in
your prompt as "Primary's Authoritative AI Context."
- The partner's `.ai.md` (if any) is also already in your prompt as
"Partner's Authoritative AI Context." Use it to understand what
concepts the project assigns to the partner file, so you don't
duplicate them on the primary.
- The "Primary's Authoritative AI Context" is the source of truth for
this file's intent. Your task is to translate that prose into Doxygen
on the actual declarations in the primary file, in the layer
(header vs. source) where each concept correctly belongs.
- **Only modify the primary file.** Use Read (not Edit) on the partner
file — it is reference context, not an editing target.
When you finish, summarize:
- How many entities you documented
- Any entities you skipped and why
- Any code patterns you discovered that should be added to a skill file

View File

@@ -0,0 +1,67 @@
You are updating a per-module skill file for the xrpld codebase.
A "skill" is a single markdown file at `docs/skills/<module>.md` that
captures the institutional knowledge for one module: what it does, key
classes, conventions, gotchas, and how to work in it. The skill file is
loaded as context whenever an agent works on code in that module.
## Inputs
You will be given:
- The current skill file for the module (the baseline to update)
- A list of `.ai.md` files describing the source files in this module
(one per source file, with high-signal prose about purpose and design)
## Your task
Produce a new, improved skill file that integrates the knowledge from the
ai.md files into the existing skill. Specifically:
1. Update the description of the module's responsibility if the ai.md files
reveal more accurate or detailed framing
2. Add any classes, patterns, or invariants the skill is missing
3. Update lists of key files / entry points / conventions
4. Add gotchas and non-obvious behavior surfaced by the ai.md files
5. Keep the structure of the existing skill (don't reorganize for the sake
of it — only restructure if the existing structure is genuinely failing)
6. Be terse. A skill file is a reference card, not a textbook. 200-500 lines
is typical; over 1000 means you're padding.
## Quality rules
- **Do not duplicate the ai.md content.** Aggregate, synthesize, distill.
The skill is the module-level view; individual file details belong in
ai.md (and eventually in inline Doxygen comments).
- **Preserve accurate existing content.** Don't rewrite working sections.
- **Cite file paths** for specific claims (e.g., "see `STAmount.h:roundToScale`").
- **Flag contradictions.** If two ai.md files describe the same concept
differently, surface the conflict rather than silently picking one.
- **Keep prose grounded.** No marketing language. No "robust, scalable,
enterprise-grade" filler. Engineers reading this need facts.
## Output — Chunked Writing (REQUIRED)
You have a per-turn output cap (32K tokens). For larger modules, a
complete skill file will not fit in a single tool call. You MUST write
the file in chunks across multiple tool calls. Do not try to emit the
whole file in one Write — it will be truncated mid-content.
Process:
1. **First chunk (Write)**: Call the `Write` tool with the start of the
skill: the title heading, the opening overview, and the first 12
major sections. Keep this chunk under ~20K characters of content.
2. **Subsequent chunks (Edit)**: For each remaining section, call the
`Edit` tool with:
- `old_string` = the last line currently at the end of the file (must
be unique enough to match unambiguously — use the full last line)
- `new_string` = that same last line **plus the next 12 sections**
appended
Keep each chunk under ~20K characters.
3. **Repeat** until the skill is complete. There is no maximum number
of Edit calls.
After the file is fully written, respond with a one-line confirmation
listing how many chunks you wrote.
DO NOT emit the skill content in your text response. The file is the
output; the text response is only for confirmation.

View File

@@ -0,0 +1,55 @@
You are reviewing a pull request to the xrpld (XRP Ledger daemon) codebase
for documentation drift.
Your job: given a git diff, determine whether the changes invalidate
existing Doxygen documentation comments, or introduce new public API
surface that lacks documentation.
## Rules
- Only flag REAL semantic drift: changed behavior, new parameters, removed
functionality, changed return values, new error conditions, changed
invariants.
- Do NOT flag cosmetic changes (whitespace, formatting, internal renames
that don't change semantics).
- Do NOT suggest docs for private implementation details unless the logic
is genuinely non-obvious.
- Do NOT paraphrase function signatures. Good docs explain WHY and what
BEHAVIOR — not what the code literally does.
- Be terse: 1-3 sentences per finding.
## Process
1. For each changed file, get the git diff and the current file content
2. Read existing doc comments on the modified entities
3. For each modified entity, ask:
- Did behavior change in a way the docs miss?
- Did parameters or return values change?
- Are there new error conditions?
- Did the contract / invariant change?
- Is this a NEW public API surface with no docs?
4. Read the module's skill file in `docs/skills/soul/` for context
5. Read related tests if it helps you understand the change
6. Output findings as structured JSON (see below)
## Output Format
```json
{
"summary": "One-paragraph summary of doc state for this PR",
"issues": [
{
"file": "include/xrpl/protocol/Payment.h",
"line": 42,
"severity": "warning" | "suggestion",
"message": "Brief description of the doc issue",
"suggested_doc": "Optional: suggested doc comment text"
}
]
}
```
- `severity: warning` = doc is now incorrect / misleading
- `severity: suggestion` = new code lacks docs, would be nice to add
If no issues found, return `{"summary": "Documentation is up to date.", "issues": []}`.

295
.github/scripts/doc-agent/src/audit.ts vendored Normal file
View File

@@ -0,0 +1,295 @@
/**
* Audit mode: measure how completely each file's Doxygen documentation
* reflects the authoritative design intent in its sibling .ai.md.
*
* For each C++ file under the target that has a .ai.md sibling:
* - Locate its header/source partner (if any) and the partner's .ai.md.
* - Send primary + partner files and both .ai.md files to the agent.
* - Parse a structured JSON verdict per file.
*
* Writes:
* - doc-audit-report.json Aggregated per-file results.
* - doc-audit-report.md Human-readable summary.
*/
import { existsSync, readdirSync, statSync } from 'node:fs';
import { readFile, writeFile } from 'node:fs/promises';
import { join, relative, resolve } from 'node:path';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { MODEL, XRPLD_ROOT } from './config.js';
import { findPartner } from './pairing.js';
import { loadSystemPrompt } from './prompt-loader.js';
const SOURCE_EXTS: ReadonlySet<string> = new Set(['.h', '.hpp', '.cpp']);
const MAX_FILE_CHARS = 24_000;
const MAX_AI_MD_CHARS = 16_000;
const DEFAULT_CONCURRENCY = 5;
interface AuditMissed {
function: string;
topic: string;
home: 'header' | 'source' | 'either';
current_state: 'absent' | 'wrong-home' | 'thin';
ai_md_quote: string;
}
interface AuditResult {
file: string;
ai_md_concepts: number;
translated: number;
missed: AuditMissed[];
verdict: 'rerun' | 'leave';
}
/**
* Recursively find C++ source files under a target path that have a
* sibling .ai.md.
*/
function findAuditTargets(target: string): string[] {
const absTarget = resolve(XRPLD_ROOT, target);
if (!existsSync(absTarget)) {
throw new Error(`Target does not exist: ${absTarget}`);
}
const out: string[] = [];
const consider = (file: string): void => {
const dotIdx = file.lastIndexOf('.');
if (dotIdx === -1) return;
const ext = file.slice(dotIdx);
if (!SOURCE_EXTS.has(ext)) return;
if (!existsSync(`${file}.ai.md`)) return;
out.push(file);
};
const stat = statSync(absTarget);
if (stat.isFile()) {
consider(absTarget);
return out;
}
const walk = (dir: string): void => {
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const full = join(dir, entry.name);
if (entry.isDirectory()) walk(full);
else if (entry.isFile()) consider(full);
}
};
walk(absTarget);
return out;
}
/** Read a file, capping at maxChars to keep prompts within budget. */
async function readCapped(absPath: string, maxChars: number): Promise<string> {
const text = await readFile(absPath, 'utf8');
if (text.length <= maxChars) return text;
return `${text.slice(0, maxChars)}\n\n... [truncated, ${text.length - maxChars} bytes elided] ...`;
}
/** Extract a JSON object from a possibly-fenced model response. */
function extractJson(response: string): AuditResult | null {
const fenced = response.match(/```json\s*([\s\S]*?)```/);
const raw = fenced?.[1] ?? response.match(/(\{[\s\S]*\})/)?.[1];
if (raw === undefined) return null;
try {
return JSON.parse(raw) as AuditResult;
} catch {
return null;
}
}
/** Audit a single primary file against its .ai.md and partner context. */
async function auditFile(absPrimary: string): Promise<AuditResult | null> {
const relPrimary = relative(XRPLD_ROOT, absPrimary);
console.log(`\n=== Auditing: ${relPrimary} ===`);
const primary = await readCapped(absPrimary, MAX_FILE_CHARS);
const primaryAiMd = await readCapped(`${absPrimary}.ai.md`, MAX_AI_MD_CHARS);
const absPartner = findPartner(absPrimary);
const relPartner = absPartner === null ? null : relative(XRPLD_ROOT, absPartner);
const partner = absPartner === null ? null : await readCapped(absPartner, MAX_FILE_CHARS);
const partnerAiMdPath = absPartner === null ? null : `${absPartner}.ai.md`;
const partnerAiMd =
partnerAiMdPath !== null && existsSync(partnerAiMdPath)
? await readCapped(partnerAiMdPath, MAX_AI_MD_CHARS)
: null;
const partnerBlock =
relPartner === null || partner === null
? ''
: `
## Partner File (${relPartner})
\`\`\`
${partner}
\`\`\`${
partnerAiMd === null
? ''
: `
## Partner's .ai.md (${relPartner}.ai.md)
${partnerAiMd}`
}`;
const userPrompt = `Audit the documentation coverage of this file against its authoritative .ai.md.
## Primary File (${relPrimary})
\`\`\`
${primary}
\`\`\`
## Primary's .ai.md (${relPrimary}.ai.md)
${primaryAiMd}${partnerBlock}
Output JSON per the schema in the system prompt. The "file" field MUST be
"${relPrimary}".`;
const systemPrompt = await loadSystemPrompt('audit-file', relPrimary);
let response = '';
const result = query({
prompt: userPrompt,
options: {
model: MODEL,
systemPrompt,
cwd: XRPLD_ROOT,
allowedTools: ['Read', 'Glob', 'Grep'],
permissionMode: 'acceptEdits',
},
});
for await (const message of result) {
if (message.type === 'assistant') {
const content = message.message?.content;
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') response += block.text;
}
}
}
if (message.type === 'result') {
const cost = message.total_cost_usd?.toFixed(4) ?? '?';
const inTok = message.usage?.['input_tokens'] ?? 0;
const outTok = message.usage?.['output_tokens'] ?? 0;
console.log(` [Cost: $${cost}, Tokens: ${inTok}/${outTok}]`);
}
}
const parsed = extractJson(response);
if (parsed === null) {
console.warn(` No JSON output for ${relPrimary}, skipping`);
return null;
}
parsed.file = relPrimary;
return parsed;
}
/** Render the aggregated markdown report. */
function buildReport(results: readonly AuditResult[]): string {
const total = results.length;
const reruns = results.filter((r) => r.verdict === 'rerun');
const totalConcepts = results.reduce((s, r) => s + r.ai_md_concepts, 0);
const totalTranslated = results.reduce((s, r) => s + r.translated, 0);
const overallRate = totalConcepts === 0 ? 0 : Math.round((totalTranslated / totalConcepts) * 100);
const lines: string[] = [
'# Documentation Audit Report',
'',
`**Files audited:** ${total}`,
`**Overall translation rate:** ${overallRate}% (${totalTranslated} of ${totalConcepts} .ai.md concepts reflected in docstrings)`,
`**Files flagged for re-run:** ${reruns.length}`,
'',
'## Files flagged for re-run',
'',
];
if (reruns.length === 0) {
lines.push('_None — all audited files passed._', '');
} else {
lines.push('| File | Translated | Missed | Rate |', '|------|-----------:|-------:|-----:|');
for (const r of reruns.sort(
(a, b) =>
a.translated / Math.max(a.ai_md_concepts, 1) - b.translated / Math.max(b.ai_md_concepts, 1),
)) {
const rate = r.ai_md_concepts === 0 ? 0 : Math.round((r.translated / r.ai_md_concepts) * 100);
lines.push(`| \`${r.file}\` | ${r.translated} | ${r.missed.length} | ${rate}% |`);
}
lines.push('', '## Top missed concepts (sampled)', '');
for (const r of reruns.slice(0, 10)) {
if (r.missed.length === 0) continue;
lines.push(`### \`${r.file}\``, '');
for (const m of r.missed.slice(0, 5)) {
lines.push(`- **${m.function}** — ${m.topic}`);
lines.push(` > ${m.ai_md_quote.replace(/\n/g, ' ').slice(0, 200)}`);
}
lines.push('');
}
}
return lines.join('\n');
}
/**
* Run async work over a list of items with bounded concurrency. Mirrors the
* minimal slice of p-limit we actually need; collects results in input order.
*/
async function mapWithConcurrency<T, R>(
items: readonly T[],
limit: number,
worker: (item: T, index: number) => Promise<R>,
): Promise<R[]> {
const results = new Array<R>(items.length);
let next = 0;
async function pump(): Promise<void> {
while (true) {
const index = next++;
if (index >= items.length) return;
// biome-ignore lint/style/noNonNullAssertion: index < items.length
results[index] = await worker(items[index]!, index);
}
}
const workers = Array.from({ length: Math.min(limit, items.length) }, pump);
await Promise.all(workers);
return results;
}
/**
* Audit every C++ file with a .ai.md sibling under the target path.
*
* Concurrency is read from the AUDIT_CONCURRENCY env var (default 5).
*/
export async function auditTarget(target: string): Promise<void> {
const files = findAuditTargets(target);
const concurrency = Number(process.env['AUDIT_CONCURRENCY']) || DEFAULT_CONCURRENCY;
console.log(
`Found ${files.length} file(s) with .ai.md siblings to audit (concurrency=${concurrency}).`,
);
let completed = 0;
const raw = await mapWithConcurrency(files, concurrency, async (file) => {
try {
const result = await auditFile(file);
completed++;
console.log(` Progress: ${completed}/${files.length}`);
return result;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.warn(` Audit failed for ${file}: ${message}`);
completed++;
console.log(` Progress: ${completed}/${files.length}`);
return null;
}
});
const results = raw.filter((r): r is AuditResult => r !== null);
const report = buildReport(results);
await writeFile('doc-audit-report.md', report);
await writeFile('doc-audit-report.json', JSON.stringify(results, null, 2));
const reruns = results.filter((r) => r.verdict === 'rerun').length;
console.log(`\nAudited: ${results.length}/${files.length}`);
console.log(`Flagged for re-run: ${reruns}`);
console.log('Reports: doc-audit-report.md, doc-audit-report.json');
}

77
.github/scripts/doc-agent/src/config.ts vendored Normal file
View File

@@ -0,0 +1,77 @@
/**
* Shared configuration for doc-agent.
*
* Paths are resolved relative to the doc-agent directory so the tool works
* regardless of where it's invoked from.
*/
import { dirname, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
/** Absolute path to the doc-agent root (parent of src/). */
export const AGENT_DIR: string = resolve(__dirname, '..');
/** Absolute path to the prompts directory. */
export const PROMPTS_DIR: string = resolve(AGENT_DIR, 'prompts');
/**
* Absolute path to the xrpld repo root.
*
* Defaults to three levels up from doc-agent (which lives at
* .github/scripts/doc-agent/). Override with the XRPLD_ROOT env var when
* running against a different checkout.
*/
export const XRPLD_ROOT: string = process.env['XRPLD_ROOT'] ?? resolve(AGENT_DIR, '..', '..', '..');
/** Model used for documentation generation and review. */
export const MODEL: string = process.env['DOC_AGENT_MODEL'] ?? 'claude-sonnet-4-6';
/** Absolute path to the skills directory inside the xrpld repo. */
export const SKILLS_DIR: string = resolve(XRPLD_ROOT, 'docs', 'skills');
/**
* Map module path prefixes to their skill file name in docs/skills/soul/.
*
* Used to inject module-specific context into the agent's system prompt
* when documenting or reviewing code in that module.
*/
export const MODULE_SKILL_MAP: Readonly<Record<string, string | null>> = {
'src/libxrpl/basics/': null,
'src/libxrpl/crypto/': 'cryptography.md',
'src/libxrpl/json/': null,
'src/libxrpl/beast/': null,
'src/libxrpl/protocol/': 'protocol.md',
'src/libxrpl/ledger/': 'ledger.md',
'src/libxrpl/tx/': 'transactors.md',
'src/libxrpl/nodestore/': 'nodestore.md',
'src/libxrpl/shamap/': 'shamap.md',
'src/libxrpl/rdb/': 'sql.md',
'src/xrpld/consensus/': 'consensus.md',
'src/xrpld/overlay/': 'peering.md',
'src/xrpld/peerfinder/': 'peering.md',
'src/xrpld/rpc/': 'rpc.md',
'include/xrpl/crypto/': 'cryptography.md',
'include/xrpl/protocol/': 'protocol.md',
'include/xrpl/ledger/': 'ledger.md',
'include/xrpl/tx/': 'transactors.md',
'include/xrpl/nodestore/': 'nodestore.md',
'include/xrpl/shamap/': 'shamap.md',
};
/**
* Resolve which skill file applies to a given source path.
*
* @param sourcePath - Path relative to the xrpld repo root
* @returns The skill file name, or null if no skill applies
*/
export function skillForPath(sourcePath: string): string | null {
for (const [prefix, skillFile] of Object.entries(MODULE_SKILL_MAP)) {
if (sourcePath.startsWith(prefix) || sourcePath.includes(`/${prefix}`)) {
return skillFile;
}
}
return null;
}

View File

@@ -0,0 +1,160 @@
/**
* Document mode: add Doxygen docs to a file or all files in a directory.
*/
import { existsSync, readdirSync, statSync } from 'node:fs';
import { readFile } from 'node:fs/promises';
import { join, relative, resolve } from 'node:path';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { MODEL, XRPLD_ROOT } from './config.js';
import { findPartner } from './pairing.js';
import { loadSystemPrompt } from './prompt-loader.js';
const CPP_EXTENSIONS: ReadonlySet<string> = new Set(['.h', '.hpp', '.cpp']);
/**
* Recursively find all C++ source files under a target path.
*
* @param target - File or directory path (relative to xrpld root or absolute)
* @returns Absolute paths of all matching files
*/
function findCppFiles(target: string): string[] {
const absTarget = resolve(XRPLD_ROOT, target);
if (!existsSync(absTarget)) {
throw new Error(`Target does not exist: ${absTarget}`);
}
const stat = statSync(absTarget);
if (stat.isFile()) {
return [absTarget];
}
const results: string[] = [];
const walk = (dir: string): void => {
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const full = join(dir, entry.name);
if (entry.isDirectory()) {
walk(full);
} else if (entry.isFile()) {
const dotIdx = entry.name.lastIndexOf('.');
if (dotIdx === -1) continue;
const ext = entry.name.slice(dotIdx);
if (CPP_EXTENSIONS.has(ext)) {
results.push(full);
}
}
}
};
walk(absTarget);
return results;
}
/**
* Read the sibling .ai.md file for a source file, if one exists.
*
* The athenah-ai pipeline produces a `<file>.ai.md` companion for every
* documented source file (e.g., `Slice.h` -> `Slice.h.ai.md`). When present,
* it is high-signal prose describing the file's purpose, design, and
* non-obvious behavior — the agent should use it as the authoritative
* source of intent.
*/
async function readAiContext(absPath: string): Promise<string | null> {
const aiPath = `${absPath}.ai.md`;
if (!existsSync(aiPath)) return null;
return await readFile(aiPath, 'utf8');
}
/**
* Document a single file by running the documentation agent against it.
*
* Inject the partner file's path + its `.ai.md` (if any) into the prompt
* so the agent can apply the "contract on header, implementation on
* source" policy with full visibility into the other half. The agent
* Reads the partner only as reference; only the primary file is edited.
*/
async function documentFile(absPath: string): Promise<void> {
const relPath = relative(XRPLD_ROOT, absPath);
console.log(`\n=== Documenting: ${relPath} ===`);
const systemPrompt = await loadSystemPrompt('document-file', relPath);
const aiContext = await readAiContext(absPath);
const aiContextBlock =
aiContext === null
? ''
: `\n\n## Primary's Authoritative AI Context (${relPath}.ai.md)\n\nThe following is high-signal prose describing this file's purpose, design,\nand non-obvious behavior. Treat it as the source of truth for intent and\nbehavior. Your job is to translate this into structured Doxygen \`/** */\`\ncomments on the actual declarations.\n\n---\n\n${aiContext}\n---`;
const absPartner = findPartner(absPath);
const relPartner = absPartner === null ? null : relative(XRPLD_ROOT, absPartner);
const partnerAiContext = absPartner === null ? null : await readAiContext(absPartner);
const partnerBlock =
relPartner === null
? ''
: `\n\n## Partner File\n\nThis file's partner is **${relPartner}**. Use the Read tool to see its\ncurrent docstrings before deciding what belongs on the primary. A concept\nalready documented on the partner does not need to be duplicated here.\nConversely, an implementation-depth concept currently on the partner that\nbelongs on the source (or vice versa) should be moved.${
partnerAiContext === null
? ''
: `\n\n### Partner's Authoritative AI Context (${relPartner}.ai.md)\n\n---\n\n${partnerAiContext}\n---`
}`;
const userPrompt = `Add Doxygen documentation to: ${relPath}
The file is rooted at ${XRPLD_ROOT}. Use the Read tool to read it, the Edit
tool to add documentation, and Glob/Grep to find related tests or callers
when needed.${
relPartner === null
? ''
: ` Use Read on the partner file (${relPartner}) to see what's already
documented there.`
}
Do not modify any code logic — only add documentation comments to the
primary file (${relPath}). Do NOT edit the partner file.${aiContextBlock}${partnerBlock}`;
const result = query({
prompt: userPrompt,
options: {
model: MODEL,
systemPrompt,
cwd: XRPLD_ROOT,
allowedTools: ['Read', 'Edit', 'Glob', 'Grep', 'Bash'],
permissionMode: 'acceptEdits',
},
});
for await (const message of result) {
if (message.type === 'assistant') {
const content = message.message?.content;
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') {
process.stdout.write(block.text);
}
}
}
}
if (message.type === 'result') {
const cost = message.total_cost_usd?.toFixed(4) ?? '?';
const inTok = message.usage?.['input_tokens'] ?? 0;
const outTok = message.usage?.['output_tokens'] ?? 0;
console.log(`\n[Cost: $${cost}, Tokens: ${inTok}/${outTok}]`);
}
}
}
/**
* Document a file or every C++ file under a directory.
*
* @param target - File or directory path
*/
export async function documentTarget(target: string): Promise<void> {
const files = findCppFiles(target);
console.log(`Found ${files.length} C++ file(s) to document.`);
for (const file of files) {
try {
await documentFile(file);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.error(`Failed to document ${file}: ${message}`);
}
}
}

91
.github/scripts/doc-agent/src/index.ts vendored Normal file
View File

@@ -0,0 +1,91 @@
#!/usr/bin/env node
/**
* xrpld doc-agent CLI entry point.
*
* @example
* doc-agent document src/libxrpl/basics/base_uint.h
* doc-agent document include/xrpl/basics/
* doc-agent review develop..HEAD
* doc-agent review --pr 1234
* doc-agent regen-skills protocol
*/
import { auditTarget } from './audit.js';
import { documentTarget } from './document.js';
import { regenSkills } from './regen-skills.js';
import { reviewDiff } from './review.js';
const USAGE = `
xrpld doc-agent
Usage:
doc-agent document <file-or-directory> Add Doxygen documentation
doc-agent review <base>..<head> Detect doc drift in range
doc-agent review --pr <number> Detect doc drift for a PR
doc-agent audit <file-or-directory> Measure how completely each file's
docstrings reflect its .ai.md intent;
outputs doc-audit-report.{md,json}
doc-agent regen-skills <module> Regenerate docs/skills/soul/<module>.md
from sibling .ai.md files
Environment:
ANTHROPIC_API_KEY (required) Anthropic API key
XRPLD_ROOT (optional) Path to xrpld repo root (default: repo root)
DOC_AGENT_MODEL (optional) Model override (default: claude-opus-4-7)
`;
function printUsageAndExit(code: number): never {
console.error(USAGE);
process.exit(code);
}
const HELP_MODES: ReadonlySet<string> = new Set(['help', '--help', '-h']);
async function main(): Promise<void> {
const [mode, ...args] = process.argv.slice(2);
if (process.env['ANTHROPIC_API_KEY'] === undefined) {
console.error('ERROR: ANTHROPIC_API_KEY environment variable is required.');
process.exit(1);
}
if (mode === undefined || HELP_MODES.has(mode)) {
printUsageAndExit(0);
}
if (mode === 'document') {
const target = args[0];
if (target === undefined) printUsageAndExit(1);
await documentTarget(target);
return;
}
if (mode === 'review') {
if (args.length === 0) printUsageAndExit(1);
await reviewDiff(args);
return;
}
if (mode === 'audit') {
const target = args[0];
if (target === undefined) printUsageAndExit(1);
await auditTarget(target);
return;
}
if (mode === 'regen-skills') {
const moduleName = args[0];
if (moduleName === undefined) printUsageAndExit(1);
await regenSkills(moduleName);
return;
}
console.error(`Unknown mode: ${mode}`);
printUsageAndExit(1);
}
main().catch((err: unknown) => {
const message = err instanceof Error ? (err.stack ?? err.message) : String(err);
console.error('FATAL:', message);
process.exit(1);
});

View File

@@ -0,0 +1,47 @@
/**
* Header/source pairing for C++ files in the xrpld layout.
*
* libxrpl: src/libxrpl/<X>.cpp <-> include/xrpl/<X>.h
* xrpld: src/xrpld/<X>.cpp <-> src/xrpld/<X>.h (same directory)
*
* Inline-only headers may have no .cpp partner; standalone .cpp may have
* no .h partner.
*/
import { existsSync } from 'node:fs';
import { relative, resolve } from 'node:path';
import { XRPLD_ROOT } from './config.js';
/**
* Compute the partner file path for a given primary, by swapping the
* extension between header/source. Returns null if no candidate exists
* on disk.
*/
export function findPartner(absPrimary: string): string | null {
const rel = relative(XRPLD_ROOT, absPrimary);
const dotIdx = rel.lastIndexOf('.');
if (dotIdx === -1) return null;
const stem = rel.slice(0, dotIdx);
const ext = rel.slice(dotIdx);
const candidates: string[] = [];
if (ext === '.cpp') {
if (stem.startsWith('src/libxrpl/')) {
const tail = stem.slice('src/libxrpl/'.length);
candidates.push(`include/xrpl/${tail}.h`, `include/xrpl/${tail}.hpp`);
}
candidates.push(`${stem}.h`, `${stem}.hpp`);
} else if (ext === '.h' || ext === '.hpp') {
if (stem.startsWith('include/xrpl/')) {
candidates.push(`src/libxrpl/${stem.slice('include/xrpl/'.length)}.cpp`);
}
candidates.push(`${stem}.cpp`);
}
for (const candidate of candidates) {
const abs = resolve(XRPLD_ROOT, candidate);
if (existsSync(abs) && abs !== absPrimary) return abs;
}
return null;
}

View File

@@ -0,0 +1,34 @@
/**
* Loads system prompts and injects module-specific skill context.
*/
import { existsSync } from 'node:fs';
import { readFile } from 'node:fs/promises';
import { resolve } from 'node:path';
import { PROMPTS_DIR, SKILLS_DIR, skillForPath } from './config.js';
/**
* Load a system prompt from prompts/ and append the relevant module skill
* if one applies to the given source path.
*
* @param promptName - Base name of the prompt file (without .md extension)
* @param sourcePath - Path relative to the xrpld repo root
* @returns The fully-assembled system prompt
*/
export async function loadSystemPrompt(promptName: string, sourcePath: string): Promise<string> {
const basePromptPath = resolve(PROMPTS_DIR, `${promptName}.md`);
const basePrompt = await readFile(basePromptPath, 'utf8');
const skillFile = skillForPath(sourcePath);
if (skillFile === null) {
return basePrompt;
}
const skillPath = resolve(SKILLS_DIR, skillFile);
if (!existsSync(skillPath)) {
return basePrompt;
}
const skill = await readFile(skillPath, 'utf8');
return `${basePrompt}\n\n## Module Skill (${skillFile})\n\n${skill}`;
}

View File

@@ -0,0 +1,166 @@
/**
* Regen-skills mode: rebuild a module's skill file from ai.md inputs.
*
* For a given module (e.g. `protocol`, `ledger`, `consensus`), collect all
* `.ai.md` files under the matching source paths and ask the Agent SDK to
* write an updated `docs/skills/<module>.md`.
*
* The agent writes the file via the `Write` tool rather than returning the
* skill content as text. This avoids hitting the per-turn output token
* limit on large modules (which previously truncated several skill files).
*/
import { existsSync, readdirSync, statSync } from 'node:fs';
import { readFile } from 'node:fs/promises';
import { join, relative, resolve } from 'node:path';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { MODEL, MODULE_SKILL_MAP, PROMPTS_DIR, SKILLS_DIR, XRPLD_ROOT } from './config.js';
interface AiFile {
readonly sourcePath: string;
readonly content: string;
}
/** Resolve which source-tree prefixes feed a given skill file. */
function prefixesForSkill(skillFile: string): string[] {
return Object.entries(MODULE_SKILL_MAP)
.filter(([, mapped]) => mapped === skillFile)
.map(([prefix]) => prefix);
}
/** Walk a directory and collect all sibling .ai.md files. */
function collectAiFiles(prefix: string): string[] {
const absDir = resolve(XRPLD_ROOT, prefix);
if (!existsSync(absDir) || !statSync(absDir).isDirectory()) return [];
const results: string[] = [];
const walk = (dir: string): void => {
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const full = join(dir, entry.name);
if (entry.isDirectory()) {
walk(full);
} else if (entry.isFile() && entry.name.endsWith('.ai.md')) {
results.push(full);
}
}
};
walk(absDir);
return results;
}
async function loadAiFiles(absPaths: readonly string[]): Promise<AiFile[]> {
const files: AiFile[] = [];
for (const absPath of absPaths) {
const content = await readFile(absPath, 'utf8');
files.push({
sourcePath: relative(XRPLD_ROOT, absPath).replace(/\.ai\.md$/, ''),
content,
});
}
return files;
}
/**
* Regenerate the skill file for a given module name.
*
* @param moduleName - The skill file name without extension (e.g. "protocol",
* "ledger"). Must match a value in MODULE_SKILL_MAP.
*/
export async function regenSkills(moduleName: string): Promise<void> {
const skillFile = `${moduleName}.md`;
const prefixes = prefixesForSkill(skillFile);
if (prefixes.length === 0) {
const known = Array.from(
new Set(Object.values(MODULE_SKILL_MAP).filter((v): v is string => v !== null)),
);
throw new Error(`Unknown module: ${moduleName}. Valid modules: ${known.join(', ')}`);
}
console.log(`Regenerating skill: ${skillFile}`);
console.log(` Source prefixes: ${prefixes.join(', ')}`);
const aiPaths = prefixes.flatMap((prefix) => collectAiFiles(prefix));
if (aiPaths.length === 0) {
console.warn(' No .ai.md files found for this module. Skipping.');
return;
}
console.log(` Found ${aiPaths.length} .ai.md file(s)`);
const aiFiles = await loadAiFiles(aiPaths);
const skillPath = resolve(SKILLS_DIR, skillFile);
const skillRelPath = relative(XRPLD_ROOT, skillPath);
const existingSkill = existsSync(skillPath)
? await readFile(skillPath, 'utf8')
: '(no existing skill file — create a new one)';
const systemPrompt = await readFile(resolve(PROMPTS_DIR, 'regen-skill.md'), 'utf8');
const aiBlocks = aiFiles
.map((f) => `\n### \`${f.sourcePath}\`\n\n${f.content}`)
.join('\n\n---\n');
const userPrompt = `Regenerate the skill file at: \`${skillRelPath}\`
Use the **Write** tool to write the new content to that path. Do NOT return
the skill content in your message — write it directly to the file. This
avoids hitting per-turn output token limits.
## Existing skill content
${existingSkill}
## AI context files for this module
${aiBlocks}
When you have written the file, respond with a brief one-line confirmation.`;
const result = query({
prompt: userPrompt,
options: {
model: MODEL,
systemPrompt,
cwd: XRPLD_ROOT,
allowedTools: ['Write', 'Edit', 'Read', 'Glob', 'Grep'],
permissionMode: 'acceptEdits',
},
});
let writeCount = 0;
let editCount = 0;
for await (const message of result) {
if (message.type === 'assistant') {
const content = message.message?.content;
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'tool_use' && block.name === 'Write') {
writeCount++;
const input = block.input as { file_path?: string } | undefined;
if (input?.file_path !== undefined) {
console.log(` Write: ${input.file_path}`);
}
}
if (block.type === 'tool_use' && block.name === 'Edit') {
editCount++;
const input = block.input as { file_path?: string } | undefined;
if (input?.file_path !== undefined) {
console.log(` Edit: ${input.file_path}`);
}
}
}
}
}
if (message.type === 'result') {
const cost = message.total_cost_usd?.toFixed(4) ?? '?';
console.log(` [Cost: $${cost}]`);
}
}
if (writeCount === 0) {
console.error(' Agent did not call Write — skill file not updated.');
return;
}
console.log(` Wrote: ${skillRelPath} (${writeCount} Write + ${editCount} Edit calls)`);
}

222
.github/scripts/doc-agent/src/review.ts vendored Normal file
View File

@@ -0,0 +1,222 @@
/**
* Review mode: detect documentation drift in a git diff range.
*
* Used by the doc-review GitHub Action and locally for testing.
*/
import { execSync } from 'node:child_process';
import { writeFile } from 'node:fs/promises';
import { query } from '@anthropic-ai/claude-agent-sdk';
import { MODEL, XRPLD_ROOT } from './config.js';
import { loadSystemPrompt } from './prompt-loader.js';
import type { FileReviewResult, GitRange, ReviewIssue, ReviewOutput } from './types.js';
const MAX_DIFF_CHARS = 12_000;
const TRACKED_PATH_PATTERN = /^(include|src\/libxrpl|src\/xrpld)\//;
const CPP_FILE_PATTERN = /\.(h|hpp|cpp)$/;
/**
* Parse the CLI arguments into a base..head git range.
*
* Accepts either:
* - `base..head` (e.g. `develop..HEAD`)
* - `--pr <number>` (resolves via `gh pr view`)
*/
function parseRangeArgs(args: readonly string[]): GitRange {
const first = args[0];
if (first === undefined) {
throw new Error('Expected range as base..head or --pr <number>');
}
if (first === '--pr') {
const pr = args[1];
if (pr === undefined) {
throw new Error('--pr requires a PR number');
}
const base = execSync(`gh pr view ${pr} --json baseRefOid -q .baseRefOid`, {
cwd: XRPLD_ROOT,
})
.toString()
.trim();
const head = execSync(`gh pr view ${pr} --json headRefOid -q .headRefOid`, {
cwd: XRPLD_ROOT,
})
.toString()
.trim();
return { base, head };
}
const match = first.match(/^([^.]+)\.\.([^.]+)$/);
if (match === null || match[1] === undefined || match[2] === undefined) {
throw new Error('Expected range as base..head or --pr <number>');
}
return { base: match[1], head: match[2] };
}
/**
* Get the list of C++ source files changed in the given git range,
* filtered to paths the doc-agent cares about.
*/
function getChangedCppFiles(range: GitRange): string[] {
const out = execSync(`git diff --name-only ${range.base}...${range.head}`, {
cwd: XRPLD_ROOT,
}).toString();
return out
.split('\n')
.filter((line) => line.length > 0)
.filter((file) => CPP_FILE_PATTERN.test(file))
.filter((file) => TRACKED_PATH_PATTERN.test(file));
}
/** Get the unified diff for a single file in the given range. */
function getFileDiff(range: GitRange, file: string): string {
return execSync(`git diff ${range.base}...${range.head} -- "${file}"`, {
cwd: XRPLD_ROOT,
maxBuffer: 10 * 1024 * 1024,
}).toString();
}
/** Extract a JSON object from a possibly-fenced model response. */
function extractJson(response: string): ReviewOutput | null {
const fenced = response.match(/```json\s*([\s\S]*?)```/);
const raw = fenced?.[1] ?? response.match(/(\{[\s\S]*\})/)?.[1];
if (raw === undefined) return null;
try {
return JSON.parse(raw) as ReviewOutput;
} catch {
return null;
}
}
/** Send one file's diff to the agent and parse the response. */
async function reviewFile(range: GitRange, file: string): Promise<FileReviewResult | null> {
console.log(`\n=== Reviewing: ${file} ===`);
const diff = getFileDiff(range, file);
if (diff.trim().length === 0) return null;
const systemPrompt = await loadSystemPrompt('review-diff', file);
const userPrompt = `Review this diff for documentation drift:
## File: ${file}
## Diff
\`\`\`
${diff.slice(0, MAX_DIFF_CHARS)}
\`\`\`
Use the Read tool to inspect the current state of the file, related tests,
or callers if needed. Output findings as JSON per the schema in the system
prompt.`;
let response = '';
const result = query({
prompt: userPrompt,
options: {
model: MODEL,
systemPrompt,
cwd: XRPLD_ROOT,
allowedTools: ['Read', 'Glob', 'Grep', 'Bash'],
permissionMode: 'acceptEdits',
},
});
for await (const message of result) {
if (message.type === 'assistant') {
const content = message.message?.content;
if (Array.isArray(content)) {
for (const block of content) {
if (block.type === 'text') {
response += block.text;
}
}
}
}
}
const parsed = extractJson(response);
if (parsed === null) {
console.warn(` No JSON output for ${file}, skipping`);
return null;
}
const issues: ReviewIssue[] = parsed.issues.map((issue) => ({
file: issue.file ?? file,
line: issue.line,
severity: issue.severity,
message: issue.message,
...(issue.suggested_doc !== undefined && { suggestedDoc: issue.suggested_doc }),
}));
return { file, summary: parsed.summary, issues };
}
/** Build the markdown report posted to the PR. */
function buildReport(fileCount: number, results: readonly FileReviewResult[]): string {
const issues = results.flatMap((r) => r.issues);
const warnings = issues.filter((i) => i.severity === 'warning').length;
const suggestions = issues.length - warnings;
const lines: string[] = ['## Documentation Review Report', ''];
lines.push(
issues.length === 0
? 'No documentation issues found.'
: `Found **${issues.length}** issue(s) across **${fileCount}** changed file(s): ${warnings} warning(s), ${suggestions} suggestion(s).`,
);
lines.push('');
for (const result of results) {
if (result.issues.length === 0) continue;
lines.push(`### \`${result.file}\``, '', result.summary, '');
for (const issue of result.issues) {
const tag = issue.severity === 'warning' ? '**Warning:**' : '**Suggestion:**';
lines.push(`- ${tag} Line ${issue.line}: ${issue.message}`);
}
lines.push('');
}
lines.push('---', '*Automated review by doc-agent.*');
return lines.join('\n');
}
/**
* Review the documentation drift introduced by a git range or PR.
*
* Writes two output files in the current working directory:
* - doc-review-report.md (markdown summary for PR comment)
* - doc-review-comments.json (inline review comments)
*/
export async function reviewDiff(args: readonly string[]): Promise<void> {
const range = parseRangeArgs(args);
console.log(`Reviewing range: ${range.base}...${range.head}`);
const files = getChangedCppFiles(range);
if (files.length === 0) {
console.log('No C++ files changed in this range.');
await writeFile('doc-review-report.md', '## Documentation Review\n\nNo C++ files changed.\n');
await writeFile('doc-review-comments.json', '[]');
return;
}
console.log(`Found ${files.length} changed C++ file(s).`);
const results: FileReviewResult[] = [];
for (const file of files) {
try {
const result = await reviewFile(range, file);
if (result !== null) results.push(result);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.warn(` Review failed for ${file}: ${message}`);
}
}
const report = buildReport(files.length, results);
const allIssues = results.flatMap((r) => r.issues);
await writeFile('doc-review-report.md', report);
await writeFile('doc-review-comments.json', JSON.stringify(allIssues, null, 2));
console.log('\nReport: doc-review-report.md');
console.log(`Inline comments: doc-review-comments.json (${allIssues.length} issues)`);
}

37
.github/scripts/doc-agent/src/types.ts vendored Normal file
View File

@@ -0,0 +1,37 @@
/**
* Shared type definitions for the doc-agent.
*/
export type Severity = 'warning' | 'suggestion';
export interface ReviewIssue {
file: string;
line: number;
severity: Severity;
message: string;
suggestedDoc?: string;
}
export interface FileReviewResult {
file: string;
summary: string;
issues: ReviewIssue[];
}
export interface ReviewOutput {
summary: string;
issues: Array<{
file?: string;
line: number;
severity: Severity;
message: string;
suggested_doc?: string;
}>;
}
export interface GitRange {
base: string;
head: string;
}
export type AgentMode = 'document' | 'review';

39
.github/scripts/doc-agent/tsconfig.json vendored Normal file
View File

@@ -0,0 +1,39 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"lib": ["ES2023"],
"outDir": "dist",
"rootDir": "src",
"strict": true,
"noImplicitAny": true,
"strictNullChecks": true,
"strictFunctionTypes": true,
"strictBindCallApply": true,
"strictPropertyInitialization": true,
"noImplicitThis": true,
"alwaysStrict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"exactOptionalPropertyTypes": true,
"noImplicitReturns": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitOverride": true,
"noPropertyAccessFromIndexSignature": true,
"allowUnusedLabels": false,
"allowUnreachableCode": false,
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"skipLibCheck": true,
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"resolveJsonModule": true,
"isolatedModules": true,
"verbatimModuleSyntax": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"]
}

277
.github/scripts/doc-coverage-check.py vendored Normal file
View File

@@ -0,0 +1,277 @@
#!/usr/bin/env python3
"""
Documentation coverage checker for xrpld.
Parses coverxygen LCOV output, compares against per-module thresholds
defined in .github/doc-coverage-thresholds.json, and generates a
markdown report suitable for posting as a PR comment.
Usage:
python3 doc-coverage-check.py \
--lcov-file doc-coverage.info \
--threshold-file .github/doc-coverage-thresholds.json \
--output doc-coverage-report.md \
[--base-lcov-file base-doc-coverage.info]
"""
import argparse
import json
import re
import sys
from collections import defaultdict
from pathlib import Path
def parse_lcov(lcov_path: str) -> dict[str, dict[str, int]]:
"""Parse LCOV-format file into per-file coverage data.
Returns a dict mapping file paths to {"documented": N, "total": N}.
"""
coverage = {}
current_file = None
documented = 0
total = 0
with open(lcov_path) as f:
for line in f:
line = line.strip()
if line.startswith("SF:"):
current_file = line[3:]
documented = 0
total = 0
elif line.startswith("DA:"):
parts = line[3:].split(",")
if len(parts) >= 2:
total += 1
if int(parts[1]) > 0:
documented += 1
elif line == "end_of_record":
if current_file:
coverage[current_file] = {
"documented": documented,
"total": total,
}
current_file = None
return coverage
def compute_module_coverage(
coverage: dict[str, dict[str, int]],
module_prefixes: list[str],
) -> dict[str, dict[str, int | float]]:
"""Aggregate file-level coverage into module-level stats."""
modules = {}
for prefix in module_prefixes:
doc = 0
tot = 0
for filepath, stats in coverage.items():
if filepath.startswith(prefix) or f"/{prefix}" in filepath:
doc += stats["documented"]
tot += stats["total"]
pct = (doc / tot * 100) if tot > 0 else 0.0
modules[prefix] = {"documented": doc, "total": tot, "percent": round(pct, 1)}
return modules
def compute_global_coverage(
coverage: dict[str, dict[str, int]],
) -> dict[str, int | float]:
"""Compute overall coverage across all files."""
doc = sum(s["documented"] for s in coverage.values())
tot = sum(s["total"] for s in coverage.values())
pct = (doc / tot * 100) if tot > 0 else 0.0
return {"documented": doc, "total": tot, "percent": round(pct, 1)}
def check_ratchet(
current: dict[str, dict[str, int | float]],
base: dict[str, dict[str, int | float]] | None,
current_global: dict[str, int | float],
base_global: dict[str, int | float] | None,
) -> list[str]:
"""Check that no module or global coverage decreased vs base branch."""
violations = []
if base_global and current_global["percent"] < base_global["percent"]:
violations.append(
f"Global coverage decreased: {base_global['percent']}% -> "
f"{current_global['percent']}%"
)
if base:
for module, stats in current.items():
if module in base and stats["percent"] < base[module]["percent"]:
violations.append(
f"`{module}` coverage decreased: "
f"{base[module]['percent']}% -> {stats['percent']}%"
)
return violations
def check_new_files(
coverage: dict[str, dict[str, int]],
new_files: list[str],
min_coverage: int,
) -> list[str]:
"""Check that new files meet minimum documentation coverage."""
violations = []
for filepath in new_files:
for covered_path, stats in coverage.items():
if filepath in covered_path or covered_path.endswith(filepath):
if stats["total"] > 0:
pct = stats["documented"] / stats["total"] * 100
if pct < min_coverage:
violations.append(
f"`{filepath}` has {pct:.0f}% doc coverage "
f"(minimum {min_coverage}%)"
)
break
return violations
def coverage_emoji(pct: float) -> str:
if pct >= 80:
return "+"
if pct >= 50:
return "~"
return "-"
def generate_report(
global_stats: dict[str, int | float],
module_stats: dict[str, dict[str, int | float]],
thresholds: dict,
violations: list[str],
new_file_violations: list[str],
) -> str:
"""Generate a markdown report for the PR comment."""
lines = []
lines.append("## Documentation Coverage Report")
lines.append("")
passed = not violations and not new_file_violations
status = "PASSED" if passed else "FAILED"
lines.append(f"**Status:** {status}")
lines.append(
f"**Global Coverage:** {global_stats['percent']}% "
f"({global_stats['documented']}/{global_stats['total']} entities documented)"
)
lines.append(
f"**Minimum Threshold:** {thresholds.get('global_minimum', 0)}%"
)
lines.append("")
if violations or new_file_violations:
lines.append("### Violations")
lines.append("")
for v in violations + new_file_violations:
lines.append(f"- {v}")
lines.append("")
lines.append("### Module Coverage")
lines.append("")
lines.append("| Module | Coverage | Documented | Total | Threshold |")
lines.append("|--------|----------|------------|-------|-----------|")
module_thresholds = thresholds.get("module_thresholds", {})
for module in sorted(module_stats.keys()):
stats = module_stats[module]
threshold = module_thresholds.get(module, 0)
emoji = coverage_emoji(stats["percent"])
lines.append(
f"| `{module}` | {stats['percent']}% | "
f"{stats['documented']} | {stats['total']} | {threshold}% |"
)
lines.append("")
lines.append(
"*Coverage measured by [coverxygen](https://github.com/psycofdj/coverxygen). "
"See [docs/DOCUMENTATION_STANDARDS.md](../docs/DOCUMENTATION_STANDARDS.md) "
"for documentation guidelines.*"
)
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Check documentation coverage")
parser.add_argument("--lcov-file", required=True, help="Path to LCOV coverage file")
parser.add_argument(
"--threshold-file", required=True, help="Path to thresholds JSON"
)
parser.add_argument("--output", required=True, help="Path to write markdown report")
parser.add_argument(
"--base-lcov-file", default=None, help="Path to base branch LCOV file"
)
parser.add_argument(
"--new-files",
default="",
help="Comma-separated list of new C++ files in this PR",
)
args = parser.parse_args()
with open(args.threshold_file) as f:
thresholds = json.load(f)
coverage = parse_lcov(args.lcov_file)
module_prefixes = list(thresholds.get("module_thresholds", {}).keys())
module_stats = compute_module_coverage(coverage, module_prefixes)
global_stats = compute_global_coverage(coverage)
base_coverage = None
base_module_stats = None
base_global_stats = None
if args.base_lcov_file and Path(args.base_lcov_file).exists():
base_coverage = parse_lcov(args.base_lcov_file)
base_module_stats = compute_module_coverage(base_coverage, module_prefixes)
base_global_stats = compute_global_coverage(base_coverage)
violations = []
if global_stats["percent"] < thresholds.get("global_minimum", 0):
violations.append(
f"Global coverage {global_stats['percent']}% is below minimum "
f"{thresholds['global_minimum']}%"
)
for module, threshold in thresholds.get("module_thresholds", {}).items():
if module in module_stats and module_stats[module]["percent"] < threshold:
violations.append(
f"`{module}` coverage {module_stats[module]['percent']}% is below "
f"threshold {threshold}%"
)
if thresholds.get("ratchet_mode") == "no_decrease":
violations.extend(
check_ratchet(
module_stats, base_module_stats, global_stats, base_global_stats
)
)
new_file_violations = []
if args.new_files:
new_files = [f.strip() for f in args.new_files.split(",") if f.strip()]
new_file_min = thresholds.get("new_file_minimum", 80)
new_file_violations = check_new_files(coverage, new_files, new_file_min)
report = generate_report(
global_stats, module_stats, thresholds, violations, new_file_violations
)
with open(args.output, "w") as f:
f.write(report)
print(report)
if violations or new_file_violations:
print(f"\nFAILED: {len(violations) + len(new_file_violations)} violation(s)")
sys.exit(1)
else:
print("\nPASSED: All coverage thresholds met")
sys.exit(0)
if __name__ == "__main__":
main()

View File

@@ -1,403 +0,0 @@
#!/usr/bin/env python3
"""
Format embedded shell snippets using the shfmt hook configured in
.pre-commit-config.yaml.
Two shapes are recognised:
* YAML workflow/action files: literal block-scalar runs (`run: |`) and
single-line runs (`run: some command`). A single-line run is upgraded to
a `run: |` block scalar if shfmt's output spans multiple lines.
* Markdown files: ``` ```bash ``` fenced code blocks.
Any block that shfmt cannot parse is skipped with a warning on stderr, so
the file is left untouched and surrounding blocks still get formatted.
For each occurrence the body is dedented, written to a temp .sh file,
formatted via `pre-commit run shfmt --files <temp>` (falling back to
`prek`), then re-indented and written back in place.
When invoked without arguments, every .yml/.yaml under .github/ plus every
.md file in the repo is scanned. When invoked with file arguments (the
pre-commit case), only those files are processed.
"""
from __future__ import annotations
import re
import shutil
import subprocess
import sys
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Union
REPO = Path(__file__).resolve().parents[2]
_HOOK_RUNNER = next((cmd for cmd in ("pre-commit", "prek") if shutil.which(cmd)), None)
if _HOOK_RUNNER is None:
sys.exit("error: neither `pre-commit` nor `prek` found on PATH")
RUN_BLOCK_RE = re.compile(r"^(?P<prefix>[ \t]*(?:- )?)run:[ \t]*\|[+-]?[ \t]*$")
RUN_INLINE_RE = re.compile(
r"^(?P<prefix>[ \t]*(?:- )?)run:[ \t]+" r"(?P<value>(?!\|[+-]?[ \t]*$)\S.*?)[ \t]*$"
)
MD_BASH_OPEN_RE = re.compile(r"^(?P<indent>[ ]{0,3})`{3}bash[ \t]*$")
MD_FENCE_CLOSE_RE = re.compile(r"^[ ]{0,3}`{3,}[ \t]*$")
@dataclass(frozen=True)
class BlockRun:
"""A `run: |` block scalar; `body_start:body_end` slices into `lines`."""
body_start: int
body_end: int
body_indent: int
@dataclass(frozen=True)
class InlineRun:
"""A single-line `run: value` at `line_idx`."""
line_idx: int
prefix: str
value: str
@dataclass(frozen=True)
class MdBashBlock:
"""A markdown ``` ```bash ``` fenced code block.
`body_start:body_end` slices into the file's lines; `open_line_idx`
points at the opening fence line.
"""
open_line_idx: int
body_start: int
body_end: int
body_indent: int
RunItem = Union[BlockRun, InlineRun]
def _scan_block_body(
lines: list[str], body_start: int, run_col: int
) -> tuple[int | None, int]:
"""Locate the body of a `run: |` block scalar starting at `body_start`.
Returns `(body_indent, scan_end)`. `scan_end` is the line index where the
outer scanner should resume. `body_indent` is `None` when no body is
present (the scalar is empty, or the next non-blank line has indent
`<= run_col`).
"""
body_indent: int | None = None
scan_end = len(lines)
for idx in range(body_start, len(lines)):
line = lines[idx]
if line.strip() == "":
continue
indent = len(line) - len(line.lstrip(" "))
if body_indent is None:
if indent > run_col:
body_indent = indent
else:
scan_end = idx
break
elif indent < body_indent:
scan_end = idx
break
if body_indent is not None:
while scan_end > body_start and lines[scan_end - 1].strip() == "":
scan_end -= 1
if scan_end <= body_start:
body_indent = None
return body_indent, scan_end
def find_run_blocks(lines: list[str]) -> list[RunItem]:
"""Return run items in document order."""
items: list[RunItem] = []
line_idx = 0
while line_idx < len(lines):
line = lines[line_idx]
if block_match := RUN_BLOCK_RE.match(line):
run_col = len(block_match.group("prefix"))
body_start = line_idx + 1
body_indent, scan_end = _scan_block_body(lines, body_start, run_col)
if body_indent is not None:
items.append(
BlockRun(
body_start=body_start,
body_end=scan_end,
body_indent=body_indent,
)
)
line_idx = scan_end
continue
if inline_match := RUN_INLINE_RE.match(line):
items.append(
InlineRun(
line_idx=line_idx,
prefix=inline_match.group("prefix"),
value=inline_match.group("value"),
)
)
line_idx += 1
return items
def find_md_bash_blocks(lines: list[str]) -> list[MdBashBlock]:
"""Return ``` ```bash ``` fenced code blocks in document order."""
blocks: list[MdBashBlock] = []
line_idx = 0
while line_idx < len(lines):
open_match = MD_BASH_OPEN_RE.match(lines[line_idx])
if not open_match:
line_idx += 1
continue
body_start = line_idx + 1
close_idx = next(
(
j
for j in range(body_start, len(lines))
if MD_FENCE_CLOSE_RE.match(lines[j])
),
None,
)
if close_idx is None:
line_idx = body_start
continue
body = lines[body_start:close_idx]
non_blank = [b for b in body if b.strip()]
body_indent = (
min(len(b) - len(b.lstrip(" ")) for b in non_blank)
if non_blank
else len(open_match.group("indent"))
)
blocks.append(
MdBashBlock(
open_line_idx=line_idx,
body_start=body_start,
body_end=close_idx,
body_indent=body_indent,
)
)
line_idx = close_idx + 1
return blocks
def dedent(lines: list[str], n: int) -> list[str]:
pad = " " * n
return [
(
""
if line.strip() == ""
else (line[n:] if line.startswith(pad) else line.lstrip(" "))
)
for line in lines
]
def reindent(lines: list[str], n: int) -> list[str]:
pad = " " * n
return [pad + line if line else "" for line in lines]
_SHFMT_ERR_RE = re.compile(r"\.sh:\d+:\d+:\s")
_GHA_EXPR_RE = re.compile(r"\$\{\{.*?\}\}", re.DOTALL)
_GHA_PLACEHOLDER_RE = re.compile(r"__GHA_EXPR_(\d+)__")
def _encode_gha_exprs(text: str) -> tuple[str, list[str]]:
"""Replace `${{ ... }}` expressions with bash-safe placeholder identifiers."""
exprs: list[str] = []
def repl(match: re.Match[str]) -> str:
exprs.append(match.group(0))
return f"__GHA_EXPR_{len(exprs) - 1}__"
return _GHA_EXPR_RE.sub(repl, text), exprs
def _decode_gha_exprs(text: str, exprs: list[str]) -> str:
"""Restore `${{ ... }}` expressions from placeholder identifiers."""
return _GHA_PLACEHOLDER_RE.sub(lambda m: exprs[int(m.group(1))], text)
def shfmt_via_hook(tmp_path: Path) -> tuple[bool, str]:
# `${{ ... }}` is not valid shell, so swap it for a placeholder identifier
# that shfmt can parse, then restore it after formatting.
encoded, exprs = _encode_gha_exprs(tmp_path.read_text())
if exprs:
tmp_path.write_text(encoded)
res = subprocess.run(
[_HOOK_RUNNER, "run", "shfmt", "--files", str(tmp_path)],
cwd=REPO,
capture_output=True,
text=True,
)
output = res.stdout + res.stderr
# shfmt emits parse errors as "<path>:<line>:<col>: <message>".
parse_err = bool(_SHFMT_ERR_RE.search(output))
# A non-zero exit that is neither a parse error nor pre-commit's "I had
# to modify files" signal means the hook itself failed to run (missing
# binary, install failure, bad config, ...). Surface that loudly rather
# than silently treating it as a no-op.
if (
res.returncode != 0
and not parse_err
and "files were modified by this hook" not in output
):
sys.exit(
f"error: `{_HOOK_RUNNER} run shfmt` failed with exit {res.returncode}:\n{output}"
)
if exprs and not parse_err:
tmp_path.write_text(_decode_gha_exprs(tmp_path.read_text(), exprs))
return not parse_err, output
def _skip(path: Path, where: int, kind: str, output: str) -> None:
print(
f" shfmt could not parse {kind} at {path}:{where + 1} — skipped",
file=sys.stderr,
)
print(f" {output.strip()}", file=sys.stderr)
def process_yaml_file(path: Path, tmp_path: Path) -> int:
text = path.read_text()
had_nl = text.endswith("\n")
lines = text.split("\n")
if had_nl:
lines = lines[:-1]
items = find_run_blocks(lines)
if not items:
return 0
changed = 0
# Process in reverse so earlier indices remain valid as we splice.
for item in reversed(items):
if isinstance(item, BlockRun):
body = lines[item.body_start : item.body_end]
tmp_path.write_text("\n".join(dedent(body, item.body_indent)) + "\n")
ok, output = shfmt_via_hook(tmp_path)
if not ok:
_skip(path, item.body_start, "block", output)
continue
formatted = tmp_path.read_text().rstrip("\n")
new_body = reindent(formatted.split("\n"), item.body_indent)
if new_body != body:
lines[item.body_start : item.body_end] = new_body
changed += 1
else:
tmp_path.write_text(item.value + "\n")
ok, output = shfmt_via_hook(tmp_path)
if not ok:
_skip(path, item.line_idx, "inline run", output)
continue
formatted = tmp_path.read_text().rstrip("\n")
if formatted == item.value:
continue
formatted_lines = formatted.split("\n")
if len(formatted_lines) == 1:
lines[item.line_idx] = f"{item.prefix}run: {formatted}"
else:
body_indent = len(item.prefix) + 2
lines[item.line_idx : item.line_idx + 1] = [
f"{item.prefix}run: |",
*reindent(formatted_lines, body_indent),
]
changed += 1
new_text = "\n".join(lines) + ("\n" if had_nl else "")
if new_text != text:
path.write_text(new_text)
return changed
def process_md_file(path: Path, tmp_path: Path) -> int:
text = path.read_text()
had_nl = text.endswith("\n")
lines = text.split("\n")
if had_nl:
lines = lines[:-1]
blocks = find_md_bash_blocks(lines)
if not blocks:
return 0
changed = 0
for block in reversed(blocks):
body = lines[block.body_start : block.body_end]
tmp_path.write_text("\n".join(dedent(body, block.body_indent)) + "\n")
ok, output = shfmt_via_hook(tmp_path)
if not ok:
_skip(path, block.open_line_idx, "```bash block", output)
continue
formatted = tmp_path.read_text().rstrip("\n")
formatted_lines = formatted.split("\n") if formatted else []
new_body = reindent(formatted_lines, block.body_indent)
if new_body != body:
lines[block.body_start : block.body_end] = new_body
changed += 1
new_text = "\n".join(lines) + ("\n" if had_nl else "")
if new_text != text:
path.write_text(new_text)
return changed
def process_file(path: Path, tmp_path: Path) -> int:
if path.suffix in (".yml", ".yaml"):
return process_yaml_file(path, tmp_path)
if path.suffix == ".md":
return process_md_file(path, tmp_path)
return 0
def gather_files(argv: list[str]) -> list[Path]:
"""Return YAML workflow/action files and markdown files that we should
process — either the paths in `argv` or, when `argv` is empty, every
such file in the repo (skipping `external/`)."""
if argv:
candidates: list[Path] = [
(REPO / a).resolve() if not Path(a).is_absolute() else Path(a) for a in argv
]
else:
gh = REPO / ".github"
candidates = [
*gh.rglob("*.yml"),
*gh.rglob("*.yaml"),
*(
p
for p in REPO.rglob("*.md")
if "external" not in p.relative_to(REPO).parts
),
]
return sorted(
p
for p in candidates
if p.exists()
and (
(p.suffix in (".yml", ".yaml") and ".github" in p.parts)
or p.suffix == ".md"
)
)
def main(argv: list[str]) -> int:
files = gather_files(argv)
if not files:
return 0
with tempfile.TemporaryDirectory(prefix="format-inline-bash-") as tmpdir:
tmp_path = Path(tmpdir) / "shfmt.sh"
total = 0
for f in files:
n = process_file(f, tmp_path)
if n:
print(f"{f.relative_to(REPO)}: reformatted {n} block(s)")
total += n
return 1 if total else 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

0
.github/scripts/levelization/generate.py vendored Executable file → Normal file
View File

View File

@@ -6,7 +6,7 @@ set -e
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
SED_COMMAND=sed
if [[ "${OSTYPE}" == 'darwin'* ]]; then
if ! command -v gsed &>/dev/null; then
if ! command -v gsed &> /dev/null; then
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
exit 1
fi

View File

@@ -8,12 +8,12 @@ set -e
SED_COMMAND=sed
HEAD_COMMAND=head
if [[ "${OSTYPE}" == 'darwin'* ]]; then
if ! command -v gsed &>/dev/null; then
if ! command -v gsed &> /dev/null; then
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
exit 1
fi
SED_COMMAND=gsed
if ! command -v ghead &>/dev/null; then
if ! command -v ghead &> /dev/null; then
echo "Error: ghead is not installed. Please install it using 'brew install coreutils'."
exit 1
fi
@@ -74,10 +74,10 @@ if grep -q '"xrpld"' cmake/XrplCore.cmake; then
# The script has been rerun, so just restore the name of the binary.
${SED_COMMAND} -i 's/"xrpld"/"rippled"/' cmake/XrplCore.cmake
elif ! grep -q '"rippled"' cmake/XrplCore.cmake; then
${HEAD_COMMAND} -n -1 cmake/XrplCore.cmake >cmake.tmp
echo ' # For the time being, we will keep the name of the binary as it was.' >>cmake.tmp
echo ' set_target_properties(xrpld PROPERTIES OUTPUT_NAME "rippled")' >>cmake.tmp
tail -1 cmake/XrplCore.cmake >>cmake.tmp
${HEAD_COMMAND} -n -1 cmake/XrplCore.cmake > cmake.tmp
echo ' # For the time being, we will keep the name of the binary as it was.' >> cmake.tmp
echo ' set_target_properties(xrpld PROPERTIES OUTPUT_NAME "rippled")' >> cmake.tmp
tail -1 cmake/XrplCore.cmake >> cmake.tmp
mv cmake.tmp cmake/XrplCore.cmake
fi

View File

@@ -6,7 +6,7 @@ set -e
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
SED_COMMAND=sed
if [[ "${OSTYPE}" == 'darwin'* ]]; then
if ! command -v gsed &>/dev/null; then
if ! command -v gsed &> /dev/null; then
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
exit 1
fi
@@ -62,7 +62,7 @@ ${SED_COMMAND} -i 's@ripple/@xrpld/@g' src/test/core/Config_test.cpp
${SED_COMMAND} -i 's/Rippled/File/g' src/test/core/Config_test.cpp
# Restore the old config file name in the code that maintains support for now.
${SED_COMMAND} -i 's/kConfigLegacyName = "xrpld.cfg"/kConfigLegacyName = "rippled.cfg"/g' src/xrpld/core/detail/Config.cpp
${SED_COMMAND} -i 's/kCONFIG_LEGACY_NAME = "xrpld.cfg"/kCONFIG_LEGACY_NAME = "rippled.cfg"/g' src/xrpld/core/detail/Config.cpp
# Restore an URL.
${SED_COMMAND} -i 's/connect-your-xrpld-to-the-xrp-test-net.html/connect-your-rippled-to-the-xrp-test-net.html/g' cfg/xrpld-example.cfg

View File

@@ -6,7 +6,7 @@ set -e
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
SED_COMMAND=sed
if [[ "${OSTYPE}" == 'darwin'* ]]; then
if ! command -v gsed &>/dev/null; then
if ! command -v gsed &> /dev/null; then
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
exit 1
fi
@@ -62,37 +62,37 @@ done
# restoring the verbiage that is already present in LICENSE.md. Ensure that if
# the script is run multiple times, duplicate notices are not added.
if ! grep -q 'Raw Material Software' include/xrpl/beast/core/CurrentThreadName.h; then
echo -e "// Portions of this file are from JUCE (http://www.juce.com).\n// Copyright (c) 2013 - Raw Material Software Ltd.\n// Please visit http://www.juce.com\n\n$(cat include/xrpl/beast/core/CurrentThreadName.h)" >include/xrpl/beast/core/CurrentThreadName.h
echo -e "// Portions of this file are from JUCE (http://www.juce.com).\n// Copyright (c) 2013 - Raw Material Software Ltd.\n// Please visit http://www.juce.com\n\n$(cat include/xrpl/beast/core/CurrentThreadName.h)" > include/xrpl/beast/core/CurrentThreadName.h
fi
if ! grep -q 'Dev Null' src/test/app/NetworkID_test.cpp; then
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/app/NetworkID_test.cpp)" >src/test/app/NetworkID_test.cpp
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/app/NetworkID_test.cpp)" > src/test/app/NetworkID_test.cpp
fi
if ! grep -q 'Dev Null' src/test/app/tx/apply_test.cpp; then
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/app/tx/apply_test.cpp)" >src/test/app/tx/apply_test.cpp
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/app/tx/apply_test.cpp)" > src/test/app/tx/apply_test.cpp
fi
if ! grep -q 'Dev Null' src/test/rpc/ManifestRPC_test.cpp; then
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/rpc/ManifestRPC_test.cpp)" >src/test/rpc/ManifestRPC_test.cpp
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/rpc/ManifestRPC_test.cpp)" > src/test/rpc/ManifestRPC_test.cpp
fi
if ! grep -q 'Dev Null' src/test/rpc/ValidatorInfo_test.cpp; then
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/rpc/ValidatorInfo_test.cpp)" >src/test/rpc/ValidatorInfo_test.cpp
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/rpc/ValidatorInfo_test.cpp)" > src/test/rpc/ValidatorInfo_test.cpp
fi
if ! grep -q 'Dev Null' src/xrpld/rpc/handlers/server_info/Manifest.cpp; then
echo -e "// Copyright (c) 2019 Dev Null Productions\n\n$(cat src/xrpld/rpc/handlers/server_info/Manifest.cpp)" >src/xrpld/rpc/handlers/server_info/Manifest.cpp
echo -e "// Copyright (c) 2019 Dev Null Productions\n\n$(cat src/xrpld/rpc/handlers/server_info/Manifest.cpp)" > src/xrpld/rpc/handlers/server_info/Manifest.cpp
fi
if ! grep -q 'Dev Null' src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp; then
echo -e "// Copyright (c) 2019 Dev Null Productions\n\n$(cat src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp)" >src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp
echo -e "// Copyright (c) 2019 Dev Null Productions\n\n$(cat src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp)" > src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp
fi
if ! grep -q 'Bougalis' include/xrpl/basics/SlabAllocator.h; then
echo -e "// Copyright (c) 2022, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/SlabAllocator.h)" >include/xrpl/basics/SlabAllocator.h # cspell: ignore Nikolaos Bougalis nikb
echo -e "// Copyright (c) 2022, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/SlabAllocator.h)" > include/xrpl/basics/SlabAllocator.h # cspell: ignore Nikolaos Bougalis nikb
fi
if ! grep -q 'Bougalis' include/xrpl/basics/spinlock.h; then
echo -e "// Copyright (c) 2022, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/spinlock.h)" >include/xrpl/basics/spinlock.h # cspell: ignore Nikolaos Bougalis nikb
echo -e "// Copyright (c) 2022, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/spinlock.h)" > include/xrpl/basics/spinlock.h # cspell: ignore Nikolaos Bougalis nikb
fi
if ! grep -q 'Bougalis' include/xrpl/basics/tagged_integer.h; then
echo -e "// Copyright (c) 2014, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/tagged_integer.h)" >include/xrpl/basics/tagged_integer.h # cspell: ignore Nikolaos Bougalis nikb
echo -e "// Copyright (c) 2014, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/tagged_integer.h)" > include/xrpl/basics/tagged_integer.h # cspell: ignore Nikolaos Bougalis nikb
fi
if ! grep -q 'Ritchford' include/xrpl/beast/utility/Zero.h; then
echo -e "// Copyright (c) 2014, Tom Ritchford <tom@swirly.com>\n\n$(cat include/xrpl/beast/utility/Zero.h)" >include/xrpl/beast/utility/Zero.h # cspell: ignore Ritchford
echo -e "// Copyright (c) 2014, Tom Ritchford <tom@swirly.com>\n\n$(cat include/xrpl/beast/utility/Zero.h)" > include/xrpl/beast/utility/Zero.h # cspell: ignore Ritchford
fi
# Restore newlines and tabs in string literals in the affected file.

View File

@@ -6,7 +6,7 @@ set -e
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
SED_COMMAND=sed
if [[ "${OSTYPE}" == 'darwin'* ]]; then
if ! command -v gsed &>/dev/null; then
if ! command -v gsed &> /dev/null; then
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
exit 1
fi

View File

@@ -6,7 +6,7 @@ set -e
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
SED_COMMAND=sed
if [[ "${OSTYPE}" == 'darwin'* ]]; then
if ! command -v gsed &>/dev/null; then
if ! command -v gsed &> /dev/null; then
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
exit 1
fi
@@ -90,7 +90,7 @@ ${SED_COMMAND} -i 's/www.ripple.com/www.xrpl.org/g' src/test/protocol/Seed_test.
# Restore specific changes.
${SED_COMMAND} -i 's@b5efcc/src/xrpld@b5efcc/src/ripple@' include/xrpl/protocol/README.md
${SED_COMMAND} -i 's/dbPrefix_ = "xrpldb"/dbPrefix_ = "rippledb"/' src/xrpld/app/misc/SHAMapStoreImp.h # cspell: disable-line
${SED_COMMAND} -i 's/kConfigLegacyName = "xrpld.cfg"/kConfigLegacyName = "rippled.cfg"/' src/xrpld/core/detail/Config.cpp
${SED_COMMAND} -i 's/kCONFIG_LEGACY_NAME = "xrpld.cfg"/kCONFIG_LEGACY_NAME = "rippled.cfg"/' src/xrpld/core/detail/Config.cpp
popd
echo "Renaming complete."

View File

@@ -6,7 +6,7 @@ set -e
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
SED_COMMAND=sed
if [[ "${OSTYPE}" == 'darwin'* ]]; then
if ! command -v gsed &>/dev/null; then
if ! command -v gsed &> /dev/null; then
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
exit 1
fi

View File

@@ -32,32 +32,7 @@ We will further set additional CMake arguments as follows:
"""
def build_config_name(os_entry: dict[str, str], platform: str, build_type: str) -> str:
parts = [os_entry["distro_name"]]
for key in ("distro_version", "compiler_name", "compiler_version"):
if value := os_entry[key]:
parts.append(value)
parts.append("arm64" if "arm64" in platform else "amd64")
parts.append(build_type.lower())
return "-".join(parts)
def generate_packaging_matrix(config: Config) -> list[dict]:
"""Emit one entry per os entry with `package: true`. Architecture is
hardcoded to linux/amd64 here (and the runner is hardcoded at the
workflow level) until arm64 packaging is ready.
"""
return [
{
"artifact_name": f"xrpld-{build_config_name(os, 'linux/amd64', 'Release')}",
"os": os,
}
for os in config.os
if os.get("package", False)
]
def generate_strategy_matrix(all: bool, config: Config) -> list[dict]:
def generate_strategy_matrix(all: bool, config: Config) -> list:
configurations = []
for architecture, os, build_type, cmake_args in itertools.product(
config.architecture, config.os, config.build_type, config.cmake_args
@@ -126,15 +101,14 @@ def generate_strategy_matrix(all: bool, config: Config) -> list[dict]:
continue
# RHEL:
# - 9 using GCC 12: Debug and Release on linux/amd64
# (Release is required for RPM packaging).
# - 9 using GCC 12: Debug on linux/amd64.
# - 10 using Clang: Release on linux/amd64.
if os["distro_name"] == "rhel":
skip = True
if os["distro_version"] == "9":
if (
f"{os['compiler_name']}-{os['compiler_version']}" == "gcc-12"
and build_type in ["Debug", "Release"]
and build_type == "Debug"
and architecture["platform"] == "linux/amd64"
):
skip = False
@@ -149,8 +123,7 @@ def generate_strategy_matrix(all: bool, config: Config) -> list[dict]:
continue
# Ubuntu:
# - Jammy using GCC 12: Debug on linux/arm64, Release on
# linux/amd64 (Release is required for DEB packaging).
# - Jammy using GCC 12: Debug on linux/arm64.
# - Noble using GCC 14: Release on linux/amd64.
# - Noble using Clang 18: Debug on linux/amd64.
# - Noble using Clang 19: Release on linux/arm64.
@@ -163,12 +136,6 @@ def generate_strategy_matrix(all: bool, config: Config) -> list[dict]:
and architecture["platform"] == "linux/arm64"
):
skip = False
if (
f"{os['compiler_name']}-{os['compiler_version']}" == "gcc-12"
and build_type == "Release"
and architecture["platform"] == "linux/amd64"
):
skip = False
elif os["distro_version"] == "noble":
if (
f"{os['compiler_name']}-{os['compiler_version']}" == "gcc-14"
@@ -251,7 +218,17 @@ def generate_strategy_matrix(all: bool, config: Config) -> list[dict]:
# Generate a unique name for the configuration, e.g. macos-arm64-debug
# or debian-bookworm-gcc-12-amd64-release.
config_name = build_config_name(os, architecture["platform"], build_type)
config_name = os["distro_name"]
if (n := os["distro_version"]) != "":
config_name += f"-{n}"
if (n := os["compiler_name"]) != "":
config_name += f"-{n}"
if (n := os["compiler_version"]) != "":
config_name += f"-{n}"
config_name += (
f"-{architecture['platform'][architecture['platform'].find('/')+1:]}"
)
config_name += f"-{build_type.lower()}"
if "-Dcoverage=ON" in cmake_args:
config_name += "-coverage"
if "-Dunity=ON" in cmake_args:
@@ -355,19 +332,10 @@ if __name__ == "__main__":
required=False,
type=Path,
)
parser.add_argument(
"-p",
"--packaging",
help="Emit the packaging matrix (derived from the 'package' field on os entries) instead of the build/test matrix.",
action="store_true",
)
args = parser.parse_args()
matrix = []
if args.packaging:
config_path = args.config if args.config else THIS_DIR / "linux.json"
matrix += generate_packaging_matrix(read_config(config_path))
elif args.config is None or args.config == "":
if args.config is None or args.config == "":
matrix += generate_strategy_matrix(
args.all, read_config(THIS_DIR / "linux.json")
)

View File

@@ -127,8 +127,7 @@
"distro_version": "9",
"compiler_name": "gcc",
"compiler_version": "12",
"image_sha": "4c086b9",
"package": true
"image_sha": "4c086b9"
},
{
"distro_name": "rhel",
@@ -170,8 +169,7 @@
"distro_version": "jammy",
"compiler_name": "gcc",
"compiler_version": "12",
"image_sha": "4c086b9",
"package": true
"image_sha": "4c086b9"
},
{
"distro_name": "ubuntu",

View File

@@ -6,16 +6,14 @@ on:
- develop
paths:
- ".github/workflows/build-nix-image.yml"
- ".github/workflows/reusable-build-docker-image.yml"
- "docker/**"
- "docker/nix.Dockerfile"
- "flake.nix"
- "flake.lock"
- "nix/**"
pull_request:
paths:
- ".github/workflows/build-nix-image.yml"
- ".github/workflows/reusable-build-docker-image.yml"
- "docker/**"
- "docker/nix.Dockerfile"
- "flake.nix"
- "flake.lock"
- "nix/**"
@@ -29,81 +27,75 @@ defaults:
run:
shell: bash
env:
UBUNTU_VERSION: "20.04"
RHEL_VERSION: "9"
DEBIAN_VERSION: "bookworm"
jobs:
build:
name: Build ${{ matrix.distro.name }} (${{ matrix.target.platform }})
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix:
# The base images are the oldest supported version of each distro
# that we want to build images for.
distro:
- name: nixos
base_image: nixos/nix:latest
- name: ubuntu
base_image: ubuntu:20.04
- name: rhel
base_image: registry.access.redhat.com/ubi9/ubi:latest
- name: debian
base_image: debian:bookworm
target:
- platform: linux/amd64
runner: ubuntu-latest
- platform: linux/arm64
runner: ubuntu-24.04-arm
uses: ./.github/workflows/reusable-build-docker-image.yml
with:
image_name: ghcr.io/xrplf/xrpld/nix-${{ matrix.distro.name }}
dockerfile: docker/nix.Dockerfile
base_image: ${{ matrix.distro.base_image }}
platform: ${{ matrix.target.platform }}
runner: ${{ matrix.target.runner }}
push: ${{ github.repository == 'XRPLF/rippled' && github.event_name == 'push' }}
merge:
name: Merge ${{ matrix.distro }} manifest
needs: build
if: ${{ github.repository == 'XRPLF/rippled' && github.event_name == 'push' }}
name: Build and push Nix image (${{ matrix.distro }})
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
fail-fast: false
matrix:
distro: [nixos, ubuntu, rhel, debian]
env:
IMAGE_NAME: ghcr.io/xrplf/xrpld/nix-${{ matrix.distro }}
include:
- distro: nixos
- distro: ubuntu
- distro: rhel
- distro: debian
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Docker metadata
id: meta
uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9 # v6.1.0
with:
images: ${{ env.IMAGE_NAME }}
tags: |
type=sha,prefix=sha-,format=short
type=raw,value=latest
- name: Determine base image
id: vars
run: |
case "${{ matrix.distro }}" in
nixos)
echo "base_image=nixos/nix:latest" >> $GITHUB_OUTPUT
;;
ubuntu)
echo "base_image=ubuntu:${UBUNTU_VERSION}" >> $GITHUB_OUTPUT
;;
rhel)
echo "base_image=registry.access.redhat.com/ubi${RHEL_VERSION}/ubi:latest" >> $GITHUB_OUTPUT
;;
debian)
echo "base_image=debian:${DEBIAN_VERSION}" >> $GITHUB_OUTPUT
;;
esac
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
- name: Login to GitHub Container Registry
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
if: github.event_name == 'push'
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Create multi-arch manifests
run: |
for tag in $(jq -cr '.tags[]' <<<"$DOCKER_METADATA_OUTPUT_JSON"); do
docker buildx imagetools create -t "$tag" "${tag}-amd64" "${tag}-arm64"
done
- name: Docker metadata
id: meta
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0
with:
images: ghcr.io/xrplf/ci/nix-${{ matrix.distro }}
tags: |
type=sha,prefix=sha-,format=short
type=raw,value=latest
- name: Inspect image
run: |
docker buildx imagetools inspect "${IMAGE_NAME}:${{ steps.meta.outputs.version }}"
- name: Build and push
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
with:
context: .
file: docker/nix.Dockerfile
platforms: linux/amd64
push: ${{ github.event_name == 'push' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: BASE_IMAGE=${{ steps.vars.outputs.base_image }}

View File

@@ -5,17 +5,8 @@ on:
types:
- checks_requested
pull_request:
types:
- opened
- edited
- reopened
- synchronize
- ready_for_review
branches:
- develop
- "release-*"
- "release/*"
- "staging/*"
types: [opened, edited, reopened, synchronize, ready_for_review]
branches: [develop]
jobs:
check_description:
@@ -29,11 +20,11 @@ jobs:
env:
PR_BODY: ${{ github.event.pull_request.body }}
if: ${{ github.event_name == 'pull_request' }}
run: printenv PR_BODY >pr_body.md
run: printenv PR_BODY > pr_body.md
- name: Check PR description differs from template
if: ${{ github.event_name == 'pull_request' }}
run: |
python .github/scripts/check-pr-description.py \
--template-file .github/pull_request_template.md \
--pr-body-file pr_body.md
run: >
python .github/scripts/check-pr-description.py
--template-file .github/pull_request_template.md
--pr-body-file pr_body.md

View File

@@ -5,19 +5,10 @@ on:
types:
- checks_requested
pull_request:
types:
- opened
- edited
- reopened
- synchronize
- ready_for_review
branches:
- develop
- "release-*"
- "release/*"
- "staging/*"
types: [opened, edited, reopened, synchronize, ready_for_review]
branches: [develop]
jobs:
check_title:
if: ${{ github.event.pull_request.draft != true }}
uses: XRPLF/actions/.github/workflows/check-pr-title.yml@cba1f0891650baf1a9c88624dc2d72573be2eb81
uses: XRPLF/actions/.github/workflows/check-pr-title.yml@a5d8dd35be543365e90a11358447130c8763871d

View File

@@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check if PRs are dirty
uses: eps1lon/actions-label-merge-conflict@0273be72a0bbd58fcd71d0d6c02c209b50d1e5e1 # v3.1.0
uses: eps1lon/actions-label-merge-conflict@1df065ebe6e3310545d4f4c4e862e43bdca146f0 # v3.0.3
with:
dirtyLabel: "PR: has conflicts"
repoToken: "${{ secrets.GITHUB_TOKEN }}"

90
.github/workflows/doc-review.yml vendored Normal file
View File

@@ -0,0 +1,90 @@
name: Documentation Review
on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- 'include/**/*.h'
- 'src/libxrpl/**/*.h'
- 'src/libxrpl/**/*.cpp'
- 'src/xrpld/**/*.h'
- 'src/xrpld/**/*.cpp'
concurrency:
group: doc-review-${{ github.ref }}
cancel-in-progress: true
defaults:
run:
shell: bash
jobs:
review:
if: github.head_ref != 'dangell7/docs'
runs-on: ubuntu-latest
permissions:
pull-requests: write
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: .github/scripts/doc-agent/package-lock.json
- name: Install doc-agent dependencies
working-directory: .github/scripts/doc-agent
run: npm ci
- name: Run documentation review
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
cd .github/scripts/doc-agent
npm run review -- "${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
- name: Post review summary
if: always()
uses: marocchino/sticky-pull-request-comment@67d0dec7b07ed060a405f9b2a64b8ab319fdd7db # v2.9.2
with:
header: doc-review
path: .github/scripts/doc-agent/doc-review-report.md
- name: Post inline review comments
if: always()
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
const fs = require('fs');
const path = '.github/scripts/doc-agent/doc-review-comments.json';
if (!fs.existsSync(path)) return;
const comments = JSON.parse(fs.readFileSync(path, 'utf8'));
if (comments.length === 0) return;
const pull_number = context.payload.pull_request.number;
const owner = context.repo.owner;
const repo = context.repo.repo;
for (const comment of comments) {
try {
await github.rest.pulls.createReviewComment({
owner,
repo,
pull_number,
body: comment.body,
commit_id: '${{ github.event.pull_request.head.sha }}',
path: comment.path,
line: comment.line,
side: 'RIGHT',
});
} catch (e) {
console.log(`Failed to post comment on ${comment.path}:${comment.line}: ${e.message}`);
}
}

View File

@@ -64,13 +64,11 @@ jobs:
.github/workflows/reusable-build-test-config.yml
.github/workflows/reusable-build-test.yml
.github/workflows/reusable-clang-tidy.yml
.github/workflows/reusable-package.yml
.github/workflows/reusable-strategy-matrix.yml
.github/workflows/reusable-test.yml
.github/workflows/reusable-upload-recipe.yml
.clang-tidy
.codecov.yml
cfg/**
cmake/**
conan/**
external/**
@@ -80,10 +78,6 @@ jobs:
CMakeLists.txt
conanfile.py
conan.lock
LICENSE.md
package/**
README.md
- name: Check whether to run
# This step determines whether the rest of the workflow should
# run. The rest of the workflow will run if this job runs AND at
@@ -98,7 +92,7 @@ jobs:
READY: ${{ contains(github.event.pull_request.labels.*.name, 'Ready to merge') }}
MERGE: ${{ github.event_name == 'merge_group' }}
run: |
echo "go=${{ (env.DRAFT != 'true' && env.READY == 'true') || env.FILES == 'true' || env.MERGE == 'true' }}" >>"${GITHUB_OUTPUT}"
echo "go=${{ (env.DRAFT != 'true' && env.READY == 'true') || env.FILES == 'true' || env.MERGE == 'true' }}" >> "${GITHUB_OUTPUT}"
cat "${GITHUB_OUTPUT}"
outputs:
go: ${{ steps.go.outputs.go == 'true' }}
@@ -140,11 +134,6 @@ jobs:
secrets:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
package:
needs: [should-run, build-test]
if: ${{ needs.should-run.outputs.go == 'true' }}
uses: ./.github/workflows/reusable-package.yml
upload-recipe:
needs:
- should-run
@@ -168,9 +157,9 @@ jobs:
PR_URL: ${{ github.event.pull_request.html_url }}
run: |
gh api --method POST -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" \
/repos/xrplf/clio/dispatches -f "event_type=check_libxrpl" \
-F "client_payload[ref]=${{ needs.upload-recipe.outputs.recipe_ref }}" \
-F "client_payload[pr_url]=${PR_URL}"
/repos/xrplf/clio/dispatches -f "event_type=check_libxrpl" \
-F "client_payload[ref]=${{ needs.upload-recipe.outputs.recipe_ref }}" \
-F "client_payload[pr_url]=${PR_URL}"
passed:
if: failure() || cancelled()
@@ -179,7 +168,6 @@ jobs:
- check-rename
- clang-tidy
- build-test
- package
- upload-recipe
- notify-clio
runs-on: ubuntu-latest

View File

@@ -1,5 +1,5 @@
# This workflow uploads the libxrpl recipe to the Conan remote and builds
# release packages when a versioned tag is pushed.
# This workflow uploads the libxrpl recipe to the Conan remote when a versioned
# tag is pushed.
name: Tag
on:
@@ -22,22 +22,3 @@ jobs:
secrets:
remote_username: ${{ secrets.CONAN_REMOTE_USERNAME }}
remote_password: ${{ secrets.CONAN_REMOTE_PASSWORD }}
build-test:
if: ${{ github.repository == 'XRPLF/rippled' }}
uses: ./.github/workflows/reusable-build-test.yml
strategy:
fail-fast: true
matrix:
os: [linux]
with:
ccache_enabled: false
os: ${{ matrix.os }}
strategy_matrix: minimal
secrets:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
package:
if: ${{ github.repository == 'XRPLF/rippled' }}
needs: build-test
uses: ./.github/workflows/reusable-package.yml

View File

@@ -21,13 +21,11 @@ on:
- ".github/workflows/reusable-build-test-config.yml"
- ".github/workflows/reusable-build-test.yml"
- ".github/workflows/reusable-clang-tidy.yml"
- ".github/workflows/reusable-package.yml"
- ".github/workflows/reusable-strategy-matrix.yml"
- ".github/workflows/reusable-test.yml"
- ".github/workflows/reusable-upload-recipe.yml"
- ".clang-tidy"
- ".codecov.yml"
- "cfg/**"
- "cmake/**"
- "conan/**"
- "external/**"
@@ -37,9 +35,6 @@ on:
- "CMakeLists.txt"
- "conanfile.py"
- "conan.lock"
- "LICENSE.md"
- "package/**"
- "README.md"
# Run at 06:32 UTC on every day of the week from Monday through Friday. This
# will force all dependencies to be rebuilt, which is useful to verify that
@@ -100,7 +95,3 @@ jobs:
secrets:
remote_username: ${{ secrets.CONAN_REMOTE_USERNAME }}
remote_password: ${{ secrets.CONAN_REMOTE_PASSWORD }}
package:
needs: build-test
uses: ./.github/workflows/reusable-package.yml

View File

@@ -14,7 +14,7 @@ on:
jobs:
# Call the workflow in the XRPLF/actions repo that runs the pre-commit hooks.
run-hooks:
uses: XRPLF/actions/.github/workflows/pre-commit.yml@cba1f0891650baf1a9c88624dc2d72573be2eb81
uses: XRPLF/actions/.github/workflows/pre-commit.yml@5e942d61bf32f7557a7c159cfac4712a687b3e3a
with:
runs_on: ubuntu-latest
container: '{ "image": "ghcr.io/xrplf/ci/tools-rippled-pre-commit:sha-41ec7c1" }'

View File

@@ -1,6 +1,7 @@
# This workflow builds the documentation for the repository, and publishes it to
# GitHub Pages when changes are merged into the default branch.
name: Build and publish documentation
# Builds Doxygen XML + HTML in a single pass, runs documentation coverage
# checks on pull requests, and publishes the HTML to GitHub Pages when changes
# land on `develop`.
name: Documentation (build, coverage, publish)
on:
push:
@@ -8,6 +9,8 @@ on:
- "develop"
paths:
- ".github/workflows/publish-docs.yml"
- ".github/doc-coverage-thresholds.json"
- ".github/scripts/doc-coverage-check.py"
- "*.md"
- "**/*.md"
- "docs/**"
@@ -17,6 +20,8 @@ on:
pull_request:
paths:
- ".github/workflows/publish-docs.yml"
- ".github/doc-coverage-thresholds.json"
- ".github/scripts/doc-coverage-check.py"
- "*.md"
- "**/*.md"
- "docs/**"
@@ -42,9 +47,14 @@ jobs:
build:
runs-on: ubuntu-latest
container: ghcr.io/xrplf/ci/tools-rippled-documentation:sha-a8c7be1
permissions:
pull-requests: write
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
- name: Prepare runner
uses: XRPLF/actions/prepare-runner@90f11ee655d1687824fb8793db770477d52afbab
@@ -57,21 +67,25 @@ jobs:
with:
subtract: ${{ env.NPROC_SUBTRACT }}
- name: Install coverxygen
# TODO: drop pin once upstream fixes the 1.8.x regression.
# 1.8.2 crashes on enums when no --exclude is configured:
# AttributeError: 'str' object has no attribute 'iter'
# at coverxygen/__init__.py extract_enum_qualified_name
run: pip install 'coverxygen<1.8'
- name: Check configuration
run: |
echo 'Checking path.'
echo ${PATH} | tr ':' '\n'
echo 'Checking environment variables.'
env | sort
echo 'Checking CMake version.'
cmake --version
echo 'Checking Doxygen version.'
doxygen --version
- name: Build documentation
- name: Build documentation (PR/HEAD)
env:
BUILD_NPROC: ${{ steps.nproc.outputs.nproc }}
run: |
@@ -80,6 +94,91 @@ jobs:
cmake -Donly_docs=ON ..
cmake --build . --target docs --parallel ${BUILD_NPROC}
- name: Determine changed C++ files
if: github.event_name == 'pull_request'
id: changed
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
with:
files: |
include/**/*.h
src/**/*.h
src/**/*.cpp
- name: Cache base-branch Doxygen XML
if: github.event_name == 'pull_request'
id: base-cache
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: build-base/docs/xml
key: doxygen-xml-${{ github.event.pull_request.base.sha }}-${{ hashFiles('docs/Doxyfile') }}
- name: Build base-branch Doxygen XML (cache miss)
if: github.event_name == 'pull_request' && steps.base-cache.outputs.cache-hit != 'true'
env:
BUILD_NPROC: ${{ steps.nproc.outputs.nproc }}
run: |
git checkout ${{ github.event.pull_request.base.sha }}
mkdir -p build-base
cd build-base
if ! cmake -Donly_docs=ON .. > cmake.log 2>&1; then
echo "::warning::Base-branch cmake configure failed; ratchet disabled for this PR"
cat cmake.log
elif ! cmake --build . --target docs --parallel ${BUILD_NPROC} > build.log 2>&1; then
echo "::warning::Base-branch Doxygen build failed; ratchet disabled for this PR"
tail -50 build.log
fi
cd ..
git checkout ${{ github.event.pull_request.head.sha }}
- name: Generate coverage report (PR)
if: github.event_name == 'pull_request'
run: |
python3 -m coverxygen \
--xml-dir ${BUILD_DIR}/docs/xml \
--src-dir . \
--output doc-coverage.info \
--kind class,struct,function,enum,typedef,variable \
--scope public
- name: Generate coverage report (base)
if: github.event_name == 'pull_request'
run: |
if [ -d "build-base/docs/xml" ]; then
python3 -m coverxygen \
--xml-dir build-base/docs/xml \
--src-dir . \
--output base-doc-coverage.info \
--kind class,struct,function,enum,typedef,variable \
--scope public || true
fi
- name: Check coverage thresholds
if: github.event_name == 'pull_request'
run: |
BASE_FLAG=""
if [ -f "base-doc-coverage.info" ]; then
BASE_FLAG="--base-lcov-file base-doc-coverage.info"
fi
NEW_FILES=""
if [ -n "${{ steps.changed.outputs.added_files }}" ]; then
NEW_FILES="--new-files ${{ steps.changed.outputs.added_files }}"
fi
python3 .github/scripts/doc-coverage-check.py \
--lcov-file doc-coverage.info \
--threshold-file .github/doc-coverage-thresholds.json \
--output doc-coverage-report.md \
${BASE_FLAG} \
${NEW_FILES} || true
- name: Post coverage report to PR
if: github.event_name == 'pull_request' && always()
uses: marocchino/sticky-pull-request-comment@67d0dec7b07ed060a405f9b2a64b8ab319fdd7db # v2.9.2
with:
header: doc-coverage
path: doc-coverage-report.md
- name: Create documentation artifact
if: ${{ github.event.repository.visibility == 'public' && github.event_name == 'push' }}
uses: actions/upload-pages-artifact@fc324d3547104276b827a68afc52ff2a11cc49c9 # v5.0.0

View File

@@ -1,89 +0,0 @@
# Build a single-platform Docker image. On push, the image is pushed to
# GHCR with arch-suffixed tags (e.g. `:latest-amd64`, `:sha-abc-amd64`)
# so the calling workflow can stitch per-arch builds into a multi-arch
# manifest without needing to pass digests around.
name: Reusable build Docker image (single platform)
on:
workflow_call:
inputs:
image_name:
description: "Full image name without tag (e.g. 'ghcr.io/xrplf/xrpld/nix-ubuntu')"
required: true
type: string
dockerfile:
description: "Path to the Dockerfile, relative to the repository root"
required: true
type: string
base_image:
description: "Value passed to the Dockerfile as the BASE_IMAGE build arg"
required: true
type: string
platform:
description: "Docker platform string, e.g. linux/amd64"
required: true
type: string
runner:
description: "GitHub Actions runner label to build on"
required: true
type: string
push:
description: "Whether to push the image to GHCR"
required: true
type: boolean
defaults:
run:
shell: bash
jobs:
build:
name: Build (${{ inputs.platform }})
runs-on: ${{ inputs.runner }}
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Determine arch
id: vars
env:
PLATFORM: ${{ inputs.platform }}
run: |
echo "arch=${PLATFORM##*/}" >>$GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
- name: Login to GitHub Container Registry
if: inputs.push
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Docker metadata
id: meta
uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9 # v6.1.0
with:
images: ${{ inputs.image_name }}
tags: |
type=sha,prefix=sha-,format=short
type=raw,value=latest
flavor: |
suffix=-${{ steps.vars.outputs.arch }},onlatest=true
- name: Build and push
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
with:
context: .
file: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platform }}
push: ${{ inputs.push }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: BASE_IMAGE=${{ inputs.base_image }}

View File

@@ -113,7 +113,7 @@ jobs:
- name: Set ccache log file
if: ${{ inputs.ccache_enabled && runner.debug == '1' }}
run: echo "CCACHE_LOGFILE=${{ runner.temp }}/ccache.log" >>"${GITHUB_ENV}"
run: echo "CCACHE_LOGFILE=${{ runner.temp }}/ccache.log" >> "${GITHUB_ENV}"
- name: Print build environment
uses: XRPLF/actions/print-build-env@59dec886e4afb05a1724443af08baccbc045b574
@@ -146,11 +146,11 @@ jobs:
CMAKE_ARGS: ${{ inputs.cmake_args }}
run: |
cmake \
-G '${{ runner.os == 'Windows' && 'Visual Studio 17 2022' || 'Ninja' }}' \
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
${CMAKE_ARGS} \
..
-G '${{ runner.os == 'Windows' && 'Visual Studio 17 2022' || 'Ninja' }}' \
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
${CMAKE_ARGS} \
..
- name: Check protocol autogen files are up-to-date
working-directory: ${{ env.BUILD_DIR }}
@@ -172,32 +172,32 @@ jobs:
cmake --build . --target code_gen
DIFF=$(git -C .. status --porcelain -- include/xrpl/protocol_autogen src/tests/libxrpl/protocol_autogen)
if [ -n "${DIFF}" ]; then
echo "::error::Generated protocol files are out of date"
git -C .. diff -- include/xrpl/protocol_autogen src/tests/libxrpl/protocol_autogen
echo "${MESSAGE}"
exit 1
echo "::error::Generated protocol files are out of date"
git -C .. diff -- include/xrpl/protocol_autogen src/tests/libxrpl/protocol_autogen
echo "${MESSAGE}"
exit 1
fi
- name: Build the binary
working-directory: ${{ env.BUILD_DIR }}
env:
BUILD_NPROC: ${{ steps.nproc.outputs.nproc }}
BUILD_NPROC: ${{ runner.os == 'Linux' && '16' || steps.nproc.outputs.nproc }}
BUILD_TYPE: ${{ inputs.build_type }}
CMAKE_TARGET: ${{ inputs.cmake_target }}
run: |
cmake \
--build . \
--config "${BUILD_TYPE}" \
--parallel "${BUILD_NPROC}" \
--target "${CMAKE_TARGET}"
--build . \
--config "${BUILD_TYPE}" \
--parallel "${BUILD_NPROC}" \
--target "${CMAKE_TARGET}"
- name: Show ccache statistics
if: ${{ inputs.ccache_enabled }}
run: |
ccache --show-stats -vv
if [ '${{ runner.debug }}' = '1' ]; then
cat "${CCACHE_LOGFILE}"
curl ${CCACHE_REMOTE_STORAGE%|*}/status || true
cat "${CCACHE_LOGFILE}"
curl ${CCACHE_REMOTE_STORAGE%|*}/status || true
fi
- name: Upload the binary (Linux)
@@ -214,7 +214,7 @@ jobs:
working-directory: ${{ env.BUILD_DIR }}
run: |
set -o pipefail
./xrpld --definitions | python3 -m json.tool >server_definitions.json
./xrpld --definitions | python3 -m json.tool > server_definitions.json
- name: Upload server definitions
if: ${{ github.event.repository.visibility == 'public' && inputs.config_name == 'debian-bookworm-gcc-13-amd64-release' }}
@@ -231,10 +231,10 @@ jobs:
run: |
ldd ./xrpld
if [ "$(ldd ./xrpld | grep -E '(libstdc\+\+|libgcc)' | wc -l)" -eq 0 ]; then
echo 'The binary is statically linked.'
echo 'The binary is statically linked.'
else
echo 'The binary is dynamically linked.'
exit 1
echo 'The binary is dynamically linked.'
exit 1
fi
- name: Verify presence of instrumentation (Linux)
@@ -250,12 +250,12 @@ jobs:
run: |
ASAN_OPTS="include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-asan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/asan.supp"
if [[ "${CONFIG_NAME}" == *gcc* ]]; then
ASAN_OPTS="${ASAN_OPTS}:alloc_dealloc_mismatch=0"
ASAN_OPTS="${ASAN_OPTS}:alloc_dealloc_mismatch=0"
fi
echo "ASAN_OPTIONS=${ASAN_OPTS}" >>${GITHUB_ENV}
echo "TSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-tsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/tsan.supp" >>${GITHUB_ENV}
echo "UBSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-ubsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/ubsan.supp" >>${GITHUB_ENV}
echo "LSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-lsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/lsan.supp" >>${GITHUB_ENV}
echo "ASAN_OPTIONS=${ASAN_OPTS}" >> ${GITHUB_ENV}
echo "TSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-tsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/tsan.supp" >> ${GITHUB_ENV}
echo "UBSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-ubsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/ubsan.supp" >> ${GITHUB_ENV}
echo "LSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-lsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/lsan.supp" >> ${GITHUB_ENV}
- name: Run the separate tests
if: ${{ !inputs.build_only }}
@@ -266,9 +266,9 @@ jobs:
PARALLELISM: ${{ runner.os == 'Windows' && '1' || steps.nproc.outputs.nproc }}
run: |
ctest \
--output-on-failure \
-C "${BUILD_TYPE}" \
-j "${PARALLELISM}"
--output-on-failure \
-C "${BUILD_TYPE}" \
-j "${PARALLELISM}"
- name: Run the embedded tests
if: ${{ !inputs.build_only }}
@@ -278,7 +278,7 @@ jobs:
run: |
set -o pipefail
# Coverage builds are slower due to instrumentation; use fewer parallel jobs to avoid flakiness
[ "$COVERAGE_ENABLED" = "true" ] && BUILD_NPROC=$((BUILD_NPROC - 2))
[ "$COVERAGE_ENABLED" = "true" ] && BUILD_NPROC=$(( BUILD_NPROC - 2 ))
./xrpld --unittest --unittest-jobs "${BUILD_NPROC}" 2>&1 | tee unittest.log
- name: Show test failure summary
@@ -287,19 +287,19 @@ jobs:
WORKING_DIR: ${{ runner.os == 'Windows' && format('{0}\{1}', env.BUILD_DIR, inputs.build_type) || env.BUILD_DIR }}
run: |
if [ ! -d "${WORKING_DIR}" ]; then
echo "Working directory '${WORKING_DIR}' does not exist."
exit 0
echo "Working directory '${WORKING_DIR}' does not exist."
exit 0
fi
cd "${WORKING_DIR}"
if [ ! -f unittest.log ]; then
echo "unittest.log not found; embedded tests may not have run."
exit 0
echo "unittest.log not found; embedded tests may not have run."
exit 0
fi
if ! grep -E "failed" unittest.log; then
echo "Log present but no failure lines found in unittest.log."
echo "Log present but no failure lines found in unittest.log."
fi
- name: Debug failure (Linux)
if: ${{ failure() && runner.os == 'Linux' && !inputs.build_only }}
@@ -317,14 +317,14 @@ jobs:
BUILD_TYPE: ${{ inputs.build_type }}
run: |
cmake \
--build . \
--config "${BUILD_TYPE}" \
--parallel "${BUILD_NPROC}" \
--target coverage
--build . \
--config "${BUILD_TYPE}" \
--parallel "${BUILD_NPROC}" \
--target coverage
- name: Upload coverage report
if: ${{ github.repository == 'XRPLF/rippled' && !inputs.build_only && env.COVERAGE_ENABLED == 'true' }}
uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 # v6.0.1
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
with:
disable_search: true
disable_telem: true

View File

@@ -38,9 +38,9 @@ jobs:
run: |
DIFF=$(git status --porcelain)
if [ -n "${DIFF}" ]; then
# Print the differences to give the contributor a hint about what to
# expect when running levelization on their own machine.
git diff
echo "${MESSAGE}"
exit 1
# Print the differences to give the contributor a hint about what to
# expect when running levelization on their own machine.
git diff
echo "${MESSAGE}"
exit 1
fi

View File

@@ -48,9 +48,9 @@ jobs:
run: |
DIFF=$(git status --porcelain)
if [ -n "${DIFF}" ]; then
# Print the differences to give the contributor a hint about what to
# expect when running the renaming scripts on their own machine.
git diff
echo "${MESSAGE}"
exit 1
# Print the differences to give the contributor a hint about what to
# expect when running the renaming scripts on their own machine.
git diff
echo "${MESSAGE}"
exit 1
fi

View File

@@ -70,13 +70,13 @@ jobs:
working-directory: ${{ env.BUILD_DIR }}
run: |
cmake \
-G 'Ninja' \
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
-Dtests=ON \
-Dwerr=ON \
-Dxrpld=ON \
..
-G 'Ninja' \
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
-Dtests=ON \
-Dwerr=ON \
-Dxrpld=ON \
..
# clang-tidy needs headers generated from proto files
- name: Build libxrpl.libpb
@@ -133,7 +133,7 @@ jobs:
- name: Write issue header
if: ${{ steps.run_clang_tidy.outcome != 'success' }}
run: |
cat >"${ISSUE_FILE}" <<EOF
cat > "${ISSUE_FILE}" <<EOF
## Clang-tidy Check Failed
### Clang-tidy Output:
@@ -144,30 +144,30 @@ jobs:
if: ${{ steps.run_clang_tidy.outcome != 'success' }}
run: |
if [ -f "${OUTPUT_FILE}" ]; then
# Extract lines containing 'error:', 'warning:', or 'note:'
grep -E '(error:|warning:|note:)' "${OUTPUT_FILE}" >filtered-output.txt || true
# Extract lines containing 'error:', 'warning:', or 'note:'
grep -E '(error:|warning:|note:)' "${OUTPUT_FILE}" > filtered-output.txt || true
# If filtered output is empty, use original (might be a different error format)
if [ ! -s filtered-output.txt ]; then
cp "${OUTPUT_FILE}" filtered-output.txt
fi
# If filtered output is empty, use original (might be a different error format)
if [ ! -s filtered-output.txt ]; then
cp "${OUTPUT_FILE}" filtered-output.txt
fi
# Truncate if too large
head -c 60000 filtered-output.txt >>"${ISSUE_FILE}"
if [ "$(wc -c <filtered-output.txt)" -gt 60000 ]; then
echo "" >>"${ISSUE_FILE}"
echo "... (output truncated, see artifacts for full output)" >>"${ISSUE_FILE}"
fi
# Truncate if too large
head -c 60000 filtered-output.txt >> "${ISSUE_FILE}"
if [ "$(wc -c < filtered-output.txt)" -gt 60000 ]; then
echo "" >> "${ISSUE_FILE}"
echo "... (output truncated, see artifacts for full output)" >> "${ISSUE_FILE}"
fi
rm filtered-output.txt
rm filtered-output.txt
else
echo "No output file found" >>"${ISSUE_FILE}"
echo "No output file found" >> "${ISSUE_FILE}"
fi
- name: Append issue footer
if: ${{ steps.run_clang_tidy.outcome != 'success' }}
run: |
cat >>"${ISSUE_FILE}" <<EOF
cat >> "${ISSUE_FILE}" <<EOF
\`\`\`
---
@@ -176,7 +176,7 @@ jobs:
- name: Create issue
if: ${{ steps.run_clang_tidy.outcome != 'success' && inputs.create_issue_on_failure }}
uses: XRPLF/actions/create-issue@2b8bc36af85b88bca0dd7bfac2e2dc05f94ad712
uses: XRPLF/actions/create-issue@fbcc16eb7f20dc3199eaf1aed0d3523a5ba9008c
with:
title: "Clang-tidy check failed"
body_file: ${{ env.ISSUE_FILE }}

View File

@@ -1,99 +0,0 @@
# Build Linux packages (DEB and RPM) from pre-built binary artifacts.
# Discovers which configurations to package from linux.json (os entries
# with "package": true) and fans out one job per entry. Today only
# linux/amd64 is emitted; the architecture is hardcoded both here
# (runner) and in generate.py.
name: Package
on:
workflow_call:
inputs:
pkg_release:
description: "Package release number. Increment when repackaging the same executable."
required: false
type: string
default: "1"
defaults:
run:
shell: bash
env:
BUILD_DIR: build
jobs:
generate-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.generate.outputs.matrix }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: 3.13
- name: Generate packaging matrix
id: generate
working-directory: .github/scripts/strategy-matrix
run: |
./generate.py --packaging --config=linux.json >>"${GITHUB_OUTPUT}"
generate-version:
runs-on: ubuntu-latest
outputs:
version: ${{ steps.version.outputs.version }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
sparse-checkout: |
.github/actions/generate-version
src/libxrpl/protocol/BuildInfo.cpp
- name: Generate version
id: version
uses: ./.github/actions/generate-version
package:
needs: [generate-matrix, generate-version]
if: ${{ github.event.repository.visibility == 'public' }}
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
name: "${{ matrix.artifact_name }}"
permissions:
contents: read
runs-on: ["self-hosted", "Linux", "X64", "heavy"]
container: ${{ format('ghcr.io/xrplf/ci/{0}-{1}:{2}-{3}-sha-{4}', matrix.os.distro_name, matrix.os.distro_version, matrix.os.compiler_name, matrix.os.compiler_version, matrix.os.image_sha) }}
timeout-minutes: 30
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Download pre-built binary
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: ${{ matrix.artifact_name }}
path: ${{ env.BUILD_DIR }}
- name: Make binary executable
run: chmod +x "${BUILD_DIR}/xrpld"
- name: Build package
env:
PKG_VERSION: ${{ needs.generate-version.outputs.version }}
PKG_RELEASE: ${{ inputs.pkg_release }}
run: ./package/build_pkg.sh
- name: Upload package artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: ${{ matrix.artifact_name }}-pkg-${{ needs.generate-version.outputs.version }}
path: |
${{ env.BUILD_DIR }}/debbuild/*.deb
${{ env.BUILD_DIR }}/debbuild/*.ddeb
${{ env.BUILD_DIR }}/rpmbuild/RPMS/**/*.rpm
if-no-files-found: error

View File

@@ -42,4 +42,4 @@ jobs:
env:
GENERATE_CONFIG: ${{ inputs.os != '' && format('--config={0}.json', inputs.os) || '' }}
GENERATE_OPTION: ${{ inputs.strategy_matrix == 'all' && '--all' || '' }}
run: ./generate.py ${GENERATE_OPTION} ${GENERATE_CONFIG} >>"${GITHUB_OUTPUT}"
run: ./generate.py ${GENERATE_OPTION} ${GENERATE_CONFIG} >> "${GITHUB_OUTPUT}"

10
.gitignore vendored
View File

@@ -1,6 +1,10 @@
# .gitignore
# cspell: disable
# AI-generated documentation source (temporary, used by doc-agent
# during the initial documentation pass; removed once docs are merged).
*.ai.md
# Macintosh Desktop Services Store files.
.DS_Store
@@ -15,6 +19,12 @@ Release/
/.build/
/.venv/
/build/
/build-base/
/doc-coverage.info
/base-doc-coverage.info
/doc-coverage-report.md
/doc-review-report.md
/doc-review-comments.json
/db/
/out.txt
/Testing/

View File

@@ -37,50 +37,37 @@ repos:
exclude: ^include/xrpl/protocol_autogen/(transactions|ledger_entries)/
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: dd18dad857d6133e90bbe478f4f2f22ec0030269 # frozen: v22.1.5
rev: cd481d7b0bfb5c7b3090c21846317f9a8262e891 # frozen: v22.1.0
hooks:
- id: clang-format
args: [--style=file]
"types_or": [c++, c, proto]
exclude: ^include/xrpl/protocol_autogen/(transactions|ledger_entries)/
- repo: https://github.com/BlankSpruce/gersemi-pre-commit
rev: faadd6a9d852369ca94f4d15b2404c967ba8cb01 # frozen: 0.27.6
- repo: https://github.com/BlankSpruce/gersemi
rev: 0.26.0
hooks:
- id: gersemi
- repo: https://github.com/rbubley/mirrors-prettier
rev: 515f543f5718ebfd6ce22e16708bb32c68ff96e1 # frozen: v3.8.3
rev: c2bc67fe8f8f549cc489e00ba8b45aa18ee713b1 # frozen: v3.8.1
hooks:
- id: prettier
args: [--end-of-line=auto]
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 4160603246a6b365d4a2af661c6d71b0a0f50478 # frozen: 26.5.1
rev: ea488cebbfd88a5f50b8bd95d5c829d0bb76feb8 # frozen: 26.1.0
hooks:
- id: black
- repo: https://github.com/scop/pre-commit-shfmt
rev: 05c1426671b9237fb5e1444dd63aa5731bec0dfb # frozen: v3.13.1-1
- repo: https://github.com/openstack/bashate
rev: 5798d24d571676fc407e81df574c1ef57b520f23 # frozen: 2.1.1
hooks:
- id: shfmt
args: [--write, --indent=4, --case-indent=true]
- repo: local
hooks:
- id: format-inline-bash-workflows
name: "format `run:` blocks in workflows/actions"
entry: ./.github/scripts/format-inline-bash.py
language: python
files: ^\.github/(workflows|actions)/.*\.ya?ml$
- id: format-inline-bash-markdown
name: "format ```bash blocks in markdown"
entry: ./.github/scripts/format-inline-bash.py
language: python
files: \.md$
- id: bashate
args: ["--ignore=E006"]
- repo: https://github.com/streetsidesoftware/cspell-cli
rev: 4643f154907327ee0a2c7038f0296e0dd77d9776 # frozen: v10.0.0
rev: a42085ade523f591dca134379a595e7859986445 # frozen: v9.7.0
hooks:
- id: cspell # Spell check changed files
exclude: |

View File

@@ -151,8 +151,8 @@ git init
git remote add origin git@github.com:XRPLF/conan-center-index.git
git sparse-checkout init
for recipe in "${recipes[@]}"; do
echo "Checking out recipe '${recipe}'..."
git sparse-checkout add recipes/${recipe}
echo "Checking out recipe '${recipe}'..."
git sparse-checkout add recipes/${recipe}
done
git fetch origin master
git checkout master
@@ -180,7 +180,7 @@ the new recipe will be automatically pulled from the official Conan Center.
If you see an error similar to the following after running `conan profile show`:
```text
```bash
ERROR: Invalid setting '17' is not a valid 'settings.compiler.version' value.
Possible values are ['5.0', '5.1', '6.0', '6.1', '7.0', '7.3', '8.0', '8.1',
'9.0', '9.1', '10.0', '11.0', '12.0', '13', '13.0', '13.1', '14', '14.0', '15',
@@ -427,19 +427,16 @@ install ccache --version 4.11.3 --allow-downgrade`.
Single-config generators:
```
cmake --build . --parallel N
cmake --build .
```
Multi-config generators:
```
cmake --build . --config Release --parallel N
cmake --build . --config Debug --parallel N
cmake --build . --config Release
cmake --build . --config Debug
```
Replace the `--parallel` parameter N with the desired number of parallel jobs. A common starting point is half of the number of available CPU
cores.
5. Test xrpld.
Single-config generators:

View File

@@ -134,7 +134,6 @@ endif()
include(XrplCore)
include(XrplProtocolAutogen)
include(XrplInstall)
include(XrplPackaging)
include(XrplValidatorKeys)
if(tests)

395
SCOPE_OF_WORK.md Normal file
View File

@@ -0,0 +1,395 @@
# XRPLD Automated Documentation System — Scope of Work
## 1. Problem Statement
The XRP Ledger daemon (`xrpld`) is a ~275,000 line C++ codebase with 1,183
source files across the core library, protocol layer, and application server.
It is the single implementation of the XRP Ledger protocol and processes
billions of dollars in value.
Despite this criticality, the codebase has minimal inline documentation. Only
569 of 1,183 files contain any Doxygen-style doc comments, and most of those
are sparse — a class-level sentence or two, rarely covering individual methods,
parameters, or behavioral invariants.
The only formal documentation effort — an external specification by Common
Prefix — has fundamental structural problems:
- **Drift is the default state.** The spec lives in a separate repository
with no CI linkage to the codebase. Every commit to `rippled` that changes
behavior silently invalidates the spec. Even one week of drift makes
the spec unreliable.
- **Separate repo, separate context.** No contributor has both repos open.
When a bug comes in, the developer reads the code, not the spec. A
recent bug would have been caught if the code itself was documented.
- **No code-level documentation.** The spec describes system-level behavior
(payment engine, DEX) but does not document individual functions, classes,
parameters, or invariants. A developer working on a specific function
gets no help.
- **Vendor dependency.** Ripple has a critical documentation dependency on a
single external firm. If the contract ends, the spec orphans.
- **Perverse incentive.** The vendor profits from complexity and drift.
Cleaner code and better inline docs reduce the need for external
specification work.
## 2. Solution as Built
An automated, in-repo documentation system with five components, all living
alongside the code with no external repos and no external vendor dependency:
1. **Module skills** — Per-module knowledge files in [docs/skills/](docs/skills/)
that capture the "soul" of each subsystem (key files, patterns, pitfalls,
invariants). These are the durable, human-maintained context that the
automated agent and human contributors both consult.
2. **doc-agent (Claude Agent SDK app)** — A TypeScript tool at
[.github/scripts/doc-agent/](.github/scripts/doc-agent/) with three modes:
`document` (write Doxygen comments), `review` (detect drift on a diff),
and `regen-skills` (rebuild a skill file from current code).
3. **Doc-review GitHub Action** — Runs the review mode on every PR; posts
inline comments and a sticky summary. Currently warning-only.
4. **Coverage enforcement** — CI-enforced documentation coverage thresholds
that ratchet up over time, preventing regression.
5. **Developer slash commands** — Claude Code commands in
[.claude/commands/](.claude/commands/) for onboarding, architecture
questions, doc review, and bug pattern detection.
Documentation accuracy is enforced by CI the same way code style and test
coverage are enforced today.
## 3. Deliverables — Built
### 3.1 Documentation Standards
[docs/DOCUMENTATION_STANDARDS.md](docs/DOCUMENTATION_STANDARDS.md) — canonical
format guide defining:
- Javadoc-style `/** ... */` Doxygen comments (matches existing convention)
- Documentation levels: file, class, public method, free function, enum
- Required Doxygen tags: `@param`, `@return`, `@note`, `@invariant`
- Quality rules: document behavior and invariants, never paraphrase
signatures, terse style (25 lines for classes, 13 for functions)
### 3.2 Doxygen Configuration Changes
[docs/Doxyfile](docs/Doxyfile):
- `EXTRACT_ALL = NO` (was `YES`) — undocumented entities are flagged rather
than silently extracted
- `GENERATE_XML = YES` (was `NO`) — required for coverxygen to parse and
measure documentation coverage
### 3.3 Module Skills
Thirteen module-level skill files in [docs/skills/](docs/skills/), each one
a self-contained guide to a subsystem's responsibilities, key types, control
flow, conventions, and common pitfalls:
| Skill | Covers |
|-------|--------|
| [consensus.md](docs/skills/consensus.md) | XRPL consensus algorithm + RCL adapters |
| [cryptography.md](docs/skills/cryptography.md) | CSPRNG, secure erasure, key handling |
| [ledger.md](docs/skills/ledger.md) | ReadView/ApplyView, state tables, sandbox |
| [nodestore.md](docs/skills/nodestore.md) | RocksDB/NuDB/Memory backends |
| [peering.md](docs/skills/peering.md) | Overlay + peerfinder |
| [protocol.md](docs/skills/protocol.md) | STObject, SField, Serializer, TER, Keylets |
| [rpc.md](docs/skills/rpc.md) | RPC handler conventions |
| [shamap.md](docs/skills/shamap.md) | SHA-256 Merkle radix tree |
| [sql.md](docs/skills/sql.md) | SOCI database wrapper, checkpointing |
| [test.md](docs/skills/test.md) | Beast unit test framework conventions |
| [transactors.md](docs/skills/transactors.md) | Full transactor template |
| [websockets.md](docs/skills/websockets.md) | WS subscriptions/streams |
| [index.md](docs/skills/index.md) | Top-level codebase map |
These skills serve a dual purpose: they are reference docs for human
contributors, and they are injected as system-prompt context by the
doc-agent (mapping in [src/config.ts](.github/scripts/doc-agent/src/config.ts)).
[install-skills.sh](.github/scripts/doc-agent/install-skills.sh) installs
the same files as Claude Code skills under `.claude/skills/<name>/SKILL.md`,
so any Claude Code session in the repo picks them up automatically.
### 3.4 doc-agent (Claude Agent SDK)
A TypeScript application at [.github/scripts/doc-agent/](.github/scripts/doc-agent/),
built on `@anthropic-ai/claude-agent-sdk`. Three modes:
| Mode | Purpose |
|------|---------|
| `document` | Add Doxygen comments to a file or directory. Reads sibling `<file>.ai.md` context, the module skill, and the source file; uses `permissionMode: 'acceptEdits'` to write directly. |
| `review` | Given a git range or PR number, detect doc drift. Emits `doc-review-report.md` (sticky comment) and `doc-review-comments.json` (inline comments). |
| `regen-skills` | Rebuild a module's skill file at `docs/skills/<module>.md` from the module's `.ai.md` files plus existing skill content. |
Layout:
```
doc-agent/
├── package.json # Node >= 20.12, @anthropic-ai/claude-agent-sdk
├── biome.json # lint + format
├── install-skills.sh # copies docs/skills/*.md → .claude/skills/*/SKILL.md
├── prompts/ # System prompts as markdown (editable without code changes)
│ ├── document-file.md
│ ├── review-diff.md
│ └── regen-skill.md
└── src/
├── index.ts # CLI entry (document | review | regen-skills)
├── config.ts # Paths, model, MODULE_SKILL_MAP
├── prompt-loader.ts # Loads prompts + injects module skill
├── document.ts
├── review.ts
├── regen-skills.ts
└── types.ts
```
Notable design decisions:
- **Prompts as markdown, not strings.** Operators tune prompts without
touching TypeScript or redeploying.
- **`.ai.md` sidecar input.** When documenting a file, the agent reads a
sibling `<file>.ai.md` (high-signal prose generated upstream by the
`athenah-ai` pipeline) as the authoritative source of intent. These are
gitignored (`*.ai.md` in [.gitignore](.gitignore)) and discarded once
the initial pass is complete.
- **Model selection via env.** `DOC_AGENT_MODEL` env var; default
`claude-sonnet-4-6`.
- **Repo root override.** `XRPLD_ROOT` env var allows running the agent
against a different checkout (useful in CI and local testing).
### 3.5 Documentation Coverage Pipeline
| File | Purpose |
|------|---------|
| [.github/doc-coverage-thresholds.json](.github/doc-coverage-thresholds.json) | Per-module thresholds + quarterly ratchet schedule |
| [.github/scripts/doc-coverage-check.py](.github/scripts/doc-coverage-check.py) | Parses coverxygen LCOV, checks thresholds, generates PR report |
| [.github/workflows/doc-coverage.yml](.github/workflows/doc-coverage.yml) | CI workflow: builds Doxygen XML, runs coverxygen, posts coverage to PR |
| [cmake/XrplDocs.cmake](cmake/XrplDocs.cmake) | `docs` CMake target wiring |
Flow:
1. On every PR touching C++ files, the workflow builds Doxygen XML for
both the PR branch and the base branch (using
`ghcr.io/xrplf/ci/tools-rippled-documentation`).
2. Coverxygen generates LCOV-format coverage from the XML.
3. The check script compares coverage against per-module thresholds.
4. Ratchet mode (`no_decrease`) prevents any PR from reducing coverage.
5. New files added in a PR require ≥ 80% doc coverage.
6. Results are posted as a sticky PR comment with per-module breakdown.
### 3.6 Doc-Review GitHub Action
| File | Purpose |
|------|---------|
| [.github/workflows/doc-review.yml](.github/workflows/doc-review.yml) | CI workflow: runs on PR, posts review |
The workflow invokes the doc-agent `review` mode (Section 3.4) directly —
there is no separate CI script. The same code path serves CI and local use,
so prompt and logic changes are tested in one place.
Flow:
1. On every PR, the workflow runs `npm run review -- "$BASE..$HEAD"` in the
doc-agent directory.
2. doc-agent enumerates C++ files changed in the range, extracts diff
hunks plus existing doc comments, and asks Claude per file whether the
docs are still accurate.
3. Outputs `doc-review-report.md` (sticky PR comment) and
`doc-review-comments.json` (inline review comments via
`actions/github-script`).
4. Runs in **warning-only mode** — does not block merge.
Local invocation uses the same command:
`npm run review develop..HEAD` or `npm run review -- --pr 1234`.
Cost: only changed files and changed hunks within those files are
processed. Estimated ~$0.050.15 per PR.
### 3.7 Claude Code Slash Commands
Four developer-facing commands in [.claude/commands/](.claude/commands/):
| Command | Purpose |
|---------|---------|
| [doc-review](.claude/commands/doc-review.md) | Review doc accuracy for files changed on current branch |
| [explain-module](.claude/commands/explain-module.md) | Explain a module's architecture, classes, control flow, entry points |
| [how-does-x-work](.claude/commands/how-does-x-work.md) | Trace a feature through the codebase with file/line references |
| [find-bug-patterns](.claude/commands/find-bug-patterns.md) | Scan code for common xrpld bug patterns (unchecked TER, integer overflow, missing amendment gates, etc.) |
### 3.8 Full Codebase Documentation
The initial documentation pass covers 1,183 C++ files organized into 21
module-level PRs (see Section 5). The doc-agent `document` mode produces
each PR in parallel across modules; each file's output is then
domain-expert reviewed before merge.
## 4. Resources Required
### 4.1 People
| Role | Responsibility |
|------|---------------|
| **Documentation lead** | Runs `doc-agent document` per module, reviews output, submits PRs, iterates on prompts in [prompts/](.github/scripts/doc-agent/prompts/) |
| **Domain reviewers** (rotating) | Review doc PRs for semantic accuracy in their area of expertise |
| **CI/infrastructure** | Deploys workflows, monitors costs, tunes false-positive rate on doc-review action |
### 4.2 Infrastructure & Tools
| Resource | Purpose |
|----------|---------|
| **Anthropic API access** | Powers the doc-agent (`document`, `review`, `regen-skills`) and the doc-review GitHub Action |
| **Claude Agent SDK** | `@anthropic-ai/claude-agent-sdk` Node package |
| **Node.js >= 20.12** | Native `--env-file` support; runs the doc-agent |
| **GitHub Actions minutes** | Doc-coverage workflow (Doxygen XML build + coverxygen) and doc-review workflow |
| **Coverxygen** | Python package, open source (MIT) |
| **Doxygen** | Already configured — uses existing `ghcr.io/xrplf/ci/tools-rippled-documentation` container |
| **GitHub Actions secret** | `ANTHROPIC_API_KEY` — for doc-review workflow |
| **athenah-ai pipeline output** | Generates `.ai.md` sidecar context files consumed by `doc-agent document`; gitignored, removed post-pass |
### 4.3 Access & Permissions
- Write access to the `rippled` repository (or a fork for initial PRs)
- Ability to add GitHub Actions secrets (`ANTHROPIC_API_KEY`)
- Ability to modify required status checks (when promoting doc-review from
warning to required)
## 5. Execution Plan
Module passes run in parallel — the doc-agent operates per-module
independently, so foundation, protocol, and application layers are
generated concurrently rather than sequentially. Module groupings below
reflect dependency layering for review purposes, not a serial schedule.
### Phase 0: Infrastructure — Complete
Tooling shipped as the foundation PR:
- [x] [docs/DOCUMENTATION_STANDARDS.md](docs/DOCUMENTATION_STANDARDS.md)
- [x] [docs/Doxyfile](docs/Doxyfile) modifications
- [x] [docs/skills/](docs/skills/) — 13 module skills + index
- [x] [.github/scripts/doc-agent/](.github/scripts/doc-agent/) — Agent SDK app (document / review / regen-skills)
- [x] [.github/scripts/doc-agent/install-skills.sh](.github/scripts/doc-agent/install-skills.sh)
- [x] [.github/doc-coverage-thresholds.json](.github/doc-coverage-thresholds.json)
- [x] [.github/scripts/doc-coverage-check.py](.github/scripts/doc-coverage-check.py)
- [x] [.github/workflows/doc-coverage.yml](.github/workflows/doc-coverage.yml)
- [x] [cmake/XrplDocs.cmake](cmake/XrplDocs.cmake)
- [x] [.github/workflows/doc-review.yml](.github/workflows/doc-review.yml) — invokes doc-agent `review` mode directly
- [x] [.claude/commands/](.claude/commands/) — 4 developer slash commands
**Exit criteria met:** All workflows pass on a test PR. Coverage report
renders correctly. Doc-review action posts comments without false positives
on a sample PR.
### Phase 1: Foundation Modules
Lowest-level modules — everything else depends on these:
| PR | Module | ~Files | ~Lines |
|----|--------|--------|--------|
| 1 | `include/xrpl/basics/` + `src/libxrpl/basics/` | 63 | ~15K |
| 2 | `include/xrpl/crypto/` + `src/libxrpl/crypto/` | 6 | ~1.5K |
| 3 | `include/xrpl/json/` + `src/libxrpl/json/` | 18 | ~4K |
| 4 | `include/xrpl/beast/` + `src/libxrpl/beast/` | 88 | ~20K |
**Process per PR:**
1. Create branch `docs/module-<name>` from `develop`.
2. Run `npm run document <path>` from `.github/scripts/doc-agent/`. The
agent reads each file's `.ai.md` sidecar, the matching module skill,
and the file itself, then writes Doxygen comments per the standards.
3. Domain expert reviews for semantic accuracy.
4. Run Doxygen build to validate no doc errors.
5. Merge; ratchet that module's threshold up to actual coverage level.
### Phase 2: Protocol & Transaction Engine
| PR | Module | ~Files |
|----|--------|--------|
| 5 | `include/xrpl/protocol/` + `src/libxrpl/protocol/` | 150 |
| 6 | `include/xrpl/ledger/` + `src/libxrpl/ledger/` | 68 |
| 7 | `include/xrpl/conditions/` + `src/libxrpl/conditions/` | 8 |
| 8 | `include/xrpl/tx/` (core framework: Transactor, ApplyContext) | 15 |
| 9 | Payment transactors | 9 |
| 10 | DEX/AMM transactors | 25 |
| 11 | Escrow transactors | 7 |
| 12 | Other transactors (NFT, token, vault, check, etc.) | 60 |
| 13 | Pathfinding + invariants | 30 |
### Phase 3: Server & Application Layer
| PR | Module | ~Files |
|----|--------|--------|
| 14 | `include/xrpl/server/` + `src/libxrpl/server/` | 35 |
| 15 | `include/xrpl/nodestore/` + `src/libxrpl/nodestore/` | 30 |
| 16 | SHAMap | 25 |
| 17 | Resource management | 17 |
| 18 | Overlay + peerfinder | 56 |
| 19 | Consensus | 15 |
| 20 | Application core (ledger, main, misc, rdb) | 133 |
| 21 | RPC handlers | 131 |
Once Phases 13 are merged, the doc-review action is promoted from
warning to a **required check**.
### Phase 4: Tests & Polish
- Document test files (brief docs only — test name + what it validates)
- Remove `.ai.md` sidecar files (they were transitional input only)
- Retrospective: false-positive rate, API costs, contributor feedback
## 6. Coverage Threshold Ratchet
Coverage thresholds are enforced per-module via
[.github/doc-coverage-thresholds.json](.github/doc-coverage-thresholds.json):
- **`no_decrease` ratchet** — no PR may reduce coverage on a module
below its current level.
- **New files** require ≥ 80% doc coverage regardless of module threshold.
- **Per-module floors** are raised manually as each module's PR lands,
pinning the achieved coverage as the new floor.
There is no calendar-based ratchet; thresholds advance with the work.
## 7. Risk Assessment
| Risk | Likelihood | Impact | Mitigation |
|------|-----------|--------|------------|
| LLM generates plausible but wrong docs | Medium | High | Every doc PR requires human domain expert review. `.ai.md` sidecars (athenah-ai) ground the agent in source-derived intent rather than free generation. |
| Doc-review action false positives annoy contributors | Medium | Medium | Warning-only mode initially. Promote to required only when FP rate < 5%. Prompts live in markdown ([prompts/](.github/scripts/doc-agent/prompts/)) and can be tuned without a code release. |
| Coverage enforcement blocks unrelated PRs | Low | Medium | `no_decrease` ratchet only; per-module floors raised manually as modules land. |
| Reviewer bandwidth bottleneck | Medium | Medium | PRs scoped to single modules. Reviewers rotate. |
| API costs exceed budget | Low | Low | Only diff hunks processed. Monthly budget cap with alerting. |
| Doxygen XML build adds CI time | Low | Low | Runs in parallel with existing checks. Uses existing documentation container. |
| Doc comments add code noise | Low | Low | Terse style enforced by standards. 25 lines per class, 13 per function. |
| Skill files drift from code | Medium | Medium | `doc-agent regen-skills <module>` rebuilds a skill from current `.ai.md` files; intended to be run periodically. |
## 8. Success Metrics
| Metric | Measurement |
|--------|-------------|
| Documentation coverage (public API) | Coverxygen LCOV reports in CI |
| Doc drift catch rate | Sample audit of merged PRs vs doc-review output |
| False positive rate (doc-review action) | Track dismissed vs accepted suggestions |
| Spec-vs-code contradictions | Bug reports citing wrong documentation |
| Contributor satisfaction | Periodic survey: "docs helped me understand the code" |
| Onboarding time | Measure across new contributors before/after |
| API cost | Anthropic API billing dashboard |
## 9. What This Replaces
This system does **not** replace the Common Prefix formal verification
work directly formal verification and code documentation solve different
problems. However, it eliminates the need for an external specification as
the "source of truth" for how xrpld behaves:
| Need | Before | After |
|------|--------|-------|
| "What does this function do?" | Read the code, guess | Read the inline Doxygen doc |
| "How does the payment engine work?" | Read Common Prefix spec (maybe stale) | Read [docs/skills/transactors.md](docs/skills/transactors.md) or run `/explain-module` |
| "Did this PR break any documented behavior?" | Manual review, hope someone notices | Doc-review action flags it automatically |
| "What's our documentation coverage?" | Unknown | Measured per-module in every PR |
| "Is the spec up to date?" | Check manually, probably not | Docs are in-repo, enforced by CI |
| "Where do I start in module X?" | Ask in chat | Read the module skill in [docs/skills/](docs/skills/) |
## 10. Out of Scope
- **Formal verification.** This project documents code behavior; it does
not prove correctness. Formal verification is a separate discipline.
- **External-facing API documentation.** This covers the C++ source code,
not the JSON-RPC API documentation on xrpl.org.
- **Test coverage.** Test file documentation is brief and optional. Test
coverage measurement is handled by existing Codecov integration.
- **Architectural decision records.** Module-level READMEs already exist
for key subsystems. This project adds function/class-level docs and the
module skills layer, not system-level ADRs.

View File

@@ -1,8 +1,8 @@
#!/bin/bash
if [[ $# -ne 1 || "$1" == "--help" || "$1" == "-h" ]]; then
name=$(basename $0)
cat <<-USAGE
name=$( basename $0 )
cat <<- USAGE
Usage: $name <username>
Where <username> is the Github username of the upstream repo. e.g. XRPLF
@@ -14,7 +14,7 @@ fi
shift
user="$1"
# Get the origin URL. Expect it be an SSH-style URL
origin=$(git remote get-url origin)
origin=$( git remote get-url origin )
if [[ "${origin}" == "" ]]; then
echo Invalid origin remote >&2
exit 1
@@ -22,11 +22,11 @@ fi
# echo "Origin: ${origin}"
# Parse the origin
ifs_orig="${IFS}"
IFS=':' read remote originpath <<<"${origin}"
IFS=':' read remote originpath <<< "${origin}"
# echo "Remote: ${remote}, Originpath: ${originpath}"
IFS='@' read sshuser server <<<"${remote}"
IFS='@' read sshuser server <<< "${remote}"
# echo "SSHUser: ${sshuser}, Server: ${server}"
IFS='/' read originuser repo <<<"${originpath}"
IFS='/' read originuser repo <<< "${originpath}"
# echo "Originuser: ${originuser}, Repo: ${repo}"
if [[ "${sshuser}" == "" || "${server}" == "" || "${originuser}" == "" || "${repo}" == "" ]]; then
echo "Can't parse origin URL: ${origin}" >&2
@@ -35,9 +35,9 @@ fi
upstream="https://${server}/${user}/${repo}"
upstreampush="${remote}:${user}/${repo}"
upstreamgroup="upstream upstream-push"
current=$(git remote get-url upstream 2>/dev/null)
currentpush=$(git remote get-url upstream-push 2>/dev/null)
currentgroup=$(git config remotes.upstreams)
current=$( git remote get-url upstream 2>/dev/null )
currentpush=$( git remote get-url upstream-push 2>/dev/null )
currentgroup=$( git config remotes.upstreams )
if [[ "${current}" == "${upstream}" ]]; then
echo "Upstream already set up correctly. Skip"
elif [[ -n "${current}" && "${current}" != "${upstream}" && "${current}" != "${upstreampush}" ]]; then
@@ -45,9 +45,9 @@ elif [[ -n "${current}" && "${current}" != "${upstream}" && "${current}" != "${u
else
if [[ "${current}" == "${upstreampush}" ]]; then
echo "Upstream set to dangerous push URL. Update."
_run git remote rename upstream upstream-push ||
_run git remote remove upstream
currentpush=$(git remote get-url upstream-push 2>/dev/null)
_run git remote rename upstream upstream-push || \
_run git remote remove upstream
currentpush=$( git remote get-url upstream-push 2>/dev/null )
fi
_run git remote add upstream "${upstream}"
fi

View File

@@ -1,8 +1,8 @@
#!/bin/bash
if [[ $# -lt 3 || "$1" == "--help" || "$1" = "-h" ]]; then
name=$(basename $0)
cat <<-USAGE
name=$( basename $0 )
cat <<- USAGE
Usage: $name workbranch base/branch user/branch [user/branch [...]]
* workbranch will be created locally from base/branch
@@ -16,7 +16,7 @@ fi
work="$1"
shift
branches=($(echo "${@}" | sed "s/:/\//"))
branches=( $( echo "${@}" | sed "s/:/\//" ) )
base="${branches[0]}"
unset branches[0]
@@ -24,10 +24,10 @@ set -e
users=()
for b in "${branches[@]}"; do
users+=($(echo $b | cut -d/ -f1))
users+=( $( echo $b | cut -d/ -f1 ) )
done
users=($(printf '%s\n' "${users[@]}" | sort -u))
users=( $( printf '%s\n' "${users[@]}" | sort -u ) )
git fetch --multiple upstreams "${users[@]}"
git checkout -B "$work" --no-track "$base"
@@ -40,7 +40,7 @@ done
# Make sure the commits look right
git log --show-signature "$base..HEAD"
parts=($(echo $base | sed "s/\// /"))
parts=( $( echo $base | sed "s/\// /" ) )
repo="${parts[0]}"
b="${parts[1]}"
push=$repo
@@ -50,7 +50,7 @@ fi
if [[ "$repo" == "upstream" ]]; then
repo="upstreams"
fi
cat <<PUSH
cat << PUSH
-------------------------------------------------------------------
This script will not push. Verify everything is correct, then push

View File

@@ -1,8 +1,8 @@
#!/bin/bash
if [[ $# -ne 3 || "$1" == "--help" || "$1" = "-h" ]]; then
name=$(basename $0)
cat <<-USAGE
name=$( basename $0 )
cat <<- USAGE
Usage: $name workbranch base/branch version
* workbranch will be created locally from base/branch. If it exists,
@@ -16,7 +16,7 @@ fi
work="$1"
shift
base=$(echo "$1" | sed "s/:/\//")
base=$( echo "$1" | sed "s/:/\//" )
shift
version=$1
@@ -28,16 +28,16 @@ git fetch upstreams
git checkout -B "${work}" --no-track "${base}"
push=$(git rev-parse --abbrev-ref --symbolic-full-name '@{push}' \
2>/dev/null) || true
push=$( git rev-parse --abbrev-ref --symbolic-full-name '@{push}' \
2>/dev/null ) || true
if [[ "${push}" != "" ]]; then
echo "Warning: ${push} may already exist."
fi
build=$(find -name BuildInfo.cpp)
sed 's/\(^.*versionString =\).*$/\1 "'${version}'"/' ${build} >version.cpp &&
diff "${build}" version.cpp && exit 1 ||
mv -vi version.cpp ${build}
build=$( find -name BuildInfo.cpp )
sed 's/\(^.*versionString =\).*$/\1 "'${version}'"/' ${build} > version.cpp && \
diff "${build}" version.cpp && exit 1 || \
mv -vi version.cpp ${build}
git diff
@@ -47,7 +47,7 @@ git commit -S -m "Set version to ${version}"
git log --oneline --first-parent ${base}^..
cat <<PUSH
cat << PUSH
-------------------------------------------------------------------
This script will not push. Verify everything is correct, then push

View File

@@ -168,13 +168,7 @@ def main():
if not os.environ.get("TIDY"):
return 0
repo_root = Path(
subprocess.check_output(
["git", "rev-parse", "--show-toplevel"],
cwd=Path(__file__).parent,
text=True,
).strip()
)
repo_root = Path(__file__).parent.parent
files = staged_files(repo_root)
if not files:
return 0

View File

@@ -28,7 +28,7 @@
# https://vl.ripple.com
# https://unl.xrplf.org
# http://127.0.0.1:8000
# file:///etc/xrpld/vl.txt
# file:///etc/opt/xrpld/vl.txt
#
# [validator_list_keys]
#

View File

@@ -527,17 +527,6 @@
#
# The current default (which is subject to change) is 300 seconds.
#
# verify_endpoints = <0 | 1>
#
# If set to 0, the server will skip validation of endpoint
# addresses received in TMEndpoints peer protocol messages,
# allowing addresses that are not publicly routable or have a
# port of 0. The default is 1 (verification enabled).
#
# WARNING: Disabling this option is a security risk and should
# only be used for local testing and debugging. Do not disable
# on mainnet.
#
#
# [transaction_queue] EXPERIMENTAL
#
@@ -953,21 +942,6 @@
#
# Optional keys for NuDB and RocksDB:
#
# cache_size Size of cache for database records. Default is 16384.
# Setting this value to 0 will use the default value.
#
# cache_age Length of time in minutes to keep database records
# cached. Default is 5 minutes. Setting this value to
# 0 will use the default value.
#
# Note: if cache_size or cache_age is not specified,
# default values will be used for the unspecified
# parameter.
#
# Note: the cache will not be created if online_delete
# is specified, because the rotating NodeStore does
# not use this cache).
#
# fast_load Boolean. If set, load the last persisted ledger
# from disk upon process start before syncing to
# the network. This is likely to improve performance
@@ -1481,7 +1455,10 @@ admin = 127.0.0.1
protocol = http
[port_peer]
port = 2459
# Many servers still use the legacy port of 51235, so for backward-compatibility
# we maintain that port number here. However, for new servers we recommend
# changing this to the default port of 2459.
port = 51235
ip = 0.0.0.0
# alternatively, to accept connections on IPv4 + IPv6, use:
#ip = ::

View File

@@ -89,3 +89,30 @@ add_custom_target(
DEPENDS "${doxygen_index_file}"
SOURCES "${dependencies}"
)
# Documentation coverage target using coverxygen.
# Generates LCOV-format coverage report from Doxygen XML output.
# Requires: pip install coverxygen
set(doxygen_xml_dir "${doxygen_output_directory}/xml")
set(doc_coverage_file "${CMAKE_BINARY_DIR}/doc-coverage.info")
add_custom_command(
OUTPUT "${doc_coverage_file}"
COMMAND
coverxygen
--xml-dir "${doxygen_xml_dir}"
--src-dir "${CMAKE_CURRENT_SOURCE_DIR}"
--output "${doc_coverage_file}"
--kind class,struct,function,enum,typedef,variable
--scope public
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
DEPENDS docs
COMMENT "Generating documentation coverage report"
)
add_custom_target(
docs-coverage
DEPENDS "${doc_coverage_file}"
COMMAND
"${CMAKE_COMMAND}" -E echo
"Documentation coverage report: ${doc_coverage_file}"
)

View File

@@ -1,44 +0,0 @@
#[===================================================================[
Linux packaging support: 'package' target.
The packaging script (package/build_pkg.sh) installs to FHS-standard
paths (/usr/bin, /etc/xrpld, etc.) regardless of CMAKE_INSTALL_PREFIX,
so no prefix guard is needed here.
#]===================================================================]
if(NOT is_linux)
message(STATUS "Packaging not supported on non-Linux hosts")
return()
endif()
if(NOT DEFINED pkg_release)
set(pkg_release 1)
endif()
find_program(RPMBUILD_EXECUTABLE rpmbuild)
find_program(DPKG_BUILDPACKAGE_EXECUTABLE dpkg-buildpackage)
if(NOT (RPMBUILD_EXECUTABLE OR DPKG_BUILDPACKAGE_EXECUTABLE))
message(
STATUS
"Neither rpmbuild nor dpkg-buildpackage found; 'package' target not available"
)
return()
endif()
set(package_env
SRC_DIR=${CMAKE_SOURCE_DIR}
BUILD_DIR=${CMAKE_BINARY_DIR}
PKG_VERSION=${xrpld_version}
PKG_RELEASE=${pkg_release}
)
add_custom_target(
package
COMMAND
${CMAKE_COMMAND} -E env ${package_env}
${CMAKE_SOURCE_DIR}/package/build_pkg.sh
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS xrpld
COMMENT "Building Linux package (deb/rpm inferred from host tooling)"
VERBATIM
)

View File

@@ -1,13 +0,0 @@
# Python dependencies for XRP Ledger code generation scripts
#
# These packages are required to run the code generation scripts that
# parse macro files and generate C++ wrapper classes.
# C preprocessor for Python - used to preprocess macro files
pcpp>=1.30
# Parser combinator library - used to parse the macro DSL
pyparsing>=3.0.0
# Template engine - used to generate C++ code from templates
Mako>=1.2.2

View File

@@ -1,105 +1,13 @@
# This file was autogenerated by uv via the following command:
# uv pip compile requirements.in --generate-hashes --output-file requirements.txt
mako==1.3.12 \
--hash=sha256:8f61569480282dbf557145ce441e4ba888be453c30989f879f0d652e39f53ea9 \
--hash=sha256:9f778e93289bd410bb35daadeb4fc66d95a746f0b75777b942088b7fd7af550a
# via -r requirements.in
markupsafe==3.0.3 \
--hash=sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f \
--hash=sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a \
--hash=sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf \
--hash=sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19 \
--hash=sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf \
--hash=sha256:0f4b68347f8c5eab4a13419215bdfd7f8c9b19f2b25520968adfad23eb0ce60c \
--hash=sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175 \
--hash=sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219 \
--hash=sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb \
--hash=sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6 \
--hash=sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab \
--hash=sha256:15d939a21d546304880945ca1ecb8a039db6b4dc49b2c5a400387cdae6a62e26 \
--hash=sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1 \
--hash=sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce \
--hash=sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218 \
--hash=sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634 \
--hash=sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695 \
--hash=sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad \
--hash=sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73 \
--hash=sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c \
--hash=sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe \
--hash=sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa \
--hash=sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559 \
--hash=sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa \
--hash=sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37 \
--hash=sha256:3537e01efc9d4dccdf77221fb1cb3b8e1a38d5428920e0657ce299b20324d758 \
--hash=sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f \
--hash=sha256:38664109c14ffc9e7437e86b4dceb442b0096dfe3541d7864d9cbe1da4cf36c8 \
--hash=sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d \
--hash=sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c \
--hash=sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97 \
--hash=sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a \
--hash=sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19 \
--hash=sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9 \
--hash=sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9 \
--hash=sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc \
--hash=sha256:591ae9f2a647529ca990bc681daebdd52c8791ff06c2bfa05b65163e28102ef2 \
--hash=sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4 \
--hash=sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354 \
--hash=sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50 \
--hash=sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698 \
--hash=sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9 \
--hash=sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b \
--hash=sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc \
--hash=sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115 \
--hash=sha256:7c3fb7d25180895632e5d3148dbdc29ea38ccb7fd210aa27acbd1201a1902c6e \
--hash=sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485 \
--hash=sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f \
--hash=sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12 \
--hash=sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025 \
--hash=sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009 \
--hash=sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d \
--hash=sha256:949b8d66bc381ee8b007cd945914c721d9aba8e27f71959d750a46f7c282b20b \
--hash=sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a \
--hash=sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5 \
--hash=sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f \
--hash=sha256:a320721ab5a1aba0a233739394eb907f8c8da5c98c9181d1161e77a0c8e36f2d \
--hash=sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1 \
--hash=sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287 \
--hash=sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6 \
--hash=sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f \
--hash=sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581 \
--hash=sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed \
--hash=sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b \
--hash=sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c \
--hash=sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026 \
--hash=sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8 \
--hash=sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676 \
--hash=sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6 \
--hash=sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e \
--hash=sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d \
--hash=sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d \
--hash=sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01 \
--hash=sha256:df2449253ef108a379b8b5d6b43f4b1a8e81a061d6537becd5582fba5f9196d7 \
--hash=sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419 \
--hash=sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795 \
--hash=sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1 \
--hash=sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5 \
--hash=sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d \
--hash=sha256:e8fc20152abba6b83724d7ff268c249fa196d8259ff481f3b1476383f8f24e42 \
--hash=sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe \
--hash=sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda \
--hash=sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e \
--hash=sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737 \
--hash=sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523 \
--hash=sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591 \
--hash=sha256:f71a396b3bf33ecaa1626c255855702aca4d3d9fea5e051b41ac59a9c1c41edc \
--hash=sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a \
--hash=sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50
# via mako
pcpp==1.30 \
--hash=sha256:05fe08292b6da57f385001c891a87f40d6aa7f46787b03e8ba326d20a3297c6e \
--hash=sha256:5af9fbce55f136d7931ae915fae03c34030a3b36c496e72d9636cedc8e2543a1
# via -r requirements.in
pyparsing==3.3.2 \
--hash=sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d \
--hash=sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc
# via -r requirements.in
# Python dependencies for XRP Ledger code generation scripts
#
# These packages are required to run the code generation scripts that
# parse macro files and generate C++ wrapper classes.
# C preprocessor for Python - used to preprocess macro files
pcpp>=1.30
# Parser combinator library - used to parse the macro DSL
pyparsing>=3.0.0
# Template engine - used to generate C++ code from templates
Mako>=1.2.2

View File

@@ -33,7 +33,7 @@ public:
* @brief Construct a ${name} ledger entry wrapper from an existing SLE object.
* @throws std::runtime_error if the ledger entry type doesn't match.
*/
explicit ${name}(SLE::const_pointer sle)
explicit ${name}(std::shared_ptr<SLE const> sle)
: LedgerEntryBase(std::move(sle))
{
// Verify ledger entry type
@@ -168,7 +168,7 @@ ${field['typeData']['setter_type']} ${field['paramName']}${',' if i < len(requir
* @param sle The existing ledger entry to copy from.
* @throws std::runtime_error if the ledger entry type doesn't match.
*/
${name}Builder(SLE::const_pointer sle)
${name}Builder(std::shared_ptr<SLE const> sle)
{
if (sle->at(sfLedgerEntryType) != ${tag})
{

View File

@@ -93,22 +93,18 @@ words:
- daria
- dcmake
- dearmor
- dedented
- deleteme
- demultiplexer
- deserializaton
- desync
- desynced
- determ
- disablerepo
- distro
- doxyfile
- dxrpl
- enabled
- enablerepo
- endmacro
- exceptioned
- EXPECT_STREQ
- Falco
- fcontext
- finalizers
@@ -166,7 +162,6 @@ words:
- Merkle
- Metafuncton
- misprediction
- missingok
- mptbalance
- MPTDEX
- mptflags
@@ -198,15 +193,11 @@ words:
- NOLINT
- NOLINTNEXTLINE
- nonxrp
- noreplace
- noripple
- nostdinc
- notifempty
- nudb
- nullptr
- nunl
- Nyffenegger
- onlatest
- ostr
- pargs
- partitioner
@@ -222,7 +213,6 @@ words:
- preauthorize
- preauthorizes
- preclaim
- preun
- protobuf
- protos
- ptrs
@@ -257,15 +247,12 @@ words:
- sfields
- shamap
- shamapitem
- shfmt
- shlibs
- sidechain
- SIGGOOD
- sle
- sles
- soci
- socidb
- SRPMS
- sslws
- statsd
- STATSDCOLLECTOR
@@ -293,8 +280,8 @@ words:
- txn
- txns
- txs
- ubsan
- UBSAN
- ubsan
- umant
- unacquired
- unambiguity
@@ -302,7 +289,6 @@ words:
- unauthorizing
- unergonomic
- unfetched
- unfindable
- unflatten
- unfund
- unimpair
@@ -332,6 +318,7 @@ words:
- xbridge
- xchain
- ximinez
- EXPECT_STREQ
- XMACRO
- xrpkuwait
- xrpl
@@ -339,4 +326,3 @@ words:
- xrplf
- xxhash
- xxhasher
- CGNAT

View File

@@ -1,12 +0,0 @@
#!/bin/bash
case "$(uname -m)" in
x86_64) LOADER=/lib64/ld-linux-x86-64.so.2 ;;
aarch64) LOADER=/lib/ld-linux-aarch64.so.1 ;;
*)
echo "Unsupported arch: $(uname -m)" >&2
exit 1
;;
esac
echo "${LOADER}"

View File

@@ -27,9 +27,7 @@ RUN mkdir /tmp/nix-store-closure && \
cp -R $(nix-store -qR result/) /tmp/nix-store-closure
# Final image
FROM ${BASE_IMAGE} AS final
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
# bash is not located at /bin/bash in nixos/nix, so we need to create a symlink to it.
RUN if [ -d /nix ]; then \
@@ -45,30 +43,10 @@ ENTRYPOINT ["/bin/bash"]
COPY --from=builder /tmp/nix-store-closure /nix/store
COPY --from=builder /tmp/build/result /nix/ci-env
ENV PATH="/nix/ci-env/bin:${PATH}"
# Externally-built dynamically-linked ELF binaries hard-code the loader path
# (e.g. /lib64/ld-linux-x86-64.so.2) in their PT_INTERP header. Install it
# from the Nix store when the base image doesn't already provide one.
COPY docker/loader-path.sh /tmp/loader-path.sh
RUN <<EOF
target="$(/tmp/loader-path.sh)"
if [ ! -e "${target}" ]; then
# Use the loader from the same glibc that gcc links libc against, so
# ld-linux and libc/libpthread share GLIBC_PRIVATE symbols at runtime.
src="$(dirname "$(gcc -print-file-name=libc.so.6)")/$(basename "${target}")"
[ -e "${src}" ] || { echo "ld-linux not found at ${src}" >&2; exit 1; }
mkdir -p "$(dirname "${target}")"
cp "${src}" "${target}"
fi
EOF
ENV PATH="/nix/ci-env/bin:$PATH"
RUN <<EOF
ccache --version
clang --version
clang++ --version
clang-format --version
cmake --version
conan --version
@@ -86,17 +64,3 @@ python3 --version
run-clang-tidy --help
vim --version
EOF
# Sanity-check that the sanitizer runtimes shipped with g++/clang++ are able to build binaries
COPY docker/test_files/cpp_sources/ /tmp/cpp_sources/
COPY docker/test_files/compile-cpp-sources.sh /tmp/compile-cpp-sources.sh
RUN /tmp/compile-cpp-sources.sh /tmp/cpp_sources /tmp/bins
# Sanity-check that the built binaries are able to run.
# We only support running the test binaries on Ubuntu and NixOS right now (will be fixed in the future)
#
# When build and test images will be separate, we will be to run on vanilla images.
COPY docker/test_files/run-test-binaries.sh /tmp/run-test-binaries.sh
RUN if echo "${BASE_IMAGE}" | grep -qiE '(ubuntu|nixos)'; then \
/tmp/run-test-binaries.sh /tmp/bins; \
fi

View File

@@ -1,50 +0,0 @@
#!/bin/bash
# Compile all C++ test binaries during the Docker image build.
# Each binary has the target system's ELF PT_INTERP (dynamic-linker path)
# baked in so it can run on the (potentially minimal) final BASE_IMAGE.
set -eo pipefail
src_dir="${1:?usage: $0 <src_dir> <dst_dir>}"
dst_dir="${2:?usage: $0 <src_dir> <dst_dir>}"
loader="$(/tmp/loader-path.sh)"
mkdir -p "${dst_dir}"
function compile() {
local compiler="${1}"
local name="${2}"
local san_flag="${3:-}"
local src="${src_dir}/${name}.cpp"
local binary="${dst_dir}/${name}-${compiler}"
echo "=== Compile ${name} with ${compiler} ==="
cmd="${compiler} -std=c++23 -O1 -g \
-pthread \
-Wl,--dynamic-linker=${loader} \
${san_flag} \
${src} -o ${binary}"
echo "Command: ${cmd}"
eval "${cmd}"
}
declare -A sanitize=(
[regular]=""
[asan]="-fsanitize=address"
[tsan]="-fsanitize=thread"
[ubsan]="-fsanitize=undefined -fno-sanitize-recover=all"
)
for name in regular asan tsan ubsan; do
san_flag="${sanitize[${name}]}"
for compiler in g++ clang++; do
compile "${compiler}" "${name}" "${san_flag}"
done
done
echo "=== All binaries compiled ==="
ls -la "${dst_dir}"

View File

@@ -1,28 +0,0 @@
#include <atomic>
#include <cstddef>
#include <iostream>
#if defined(__clang__) || defined(__GNUC__)
__attribute__((noinline))
#elif defined(_MSC_VER)
__declspec(noinline)
#endif
int
read_after_free(volatile int* array, std::size_t index)
{
std::atomic_signal_fence(std::memory_order_seq_cst);
int value = array[index];
std::atomic_signal_fence(std::memory_order_seq_cst);
return value;
}
int
main()
{
int* array = new int[5]{10, 20, 30, 40, 50};
delete[] array;
std::cout << "Value at index 2: " << read_after_free(array, 2) << std::endl;
return 0;
}

View File

@@ -1,28 +0,0 @@
#include <iostream>
#include <mutex>
#include <thread>
#include <vector>
static std::mutex gMutex;
void
worker(int id)
{
std::lock_guard<std::mutex> lock(gMutex);
std::cout << "Hello from thread " << id << "\n";
}
int
main()
{
constexpr int kNumThreads = 10;
std::vector<std::thread> threads;
threads.reserve(kNumThreads);
for (int i = 0; i < kNumThreads; ++i)
threads.emplace_back(worker, i);
for (auto& t : threads)
t.join();
std::cout << "Hello from main thread\n";
return 0;
}

View File

@@ -1,26 +0,0 @@
#include <iostream>
#include <thread>
static int kCounter = 0;
void
increment()
{
for (int i = 0; i < 100'000; ++i)
{
++kCounter;
}
}
int
main()
{
std::thread t1(increment);
std::thread t2(increment);
t1.join();
t2.join();
std::cout << "Final counter value: " << kCounter << std::endl;
return 0;
}

View File

@@ -1,13 +0,0 @@
#include <iostream>
#include <limits>
int
main()
{
int maxInt = std::numeric_limits<int>::max();
int volatile one = 1;
std::cout << "Current max: " << maxInt << std::endl;
int overflowed = maxInt + one;
std::cout << "Overflowed result: " << overflowed << std::endl;
return 0;
}

View File

@@ -1,62 +0,0 @@
#!/bin/bash
# Run pre-compiled sanitizer binaries and confirm each emits its expected diagnostic.
# Binaries must already exist in <bins_dir> with the layout:
# <name>-g++ and <name>-clang++ for name in {regular,asan,tsan,ubsan}
set -eo pipefail
bins_dir="${1:?usage: $0 <bins_dir>}"
# Run a binary and verify its exit code and output.
# Usage: run <binary> <expected_output> <expected_rc>
function run() {
local binary="${1}"
local expected_output="${2}"
local expected_rc="${3}"
local out_file
out_file="$(mktemp)"
echo "=== Run ${binary} ==="
local rc=0
"${binary}" >"${out_file}" 2>&1 || rc=$?
cat "${out_file}"
if [ "${expected_rc}" = "nonzero" ]; then
if [ "${rc}" -eq 0 ]; then
echo "ERROR: expected non-zero exit code from ${binary}, got ${rc}" >&2
exit 1
fi
elif [ "${rc}" -ne "${expected_rc}" ]; then
echo "ERROR: expected exit code ${expected_rc} from ${binary}, got ${rc}" >&2
exit 1
fi
grep -q "${expected_output}" "${out_file}" ||
{
echo "ERROR: expected '${expected_output}' from ${binary}" >&2
exit 1
}
echo "OK: '${expected_output}' detected"
}
declare -A expect=(
[regular]="Hello from main thread"
[asan]="heap-use-after-free"
[tsan]="data race"
[ubsan]="signed integer overflow"
)
for compiler in g++ clang++; do
for name in regular asan tsan ubsan; do
binary="${bins_dir}/${name}-${compiler}"
if [ "${name}" = "regular" ]; then
expected_rc=0
else
expected_rc=nonzero
fi
run "${binary}" "${expect[$name]}" "${expected_rc}"
done
done

View File

@@ -0,0 +1,192 @@
# XRPLD Documentation Standards
This document defines the canonical format for inline code documentation in the
xrpld codebase. All new and updated code must follow these standards.
## Comment Style
Use Javadoc-style Doxygen comments (`/** ... */`). This matches the dominant
convention in the codebase: ~5,200 existing instances across 569 files.
```cpp
/** Brief description of the entity. */
```
For multi-line documentation, each line is prefixed with ` * ` (space, asterisk,
space):
```cpp
/** Brief description of the entity.
*
* Extended description with behavioral details, invariants,
* and constraints that are not obvious from the signature.
*/
```
`JAVADOC_AUTOBRIEF = YES` is enabled in the Doxyfile, so the first sentence
of any `/** */` block is automatically treated as the brief. An explicit
`@brief` tag is accepted but not required.
The `///` triple-slash style appears in ~37 files (340 instances). It is
valid Doxygen and will not be removed where it exists, but new code should
use `/** */` for consistency with the majority style.
## What to Document
### File-Level (Optional)
The `@file` tag is not currently used anywhere in the codebase. Adding
file-level documentation is encouraged for complex modules where a
high-level overview helps, but it is not required:
```cpp
/** @file
* Defines the Payment transactor for the XRP Ledger.
*
* The Payment transactor handles direct XRP transfers, cross-currency
* payments via the pathfinding engine, and partial payments.
*/
```
Module-level READMEs (e.g., `src/xrpld/peerfinder/README.md`) remain the
primary place for architectural documentation.
### Class / Struct Level
Every class and struct gets a doc block describing:
- What it does (1-2 sentences)
- Key invariants or constraints, if any
- Thread-safety guarantees, if relevant
- Lifecycle notes, if relevant
```cpp
/** Executes a Payment transaction on the XRP Ledger.
*
* Supports direct XRP payments, cross-currency payments via RippleCalc,
* and partial payments (tfPartialPayment). Path count is limited to 6
* with max path length of 8.
*/
class Payment : public Transactor { ... };
```
Target: 2-5 lines for most classes. Complex classes may need more.
### Public Methods and Free Functions
Every public method and free function in headers gets:
- Brief description of behavior (not a restatement of the signature)
- `@param` for each parameter
- `@return` describing what is returned
- `@throw` if it can throw (either `@throw` or `@throws` is acceptable —
the codebase uses both)
- `@note` for non-obvious constraints or edge cases
When a `@param` description wraps, continuation lines are indented with
4 spaces from the `*`:
```cpp
/** Round a Number value to the precision of a given asset.
*
* For IOUs, rounds to the IOU's scale. For XRP and MPT, no rounding
* is performed.
*
* @param asset The relevant asset
* @param value The value to be rounded
* @param scale An exponent value to establish the precision limit of
* `value`. Should be larger than `value.exponent()`.
* @return The rounded Number.
*/
[[nodiscard]] Number
roundToAsset(Asset const& asset, Number const& value, int scale);
```
Target: 1-3 lines of description plus `@param`/`@return`.
### Private Methods
Document private methods only when the logic is non-obvious. A brief
one-line comment is sufficient.
### Enums and Constants
All enums get a brief class-level description. Individual enum values get
inline documentation when the meaning is not self-evident:
```cpp
/** Result codes for transaction processing. */
enum TERCode
{
tesSUCCESS = 0, /**< Transaction succeeded. */
tecCLAIM = 100, /**< Fee claimed; transaction failed. */
tecPATH_PARTIAL = 101, /**< Path could not deliver full amount. */
};
```
## What NOT to Document
- Do not paraphrase the function signature. `/** Returns the account ID. */`
on `AccountID getAccountID()` adds zero information.
- Do not document what is obvious from well-named identifiers.
- Do not reference specific issues, PRs, or task numbers. These belong in
commit messages and rot as the codebase evolves.
- Do not add multi-paragraph docstrings. If it takes that long to explain,
the code may need restructuring.
- Do not document `.cpp` implementation files exhaustively. Focus docs on
headers where the public interface is defined.
## Quality Over Quantity
Wrong documentation is worse than no documentation. Every doc comment must
accurately describe the current behavior. When in doubt:
- Read the implementation before writing the doc
- Cross-reference against test files for edge cases
- Use `@note` to flag subtle behavior that has caught contributors before
## Doxygen Tags Reference
Tags in regular use across the codebase:
| Tag | Codebase Usage | Purpose |
|-----|----------------|---------|
| `@brief` | ~2,500 instances | Brief description (optional — autobrief is enabled) |
| `@param` | ~2,400 instances | Function parameter description |
| `@return` | ~2,200 instances | Return value description |
| `@note` | ~270 instances | Important behavioral note or caveat |
| `@throw` / `@throws` | ~450 instances combined | Exception specification |
| `@see` | ~64 instances | Cross-reference to related entities |
| `@tparam` | ~43 instances | Template parameter description |
Tags used rarely but accepted:
| Tag | Codebase Usage | Purpose |
|-----|----------------|---------|
| `@invariant` | ~13 instances | Property that must always hold |
| `@pre` | ~3 instances | Precondition |
| `@file` | 0 instances | File-level description (new convention, optional) |
## Enforcement
Documentation coverage is measured by [coverxygen](https://github.com/psycofdj/coverxygen)
and enforced in CI:
- PRs cannot decrease documentation coverage (`no_decrease` ratchet mode)
- New files added in a PR require >= 80% doc coverage
- Module-specific thresholds increase quarterly
- The doc-review GitHub Action checks whether code changes invalidate
existing documentation
Coverage is measured against public API surface: classes, structs,
functions, enums, typedefs, and variables. Private implementation details
are not counted.
## Style Notes
- Doc comments go immediately before the entity they describe (no blank
line between the comment and the declaration)
- Keep `@param` descriptions on a single line when possible
- For wrapped `@param` descriptions, indent continuation lines 4 spaces
from the `*`
- Use `@see` sparingly — only when the relationship is non-obvious
- Code style (braces, line width, formatting) is governed by `.clang-format`
and is independent of these documentation standards

View File

@@ -49,7 +49,7 @@ LOOKUP_CACHE_SIZE = 0
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_ALL = NO
EXTRACT_PRIVATE = YES
EXTRACT_PACKAGE = NO
EXTRACT_STATIC = YES
@@ -257,7 +257,7 @@ MAN_LINKS = NO
#---------------------------------------------------------------------------
# Configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
GENERATE_XML = YES
XML_OUTPUT = xml
XML_PROGRAMLISTING = YES

256
docs/skills/consensus.md Normal file
View File

@@ -0,0 +1,256 @@
# Consensus
Template-based state machine in `Consensus.h` parameterized by an `Adaptor` (production: `RCLConsensus`). Three phases: `open → establish → accepted`. Four modes: `proposing`, `observing`, `wrongLedger`, `switchedLedger`. Header-only because of templating; policy decisions (`shouldCloseLedger`, `checkConsensus`, `checkConsensusReached`) live as free functions in `Consensus.cpp` for independent testability.
## Architecture
The consensus engine is fully decoupled from XRPL types via the `Adaptor` template parameter. `Adaptor` provides four type aliases (`Ledger_t`, `TxSet_t`, `NodeID_t`, `PeerPosition_t`) plus callbacks (`onClose`, `onAccept`, `onForceAccept`, `onModeChange`) and queries (`proposersValidated`, `proposersFinished`, `getPrevLedger`). Networking is hooked via `propose()` and three `share()` overloads (position, tx set, individual tx).
The engine itself has no thread or timer — it is driven externally by `timerEntry()` calls. Thread safety is the caller's responsibility.
## Key Invariants
- A ledger cannot close until the previous ledger reaches consensus AND (has transactions OR close time reached)
- Proposals must have strictly increasing sequence numbers per peer; stale proposals are silently dropped
- `ConsensusResult` constructor asserts `txns.id() == position.position()` — a node's declared position is always a commitment to a specific tx set
- The Avalanche state machine progressively raises consensus thresholds over time (`init → mid → late → stuck`) to force convergence
- `minCONSENSUS_PCT = 80` is the baseline for `checkConsensus`; timing: `ledgerMIN_CONSENSUS = 1950ms`, `ledgerMAX_CONSENSUS = 15s`, `ledgerABANDON_CONSENSUS = 120s`
- `ledgerMAX_CONSENSUS` must stay below `validationFRESHNESS` so waiting validators aren't mistaken for offline
- Dead nodes (`deadNodes_`) are permanently excluded for the round once they bow out
- LedgerTrie compression invariant: non-root nodes with zero `tipSupport` must have ≥2 children
- `ConsensusResult::disputes` holds only genuinely-differing transactions; `compares` set prevents O(n²) work when multiple peers share a tx set
## Phases and Modes
### Phase transitions (`ConsensusPhase` in `ConsensusTypes.h`)
```
"close" "accept"
open --------> establish ---------> accepted
^ | |
|---------------| |
| "startRound" |
|------------------------------------|
```
Mid-`establish` re-entry to `open` happens inside `handleWrongLedger()` — it preserves surrounding state rather than aborting. `timerEntry`, `gotTxSet`, and `peerProposal` all short-circuit when phase is `accepted`.
### Mode transitions (`ConsensusMode`)
```
proposing observing
\ /
\---> wrongLedger <---/
^
v
switchedLedger
```
`switchedLedger` is a distinct mode (not just `observing`) because close-time logic checks the mode label when deciding whether the previous ledger's close time is authoritative. `MonitoredMode` inner class wraps the enum to make silent mode changes structurally impossible — every `set()` calls `adaptor_.onModeChange(before, after)`.
## Phase Logic
### Open phase
`shouldCloseLedger()` is called per timer tick. Priority order (`Consensus.cpp`):
1. Sanity bounds — close immediately if `prevRoundTime` or `timeSincePrevClose` outside `[-1s, 10min]`
2. Majority closed — close if `proposersClosed + proposersValidated > prevProposers / 2`
3. Idle case — only close on `timeSincePrevClose >= ledgerIDLE_INTERVAL` (15s) when no transactions
4. Minimum open time — never close before `ledgerMIN_CLOSE` (2s)
5. Rate limit — block close if `openTime < prevRoundTime / 2` (prevents fast node from outrunning slower validators)
Close-time reference: if mode is `wrongLedger` or close-time wasn't agreed, use internal `prevCloseTime_` rather than the ledger's recorded close time.
### Establish phase
Per tick: `updateOurPositions()``shouldPause()``haveConsensus()`. `ledgerMIN_CONSENSUS` is enforced before any position updates. `updateOurPositions()`:
- Prunes stale peer proposals (older than `proposeFRESHNESS` = 20s)
- Calls `dispute.updateVote(convergePercent_, ...)` on each `DisputedTx`
- Rebuilds the `MutableTxSet` if any vote flipped, re-shares + re-proposes
`shouldPause()` uses a 5-phase cycle (04) keyed off `(ahead - 1) % 5`. Each phase requires progressively more validators current; phase 4 requires all. This cycles to avoid any single threshold being universally right.
### checkConsensus outcomes (`ConsensusState` in `ConsensusTypes.h`)
- `No` — insufficient agreement
- `Yes` — local + network agree on tx set (80% with self counted, via `proposing` flag in `checkConsensusReached`)
- `MovedOn` — 80% of peers finished without us (self not counted); we lost the race
- `Expired` — abandoned after `prevAgreeTime * ledgerABANDON_CONSENSUS_FACTOR` (factor=10), clamped to `[ledgerMAX_CONSENSUS, ledgerABANDON_CONSENSUS]`
The zero-peer case in `checkConsensusReached` deliberately refuses consensus until `reachedMax` — prevents premature self-close on a network slow to deliver proposals. The `stalled` case bypasses the percentage check entirely; when all disputed transactions have clear supermajority agreement either way, network commits immediately.
## Avalanche Voting
Four states defined in `ConsensusParms.h` as `std::map<AvalancheState, AvalancheCutoff>` (data-driven, not switch — supports hypothetical loops):
| State | Time threshold (% of prior round) | Required yes-vote | Next |
|---------|-----------------------------------|-------------------|--------|
| `init` | 0% | 50% | `mid` |
| `mid` | 50% | 65% | `late` |
| `late` | 85% | 70% | `stuck`|
| `stuck` | 200% | 95% | `stuck`|
`getNeededWeight()` returns `(consensusPct, optional<nextState>)`; caller does the actual state update. `avMIN_ROUNDS` prevents premature escalation on clock jitter; `avalancheCounter_` resets to zero on every state transition.
`DisputedTx::updateVote()` behaves asymmetrically:
- Proposing: `weight = (yays_*100 + (ourVote_?100:0)) / (nays_+yays_+1)`; `newPosition = weight > requiredPct`
- Not proposing: `newPosition = yays_ > nays_`, `weight = -1`. Observer never distorts proposers' weighted vote.
`DisputedTx` uses `boost::container::flat_map<NodeID_t, bool>` for peer votes (cache-friendly for small sets), pre-reserved to `numPeers`. `yays_` and `nays_` counters allow O(1) percentage computation without scanning the map. `setVote()` returns `true` on any change (including a new vote), which feeds `peerUnchangedCounter_` tracking.
Stall detection (`DisputedTx::stalled`) — all must hold:
1. `nextCutoff.consensusTime <= currentCutoff.consensusTime` (terminal `stuck` state)
2.`avMIN_ROUNDS` rounds in state
3. `peersUnchanged >= avSTALLED_ROUNDS` **OR** `currentVoteCounter_ >= avSTALLED_ROUNDS` (OR not AND — defends against a peer flip-flopping to reset the counter)
4. Vote split exceeds `minCONSENSUS_PCT` (80%) in either direction
`peerUnchangedCounter_` resets to 0 on any peer vote change in `updateDisputes()`. Close-time consensus uses a separate threshold `avCT_CONSENSUS_PCT` (75%) — close-time agreement is a simpler majority, not a multi-round ratchet.
## Proposals (`ConsensusProposal.h`)
Five fields hashed for signing: `HashPrefix::proposal`, `proposeSeq_`, `closeTime_`, `prevLedgerID_`, `position_`. Hash is `mutable std::optional<uint256>`, lazily computed; `changePosition()` and `bowOut()` must call `signingHash_.reset()` before mutating.
Sequence sentinels:
- `seqJoin = 0` — initial proposal (`isInitial()`); `ConsensusCloseTimes` collects these for clock-drift measurement
- `seqLeave = 0xffffffff` — bow-out; `changePosition()` refuses to increment past this
`seenTime()` is local wall-clock time when last updated, NOT `closeTime_` (the proposer's estimate of when the ledger should close in `NetClock`). Don't conflate them. `isStale(cutoff)` uses `seenTime()`. `operator==` includes `seenTime()`, so logically-identical proposals seen at different times don't compare equal.
The production wrapper `RCLCxPeerPos` (in `app/consensus/`) adds cryptographic signature and public key for network propagation. Template parameters `(NodeID_t, LedgerID_t, Position_t)` allow unit-test instantiation over simple integer types.
## `ConsensusTypes.h` — Vocabulary Types
- **`ConsensusTimer`**: dual `tick()` overloads — wall-clock (`steady_clock::time_point`) and fixed-increment (for deterministic simulation). Both update `dur_`; `read()` always valid. Backing `roundTime` in `ConsensusResult` feeds `prevRoundTime_`.
- **`ConsensusCloseTimes`**: `peers` is `std::map<NetClock::time_point, int>` (ordered for deterministic traversal when resolving close time); `self` is local estimate. Collects initial (`seqJoin`) proposals for clock-drift measurement.
- **`ConsensusResult`**: instantiated once per round by `closeLedger`, lives in `Consensus::result_` as `std::optional`. Holds `disputes`, `compares` work-avoidance set, `proposers` snapshot. `state` field records `ConsensusState` outcome for diagnostics.
## Wrong-Ledger Recovery
At every `timerEntry()`, `checkLedger()` calls `adaptor_.getPrevLedger()`. If diverged, `handleWrongLedger()`:
1. Calls `leaveConsensus()` — broadcasts bow-out, drops to `observing`
2. Clears peer state
3. Calls `playbackProposals()` — replays proposals from `recentPeerPositions_` (capped at 10/peer, stored regardless of ledger ID)
4. If correct ledger acquired: `startRoundInternal()` in `switchedLedger` mode; else: stays in `wrongLedger`
The bounded `recentPeerPositions_` buffer is a deliberate trade-off: small bounded buffer beats dropping proposals during switches. Recovery re-enters `open` phase mid-`establish` via `handleWrongLedger()`, preserving surrounding state.
## Common Bug Patterns
- Proposals referencing a stale `prevLedgerID_` after a ledger switch cause split-brain; always check `newPeerProp.prevLedger() != prevLedgerID_` before processing
- Resetting the consensus timer during `establish` phase causes re-convergence and potential split; timer must only reset on phase transitions
- `DisputedTx::updateVote` changes local vote based on peer pressure; bugs here cause determinism failures across nodes
- `createDisputes()` deduplicates via `result_->compares` set; missing this check creates duplicate disputes that skew vote counts
- The `peerUnchangedCounter_` is reset to 0 when any vote changes; bugs in this counter cause premature consensus declaration
- Forgetting `signingHash_.reset()` before mutating a `ConsensusProposal` returns stale hashes
- Comparing wall-clock `seenTime()` against `NetClock` `closeTime_` is a type-shaped bug waiting to happen
- Two temporal domains in `ConsensusParms`: validation/proposal parms use **NetClock seconds**; consensus-loop timers use **steady-clock milliseconds** — mixing them produces subtle bugs
## Key Code Patterns
### Proposal Validation
```cpp
if (newPeerProp.prevLedger() != prevLedgerID_)
{
JLOG(j_.debug()) << "Got proposal for " << newPeerProp.prevLedger()
<< " but we are on " << prevLedgerID_;
return;
}
```
### Complete Bow-Out Handling
```cpp
if (newPeerProp.isBowOut())
{
if (result_)
for (auto& it : result_->disputes)
it.second.unVote(peerID);
if (currPeerPositions_.find(peerID) != currPeerPositions_.end())
currPeerPositions_.erase(peerID);
deadNodes_.insert(peerID); // permanently excluded this round
}
```
### CLOG diagnostic pattern
Most methods take `std::unique_ptr<std::stringstream> const& clog = {}`. `CLOG(clog)` macro appends only when non-null — full round trace available without paying formatting cost on the hot path.
## Validations (`Validations.h`)
`Validations<Adaptor>` is templated; production uses `RCLValidationsAdaptor`. Five coordinated structures under one `mutex_`:
- `current_`: most recent per node, fast-path for quorum
- `byLedger_`: aged unordered map keyed by ledger ID
- `bySequence_`: aged unordered map for Byzantine detection
- `trie_`: `LedgerTrie<Ledger>` for preferred-ledger calc
- `acquiring_`: validations waiting on locally-unavailable ledgers
`ValidationParms` windows: `validationCURRENT_WALL=5min`, `validationCURRENT_LOCAL=3min`, `validationCURRENT_EARLY=3min`, `validationSET_EXPIRES=10min`, `validationFRESHNESS=20s` (used only for laggard detection, not staleness). Fields are mutable instance members, not `constexpr` — simulations inject alternate values.
`isCurrent()` checks two clocks independently: signer's wall time and our local steady-clock first-observation time. Arithmetic promotes to signed 64-bit to avoid underflow on untrusted `signTime`.
`SeqEnforcer<Seq>` rejects regressed/duplicate sequences but resets its high-water mark after `validationSET_EXPIRES` with no new validation — long-offline validators can rejoin.
`add()` classification (in order):
- Same seq, different ledger/sign time → `ValStatus::conflicting` (possible Byzantine)
- Same seq + ledger, different cookie → `ValStatus::multiple` (misconfig/duplicate)
- Otherwise → `ValStatus::badSeq`
All trie queries go through `withTrie()`, which first flushes stale entries via `current()` then promotes newly-available ledgers via `checkAcquired()`. `lastLedger_` tracks each node's trie contribution so `removeTrie()` can atomically undo before re-inserting.
`getPreferred(curr)` fallback: trie → `acquiring_` (max waiters) → `nullopt`. Conservative switch rule: if preferred is an immediate child of current working ledger, stay put.
`trustChanged()` iterates `current_` and full `byLedger_` to propagate UNL changes — trie reflects only currently trusted validators.
`setSeqToKeep([low, high))` pins a range against eviction by "touching" entries near expiry. Throttled to once per `(validationSET_EXPIRES - validationFRESHNESS)` window.
## LedgerTrie (`LedgerTrie.h`)
Compressed prefix trie over ledger ancestry — ledger history is treated as a string over the alphabet of ledger IDs. Each `Node` carries a `Span` (half-open `[start_, end_)`), two counters, raw parent pointer, owned children.
- `tipSupport`: validations exactly matching this node's tip
- `branchSupport`: `tipSupport` + sum of descendants' `branchSupport`
Counters propagate up the parent chain on every `insert`/`remove`. Non-root nodes with zero tip and ≤1 child violate the compression invariant and are merged.
`insert()` may do up to two structural ops:
1. Split — extract suffix into new child inheriting children + counts, truncate found node
2. Branch — append new leaf
`remove()` uses `findByLedgerID()` (O(n) exact match), not the prefix-based `find()`.
`getPreferred(largestIssued)` — the algorithmic heart. Walks from root using "preferred by branch": validators with last validation below the current frontier are *uncommitted* (could swing any branch). A branch advances only when `branchSupport` exceeds *uncommitted*, and a child wins only when its `branchSupport` lead over the runner-up exceeds *uncommitted* (with `startID()` tie-break). The strictly-greater-than margin prevents thrashing when validators lag.
`seqSupport: std::map<Seq, uint32_t>` (ordered for in-sequence walk) drives the uncommitted accounting.
`checkInvariants()` does full DFS — used heavily in tests; verifies compression rule, counter consistency, parent links, and `seqSupport` sums.
`Ledger` template contract: cheap copy, `seq()`, `operator[](Seq)` returning `ID{0}` for unknowns, `MakeGenesis{}` tag, free `mismatch(Ledger,Ledger)`. Unique history invariant: agreement on any ancestor ID implies agreement on all earlier ancestors.
`SpanTip<Ledger>` is the return type of `getPreferred()` — a lightweight struct with the tip's seq, ID, and a ledger copy for ancestor lookups. `Span::diff()` delegates to `mismatch()` to find first divergence point.
## Amendments
- 80% validator support for 2 weeks to enable; tracked via `AmendmentTable` with `amendmentMap_`
- New amendments: add to `features.macro` with `XRPL_FEATURE`/`XRPL_FIX`, increment `numFeatures` in `Feature.h`
- Unsupported enabled amendment blocks the server (`setAmendmentBlocked`); no mechanism to disable/revoke
- Voting happens each consensus round in `doVoting`; votes persisted in `FeatureVotes` SQLite table
- `fixAmendmentMajorityCalc` changed the threshold calculation; check which applies
## UNL and Negative UNL
- N-UNL temporarily disables unreliable validators (max 25% of UNL: `negativeUNLMaxListed = 0.25`)
- Scoring via `buildScoreTable` over recent ledger history; low watermark 50% = disable candidate, high 80% = re-enable
- Candidate selection deterministic via previous ledger hash as randomizing pad
- `newValidatorDisableSkip = FLAG_LEDGER_INTERVAL * 2` prevents disabling newly joined validators
## Transaction Ordering
- `CanonicalTXSet`: salted account key (XOR random salt) → seq proxy → tx ID. Salt prevents ordering manipulation
- `TxQ` uses `OrderCandidates`: higher fee level first, then `txID XOR parentHash` tiebreaker
- Per-account limit `maximumTxnPerAccount`; blocked transactions held until blocker resolves
## Key Files
- `src/xrpld/consensus/Consensus.h` — state machine (header-only template)
- `src/xrpld/consensus/Consensus.cpp` — free policy functions (`shouldCloseLedger`, `checkConsensus`, `checkConsensusReached`)
- `src/xrpld/consensus/ConsensusParms.h` — all numeric thresholds; dual-clock (NetClock seconds vs steady ms)
- `src/xrpld/consensus/ConsensusTypes.h``ConsensusMode`, `ConsensusPhase`, `ConsensusState`, `ConsensusTimer`, `ConsensusCloseTimes`, `ConsensusResult`
- `src/xrpld/consensus/ConsensusProposal.h` — proposal record with sequence protocol and lazy signing hash
- `src/xrpld/consensus/DisputedTx.h` — per-tx avalanche voting and stall detection
- `src/xrpld/consensus/Validations.h` — validation tracking, indexing, trie integration
- `src/xrpld/consensus/LedgerTrie.h` — compressed ancestry trie for preferred-ledger calc
- `src/xrpld/app/consensus/RCLConsensus.cpp` — XRPL `Adaptor` implementation
- `src/xrpld/app/misc/detail/AmendmentTable.cpp` — amendment voting logic
- `src/xrpld/app/misc/NegativeUNLVote.cpp` — N-UNL voting
- `src/xrpld/app/misc/CanonicalTXSet.h` — tx ordering

148
docs/skills/cryptography.md Normal file
View File

@@ -0,0 +1,148 @@
# Cryptography
XRPL supports secp256k1 (ECDSA) and ed25519 key types. All crypto uses OpenSSL + dedicated libs (libsecp256k1, ed25519-donna). The `xrpl::crypto` layer provides three foundational utilities — a CSPRNG, secure memory erasure, and RFC 1751 mnemonic encoding — that underpin all key/seed handling.
## Module Layout
Three small, focused TUs form the foundation; protocol-level types (`SecretKey`, `PublicKey`, `Seed`) in `src/libxrpl/protocol/` consume them.
| File | Purpose |
|------|---------|
| `include/xrpl/crypto/csprng.h` / `src/libxrpl/crypto/csprng.cpp` | `csprng_engine` + `crypto_prng()` singleton; wraps OpenSSL `RAND_bytes`/`RAND_add`/`RAND_poll` |
| `include/xrpl/crypto/secure_erase.h` / `src/libxrpl/crypto/secure_erase.cpp` | One-line delegation to `OPENSSL_cleanse`; canonical wipe primitive |
| `include/xrpl/crypto/RFC1751.h` / `src/libxrpl/crypto/RFC1751.cpp` | Static class; 2048-word mnemonic codec + `getWordFromBlob` utility |
## Key Invariants
- `SecretKey` and `Seed` destructors call `secure_erase` on their internal buffer as the very first action; any new sensitive type must follow this pattern (covers exception unwind paths too)
- ed25519 public keys are prefixed with `0xED` (33 bytes total); secp256k1 keys are 33-byte compressed
- `sha512Half` (first 32 bytes of SHA-512) is the standard hash used throughout XRPL for node hashing, signing, etc.
- `RIPEMD-160(SHA-256(x))` is used for account ID derivation (`ripesha_hasher`)
- Base58 encoding includes a type byte prefix and 4-byte checksum (double SHA-256)
- All randomness for cryptographic material flows through `crypto_prng()`; never call OpenSSL's `RAND_bytes` directly and never use `std::rand`/`rand()`
- `csprng_engine` is non-copyable and non-movable (deleted ops); the singleton must be accessed by reference via `crypto_prng()`
- `csprng_engine` satisfies the C++ *UniformRandomNumberEngine* named requirement (`result_type` = `std::uint64_t`, `operator()()`, `constexpr min()`/`max()`) — plugs into `std::uniform_int_distribution`, `beast::rngfill`, etc.
- RFC 1751 dictionary has exactly 2^11 = 2048 entries; indices 0570 are words ≤ 3 chars, 5712047 are exactly 4 chars (exploited in `wsrch` to halve binary search range)
- Each RFC 1751 word encodes exactly 11 bits; a 64-bit block uses 6 words (66 bits = 64 data + 2 parity); a 128-bit key uses two such blocks → 12 words total
## Common Bug Patterns
- Mixing up key types: secp256k1 signing hashes the message with `sha512Half` first; ed25519 signs the raw message
- `signDigest` only works with secp256k1; calling it with ed25519 throws a logic error
- Signature canonicality: ed25519 `verify` checks canonicality before calling `ed25519_sign_open`; non-canonical signatures are rejected
- Overlay handshake uses `signDigest` to sign the session fingerprint (`sharedValue`); the signature binds the TLS session to the node identity
- Relying on a naive `memset` to wipe key material — optimizer will eliminate it as a dead store; must use `secure_erase`
- Forgetting to wipe *intermediate* derivation buffers (SHA-512 halves, scratch arrays) after the final `SecretKey` has taken its copy
- Constructing a second `csprng_engine` instance: forbidden by deleted ctors; sharing one OpenSSL pool through the singleton is required
- Passing `mix_entropy` a buffer and assuming OpenSSL credits it as entropy — the entropy estimate passed to `RAND_add` is always `0` (deliberately conservative; `std::random_device` may be weak on some platforms)
- RFC 1751 decode: distinguish `1` (success), `0` (unknown word), `-1` (malformed input), `-2` (parity failure) — do not collapse all failures into a single error
- `insert()` in RFC 1751 uses bitwise OR, not assignment — output buffer must start zero-initialized
- Treating RFC 1751 parity as cryptographic integrity — it's a 2-bit transcription check, not a MAC
- Using `getWordFromBlob` for anything cryptographic — it's a Jenkins one-at-a-time hash and explicitly insecure
## Review Checklist
- New crypto code must use `crypto_prng()` singleton for randomness, never raw `rand()` or direct OpenSSL `RAND_*`
- Secret key buffers must be `secure_erase`d after use (destructors *and* intermediate scratch buffers)
- Verify that key type dispatch handles both secp256k1 and ed25519 (or explicitly rejects one with a clear error)
- Any new sensitive type should follow the `SecretKey`/`Seed` pattern: destructor calls `secure_erase` as its first/only action
- New OpenSSL touchpoints should respect the `OPENSSL_VERSION_NUMBER < 0x10100000L` thread-safety guard pattern used in `csprng.cpp`
- CSPRNG failures (`RAND_bytes`/`RAND_poll` ≠ 1) must propagate via `Throw<>` (logs stack trace) — never silently fall back
- `RAND_cleanup()` must only be called for OpenSSL `< 1.1.0`; modern versions handle cleanup via `atexit`
## Key Patterns
### Secure Erasure
```cpp
// REQUIRED: destructor must erase secret material
SecretKey::~SecretKey()
{
secure_erase(buf_, sizeof(buf_));
}
// REQUIRED: erase intermediate buffers after use
beast::rngfill(buf, sizeof(buf), crypto_prng());
SecretKey sk(Slice{buf, sizeof(buf)});
secure_erase(buf, sizeof(buf)); // MUST erase raw buffer
```
`secure_erase` delegates to `OPENSSL_cleanse`, which uses volatile writes / opaque function-pointer calls to defeat dead-store elimination. Lives in a separate TU (`secure_erase.cpp`) so the call site cannot inline it away — the out-of-line call forces the compiler to treat it as an opaque side effect. It does **not** clear CPU registers or caches — best-effort for heap/stack only (see Percival 2014). Takes raw `void*` + byte count with no null/zero guards; callers must supply valid arguments.
Wrapping behind `xrpl::secure_erase` provides one auditable choke point if the underlying strategy ever changes (e.g., switching to `explicit_bzero`). `OPENSSL_cleanse` is preferred over platform-specific alternatives (`memset_s`, `explicit_bzero`, `SecureZeroMemory`) because OpenSSL already centralizes cross-platform portability for the rest of the crypto stack.
### CSPRNG Usage
```cpp
// Singleton access; never copy/store by value
auto& rng = crypto_prng();
// Bulk fill — preferred for key material
std::uint8_t buf[32];
rng(buf, sizeof(buf)); // operator()(void*, size_t)
// Or via beast adapter satisfying UniformRandomNumberEngine
beast::rngfill(buf, sizeof(buf), crypto_prng());
```
Constructor calls `RAND_poll()` eagerly to surface entropy failures at startup rather than at first key gen. Failure throws `std::runtime_error("CSPRNG: Insufficient entropy")` via `Throw<>`; callers generally do not catch — propagation halts the operation, which is correct.
### Key Type Dispatch
```cpp
// REQUIRED: handle both key types or explicitly reject
if (type == KeyType::ed25519)
{ /* ed25519 path */ }
else if (type == KeyType::secp256k1)
{ /* secp256k1 path */ }
else
LogicError("unknown key type"); // MUST NOT fall through silently
```
### RFC 1751 Mnemonic Encoding
```cpp
// 16-byte (128-bit) seed <-> 12-word mnemonic
std::string words;
RFC1751::getEnglishFromKey(words, std::string{seedBytes, 16});
std::string roundTrip;
int rc = RFC1751::getKeyFromEnglish(roundTrip, words);
// rc: 1=success, 0=unknown word, -1=malformed, -2=parity mismatch
```
`Seed.cpp` reverses the 16 bytes before/after RFC 1751 encoding to match the RFC's big-endian convention. `standard()` normalizes input by uppercasing and applying visual substitutions `1→L`, `0→O`, `5→S` for handwritten/OCR tolerance. The 2-bit parity per 8-byte half is a transcription check, **not** a cryptographic integrity check.
`getKeyFromEnglish` uses `boost::algorithm::split` with `token_compress_on` for whitespace tolerance. Encoder (`getEnglishFromKey`) has no return code — encoding is lossless and cannot fail on valid 16-byte input. Decoder (`getKeyFromEnglish`) has a 4-valued return code — it must validate user-supplied strings.
`getWordFromBlob` is a separate utility: Jenkins one-at-a-time hash → `% 2048` → one dictionary word. Explicitly **not** cryptographically secure; used in `NetworkOPs.cpp` for `shroudedHostId` (privacy-preserving node label in logs/RPC). Reuses the RFC 1751 dictionary purely for its vetted set of short, pronounceable words.
## CSPRNG Internals
- Constructor calls `RAND_poll()` eagerly; destructor calls `RAND_cleanup()` only for OpenSSL `< 1.1.0` (modern versions clean up via `atexit`)
- Thread-safety mutex is compile-time gated: `#if (OPENSSL_VERSION_NUMBER < 0x10100000L) || !defined(OPENSSL_THREADS)` — modern builds elide the lock on the hot path (`RAND_bytes` is internally thread-safe in OpenSSL ≥ 1.1.0). The mutex is *always* held around `RAND_add` in `mix_entropy` regardless of version
- `mix_entropy` reads 128 values from `std::random_device` *before* locking (independently thread-safe), then locks for `RAND_add`
- `mix_entropy` passes entropy estimate `0` to `RAND_add` — never claim entropy for `std::random_device` or caller-supplied buffers (conservative accounting prevents prematurely satisfying OpenSSL's seeding threshold)
- `mix_entropy` is called on a timer from `Application.cpp` to stir fresh OS entropy during the node's lifetime
- Singleton is a function-local `static` (Meyers singleton); C++11 guarantees thread-safe one-time init
- Scalar `operator()()` delegates to buffer-fill overload with `sizeof(result_type)` (8 bytes) — both paths share validation/error handling
## RFC 1751 Internals
- `extract(s, start, length)` / `insert(s, x, start, length)`: read/write `length ≤ 11` bits at arbitrary offset across a 9-byte buffer; guarded by `XRPL_ASSERT` (stripped in release). Both work across byte boundaries by assembling 23 adjacent bytes into a 24-bit window
- `insert` uses bitwise OR (not assignment) — output buffer must start zero-initialized; partial writes accumulate safely
- `btoe` appends a 9th byte for 2-bit parity computed by summing all 32 bit-pairs across the 64-bit payload; parity occupies bit positions 6465
- `etob` validates: exactly 6 words, each 14 chars, all in dictionary, parity matches — distinct error codes per failure mode (`0` unknown, `-1` malformed, `-2` parity)
- `wsrch` halves the binary search range based on input word length: `[0, 571)` for ≤3-char words, `[571, 2048)` for 4-char words
- No exceptions used anywhere in RFC 1751 — all errors are integer return codes
- All methods are static; `RFC1751` is a pure stateless utility class — instantiation is never needed
## Key Files
- `include/xrpl/protocol/SecretKey.h` / `PublicKey.h` — key types
- `src/libxrpl/protocol/SecretKey.cpp` — signing, key generation; canonical example of CSPRNG + `secure_erase` discipline
- `src/libxrpl/protocol/PublicKey.cpp` — verification
- `src/libxrpl/protocol/Seed.cpp` — 128-bit seed; uses RFC 1751 for mnemonic encoding (reverses bytes for big-endian convention)
- `include/xrpl/protocol/digest.h` — hash functions (`sha512Half`, `ripesha_hasher`, etc.)
- `include/xrpl/crypto/csprng.h` + `src/libxrpl/crypto/csprng.cpp` — CSPRNG engine and singleton
- `include/xrpl/crypto/secure_erase.h` + `src/libxrpl/crypto/secure_erase.cpp` — memory wipe primitive
- `include/xrpl/crypto/RFC1751.h` + `src/libxrpl/crypto/RFC1751.cpp` — mnemonic codec
- `src/xrpld/overlay/detail/Handshake.cpp` — overlay handshake crypto
- `src/xrpld/app/main/Application.cpp` — periodic `mix_entropy` calls
- `src/xrpld/app/misc/NetworkOPs.cpp` — uses `getWordFromBlob` for `shroudedHostId`

126
docs/skills/index.md Normal file
View File

@@ -0,0 +1,126 @@
# xrpld Codebase Skills Index
## Description
This is the top-level guide for all best-practices skills in this repository. Use this to understand the codebase organization and find the right skill for any task.
## When to Use Skills
Reference a skill whenever you are:
- **Writing new code** in a module - check the skill first for established patterns
- **Modifying existing code** - verify your changes follow module conventions
- **Adding a new transaction type** - see `libxrpl/tx/transactors.md` for the full template
- **Debugging** - skills list key files and common pitfalls per module
- **Reviewing code** - skills document what "correct" looks like for each module
## Codebase Architecture
The codebase is split into two main areas:
### `src/libxrpl/` — The Library (skills in `.claude/skills/libxrpl/`)
Reusable library code: data types, serialization, cryptography, ledger state, transaction processing, and storage. This is the **protocol layer**.
| Module | Responsibility |
|--------|---------------|
| `basics` | Foundational types: Buffer, Slice, base_uint, Number, logging, error contracts |
| `beast` | Support layer: Journal logging, test framework, instrumentation, IP types |
| `conditions` | Crypto-conditions (RFC): fulfillment validation, DER encoding |
| `core` | Job queue, load monitoring, hash-based message dedup |
| `crypto` | CSPRNG, secure erasure, RFC1751 encoding |
| `json` | Json::Value, parsing, serialization, StaticString optimization |
| `ledger` | ReadView/ApplyView, state tables, payment sandbox, credit ops |
| `net` | HTTP/HTTPS client, SSL certs, async I/O |
| `nodestore` | Persistent node storage: RocksDB, NuDB, Memory backends |
| `protocol` | STObject hierarchy, SField, Serializer, TER codes, Features, Keylets |
| `proto` | Protocol Buffer generated headers (gRPC API definitions) |
| `rdb` | SOCI database wrapper, checkpointing |
| `resource` | Rate limiting, endpoint tracking, abuse prevention |
| `server` | Port config, SSL/TLS, WebSocket, admin networks |
| `shamap` | SHA-256 Merkle radix tree (16-way branching, COW) |
| `tx` | Transaction pipeline: Transactor base, preflight/preclaim/doApply |
### `src/xrpld/` — The Server Application (skills in `.claude/skills/xrpld/`)
The running rippled server: application lifecycle, consensus, networking, RPC, and peer management. This is the **application layer**.
| Module | Responsibility |
|--------|---------------|
| `app` | Application singleton, ledger management, consensus adapters, services |
| `app/main` | Application initialization and lifecycle |
| `app/ledger` | Ledger storage, retrieval, immutable state management |
| `app/consensus` | RCL consensus adapters (bridges generic algorithm to rippled) |
| `app/misc` | Fee voting, amendments, SHAMapStore, TxQ, validators, NetworkOPs |
| `app/paths` | Payment path finding algorithm, trust line caching |
| `app/rdb` | Application-level database operations |
| `app/tx` | Application-level transaction handling |
| `consensus` | Generic consensus algorithm (CRTP-based, app-independent) |
| `core` | Configuration (Config.h), time keeping, network ID |
| `overlay` | P2P networking: peer connections, protocol buffers, clustering |
| `peerfinder` | Network discovery: bootcache, livecache, slot management |
| `perflog` | Performance logging and instrumentation |
| `rpc` | RPC handler dispatch, coroutine suspension, 40+ command handlers |
| `shamap` | Application-level SHAMap operations (NodeFamily) |
### `include/xrpl/` — Header Files
Headers live in `include/xrpl/` and mirror the `src/libxrpl/` structure. Each skill already references its corresponding headers in the "Key Files" section.
## Cross-Cutting Conventions
### Error Handling
- **Transaction errors**: Return `TER` enum (tesSUCCESS, tecFROZEN, temBAD_AMOUNT, etc.)
- **Logic errors**: `Throw<std::runtime_error>()`, `LogicError()`, `XRPL_ASSERT()`
- **I/O errors**: `Status` enum or `boost::system::error_code`
- **RPC errors**: Inject via `context.params`
### Assertions
```cpp
XRPL_ASSERT(condition, "ClassName::method : description"); // Debug only
XRPL_VERIFY(condition, "ClassName::method : description"); // Always enabled
```
### Logging
```cpp
JLOG(j_.warn()) << "Message"; // Always wrap in JLOG macro
```
### Memory Management
- `IntrusiveRefCounts` + `SharedIntrusive` for shared ownership in libxrpl
- `std::shared_ptr` for shared ownership in xrpld
- `std::unique_ptr` for exclusive ownership
- `CountedObject<T>` mixin for instance tracking
### Feature Gating
```cpp
if (ctx.rules.enabled(featureMyFeature)) { /* new behavior */ }
```
### Code Organization
- Headers in `include/xrpl/`, implementations in `src/libxrpl/` or `src/xrpld/`
- `#pragma once` (never `#ifndef` guards)
- `namespace xrpl { }` for all code
- `detail/` namespace for internal helpers
- Factory functions: `make_*()` returning `unique_ptr` or `shared_ptr`
## Subsystem Skills (soul/)
Concise invariants, bug patterns, and review checklists for each subsystem:
| Subsystem | File | Focus |
|-----------|------|-------|
| Consensus | `soul/consensus.md` | State machine, amendments, UNL, validations, TX ordering |
| Cryptography | `soul/cryptography.md` | Key types, signing, hashing, handshake |
| Ledger | `soul/ledger.md` | Immutability, acquisition, entry types |
| NodeStore | `soul/nodestore.md` | Backends, caching, online deletion |
| Peering | `soul/peering.md` | Overlay, connections, squelching |
| Protocol | `soul/protocol.md` | Macros, serialization format, field ordering |
| RPC | `soul/rpc.md` | Handler dispatch, roles, subscriptions |
| SHAMap | `soul/shamap.md` | COW, node types, sync, proofs |
| SQL | `soul/sql.md` | Schema, config, checkpointing |
| Testing | `soul/test.md` | JTx framework, Env setup, TER expectations, amendment gating |
| Transactors | `soul/transactors.md` | Pipeline, new TX types, signing, transactor template |
| WebSockets | `soul/websockets.md` | Session lifecycle, flow control |
### Workflow & Processes
| Skill | File |
|-------|------|
| Workflow Orchestration | `workflow.md` |
| Task Management | `task-management.md` |
| Amendment Creation | `amendment.md` |
| Merge Conflicts | `merge-conflicts.md` |

320
docs/skills/ledger.md Normal file
View File

@@ -0,0 +1,320 @@
# Ledger
Each ledger is an immutable snapshot: header (seq, hashes, close time) + state SHAMap + transaction SHAMap. `LedgerMaster` is the central coordinator. The module spans `Ledger` itself, the view hierarchy (`ReadView``ApplyView``OpenView`/`Sandbox`/`PaymentSandbox`), directory primitives, subscription/order-book fan-out (`BookListeners`, `OrderBookDB`), governance state (`AmendmentTable`), pending-save bookkeeping, and a large family of per-object-type helper free functions.
## Key Invariants
- Once `setImmutable()` is called, the ledger and its SHAMaps cannot change; only immutable ledgers can be shared across threads. Mutable ledgers must not be shared.
- Every server always has an open ledger; the open ledger cannot close until previous consensus completes.
- Ledger header hashes to the ledger's identity hash; includes state root, tx root, parent hash, total coins, close time.
- `LedgerMaster` tracks: `mPubLedger` (last published), `mValidLedger` (last validated), `mLedgerHistory` (cache).
- Validation requires minimum trusted validations (`minVal`); filtered by Negative UNL.
- Open ledgers store transactions without metadata; closed ledgers store `addVL(tx)||addVL(meta)` and produce `TxMeta` on apply.
- Trust-line `sfBalance` is always stored "low account's perspective"; helpers negate when querying from high side. The `sfLowLimit.account < sfHighLimit.account` ordering is the trust-line orientation invariant — every access decides which side is "us" via `AccountID` comparison.
- Directory invariant: page keys are chosen so the low 96 bits of every token in an NFT page are strictly less than the page key's low 96 bits; for owner/order-book directories, page 0 is the anchor and `sfIndexPrevious` on root points to the tail.
- `PendingSaves` invariant: exactly one of `saveLedgerAsync`/`pendSaveValidated` may run for a given ledger sequence at a time; second caller observes `started == true` and bails.
- `ApplyStateTable` pointer-identity invariant: `erase(sle)` and `update(sle)` require the exact same `shared_ptr` obtained from `peek()` on the same view instance. Crossing views is a `LogicError`.
- `RawStateTable` state-machine collapse: `erase` after `insert` removes the entry entirely (net zero); `insert` after `erase` upgrades to `replace`; double-erase is a `LogicError`.
## Common Bug Patterns
- Modifying a ledger after `setImmutable()` corrupts shared state; always check `mImmutable` before mutation.
- Gap detection: if ledgers 603 and 600 exist but 601-602 are missing, `LedgerMaster` requests 602 first, then backfills 601.
- `InboundLedger::gotData()` queues data for processing; calling `done()` before all data arrives creates incomplete ledgers.
- `checkAccept` won't accept a ledger that isn't ahead of the last validated ledger; stale validations are silently ignored.
- Calling `ApplyViewImpl::apply()` twice or using the view after apply: the only valid operation post-apply is destruction.
- Passing an SLE from `peek()` on view A to `erase()`/`update()` on view B: `ApplyStateTable` enforces pointer identity and `LogicError`s.
- Forgetting that `read()` is change-aware but `slesBegin/End` iterates only the base — pending inserts won't appear in SLE iteration on `ApplyViewBase`.
- Comparing iterators across different `ReadView` instances: `XRPL_ASSERT` fires in debug; UB in release.
- Stale `OpenView::txCount` ordinal in nested/batch views — must use `batch_view_t` constructor to capture `baseTxCount_`.
- Calling `removeExpired`/`deleteSLE` in preclaim — preclaim is `ReadView`-only; expiry-driven deletion only happens in doApply.
- Forgetting that `directSendNoFee` is not `[[nodiscard]]` (for `DirectStep.cpp` compatibility) — its return must still be inspected.
- Accessing trust-line endpoints by raw low/high without first computing orientation — use the trust-line helpers that take `(account, peer)` and resolve which slot to touch.
- Constructing a `PaymentSandbox` from `ApplyView&` when nested within another payment: the nested form takes `PaymentSandbox const*` so `DeferredCredits` chain to the parent. Wrong constructor = double-spend escape.
- Updating an SLE then publishing subscriptions before `havePublished` advances: re-entrant publishers see partial state.
- Submitting to `OrderBookDB` while holding `mLock` from a callback — `setup_` ingests under its own write lock and may re-enter.
- `AcceptedLedgerTx` constructor asserts `!ledger->open()` — constructing from an open ledger aborts in debug.
- Calling `closeChannel` before `sfAmount >= sfBalance` invariant holds — the `XRPL_ASSERT` inside will abort; fix the calling transactor, not the helper.
- `ReadView` copy/move constructors always re-initialize `sles(*this)` and `txs(*this)` — subclasses must not rely on memberwise copy of those range objects.
- `forEachItemAfter` hint-page optimization: if the hint is stale the code falls back to linear scan but still returns `false` if `after` key is never found; callers must handle this as an invalid cursor.
- `dirIsEmpty` requires both empty `sfIndexes` *and* zero `sfIndexNext` — an empty root page does not mean an empty directory if subsequent pages exist.
## Ledger Entry Types
- Defined in `ledger_entries.macro` using `LEDGER_ENTRY(type, code, class, name, fields)`.
- Each entry has an `SOTemplate` defining required/optional fields.
- Key computation: `Indexes.cpp` computes unique keys (keylets) for each ledger object type.
- `STLedgerEntry` wraps the serialized data with type-safe field access.
- Pseudo-account types (AMM, Vault, LoanBroker) are discovered by scanning `ltACCOUNT_ROOT` SOTemplate for `SField::sMD_PseudoAccount`-flagged fields; no manual registration.
## View Hierarchy
```
ReadView (abstract, read-only)
└── DigestAwareReadView (adds per-entry digest for CachedView)
└── Ledger (final; owns stateMap_ + txMap_)
└── OpenView (mutable; ReadView + TxsRawView; delta over base)
└── detail::ApplyViewBase (ApplyView + RawView; buffered via ApplyStateTable)
├── ApplyViewImpl (commit path; produces TxMeta; carries deliver_)
├── Sandbox (discardable; flush via apply(RawView&))
└── PaymentSandbox (overrides credit/balance hooks; DeferredCredits)
```
- `ApplyStateTable` (per-tx buffer): actions `cache`/`insert`/`modify`/`erase`; generates `TxMeta` with `sfPreviousFields`/`sfFinalFields`/`sfNewFields` driven by `SField::sMD_*` flags; threads `sfPreviousTxnID`/`sfPreviousTxnLgrSeq` on affected account roots and trust-line endpoints. On `apply()` the buffer is flushed into the base `RawView` and the table is reset; using the table afterward is UB. A byte-equal `sfModifiedNode` is silently omitted from metadata.
- `RawStateTable` (used by `OpenView` and `RawStateTable::apply` flush): three actions only; state-machine collapse — `insert + erase → removed entirely`; `insert + replace → insert with new SLE`; `erase + insert (modify) → replace`. No `TxMeta` is produced because `RawView` is the post-apply mutation surface.
- Both tables use `boost::container::pmr::monotonic_buffer_resource` with a 256 KB initial arena; `unique_ptr` for stable address so map allocators work after move. Memory is released only when the table is destroyed — long-lived tables leak peak working set.
- `ReadViewFwdRange` is the iterator/range adapter used for `sles`/`txs` traversal; iterator equality is anchored to the owning view (`XRPL_ASSERT` cross-view comparisons in debug). Virtual clone via `iter_base::copy()` enables value-semantics copying. Deferred dereference caches the result in `mutable std::optional<value_type> cache_`; `operator++` clears the cache.
- `CachedView` (`CachedLedger = CachedView<Ledger>`): two-level cache — per-view `map_<key, digest>` plus process-wide `CachedSLEs` (`TaggedCache<uint256, SLE const>`) keyed by digest. The per-view map uses `hardened_hash` to defeat hash-flooding from adversarial keylet sequences. Hit/hitExpired/miss counters distinguish full hit, digest-known-but-SLE-evicted, and cold miss. The expensive `base_.digest()` and `base_.read()` calls happen outside the lock; `mutex_` protects only the key→digest map.
- Hooks pattern: `balanceHookIOU/MPT`, `ownerCountHook` (read side, on `ReadView`) and `creditHookIOU/MPT`, `adjustOwnerCountHook`, `issuerSelfDebitHookMPT` (write side, on `ApplyView`) are no-ops by default; `PaymentSandbox` overrides them to prevent within-payment double-spend.
- `isDryRun` path in `apply(OpenView&...)`: full `TxMeta` is built and returned as `std::optional<TxMeta>`, but state changes and `rawTxInsert` are suppressed. Used for fee simulation.
## Directory Structures
Three distinct paged-list flavors, all `ltDIR_NODE`-based:
- **Owner / book directories** (`ApplyView::dirInsert`/`dirAppend`/`dirRemove`/`dirDelete`): root at page 0; `sfIndexNext`/`sfIndexPrevious` linked; root's `sfIndexPrevious` points to tail for O(1) append. `dirAppend` preserves insertion order (offers only, asserted); `dirInsert` keeps sorted order within each page. Page overflow detected via deliberate `uint64_t` wraparound (compile-time `static_assert`ed). `describe` callback brands each new page with type-specific fields (e.g., `sfOwner`).
- **NFToken pages** (`NFTokenHelpers`): tokens packed into `STArray`-bearing pages, sorted by `compareTokens()` (low 96 bits, then full ID). Last page anchored at `keylet::nftpage_max(owner)`. Split algorithm respects equivalence groups (identical low 96 bits); merge across adjacent pages on remove. `fixNFTokenPageLinks` amendment changes empty-last-page handling.
- **Quality-keyed order books** (`BookDirs`): two-level — `succ()` finds next quality directory in `[root_, getQualityNext(root_))`, then `cdirFirst`/`cdirNext` walks pages within that quality. `BookDirs` iterator transparently crosses quality boundaries.
The `Dir` class is a simple range adaptor (NFTokenOffer directories + unit tests); `next_page()` is public to allow page-skipping traversal (used by `notTooManyOffers`).
## Helper Module (`include/xrpl/ledger/helpers/`)
Free functions per ledger-object type. The asset-agnostic dispatcher is `TokenHelpers.h`, which routes `Asset` (`std::variant<Issue, MPTIssue>`) via `std::visit` to `RippleStateHelpers` (IOU) or `MPTokenHelpers` (MPT).
Conventional split:
- Stateless / preflight-safe checks: take `ReadView const&`
- State-mutating: take `ApplyView&`
- Two-phase pattern: read-only preclaim function (`credentials::valid`/`validDomain`) paired with mutating doApply counterpart (`verifyDepositPreauth`/`verifyValidDomain`) that prunes expired entries
Key files: `AMMHelpers`, `AccountRootHelpers`, `CredentialHelpers`, `DelegateHelpers`, `DirectoryHelpers`, `EscrowHelpers`, `MPTokenHelpers`, `NFTokenHelpers`, `OfferHelpers`, `PaymentChannelHelpers`, `PermissionedDEXHelpers`, `RippleStateHelpers`, `TokenHelpers`, `VaultHelpers`.
Policy enums (used to avoid bare bools): `FreezeHandling`, `AuthHandling`, `SpendableHandling`, `WaiveTransferFee`, `AllowMPTOverflow`, `AuthType` (`StrongAuth`/`WeakAuth`/`Legacy` — Legacy maps to StrongAuth for MPT, WeakAuth for IOU), `TruncateShares`.
## AMM Rounding Contract
The pool invariant `sqrt(asset1 × asset2) >= LPTokenBalance` is non-negotiable, so every formula has explicit directional rounding:
- Swap-in: output rounds **down** (trader gets less, pool retains).
- Swap-out: input rounds **up** (trader pays more).
- LP token deposit: tokens **down**, assets **up**.
- LP token withdrawal: tokens **up**, assets **down**.
`fixAMMv1_1` introduced per-step rounding (vs end-only); `fixAMMv1_3` extended this discipline to LP/deposit/withdraw paths via `multiply(balance, frac, mode)` and the `getRoundedAsset`/`getRoundedLPTokens` wrappers (two overloads each — direct and lambda-deferred). Pre-amendment paths must be preserved for historic replay.
`adjustLPTokens`: avoids precision loss when adding small token amounts to large `LPTokensBalance` by computing `(balance + tokens) - balance` rather than `tokens`. Becomes a no-op under `fixAMMv1_3`.
`changeSpotPriceQuality`: aligns AMM synthetic offer to CLOB best quality. Solves a quadratic (or linear, for the alternate constraint) and takes the smaller binding result. `fixAMMv1_1` switched the starting side to always-XRP-first to avoid XRP-drop discretization undershoot. `detail::reduceOffer` applies 0.01% rescue multiplier when quality still falls below target.
`solveQuadraticEqSmallest()` uses the numerically stable "citardauq" formula (Blinn's paper) to avoid catastrophic cancellation when `b > 0` in the standard form.
## MPT Specifics
- `OutstandingAmount` can transiently exceed `MaximumAmount` during payment-engine routing — `AllowMPTOverflow::Yes` raises the ceiling to `UINT64_MAX` for that case; direct sends use `No` and strict cap. The `fixSecurity3_1_3` amendment makes `accountSendMulti` accumulate in exact `uint64_t` (not `STAmount`/`Number`) to avoid 19-digit precision loss in aggregate overflow checks.
- `selfDebit` field on `IssuerValueMPT` in `PaymentSandbox::DeferredCredits` tracks issuer-as-seller offers because the payment engine credits first; `balanceHookSelfIssueMPT` caps available issuance at `origBalance - selfDebit`.
- Two-phase auth: `requireAuth` (preclaim, `ReadView`) checks the static authorization predicates; `enforceMPTokenAuthorization` (doApply, `ApplyView`) handles the case where a domain-authorized holder lacks an `MPToken` SLE in preclaim — it lazily allocates the SLE on the fly and consumes the `priorBalance` reserve.
- `lockEscrowMPT` does NOT change `OutstandingAmount` (tokens are still in circulation while escrowed); `unlockEscrowMPT` decreases `OutstandingAmount` only by the fee delta (gross - net) under `fixTokenEscrowV1`.
- `isVaultPseudoAccountFrozen()` recursively checks whether a vault-backed MPT issuance is frozen by the vault's underlying asset; a `depth` parameter bounds recursion (purely defensive — nested vaults cannot currently be created).
## IOU Trust-Line Specifics
- Orientation: every trust line stores its endpoints in fixed `sfLowLimit`/`sfHighLimit` slots based on `AccountID` comparison. Helpers like `accountHolds`, `accountFunds`, `trustCreate`, `trustDelete` take `(account, peer)` and resolve the slot internally — never index by raw low/high outside the helpers.
- Three freeze tiers: `isIndividualFrozen` (issuer froze this specific holder), `isFrozen` (global freeze flag), `isDeepFrozen` (transitive freeze through deep-freeze chains). Each has distinct policy implications for sends, offers, and AMM participation; helper queries take a `FreezeHandling` enum to select policy.
- `rippleCredit`/`rippleSend` apply transfer fees only when the issuer is neither endpoint and `WaiveTransferFee::No`; reserve/quality limits enforced via `accountFunds` rather than raw balance.
- Trust-line deletion (`trustDelete`) requires both endpoints to be at default state (zero balance, default limits/flags); helpers compute "default" against the post-tx state, not raw fields.
- `updateTrustLine` (static helper in `RippleStateHelpers.cpp`): when a sender's balance crosses zero and meets seven specific conditions (zero limit, zero quality flags, no freeze, etc.), it releases the sender's reserve and signals the caller to delete the line.
## Credential System
- Two-phase enforcement mirrors the `ReadView`/`ApplyView` split: `credentials::valid()` and `credentials::validDomain()` are read-only (preclaim); `verifyDepositPreauth()` and `verifyValidDomain()` are mutating (doApply) and delete expired credential objects as a side effect.
- `credentials::deleteSLE()` handles two owner directories (issuer and subject) with reserve accounting that shifts ownership at `lsfAccepted` time. Before acceptance only the issuer pays the reserve; after, the subject does.
- `checkFields()` validates `sfCredentialIDs` (`STVector256`); `checkArray()` validates `STArray` credential pairs and uses `sha512Half(issuer, credentialType)` for duplicate detection.
- `authorizedDepositPreauth()` uses a `lifeExtender` vector to keep `SLE const` shared pointers alive while the `Slice`-based set is in scope — forgetting this causes dangling pointer reads.
## Pseudo-Accounts
Synthetic `AccountRoot` SLEs owned by protocol objects (AMM, Vault, LoanBroker). Address derived from `sha512Half(attempt, parentHash, ownerKey)` → RIPESHA in a loop up to `maxAccountAttempts = 256` (consensus-critical constant). Flags `lsfDisableMaster | lsfDefaultRipple | lsfDepositAuth`; `sfSequence = 0` under `featureSingleAssetVault`/`featureLendingProtocol`. `isPseudoAccount(sle, filter?)` checks for any field tagged `SField::sMD_PseudoAccount` (currently `sfAMMID`, `sfVaultID`, `sfLoanBrokerID`). Pseudo-accounts bypass reserve requirements in `xrpLiquid`.
Amendment checks are the *caller's* responsibility, not `createPseudoAccount`'s. The function asserts the passed `ownerField` carries `sMD_PseudoAccount`.
## Vault Math
`VaultHelpers` converts between asset and share denominations using `sfScale` (decimal precision) and tracks `sfLossUnrealized` separately on deposit vs withdraw paths — the asymmetry is intentional: deposits price against gross `sfAssetsTotal`; withdrawals subtract `sfLossUnrealized` first (existing holders bear the loss, not new depositors). `TruncateShares` policy controls whether sub-unit share remainders are rounded to zero or rejected.
Empty-vault bootstrap: when `sfAssetsTotal == 0`, initial share allocation is `assets × 10^sfScale` (truncated), establishing the starting exchange rate. This reduces susceptibility to the first-depositor donation attack.
## Ledger Timing (`LedgerTiming.h`)
Adaptive close-time binning prevents clock-skew disagreements. Resolutions: `{10, 20, 30, 60, 90, 120}` seconds; default 30, genesis 10. Adjustment is asymmetric:
- On disagreement, coarsen every ledger (`decreaseLedgerTimeResolutionEvery = 1`)
- On agreement, refine only every 8th ledger (`increaseLedgerTimeResolutionEvery = 8`)
`roundCloseTime` is epoch-anchored (uses `time_since_epoch()`, not local offset) for deterministic agreement. `effCloseTime` enforces strict monotonicity: `max(rounded, priorCloseTime + 1s)`. `time_point{}` is the sentinel for "no agreed close time" and is returned unchanged.
## Skip List
Two-tier on-ledger structure for historical hash lookup:
- `keylet::skip()` (the rolling 256-page): hashes of the 256 immediate ancestors; updated every ledger.
- `keylet::skip(seq)`: every 256-aligned ledger stores a permanent record. `getCandidateLedger(seq)` rounds up to the nearest 256-aligned sequence.
Non-aligned ledgers older than 256 are unreachable — `hashOfSeq` returns `nullopt`.
## Canonical Transaction Ordering (`CanonicalTXSet`)
Retry queue between consensus passes. Sort key: `(account ⊕ salt, SeqProxy, txId)`.
- **Salt**: `LedgerHash` XORed into `AccountID`; prevents account-address mining for persistent ordering advantage. Refreshed by `reset()` each round.
- **`SeqProxy`**: sequences sort before tickets unconditionally (so `TicketCreate` always applies before ticket consumers).
- **`popAcctTransaction()`**: returns next eligible same-account tx — either ticket-based, or sequence exactly `+1` from current.
`Key::operator==` compares only `txId_` (asymmetric with `operator<`).
## Subscription Fan-Out
`BookListeners` and `OrderBookDB` together drive WebSocket book-stream subscriptions:
- `BookListeners`: per-book (`Book`-keyed) registry of `InfoSub::wptr`. `publish` builds a `MultiApiJson` once and dispatches by API version, expiring dead weak pointers in-place. Locking is per-listener-set (`std::recursive_mutex`), not global.
- `OrderBookDB`: scans the ledger's `dirNode` index to build `xrpBooks_` (XRP-side, O(1) checked) and `allBooks_` (IOU/MPT, scanned). Scans are throttled by `setup_seq_` — only re-scan on ledger change. `publishOrderBook` walks affected listeners; `havePublished` dedups so the same ledger isn't re-fanned-out across overlapping subscription updates.
- Ingest path under `mLock` (write); subscribe/unsubscribe under read lock. Callbacks must not re-enter `setup_`.
- `havePublished` is a `hash_set<uint64_t>` shared across all `BookListeners::publish()` calls for a single transaction, preventing duplicate delivery to subscribers registered on multiple affected books.
## Accepted Ledger Tx (`AcceptedLedgerTx`)
Eagerly-materialized projection of a transaction-in-ledger: tx, meta, deserialized account fields, affected-accounts list, JSON-serialization cache. Construction is the heavyweight step; downstream consumers (RPC, subscription) read fields by reference. The class is immutable post-construction; thread-safe to share.
Constructor asserts `!ledger->open()`. For `ttOFFER_CREATE` transactions where `account != amount.getIssuer()`, `owner_funds` is injected into `mJson` via `accountFunds(fhIGNORE_FREEZE, ahIGNORE_AUTH)` — a read-time annotation for order-book subscribers, not ledger state.
`getEscMeta()` returns `mRawMeta` as a SQL blob literal; used by `Node.cpp` for transaction DB inserts.
## Pending Saves (`PendingSaves`)
State machine tracking in-flight `Ledger` → DB writes. Each entry keyed by ledger sequence; transitions `requested → started → finished`. `pendSave` is idempotent — second caller for the same sequence is a no-op. Synchronous callers in `shouldWork(seq, true)` block on a `condition_variable` until the in-progress write completes. `getSnapshot()` returns a copy of the map; `LedgerMaster::getValidatedRange()` uses it to exclude in-progress sequences from the reported range.
## Amendment Table (`AmendmentTable`)
Governance state for protocol amendments. Each amendment has a `VoteBehavior`:
- `DefaultYes` — vote yes unless config overrides
- `DefaultNo` — vote no unless config overrides
- `Obsolete` — never vote yes, even with config override; `LogicError` if config tries
`TrustedVotes` tracks per-validator amendment votes across ledgers with anti-flapping (a validator must consistently vote for N consecutive flag ledgers before counting toward majority). The flag-ledger cadence and majority threshold are consensus-critical constants. `doVoting` produces the `EnableAmendment`/`Veto` pseudo-transactions inserted at flag ledger close.
Two-layer API: the pure-virtual `doVoting(LedgerIndex, set<uint256>, majorityAmendments_t)` is wrapped by a non-virtual `doVoting(shared_ptr<ReadView const>, ...)` that calls `getEnabledAmendments()` and `getMajorityAmendments()` from `View.h`. This decouples the implementation from ledger view types.
`needValidatedLedger(seq)` is an optimization gate — most ledgers have no effect on amendment voting; only flag ledgers need the full `doValidatedLedger` pass.
## Review Checklist
- New ledger entry types: add to `ledger_entries.macro`, implement keylet in `Indexes.cpp`, verify acquisition code in `InboundLedger`/`LedgerMaster`, check `LedgerCleaner` handling.
- New pseudo-account types: tag the key field with `SField::sMD_PseudoAccount` in `sfields.macro`; `isPseudoAccount` picks it up automatically. Caller, not `createPseudoAccount`, owns the amendment gate.
- New asset operations: extend `Asset` variant + add branches in `TokenHelpers.h` dispatchers. Don't reach into IOU- or MPT-specific helpers directly unless intentionally bypassing the dispatch layer.
- Helper functions touching balances: respect the read/write hook protocol so `PaymentSandbox` correctly defers credits.
- AMM math changes: gate behind an amendment; preserve the pre-amendment formula path.
- Directory changes: account for both legacy (unsorted) and modern pages in iteration; verify `cdirNext`/`dirNext` cursor semantics if deleting during iteration (see `cleanupOnAccountDelete` workaround in `View.cpp` around line 485).
- New amendment: register `VoteBehavior`; if `Obsolete`, ensure no config path can flip it; add to `TrustedVotes` accounting if it should be majority-tracked.
- New subscription stream: if order-book-related, register with `BookListeners`; for ledger-wide streams use `OrderBookDB::havePublished` to dedup.
- Escrow with non-XRP assets: use `escrowUnlockApplyHelper<T>` (template specialized for `Issue` / `MPTIssue`); check `createAsset` flag gating and `lockedRate` capping logic.
- Delegate transactions: call `checkTxPermission` first; only call `loadGranularPermission` when broad permission check fails.
## Key Patterns
### Immutability Guard
```cpp
// After this, no mutations allowed on the ledger or its SHAMaps
inline void SHAMap::setImmutable()
{
XRPL_ASSERT(state_ != SHAMapState::Invalid, "...");
state_ = SHAMapState::Immutable;
}
// VERIFY: code never calls peek()/insert()/erase() after setImmutable()
```
### New Ledger Entry Keylet
```cpp
// REQUIRED: every new ledger entry type needs unique keylet computation
Keylet keylet::myEntry(AccountID const& id)
{
return {ltMY_ENTRY,
sha512Half(std::uint16_t(spaceMyEntry), id)};
}
// Also add to ledger_entries.macro and Indexes.cpp
```
### Peek/Update Contract
```cpp
// peek() returns shared_ptr<SLE>; you MUST call update() or erase() with
// the SAME pointer on the SAME view. Crossing views is a LogicError.
auto sle = view.peek(keylet::account(id));
sle->setFieldU32(sfSequence, seq + 1);
view.update(sle); // promotes cache → modify in ApplyStateTable
```
### Two-Phase Expiry Cleanup
```cpp
// preclaim (ReadView) — detect but don't mutate
if (credentials::validDomain(view, domainID, account) == tecEXPIRED)
/* allow through; doApply will clean up */;
// doApply (ApplyView) — mutate
if (auto ter = verifyValidDomain(view, domainID, account, j); ter != tesSUCCESS)
return ter; // expired credentials deleted as side effect
```
### Directional Multiply (AMM)
```cpp
// post-fixAMMv1_3: rounding mode at the final multiply, not at toSTAmount
auto const tokens = getRoundedLPTokens(
rules,
lptAMMBalance,
[&] { return /* fractional formula */; },
IsDeposit::Yes); // → Number::downward
```
### Nested PaymentSandbox
```cpp
// Inside a payment step that needs its own sandbox: chain DeferredCredits
PaymentSandbox inner(&outer); // PaymentSandbox const* form
// ... inner.creditHookIOU(...) defers against the outer's table too
inner.apply(outer); // flushes deferred credits up one level
```
### Subscription Fan-Out
```cpp
// publish once, dispatch by version; havePublished shared across all books
MultiApiJson msg = buildBookUpdate(...);
listeners.publish(msg, havePublished);
db.havePublished(ledgerSeq); // dedup across overlapping streams
```
### Sandbox Commit-or-Discard
```cpp
Sandbox sb(&ctx_.view());
// ... all mutations through sb ...
if (result == tesSUCCESS)
sb.apply(ctx_.rawView());
// else sb destroyed → changes evaporate
```
## Key Files
- `src/xrpld/app/ledger/Ledger.h` / `src/libxrpl/ledger/Ledger.cpp` — ledger class, genesis/successor/load constructors, immutable transition, skip list, NegUNL
- `src/xrpld/app/ledger/detail/LedgerMaster.cpp` — central coordinator
- `src/xrpld/app/ledger/detail/InboundLedger.cpp` — ledger acquisition
- `include/xrpl/protocol/detail/ledger_entries.macro` — entry type definitions
- `src/libxrpl/protocol/Indexes.cpp` — keylet computation
- `include/xrpl/ledger/ReadView.h` + `ApplyView.h` + `OpenView.h` + `RawView.h` — view interface hierarchy
- `include/xrpl/ledger/detail/ApplyStateTable.h` / `RawStateTable.h` / `ApplyViewBase.h` / `ReadViewFwdRange.h` + `.ipp` — buffered mutation tables, range adapters, `TxMeta` generation
- `include/xrpl/ledger/Sandbox.h` / `PaymentSandbox.h` / `ApplyViewImpl.h` — concrete view types
- `include/xrpl/ledger/CachedView.h` / `CachedSLEs.h` — two-level SLE cache (hardened-hash inner map)
- `include/xrpl/ledger/CanonicalTXSet.h` — retry-pass deterministic ordering
- `include/xrpl/ledger/LedgerTiming.h` — close-time binning
- `include/xrpl/ledger/Dir.h` / `BookDirs.h` — directory iteration
- `include/xrpl/ledger/BookListeners.h` / `OrderBookDB.h` — subscription fan-out
- `include/xrpl/ledger/AcceptedLedgerTx.h` — eager tx-in-ledger projection
- `include/xrpl/ledger/AmendmentTable.h` — governance state, `VoteBehavior`, `TrustedVotes`
- `include/xrpl/ledger/PendingSaves.h` — in-flight DB write coalescing
- `include/xrpl/ledger/View.h` — free-function utility layer (expiry, `hashOfSeq`, `areCompatible`, `dirLink`, `canWithdraw`, `cleanupOnAccountDelete`)
- `include/xrpl/ledger/helpers/*` — per-object-type free functions
- `src/libxrpl/ledger/helpers/*` — implementations (AMM math, NFT page split, credential lifecycle, MPT overflow, vault math)
- `src/libxrpl/ledger/ApplyView.cpp` — directory `createRoot`, `findPreviousPage`, `insertKey`, `insertPage` (in `namespace xrpl::directory`)
- `src/libxrpl/ledger/PaymentSandbox.cpp``DeferredCredits` IOU/MPT shadow tables, post-switchover balance algorithm
- `src/libxrpl/ledger/OpenView.cpp``RawStateTable` + `txs_map` delta accumulation, PMR arena

209
docs/skills/nodestore.md Normal file
View File

@@ -0,0 +1,209 @@
# NodeStore
Persistent key-value store for `NodeObject`s (ledger entries). Every piece of ledger state — account states, transactions, ledger headers, SHAMap nodes — is serialized as a `NodeObject` keyed by its 256-bit hash and persisted here. Backends are pluggable (NuDB, RocksDB, in-memory, null) and selected via the `[node_db]` config section. The layer above (`Database`) adds an async read thread pool, batching, and statistics; `Backend` is the narrow storage interface.
## Architecture
Four layers, each with a narrow contract:
1. **`NodeObject`** (`include/xrpl/nodestore/NodeObject.h`) — immutable (type, hash, blob). Constructed only via `createObject()` factory; the `PrivateAccess` tag struct makes the public constructor effectively private while remaining compatible with `std::make_shared`. Hash is *not* verified against data — trust the caller. Inherits `CountedObject<NodeObject>` for live-instance diagnostics.
2. **`Backend`** (`include/xrpl/nodestore/Backend.h`) — pure abstract key/value interface. `fetch`/`store` are concurrent; `storeBatch`/`for_each` are not. Two-phase init: construct, then `open()`.
3. **`Database`** (`include/xrpl/nodestore/Database.h`) — owns the async read pool and stats; defines pure-virtual `fetchNodeObject(hash, seq, FetchReport&, duplicate)`. Public non-virtual `fetchNodeObject` instruments the private virtual one (timing, hit/miss, scheduler callback) — subclasses cannot bypass metrics.
4. **`Manager`** (`include/xrpl/nodestore/Manager.h`) — singleton registry mapping config `type=` strings to `Factory` instances. `make_Backend()` and `make_Database()` are the construction entry points.
Two concrete `Database` subclasses: `DatabaseNodeImp` (single backend) and `DatabaseRotatingImp` (two backends for online deletion).
## Key Invariants
- `NodeObjectType` values: `hotUNKNOWN=0`, `hotLEDGER=1`, `hotACCOUNT_NODE=3`, `hotTRANSACTION_NODE=4`, `hotDUMMY=512`. Value 2 is a historical gap. `hotDUMMY` is deliberately outside the contiguous range so it cannot collide with valid types — used as a cache sentinel meaning "confirmed missing."
- `NodeObject` lives in the `xrpl` namespace (not `xrpl::NodeStore`) because it is consumed broadly by SHAMap, ledger, and serialization layers.
- Preferred backends: NuDB (append-mostly, default) and RocksDB; Memory and Null are for tests / configured ephemerality.
- `Database` instrumentation is structural: the public `fetchNodeObject()` measures elapsed time, increments atomic counters, and calls `scheduler_.onFetch()` around every private virtual call.
- `Backend::fetch` and `Backend::store` are called concurrently from many threads; `storeBatch` and `for_each` are not. Implementations must reflect this.
- `DatabaseRotatingImp::isSameDB()` and `DatabaseNodeImp::isSameDB()` both return `true` unconditionally — the rotating store is one logical namespace despite physical split.
- Batch writes accumulate up to `batchWriteLimitSize = 65536` objects; peak in-flight memory can be ~2× this because a new batch accumulates while the previous one is being flushed (`Types.h`).
- `EncodedBlob` / `DecodedBlob` define the on-disk format: bytes 07 zero-padded (legacy ledger-index field), byte 8 = `NodeObjectType`, bytes 9+ = payload. Both must change together.
- `NuDBBackend::fdRequired()` returns 3 (data, key, log files). `RocksDBBackend::fdRequired()` returns `max_open_files + 128`. `DatabaseRotatingImp` sums both backends' values.
- `storeStats()` asserts `count <= sz` — byte total must be ≥ item count, guards against accounting bugs in subclasses.
## On-Disk Format
Defined jointly by `EncodedBlob` (write) and `DecodedBlob` (read) in `include/xrpl/nodestore/detail/`:
```
Bytes 07 Zero (historically ledger index; ignored on read)
Byte 8 NodeObjectType (one byte)
Bytes 9+ Raw serialized payload
```
The 32-byte hash is the storage key, kept separate from the value.
- `EncodedBlob` embeds a 1033-byte stack buffer (`payload_`) sized for header + 1024-byte payload (most objects). Only blobs exceeding 1024 bytes heap-allocate. `ptr_` is `uint8_t* const` set at construction; destructor frees iff `ptr_ != payload_.data()`. ~94% of real objects fit the stack buffer. The destructor `XRPL_ASSERT` verifies pointer/size coherence to catch any drift.
- `DecodedBlob` is a non-owning view into the raw buffer. Validation is by `wasOk()` flag, not exceptions. `createObject()` asserts `m_success` and copies the payload into an owning `Blob` for the returned `NodeObject`. `hotDUMMY` (512) falls through the type `switch` and leaves `m_success = false`.
NuDB additionally runs the encoded blob through `nodeobject_compress` (see Compression below).
## Compression (`include/xrpl/nodestore/detail/codec.h`)
NuDB-only. Four type tags prefix every stored blob (as a varint):
| Type | Format |
|---|---|
| 0 | Uncompressed (legacy; never written, still read) |
| 1 | LZ4-compressed |
| 2 | Sparse inner-node (bitmask + present hashes) |
| 3 | Full inner-node (all 16 hashes, no bitmask) |
**Inner-node fast path**: SHAMap inner nodes are exactly 525 bytes with `HashPrefix::innerNode` at offset 9. The compressor recognizes this and either packs only non-zero child hashes with a 16-bit presence bitmask (type 2) or stores all 16 hashes contiguously (type 3). Reconstruction zeros the `index`, `unused`, and `kind` fields — so a round-trip is *lossy* on those fields. `filter_inner()` pre-zeros them on the source side to make import-time `memcmp` verification succeed.
**LZ4 path**: `lz4_compress` stores a varint decompressed-size prefix then LZ4 payload. `lz4_decompress` validates the varint (overflow checked before `static_cast<int>`) then pre-allocates the exact output buffer before calling `LZ4_decompress_safe`. All size mismatches throw `std::runtime_error`.
**Varint** (`varint.h`): base-127 (not base-128) LEB-style encoding; bit 7 is continuation. Used for the type tag and the LZ4 decompressed-size prefix. The base-127 choice means `0x7F` never appears as a payload byte. Functions are function templates (not plain functions) to satisfy ODR across TUs. `read_varint` returns 0 on empty buffer, overrun, or overflow.
**BufferFactory pattern**: All codec functions take a callable `void*(size_t)` so allocation policy stays with the caller (NuDB passes its scratch buffer). Codecs never free memory; the factory object's lifetime governs cleanup.
## Async Read Pool (`Database`)
The base class spawns `readThreads` detached threads at construction. Each loops on `readCondVar_`, dequeues from `read_` (a `std::map<uint256, vector<pair<seq, callback>>>`), and calls the subclass's private `fetchNodeObject`.
**Hash coalescing**: Multiple concurrent `asyncFetch()` calls for the same hash collapse into a single map entry; one backend read fires all callbacks. For different `seq` values on the same hash, `isSameDB(seq1, seq2)` decides whether to reuse the fetched object or issue a second fetch.
**Batched dequeue**: Each worker extracts up to `requestBundle_` entries per lock acquisition (default 4, clamped 164 via `rq_bundle` config) to amortize mutex cost. Uses `read_.extract()` (C++17 node-handle) to move entries without copying.
**Threads are detached** (not joined), controlled by `readStopping_` atomic + `readThreads_` counter. `stop()` clears `read_`, broadcasts the condvar, and spin-yields until `readThreads_` reaches zero (asserted within 30s).
**Shutdown ordering — critical**: Derived classes *must* call `stop()` in their own destructors. The base destructor calls `stop()` as a safety net, but by then the derived vtable is already gone — a worker thread blocked in `fetchNodeObject` would invoke a destroyed vtable entry. See Common Bug Patterns.
**`asyncFetch()` during shutdown**: Silently discards the request if `isStopping()` is already true — callers during shutdown get no callback.
**`runningThreads_` vs `readThreads_`**: Workers increment `runningThreads_` on wake and decrement before waiting, so `getCountsJson()` can distinguish actively-processing threads from threads blocked on I/O. `readThreads_` is decremented on thread exit; reaching zero confirms all threads fully exited.
**Diagnostics**: `getCountsJson()` surfaces queue depth, thread counts, `rq_bundle`, write/read counts, bytes, hit counts, and total read duration (µs) for the `get_counts` RPC.
## BatchWriter (`include/xrpl/nodestore/detail/BatchWriter.h`)
Coalesces individual writes into batches for backends that benefit (RocksDB uses it; NuDB does not — NuDB's `do_insert` is synchronous).
- Privately inherits `Task`; the backend inherits `BatchWriter::Callback` and provides `writeBatch(Batch const&)`. Same object plays both roles, no extra allocation.
- **Double-buffer swap**: `store()` pushes into `mWriteSet`. `writeBatch()` holds the lock only long enough to swap `mWriteSet` with a fresh local vector, then releases before doing I/O. New stores accumulate concurrently with the flush. After each flush the loop re-checks the buffer before clearing `mWritePending` so no items are dropped.
- **`std::recursive_mutex` + `std::condition_variable_any`**: A synchronous scheduler (e.g., `DummyScheduler`) calls `performScheduledTask()` inline on the producer thread, which re-enters `writeBatch` on the same thread — plain `std::mutex` would deadlock.
- **Backpressure**: `store()` blocks on `mWriteCondition` when `mWriteSet.size() >= batchWriteLimitSize` (65536). Peak memory ≈ 2× the limit.
- **`mWritePending` flag**: Ensures only one scheduler task outstanding. First `store()` that finds flag clear raises it and calls `scheduleTask()`.
- **`getWriteLoad()`**: Returns `max(mWriteLoad, mWriteSet.size())` — conservative estimate reflecting both in-flight write count and pending accumulation count.
- **Destructor** calls `waitForWriting()` — no data is abandoned on backend teardown.
- **Telemetry**: After each flush, records count + wall-clock duration in `BatchWriteReport` and passes to `m_scheduler.onBatchWrite()`.
## Scheduler
Pure abstract (`include/xrpl/nodestore/Scheduler.h`): `scheduleTask(Task&)`, `onFetch(FetchReport)`, `onBatchWrite(BatchWriteReport)`. Contract: scheduler may invoke task on calling thread *or* a foreign thread — both are valid. Two implementations:
- **`DummyScheduler`**: `scheduleTask` runs `task.performScheduledTask()` synchronously on the calling thread; `onFetch`/`onBatchWrite` are no-ops. Used by every NodeStore test and by the bulk-import path in `Application.cpp`.
- **`NodeStoreScheduler`** (production, in `xrpld/app`): posts a `jtWRITE` job to the `JobQueue` (synchronous fallback if queue is stopped); routes `FetchReport` to `jtNS_SYNC_READ`/`jtNS_ASYNC_READ` load events.
`Task` (`Task.h`) is just a virtual `performScheduledTask()` + virtual destructor. `BatchWriter` inherits it privately so external code cannot treat it as a `Task`. The recursive mutex in `BatchWriter` is required precisely because `DummyScheduler` can call back synchronously.
`FetchReport` has a `const FetchType` member set at construction; `elapsed` is zero-initialized. `BatchWriteReport` carries elapsed + writeCount. Both are plain-old-data stack values passed by value.
## Rotating Backend / Online Deletion
`DatabaseRotatingImp` (`src/libxrpl/nodestore/DatabaseRotatingImp.cpp`) holds two `shared_ptr<Backend>`s: writable + archive. `SHAMapStoreImp` (in `xrpld/app`) drives the rotation policy; `DatabaseRotatingImp` only handles the atomic swap.
**`rotate(newBackend, callback)` sequence** (under `mutex_`):
1. `setDeletePath()` on the old archive, move it into a local `shared_ptr oldArchiveBackend`.
2. Promote `writableBackend_``archiveBackend_`.
3. Install `newBackend``writableBackend_`.
Then release the lock and invoke `callback(newWritableName, newArchiveName)`. The callback persists names to the SQL state DB. `oldArchiveBackend` falls out of scope *after* the callback returns — so the directory is deleted only after the state DB knows the new layout. Crash between swap and persist → recoverable from state DB on restart.
**Snapshot-and-release locking**: Every read/write copies the relevant `shared_ptr` under `mutex_`, releases the lock, then does I/O via the local. Locking across disk I/O would serialize all readers. Exception: `sync()` holds the lock for the full call (maintenance path, not latency-sensitive).
**Fetch fallthrough + promotion**: `fetchNodeObject` tries writable, then archive. If found in archive and `duplicate=true`, re-snapshots `writableBackend_` (to handle a rotation racing with the archive read) and writes the object into the *current* writable. Objects not promoted before the next rotation are gone forever — promotion is the migration mechanism.
**`newWritableBackendName`** is captured *before* the lock (calling `getName()` on the new backend); `newArchiveBackendName` is captured *inside* the lock from the demoted former writable. Both are passed to the callback.
## Manager / Factory Registration
`ManagerImp` is a Meyers singleton (`static ManagerImp _` in `instance()`). Its constructor calls four free functions (`registerNuDBFactory`, `registerRocksDBFactory`, `registerNullFactory`, `registerMemoryFactory`), each of which creates a function-local `static` factory whose constructor calls `Manager::insert(*this)`.
**Why this pattern**: Factories are never globals. If a global `Factory` destructor called `Manager::instance().erase()` after `ManagerImp` had been destroyed, the result would be UB (no order guarantee across translation units). Function-local statics initialize after `ManagerImp` and destroy before it — safe.
- `Manager::find()` is case-insensitive (`boost::iequals`) so `"NuDB"`, `"nudb"`, `"NUDB"` all match.
- `make_Backend()` throws `std::runtime_error` with operator-facing message on missing or unknown `type` key. Same `missing_backend()` helper covers both the absent-key and unrecognized-name paths.
- `make_Database()` = `make_Backend()` + `backend->open()` + wrap in `DatabaseNodeImp`. The explicit `open()` separation lets I/O errors surface before the full `Database` stack is built.
- Registry mutex protects `list_` (a `vector<Factory*>` of non-owning pointers). `erase()` uses `XRPL_ASSERT` — removing an unknown factory is a programming error.
- `Factory::createInstance` second overload accepts `nudb::context&` for shared I/O threads across NuDB shards. Non-NuDB backends inherit a default that returns `{}` (null), prompting the caller to fall back to the simpler overload.
## Backend-Specific Notes
**NuDB** (`backend/NuDBFactory.cpp`): Three on-disk files (`nudb.dat`, `nudb.key`, `nudb.log`); `fdRequired()` = 3. `appnum = 1` embedded in the header, sanity-checked on every open. `nudb_block_size` config key must be a power of 2 between 4096 and 32768; defaults to `nudb::block_size()` (filesystem-native). `for_each()` and `verify()` *close and reopen* the database — incompatible with concurrent access. `fetch()` uses a zero-copy callback into NuDB's internal buffer; decompression must happen inside the callback (buffer only valid during callback). `db_.insert()` returning `nudb::error::key_exists` is silently ignored (content-addressed: same hash → same data). `burstSize` set via `db_.set_burst()` after open — important performance parameter. Destructor catches `nudb::system_error` from `close()` because destructors must not propagate exceptions.
**RocksDB** (`backend/RocksDBFactory.cpp`): `RocksDBEnv` overrides `StartThread` to name threads `"rocksdb #N"` (using an atomic counter) for profiler visibility. `hard_set` config flag: when false (default), small `cache_mb`/`open_files` values are silently escalated to production-appropriate defaults (1024 MB cache, 8000 FDs). Implements both `Backend` and `BatchWriter::Callback`; uses `BatchWriter` for writes. `storeBatch` is atomic (single `WriteBatch` in WAL); `fetchBatch` is a serial loop with no atomicity. `sync()` is empty — WAL provides durability. Key passed to RocksDB via `std::bit_cast<char const*>` over the `uint256` — no copy. Raw option strings accepted via `bbt_options` and `options` config keys; throw on parse failure.
**Memory** (`backend/MemoryFactory.cpp`): `MemoryFactory` owns named `MemoryDB` instances in a case-insensitive (`boost::beast::iless`) map; multiple `MemoryBackend`s opened with the same path share the same `MemoryDB`. Survives backend close/reopen within a process. The `db.open` guard in `MemoryFactory::open()` is dead code (`open` is never set to `true`). `for_each` reads without holding the mutex — caller must ensure no concurrent writes. Module-level raw pointer `memoryFactory` allows `MemoryBackend::open()` to call back without holding a reference.
**Null** (`backend/NullFactory.cpp`): All operations no-op; `fetch` returns `notFound`. Exists so `type=none` is a valid config value. Doubles as a minimal reference implementation of `Backend`. `isOpen()` always returns `false`.
## Common Bug Patterns
- **Forgetting `stop()` in derived `Database` destructor**: Symptom is a crash during teardown in a worker thread invoking a destroyed vtable. The base `~Database()` calls `stop()` as a fallback but the derived vtable is already gone by then.
- **Holding `DatabaseRotatingImp::mutex_` across I/O**: Will serialize all readers. Always snapshot the `shared_ptr` under lock, release, then I/O.
- **Forgetting `duplicate=true` when archive fallback matters**: Objects fetched from archive are not promoted to writable; next rotation discards them silently.
- **Treating `hotDUMMY` as a real object**: Cache lookups must check the type before dereferencing.
- **Exceeding `batchWriteLimitSize`**: `BatchWriter::store()` blocks the caller; not a silent truncation, but unexpected backpressure can cause deadlock if the same thread is needed to drain.
- **Inaccurate `fdRequired()`**: Causes silent backend failures when the process file descriptor limit is exceeded. Aggregated across all backends by the base `Database`.
- **Changing `EncodedBlob` without `DecodedBlob`**: Breaks on-disk read compatibility silently — both classes define the format jointly.
- **Calling `for_each` / `verify` on NuDB concurrently with reads**: Both close and reopen the database; concurrent access is undefined.
- **Lossy inner-node round-trip**: `nodeobject_compress` zeros `index`/`unused`/`kind` on reconstruction. Code that verifies blobs after compress→decompress must call `filter_inner()` first.
- **Not calling `backend->open()` before wrapping in `DatabaseNodeImp`**: `make_Database()` does this correctly but manual construction may miss it; errors surface much later in I/O paths.
- **Using `MemoryBackend::for_each` concurrently**: No lock held; caller must guarantee no concurrent writes (implicit contract, not enforced).
- **`fetchBatch` atomicity assumption on RocksDB**: `storeBatch` is atomic (one `WriteBatch`); `fetchBatch` is a serial loop — no group atomicity on reads.
## Review Checklist
- Config: `[node_db]` has valid `type`, `path`, and (for NuDB) `nudb_block_size` if present.
- Online deletion: `SHAMapStoreImp` coordinates rotation with application lifecycle; rotation callback must persist new names before old archive can be deleted.
- New backend implementations: full `Backend` interface including `fdRequired()`, both `open()` overloads (default-throws is OK for non-NuDB), accurate concurrency guarantees on `fetch`/`store`, no-op `verify()` if not implementing.
- Derived `Database` subclasses: `stop()` in destructor; override `fetchNodeObject(hash, seq, FetchReport&, duplicate)` not the public non-virtual.
- Any change to on-disk format: update both `EncodedBlob` and `DecodedBlob`; consider backward-compat type tag (codec.h type 0 path is the precedent).
- Inner-node codec changes: update `filter_inner()` alongside compressor/decompressor; verify round-trip with zeroed metadata fields.
- New varint usages: confirm base-127 (not base-128) arithmetic; use `varint_traits<T>::max` for stack buffer sizing.
## Key Files
### Public interfaces
- `include/xrpl/nodestore/NodeObject.h` — immutable object, factory-only construction, `CountedObject` live-count
- `include/xrpl/nodestore/Backend.h` — pluggable storage interface; concurrency contracts annotated inline
- `include/xrpl/nodestore/Database.h` — async read pool, instrumented fetch, Template Method pattern
- `include/xrpl/nodestore/DatabaseRotating.h` — adds `rotate()`
- `include/xrpl/nodestore/Manager.h` — singleton factory registry
- `include/xrpl/nodestore/Factory.h` — abstract factory; two `createInstance` overloads (plain + nudb::context)
- `include/xrpl/nodestore/Scheduler.h` / `Task.h` — async dispatch + telemetry
- `include/xrpl/nodestore/DummyScheduler.h` — synchronous, for tests + import
- `include/xrpl/nodestore/Types.h``Status`, `Batch`, batch size constants
### Implementations (detail/)
- `include/xrpl/nodestore/detail/DatabaseNodeImp.h` — single-backend; `isSameDB` always true; ledgerSeq ignored
- `include/xrpl/nodestore/detail/DatabaseRotatingImp.h` — rotation + promotion; snapshot-and-release locking
- `include/xrpl/nodestore/detail/ManagerImp.h` — singleton + registry; `missing_backend()` static helper
- `include/xrpl/nodestore/detail/BatchWriter.h` — write coalescing + backpressure; recursive mutex
- `include/xrpl/nodestore/detail/EncodedBlob.h` — serializer; 1033-byte stack buffer, heap fallback
- `include/xrpl/nodestore/detail/DecodedBlob.h` — parser; non-owning view, `wasOk()` flag
- `include/xrpl/nodestore/detail/codec.h` — LZ4 + inner-node compression; BufferFactory pattern
- `include/xrpl/nodestore/detail/varint.h` — base-127 varint; function templates for ODR safety
### Source (libxrpl/nodestore/)
- `src/libxrpl/nodestore/Database.cpp` — read pool, hash coalescing, shutdown, `importInternal()`
- `src/libxrpl/nodestore/DatabaseNodeImp.cpp` — simple backend wrapper; `fetchBatch` resize guard
- `src/libxrpl/nodestore/DatabaseRotatingImp.cpp` — rotation, promotion, snapshot locking
- `src/libxrpl/nodestore/BatchWriter.cpp` — double-buffer swap, backpressure, load estimation
- `src/libxrpl/nodestore/ManagerImp.cpp` — singleton init + factory registration
- `src/libxrpl/nodestore/DecodedBlob.cpp` — format parsing; legacy 8-byte prefix handling
- `src/libxrpl/nodestore/DummyScheduler.cpp` — synchronous no-op scheduler
- `src/libxrpl/nodestore/NodeObject.cpp``PrivateAccess` factory; `CountedObject` wiring
- `src/libxrpl/nodestore/backend/NuDBFactory.cpp` — production default; compression pipeline
- `src/libxrpl/nodestore/backend/RocksDBFactory.cpp` — alternate production; `RocksDBEnv` thread naming
- `src/libxrpl/nodestore/backend/MemoryFactory.cpp` — test/ephemeral; shared `MemoryDB` by path
- `src/libxrpl/nodestore/backend/NullFactory.cpp``type=none`; reference `Backend` skeleton
### Lifecycle orchestration
- `src/xrpld/app/misc/SHAMapStoreImp.cpp` — drives `rotate()`, manages state DB, enforces min rotation interval (256 ledgers networked / 8 standalone)

317
docs/skills/peering.md Normal file
View File

@@ -0,0 +1,317 @@
# Overlay Peering
P2P network using persistent TCP/IP connections. Messages serialized via Protocol Buffers. `OverlayImpl` manages connections; `PeerImp` handles per-peer logic. `PeerFinder` (sub-module under `peerfinder/`) handles peer discovery, slot accounting, and address caches.
## Key Invariants
- Connection preference order: Fixed Peers → Livecache → Bootcache
- Cluster connections and reserved (`PeerReservationTable`) connections do NOT count toward slot limits in `Counts::can_activate` — they bypass `m_in_active`/`m_out_active` caps
- Validators are forced `peerPrivate=true` by `Config::makeConfig` even without explicit `[peer_private]`; this is "soft" privacy (still accepts inbound, but asks peers not to gossip address). `wantIncoming` is derived *before* the validator key check fires, so a validator with a key still advertises inbound willingness internally.
- Protobuf message changes MUST maintain wire compatibility or risk network partitioning
- Squelching: after `MAX_SELECTED_PEERS=5` peers each cross `MAX_MESSAGE_THRESHOLD=20` messages, a random 5-peer subset becomes "Selected"; rest are muted via `TMSquelch` for a randomized window in `[MIN_UNSQUELCH_EXPIRE=300s, MAX_UNSQUELCH_EXPIRE_PEERS=3600s]`
- Reduce-relay does not activate for `WAIT_ON_BOOTUP=10min` after process start (`Slots::reduceRelayReady`)
- Handshake binds TLS session to node identity via signature of `makeSharedValue` (SHA-512 XOR of TLS finished messages, then `sha512Half`); a zero shared value (degenerate XOR) is rejected
- Wire format: 6-byte header uncompressed, 10-byte compressed; 26-bit payload size field caps messages at `maximumMessageSize = 64 MiB`
- Hop count cap: `Endpoint` constructor clamps `hops` to `maxHops+1=7`; `Logic::preprocess` drops `hops > maxHops=6` and increments surviving hops by 1 before storage
- TX reduce-relay queue is bounded by `MAX_TX_QUEUE_SIZE=10000` hashes per peer; required to stay under the 64 MiB protocol limit at high TPS
- `peersWithMessage_` (in `Slots`) is `inline static` — shared across all instantiations, not per-instance
- `Bootcache` valence is a *streak* counter: clamped to 0 before crossing sign, so a failing peer resets to 0 before going positive. Static peers receive `staticValence=32`.
- `Livecache` uses `push_front` insertion — MUST `shuffle()` before handout to prevent topology manipulation by an adversary repeatedly advertising its own address
- `SourceStrings::fetch()` silently drops malformed addresses — no error returned for bad config entries
- `Checker` destructor calls `wait()` only; must call `stop()` first explicitly before destruction to cancel pending probes
## Common Bug Patterns
- PeerFinder slot exhaustion: if `inPeers`/`outPeers` is reached, new connections silently fail; check `Counts::can_activate` and `attempts_needed`
- `HashRouter::shouldRelay` prevents duplicate relay; bypassing it causes message storms (`OverlayImpl::relay` enforces this)
- `ConnectAttempt::processResponse` on HTTP 503 parses `peer-ips` JSON array for redirect; malformed entries are validated as endpoints before being passed to `peerFinder().onRedirects`
- `PeerImp::close` must run on the strand; calling from wrong thread causes race conditions on socket and timer state
- Destructor chain: `~PeerImp``deletePeer``onPeerDeactivate``on_closed``remove`; interrupting this chain leaks slots
- `~ConnectAttempt` releases the PeerFinder slot via `on_closed(slot_)` only if `slot_ != nullptr`; on successful promotion to `PeerImp`, `slot_` is moved out and must be left null
- `tryAsyncShutdown()` must defer SSL shutdown until `!readPending_ && !writePending_`; calling `async_shutdown` while async I/O is in flight is undefined behavior
- `dynamic_pointer_cast<SlotImp>` is required wherever `Manager` API takes `shared_ptr<Slot>` but `Logic` needs `SlotImp`
- A compressed message from a peer that did NOT negotiate compression is a hard `protocol_error` in `invokeProtocolMessage` (prevents CPU forcing attack)
- Self-squelch attempt (peer sends `TMSquelch` for our own validation key) is silently dropped in `PeerImp::onMessage(TMSquelch)` — never trust a peer to silence us
- `Cluster::for_each` callback must NOT call `Cluster::update` — same non-recursive mutex, deadlock
- `ZeroCopyOutputStream` destructor MUST flush trailing `commit_` — protobuf doesn't guarantee terminal `BackUp` or `Next` call; missing flush silently drops bytes
- `Bootcache` erase-then-reinsert pattern in `on_success`/`on_failure`: bimap values are logically const after insert, so valence updates require erase + reinsert
- `SlotImp::state(active)` is forbidden — must use `activate()` which also sets `whenAcceptEndpoints`; bypassing this leaves the flood-control timestamp unset
- `SourceStrings::fetch()` has an idempotent retry loop quirk: if `from_string()` fails, it retries the same string (no-op); effective behavior is just "drop invalid entries"
## Connection Lifecycle
### Outbound (`ConnectAttempt`)
1. `OverlayImpl::connect` → resource check → `peerFinder().new_outbound_slot()` → create `ConnectAttempt`
2. Five-phase chain: `async_connect` → TLS `async_handshake` → HTTP write → HTTP read → `processResponse`
3. Dual-timer scheme: global 25s ceiling (`connectTimeout`) + per-step timers (8/8/3/3/2s); both share `onTimer` callback distinguishing by expiry comparison. Global timer armed once (guarded by epoch-check), step timer reset at each phase.
4. `ioPending_` flag prevents starting SSL shutdown while another async op is pending on the stream
5. On HTTP 101: `verifyHandshake` → create `PeerImp` → move `slot_` and `stream_ptr_` into peer → `overlay_.add_active(peer)`
6. On HTTP 503 with JSON `peer-ips`: forward to `peerFinder().onRedirects`
7. `verify_none` on TLS — security comes from node-key signature over `makeSharedValue`, not cert chain
### Inbound (`OverlayImpl::onHandoff`)
1. HTTP server hands off TLS stream + upgrade request
2. Sequential gates: `processRequest` (for `/crawl`, `/health`, `/vl/`) → resource limit → `new_inbound_slot``negotiateProtocolVersion``makeSharedValue``verifyHandshake`
3. Create `PeerImp`, insert into `m_peers` (slot-keyed); `peer->run()` MUST be called while holding `mutex_` (race vs `stop()` draining list)
4. `m_peers` populated here, but `ids_` only after `activate()` post-protocol-handshake
## Two-Phase Peer Registration
- `m_peers`: `PeerFinder::Slot → weak_ptr<PeerImp>` — populated at handshake start, used for slot management
- `ids_`: `Peer::id_t → weak_ptr<PeerImp>` — populated at `activate()` after protocol handshake; used for broadcast and relay
- Outbound peers (via `ConnectAttempt`) populate both maps together in `add_active`
## Review Checklist
- Verify resource manager checks on both inbound and outbound connections
- New protocol messages: update protobuf definitions AND verify wire compatibility; add LZ4 eligibility list in `Message::compress()` if bulk
- Squelch changes: test with high peer counts; incorrect squelch logic can silence validators
- Header parsing changes (`ProtocolMessage.h`): the high-bit format guard (`*iter & 0x80`) and reserved-bit checks (`*iter & 0x0C == 0`) MUST remain
- Adding a new compression `Algorithm` enum value: must have high bit set, low nibble zero (so it's extractable via `*iter & 0xF0`); update `Compression.h` dispatch switches or the `UNREACHABLE` guard fires
- Strand discipline: any new method touching socket/queue state must guard with `if (!strand_.running_in_this_thread()) return post(strand_, ...)`
- `ZeroCopyOutputStream` use: always ensure the object goes out of scope (destructor flush) before the caller reads from the streambuf
- Bootcache changes: remember valence updates require erase-then-reinsert (bimap), and the cooldown (60s) batches writes
## Key Patterns
### Strand Execution
```cpp
// REQUIRED: socket operations must run on the strand
if (!strand_.running_in_this_thread())
return post(strand_, std::bind(
&PeerImp::close, shared_from_this()));
// Calling socket ops from wrong thread causes races on state
```
### Duplicate Relay Prevention
```cpp
// REQUIRED: check HashRouter before relaying
if (!hashRouter_.shouldRelay(hash))
return; // already relayed — suppress duplicate
overlay_.relay(message, hash);
```
### Shared Lazy Compression
```cpp
// Message::getBuffer(Compressed::On) — compresses once, shared across N peers
std::call_once(once_flag_, &Message::compress, this);
// Eligible types only (mtTRANSACTION, mtLEDGER_DATA, mtVALIDATOR_LIST, ...);
// Latency-sensitive types (mtPING, mtVALIDATION, mtPROPOSE_LEDGER) excluded.
// Falls back to uncompressed if savings < 4 bytes (compressed header overhead).
// Messages <= 70 bytes are never compressed.
```
### Resource Charging Batches
```cpp
// PeerImp::onMessageBegin resets fee_; onMessageEnd applies charge once per
// message via charge(). Handlers escalate via fee_.update() (monotonic).
```
### Exception-Based Handshake Failures
```cpp
// verifyHandshake() throws std::runtime_error on any check failure;
// callers (ConnectAttempt, PeerImp::doAccept) wrap in try/catch and tear down.
```
### Traffic Categorization Double-Call
```cpp
// categorize() called once at Message construction (outbound, inbound=false).
// addCount() called twice per message: once for category, once for 'total'.
// 'unknown' is NOT rolled into 'total'.
```
## Reduce-Relay (Squelch) Architecture
Two halves, decoupled:
- **Upstream (`Slot`/`Slots` in `OverlayImpl`)**: counts inbound validator messages per peer, selects 5 sources, calls `SquelchHandler::squelch()` (implemented by `OverlayImpl`) which sends `TMSquelch` over the wire. Uses `UptimeClock`.
- **Downstream (`Squelch` in `PeerImp`)**: receives `TMSquelch`, stores expiry in `hash_map<PublicKey, time_point>`. `PeerImp::send()` calls `expireSquelch(validator)` before transmitting any validator-keyed message; `false` return → drop, count under `TrafficCount::squelch_suppressed`.
All `OverlayImpl::updateSlotAndSquelch` calls are dispatched to `strand_` because `Slots<UptimeClock>` is not thread-safe.
Squelch expiry is lazy: no background timer. `expireSquelch` removes stale entries on next send. Out-of-bounds durations in incoming `TMSquelch` trigger `feeInvalidData` and `removeSquelch` (defensive clear).
### Slot Selection Algorithm (`Slot<clock_type>::update`)
Two-threshold design: peers enter the *considered pool* at `MIN_MESSAGE_THRESHOLD=19` messages; selection fires when `MAX_SELECTED_PEERS=5` peers individually reach `MAX_MESSAGE_THRESHOLD=20`. The one-message gap lets the system confirm a peer has continued sending before committing it as a candidate. If fewer than 5 non-idle peers are available at selection time, `initCounting()` resets and defers — never squelches with incomplete picture.
Inactivity (`IDLED=8s`): idle selected peer → unsquelch all + revert to `Counting`. Slots whose `lastSelected_` is older than `MAX_UNSQUELCH_EXPIRE_DEFAULT=600s` are deleted by `deleteIdlePeers()`.
Squelch duration scaled by peer count: `min(max(600s, 10s × npeers), 3600s)`.
## TX Reduce-Relay
When `txReduceRelayEnabled_` (negotiated via `FEATURE_TXRR`):
- Full transactions go to a quota of peers (computed from `TX_REDUCE_RELAY_MIN_PEERS` and `TX_RELAY_PERCENTAGE`)
- Remaining peers get hash announcements via `addTxQueue` → batched `TMHaveTransactions` flushed by periodic `sendTxQueue`
- Peers without the feature always get full message (back-compat)
- Peer list is shuffled with `default_prng()` to avoid systematic bias
- `MAX_TX_QUEUE_SIZE=10000` cap; `doTransactions` rejects requests exceeding this as malformed
## Tracking State
`tracking_` (atomic `Tracking` enum): `unknown`, `converged`, `diverged`. Thresholds from `Tuning.h`:
- `convergedLedgerLimit=24` — within this many ledgers of validated index
- `divergedLedgerLimit=128` — beyond this, mark diverged and start the `MAX_DIVERGED_TIME` countdown
Hysteresis (24 vs 128) prevents oscillation on slightly-behind peers.
## Send Queue Backpressure (`Tuning.h`)
Three tiers:
- `targetSendQueue=128` — below this, peer is healthy; resets `large_sendq_` counter
- `sendqIntervals=4` — consecutive 1-second ticks at-or-above target before disconnect
- `dropSendQueue=192` — refuse new query responses (don't do expensive lookups for stuck peer)
- `sendQueueLogFreq=64` — log every 64th enqueue when queue is large (throttle log spam)
Other key tuning constants:
- `softMaxReplyNodes=8192`/`hardMaxReplyNodes=12288` — soft/hard caps for `TMLedgerData` node counts
- `maxQueryDepth=3` — recursion limit for `TMGetLedger`; deeper queries rejected as `badData`
- `checkIdlePeers=4` — modulo for timer-driven idle peer scan
- `readBufferBytes=16384``constexpr size_t` for socket read buffer (separate from enum for type reasons)
## PeerFinder Sub-Module
Implements peer address discovery, slot accounting, and reachability checks. Owned by `OverlayImpl` via `make_Manager()`. Hidden behind `Manager` abstract interface; concrete `ManagerImp` lives in `detail/PeerfinderManager.cpp`.
### Components
- **`Logic<Checker>`**: central decision engine. Holds `slots_`, `connectedAddresses_` (multiset for IP limit), `keys_` (dedup public keys), `fixed_`, `livecache_`, `bootcache_`. Guarded by `std::recursive_mutex lock_`. Recursive mutex needed because `on_closed()` calls `remove()` independently.
- **`Livecache`**: ~30s TTL gossip cache (`Tuning::liveCacheSecondsToLive`). `beast::aged_map` + `boost::intrusive::list` per hop bucket (size `maxHops+2=9`, indices 08). MUST `shuffle()` before handout — `push_front` insertion is exploitable otherwise.
- **`Bootcache`**: persistent (SQLite via `StoreSqdb`). Bimap (`unordered_set_of` by endpoint, `multiset_of` by valence) for O(1) update and ranked iteration. Valence is a streak counter (clamped to 0 before crossing sign). `staticValence=32` for `[ips]`/`[ips_fixed]`. Throttled writes: 60s cooldown via `flagForUpdate`/`checkUpdate`; destructor force-flushes. Pruning: remove bottom 10% when over 1000 entries.
- **`Checker<Protocol>`**: async TCP probe for verifying peer's advertised listening port. Self-managing `async_op` via `shared_ptr` capture in handler; `~Checker` calls `wait()` (not `stop()` — must call `stop()` first explicitly).
- **`Counts`**: pure bookkeeping, no own mutex (relies on `Logic::lock_`). All updates funnel through private `adjust(slot, ±1)`. Fixed/reserved bypass active caps in `can_activate`. `isConnectedToNetwork()` returns `true` only when `m_out_max == 0` (pure listener mode).
- **`SlotImp`**: concrete slot state. Two constructors: inbound takes both endpoints, sets `checked=false,canAccept=false`; outbound only takes remote, sets `checked=true,canAccept=true` since TCP connect itself proves reachability. State machine enforced by `XRPL_ASSERT` in `state()` and `activate()`. `m_listening_port` is `std::atomic<int32_t>` with `-1` sentinel.
- **`Fixed`**: per-fixed-peer backoff. Fibonacci sequence in minutes: `{1,1,2,3,5,8,13,21,34,55}`, clamped to last index. `failure()` advances; `success()` resets.
- **`Source`**: abstract; only concrete is `SourceStrings` (config `[ips]`). `cancel()` is a no-op for all current (synchronous) implementations but exists as an extension point for future async sources.
### Autoconnect Tier Order
`Logic::autoconnect()` strictly returns at first non-empty tier:
1. Fixed peers (via `get_fixed`, respecting `Fixed::when()` backoff)
2. Livecache (shuffled, reverse hop order — far peers first for topological diversity)
3. (Bootcache refill placeholder for DNS)
4. Bootcache fallback
`m_squelches` aged set (60s TTL, `Tuning::recentAttemptDuration`) suppresses rapid retries to same address across calls.
### Endpoint Gossip
- **Receiving (`on_endpoints` + `preprocess`)**: rate-limited via per-slot `whenAcceptEndpoints` (`Tuning::secondsPerMessage=151s`, a prime to desync nodes). Random sample-down if oversized. `hops==0` entry's IP replaced with sender's socket address (peer doesn't know own public IP). All surviving hops incremented by 1 before livecache insert. First-hop entries trigger `Checker::async_connect` for reachability test.
- **Sending (`buildEndpointsForPeers`)**: shuffle slots, use `SlotHandouts` per peer, run `handout()` algorithm. Self-advertisement uses zero-address IPv6 sentinel — receiver substitutes socket's remote address.
- **`Handouts` algorithm**: round-robin across multiple targets to ensure fair distribution. `move_back` after each acceptance rotates endpoints. Per-target dedup via `SlotImp::recent_t` (aged map; `filter()` uses `<=` hop comparison; `try_insert` writes both received and sent into recent — pessimistic update).
### `recent_t` Filter Semantics
`insert()` updates cached hop count only if new value ≤ existing. `filter()` suppresses sends when cached hop ≤ sending hop. The `<=` boundary is intentional — sending at a strictly lower hop than the peer knows is still useful; matching or higher is redundant.
## TLS Channel-Binding (Non-Standard)
`makeSharedValue` derives a 256-bit value from TLS finished messages:
```
sha512Half(SHA512(my_finished) XOR SHA512(peer_finished))
```
Rejects degenerate zero-XOR case. Non-standard (see OpenSSL #5509, XRPLF/rippled #2413). TLS cert verification is explicitly disabled (`verify_none`) — security comes from binding node-public-key signature to this shared value via `Session-Signature` HTTP header. MITM produces different finished values → signature mismatch → rejection.
## Handshake HTTP Headers
Built by `buildHandshake`, verified by `verifyHandshake`. Verify order is layered (cheap → expensive):
1. `Network-ID` mismatch
2. `Network-Time` ±20s tolerance
3. `Public-Key` parse, self-connection check
4. `Session-Signature` cryptographic verify
5. `Local-IP`/`Remote-IP` cross-check (NAT diagnostics)
Feature negotiation via `X-Protocol-Ctl`: `compr=lz4`, `vprr=1`, `txrr=1`, `ledgerreplay=1`. Responder echoes back only features locally configured AND requested (AND-gate). Initiator unconditionally advertises all locally supported features.
## ZeroCopy I/O Adapters
`ZeroCopyInputStream<Buffers>` wraps `ConstBufferSequence` for protobuf parsing without intermediate copy. `BackUp`/`Skip` support sub-buffer granularity via tracked `pos_` within current buffer. Empty buffer sequence is safe (null `pos_` initialized in constructor).
`ZeroCopyOutputStream<Streambuf>` uses deferred commit pattern: `commit_` tracks bytes promised but not yet committed. Destructor MUST flush trailing `commit_` — protobuf doesn't guarantee terminal `BackUp` or `Next` call. `BackUp(n)` asserts `n <= commit_` and prevents double-commit.
## Traffic Categorization
`TrafficCount::categorize()` is called once at `Message` construction (outbound) and per inbound message. Two-stage: static `unordered_map<MessageType, category>` for simple types, then `dynamic_cast` for protobuf inspection of `TMLedgerData`/`TMGetLedger` (`requestcookie` distinguishes forwarded vs originated) and `TMGetObjectByHash` (`query()` flag determines get/share). `unknown` is NOT rolled into `total`. `squelch_suppressed` records bytes NOT transmitted due to squelch; `squelch_ignored` records bytes from peers ignoring squelch.
## Compression Eligibility (`Message::compress`)
Skip if ≤70 bytes. Whitelist of eligible types: `mtMANIFESTS`, `mtENDPOINTS`, `mtTRANSACTION`, `mtGET_LEDGER`, `mtLEDGER_DATA`, `mtGET_OBJECTS`, `mtVALIDATOR_LIST`, `mtVALIDATOR_LIST_COLLECTION`, `mtREPLAY_DELTA_RESPONSE`, `mtTRANSACTIONS`. Excludes high-frequency control messages (`mtPING`, `mtVALIDATION`, `mtPROPOSE_LEDGER`, `mtSTATUS_CHANGE`). If compressed size doesn't beat uncompressed minus 4-byte header overhead, fall back to uncompressed (`bufferCompressed_` cleared, `getBuffer()` returns uncompressed).
## HTTP Endpoints (served by `OverlayImpl::processRequest`)
- `/crawl` — JSON topology, gated by bitmask config (`CrawlOptions::Overlay|ServerInfo|ServerCounts|Unl`)
- `/health` — three-tier status (200/503/500) — HTTP status encodes result so LBs need no JSON parsing
- `/vl/<key>` or `/vl/<version>/<key>` — signed validator list
## Concurrency Notes
- `OverlayImpl::mutex_` is `std::recursive_mutex` (acknowledged tech debt: `// VFALCO use std::mutex`). Recursion stems from `run()` triggering callbacks back into overlay.
- `cond_` is `condition_variable_any` (needs Lockable, not BasicLockable) for shutdown drain
- `work_` (`executor_work_guard` as `std::optional`) keeps `io_context` alive; `reset()` during `stop()` lets queue drain
- Strand vs mutex: peer registry mutations use `mutex_`; timer/squelch/tx-metrics work uses strand
- `OverlayImpl::Child` registration: destructor auto-removes from `list_`; `stopChildren()` copies pointers before iterating to avoid invalidation
- `PeerImp` field locks: `recentLock_` (ledger state, latency), `nameMutex_` (`shared_mutex` for `name_`); strand-confined fields need no lock
- `TxMetrics` has its own `std::mutex`; writers additionally serialize via overlay strand; RPC readers call `json()` directly without going through strand
- `Cluster::mutex_` is non-recursive — `for_each` callback must not call `update()`
- `PeerReservationTable`: `list()` deliberately releases lock before `std::sort` to minimize hold time (snapshot sort pattern)
## Cluster Registry
`Cluster` owns a `std::set<ClusterNode, Comparator>`. The `Comparator` is `is_transparent` — enables `find(PublicKey)` without constructing a dummy `ClusterNode`. `update()` enforces monotonic time (rejects stale reports), preserves names across nameless gossip updates, and uses erase+`emplace_hint` for O(1) amortized reinsert. `load()` is fail-fast on malformed lines (returns `false`) but tolerates duplicates with a warning (first entry wins).
## PeerSet (Data Acquisition)
`PeerSet` / `PeerSetImpl` manages the working set of peers queried for a single in-flight data acquisition (ledger, tx-set, etc.). Uses scored peer selection (`Peer::getScore(hasItem)`) sorted descending. `peers_` set of `Peer::id_t` acts as exclusion list — same peer is never re-added across retries. `DummyPeerSet` via `make_DummyPeerSet()` is the null-object used when `loadOldLedger()` needs `InboundLedger` without live peers.
## Key Files
### Overlay core
- `src/xrpld/overlay/Overlay.h` / `detail/OverlayImpl.{h,cpp}` — main manager
- `src/xrpld/overlay/Peer.h` / `detail/PeerImp.{h,cpp}` — per-peer logic
- `src/xrpld/overlay/Message.h` / `detail/Message.cpp` — wire envelope, lazy compression
- `src/xrpld/overlay/detail/ConnectAttempt.{h,cpp}` — outbound connection state machine
- `src/xrpld/overlay/detail/Handshake.{h,cpp}` — handshake crypto, feature negotiation
- `src/xrpld/overlay/detail/ProtocolMessage.h` — wire framing, dispatch
- `src/xrpld/overlay/detail/ProtocolVersion.{h,cpp}``XRPL/x.y` negotiation
- `src/xrpld/overlay/detail/ZeroCopyStream.h` — protobuf/Asio buffer adapters
- `src/xrpld/overlay/detail/Tuning.h` — all overlay magic numbers
- `src/xrpld/overlay/make_Overlay.h` — factory + `setup_Overlay` (parses `[overlay]`, `[crawl]`, `[vl]`, `[network_id]`)
- `src/xrpld/overlay/predicates.h` — composable peer-selection/dispatch functors for `Overlay::foreach`
### Reduce-relay
- `src/xrpld/overlay/Slot.h` — per-validator state machine + selection algorithm
- `src/xrpld/overlay/Squelch.h` — per-peer suppression enforcement
- `src/xrpld/overlay/ReduceRelayCommon.h` — all reduce-relay constants
### Telemetry
- `src/xrpld/overlay/detail/TrafficCount.{h,cpp}` — per-category byte/message counters
- `src/xrpld/overlay/detail/TxMetrics.{h,cpp}` — rolling averages for tx reduce-relay
### Cluster
- `src/xrpld/overlay/Cluster.h` / `ClusterNode.h` / `detail/Cluster.cpp` — trusted-node registry with heterogeneous lookup
### PeerSet (data acquisition)
- `src/xrpld/overlay/PeerSet.h` / `detail/PeerSet.cpp` — scored peer selection for InboundLedger etc.
### Reservations
- `src/xrpld/overlay/detail/PeerReservationTable.cpp` — persistent allowlist via SQLite
### Compression
- `src/xrpld/overlay/Compression.h``Algorithm` enum, dispatch wrappers, wire-format constants
### PeerFinder
- `src/xrpld/peerfinder/PeerfinderManager.h` / `Slot.h` / `make_Manager.h` — public interface
- `src/xrpld/peerfinder/detail/Logic.h` — central decision engine
- `src/xrpld/peerfinder/detail/Livecache.h` / `Bootcache.{h,cpp}` — address caches
- `src/xrpld/peerfinder/detail/Checker.h` — async reachability prober
- `src/xrpld/peerfinder/detail/SlotImp.{h,cpp}` — slot state machine
- `src/xrpld/peerfinder/detail/Counts.h` — slot bookkeeping
- `src/xrpld/peerfinder/detail/Fixed.h` — Fibonacci backoff
- `src/xrpld/peerfinder/detail/Handouts.h` — fair distribution algorithm
- `src/xrpld/peerfinder/detail/StoreSqdb.h` — SQLite persistence (schema v4, migration handles `DROP COLUMN` via table rename)
- `src/xrpld/peerfinder/detail/Source.h` / `SourceStrings.{h,cpp}` — abstract + static-string address source
- `src/xrpld/peerfinder/detail/Tuning.h` — peerfinder magic numbers
- `src/xrpld/peerfinder/detail/iosformat.h``leftw` stream manipulator for log alignment

429
docs/skills/protocol.md Normal file
View File

@@ -0,0 +1,429 @@
# Protocol and Serialization
The protocol layer defines XRPL's wire format, type system, and validation rules. It owns the canonical binary encoding required for signatures and consensus, the macro-driven registries for features/transactions/ledger entries/sfields/permissions, the typed object model (`STBase` hierarchy) that every transaction and ledger object inhabits, the cryptographic primitives, and the JSON/RPC boundary.
## Layered Type System
```
Asset = std::variant<Issue, MPTIssue> ← unified asset identity (XRP/IOU/MPT)
Issue = (Currency, AccountID) ← XRP iff currency==zero
MPTIssue = wraps MPTID (192-bit: seq32 || account160)
Amount types (lean, runtime polymorphic via Asset):
XRPAmount = int64 drops ← integral, no asset
IOUAmount = (mantissa, exponent) ← 15-digit decimal floating point
MPTAmount = int64 ← integral, no asset
STAmount = unified wire/serialized form holding Asset + value
- holds<Issue>(), holds<MPTIssue>(), native(), integral()
- canonicalize() normalizes (mantissa, exponent) per asset rules
- Internal: mAsset + mValue(uint64) + mOffset(int) + mIsNegative(bool)
```
`PathAsset` = `std::variant<Currency, MPTID>` — pathfinding-only asset reference (no issuer); used inside `STPathElement`.
Conversion utilities in `AmountConversions.h`: `toSTAmount`, `toAmount<T>`, `getAsset<T>`. Lean→STAmount is implicit-friendly; STAmount→lean is explicit (`get<>` throws on type mismatch).
`Units.h` provides phantom-typed `ValueUnit<TAG, T>` (`Drops`, `FeeLevel`, `FeeLevelDouble`) with `unit_cast<>` for explicit conversion; prevents drop/fee-level mixups at compile time.
## Key Invariants
- **Canonical field ordering:** sort by `(SerializedTypeID << 16) | fieldValue`, NOT by raw Field ID bytes — wrong sort breaks signatures
- **Field ID encoding:** 13 bytes; both type and field codes <16 single byte `(type<<4)|name`
- **Hash domain separation:** every signable payload prepends a 4-byte `HashPrefix` (`STX\0`, `SMT\0`, `VAL\0`, `BCH\0`, `CLM\0`, `LWR\0`, `MAN`, `TXN`, etc.) never share hashes across domains. Helper `make_hash_prefix(a,b,c)` is constexpr `uint32_t` builder.
- **STObject access semantics:** `obj[sfFoo]` throws `FieldErr` if absent; `obj[~sfFoo]` returns `std::optional`. `getOrThrow<T>(name)` family in `json_get_or_throw.h` enforces presence + type for raw JSON inputs.
- **Amendment IDs are deterministic:** `featureFoo == sha512Half(Slice("Foo"))` never change a feature name. Feature names must satisfy `isFeatureName()` at compile time (`UpperCamel` regex). Names exactly 32 bytes long are forbidden (reserved for raw hash collision prevention).
- **`numFeatures` is a ceiling, NOT an exact count.** Counting includes `XRPL_RETIRE_*` and any inactive macros; never use it as a length.
- **Feature registry frozen at startup:** `registerFeature` checks `numFeatures` and aborts via static-init `LogicError` if exceeded. The `readOnly` atomic fence flips after all file-scope variables are initialized any query before then asserts.
- **Singletons everywhere:** `SField`, `LedgerFormats`, `TxFormats`, `InnerObjectFormats`, `Permission`, `Feature` registry all use Meyer's singletons; registration completes before `main()` via static init.
- **Multi-sign signers MUST be sorted ascending by AccountID** (no duplicates, count in [1,32], cannot include tx account). The signer's AccountID is appended to the multi-sign blob to prevent shared-RegularKey replay attacks.
- **`vfFullyCanonicalSig` always set** by signer; verifiers normalize ECDSA S to low form via libsecp256k1.
- **Amendment-gated arithmetic:** `getSTNumberSwitchover()` is a `LocalValue<bool>` (per-coroutine) selecting legacy vs `Number`-based normalization in `IOUAmount`/`STAmount`.
- **TxMeta `AffectedNodes` must be sorted by index** for canonical serialization (consensus-critical). `addRaw()` performs this sort; failure is a consensus fork risk.
- **STObject debug-only field-uniqueness checks** (`isFieldAllowed`): silent duplicate fields in production are possible bugs but no runtime check.
- **STLedgerEntry construction fails loudly** if the type is unrecognized no silent fallback.
- **STValidation only accepts secp256k1 keys**; Ed25519 keys throw at construction time.
- **STNumber two-phase rounding contract:** `associateAsset()` must be called before `add()`. The assertion in `add()` checks idempotency calling `setValue()` after `associateAsset()` without re-associating is a programming error.
## Macro-Driven Registries (X-Macros)
Single source of truth for each registry; `.macro` files included multiple times with redefined macros to generate enum, declarations, and definitions.
| Macro file | Used for | Add requires |
|---|---|---|
| `features.macro` | `XRPL_FEATURE`, `XRPL_FIX`, `XRPL_RETIRE_*` | Bump `numFeatures` in `Feature.h` |
| `transactions.macro` | `TRANSACTION(tag, value, name, delegable, amendment, privileges, fields)` | nothing count derived |
| `ledger_entries.macro` | `LEDGER_ENTRY(tag, value, name, rpcName, fields)` + `LEDGER_ENTRY_DUPLICATE` for name collisions | nothing |
| `sfields.macro` | `TYPED_SFIELD(name, TYPE, code)`, `UNTYPED_SFIELD` | nothing |
| `permissions.macro` | `PERMISSION(name, txType, value)` (granular permissions 65537) | nothing |
Pattern uses `#pragma push_macro/pop_macro` to protect macro names. `UNWRAP(...)` strips outer parens around field-list initializers so commas don't confuse the preprocessor. `LEDGER_ENTRY_DUPLICATE` exists because `DepositPreauth` is both a transaction type and ledger entry type `JSS()` can't emit the same string twice.
Feature name validation is `constexpr` (compile-time `static_assert` on the literal); typos like lower-case first letter fail to build.
The `FeatureCollections` internal singleton uses `boost::multi_index_container` with three simultaneous indexes: random-access by insertion order (`byIndex` for bitset mapping), hash-unique by `uint256`, and hash-unique by name. A simple `unordered_map` cannot provide the stable integer index that `FeatureBitset` requires.
## Field Identity (`SField`)
- **Field code** = `(SerializedTypeID << 16) | fieldValue` packs type family and per-type index; canonical sort key
- `SField` instances are immutable singletons created at static init via `private_access_tag_t` (only definable inside `SField.cpp`)
- `TypedField<T>` adds compile-time payload type; `OptionaledField<T>` via `operator~(sfField)`
- Metadata flags (`fieldMeta`): `sMD_ChangeOrig`, `sMD_ChangeNew`, `sMD_DeleteFinal`, `sMD_Create`, `sMD_Always`, `sMD_BaseTen` (decimal display), `sMD_PseudoAccount`, `sMD_NeedsAsset` (drives `STTakesAsset` association), `sMD_Default` (field absent when zero)
- `IsSigning::no` excludes fields from signing hash (`sfTxnSignature`, `sfSigners`, `sfSignature`, etc.)
- `isBinary()` `fieldValue<256` (wire-representable); `isDiscardable()` `fieldValue>256` (JSON-only, e.g., `sfHash`, `sfIndex`)
- **Debug-only uniqueness check** during static init; release builds will silently mis-register on collision
## Wire Format Reference
| Item | Encoding |
|---|---|
| XRP STAmount | 8 bytes; bit63=0, bit62=sign(1=pos), 62-bit value |
| MPT STAmount | 8 bytes header (bit63=0, bit61=1, 56-bit value) + 192-bit MPTID |
| IOU STAmount | bit63=1, bit62=sign, 8-bit (offset+97), 54-bit mantissa, +20B currency, +20B issuer |
| AccountID | 20 bytes, VL-prefixed when standalone (`STAccount` mimics `STBlob` wire format) |
| MPTID | 192 bits = 32-bit big-endian sequence 160-bit issuer |
| STArray | elements between markers; ends with `STI_ARRAY,1` (`0xf1`) |
| STObject | fields in canonical order; ends with `STI_OBJECT,1` (`0xe1`) |
| VL prefix | 1 byte (0192), 2 bytes (19312480), 3 bytes (12481918744); else `std::overflow_error` |
| STIssue | 160-bit currency; if zero XRP; if next 160 = `noAccount()` MPT (then 32-bit seq); else IOU issuer |
| STNumber | 12 bytes: int64 signed mantissa + int32 exponent (two separate statements order must be explicit) |
| LP token currency | byte0 = `0x03`; bytes 1-19 = `sha512Half(min(asset1,asset2), max(asset1,asset2))` low bits |
| Order book quality | `(exponent+100) << 56 \| mantissa`; embedded in last 8 bytes of directory key (big-endian) so SHAMap order = price order |
| NFTokenID (256-bit) | flags(2) + transferFee(2) + issuer(20) + cipheredTaxon(4) + serial(4); low 96 bits = page sort key |
| LedgerHeader | 118 bytes fixed layout (seq, drops, parentHash, txHash, accountHash, parentClose/closeTime/closeFlags, closeTimeResolution) |
| Payment channel claim | `HashPrefix::paymentChannelClaim` channelID(32) amount(8) |
| Batch signing payload | `HashPrefix::batch` outer flags(4) inner-tx count(4) inner-tx hash list |
## Canonical Hashes
```
TXN → transactionID SND → txNode (with metadata)
MLN → leafNode MIN → innerNode
LWR → ledgerMaster STX → txSign (single-sig)
SMT → txMultiSign VAL → validation
PRP → proposal MAN → manifest
CLM → paymentChannelClaim BCH → batch
```
All hashes use `sha512Half` (first 256 bits of SHA-512). `HashPrefix` constants are protocol-immutable; the `make_hash_prefix(a,b,c)` constexpr packer in `HashPrefix.h` is the canonical way to declare new prefixes.
## STObject and STVar
- `STObject` stores `std::vector<detail::STVar>`; iterators expose `STBase const&` via transform iterator
- `STVar` is type-erased with 72-byte inline buffer (small-object optimization); `on_heap()` reports whether a value spilled; larger ST types heap-allocate
- `STVar` is movable; moving an on-heap STVar steals the pointer, while inline ones must invoke each ST type's move ctor through the v-table
- `copy()`/`move()` virtuals on every ST type delegate to `STBase::emplace()` for placement-new into `STVar`'s buffer
- **Two modes:**
- **Free** (`mType==nullptr`): linear field scan via `getFieldIndex()`; accepts any field; insertion order preserved
- **Templated** (`mType` set): O(1) field lookup via `SOTemplate::indices_`; `v_` laid out in template order with every slot pre-populated; unknown fields rejected
- `applyTemplate()` validates after deserialization; `set(SOTemplate)` initializes empty object with template
- Deserialization depth capped at 10 to prevent stack exhaustion
- `operator==` compares only `isBinary()==true` fields (O(n²) by design); `isEquivalent()` fast-paths when same `mType` pointer (positional comparison)
- `makeInnerObject()` applies templates conditionally on `fixInnerObjTemplate` / `fixInnerObjTemplate2` amendments historical ledger entries without template structure must not be rejected on replay
### Proxy Access Pattern
```cpp
auto amt = tx[sfAmount]; // ValueProxy: throws FieldErr if absent
auto dst = tx[~sfDestination]; // OptionalProxy: std::optional
tx[sfFlags] = 0; // proxy.assign() — soeDEFAULT zero is silently removed
tx[~sfDestTag] = std::nullopt; // remove field (only valid for soeOPTIONAL)
```
Proxies forbid removing `soeREQUIRED` or `soeDEFAULT` fields.
## SOEStyle (Field Presence)
| Style | Meaning |
|---|---|
| `soeREQUIRED` | must be present |
| `soeOPTIONAL` | may be absent; if present, may carry default value |
| `soeDEFAULT` | may be absent; if present, must NOT equal default auto-removed when assigned default |
`SOETxMPTIssue` flag on amount/issue fields: `soeMPTSupported`, `soeMPTNotSupported`, `soeMPTNone`. Omitting `soeMPTSupported` silently rejects MPT amounts in that field.
## Common Bug Patterns
- Adding to `transactions.macro` without adding to `sfields.macro` silent serialization failures
- Forgetting to bump `numFeatures` after `XRPL_FEATURE` static-init `LogicError` (registry overflow) caught at startup
- Hand-built binary blobs in non-canonical field order signature verification failures
- Omitting `soeMPTSupported` on amount field MPT payments silently rejected
- Mutating `sfTransactionType` inside `STTx` assembler callback `LogicError` (caught at startup)
- Storing `STBase` subclasses directly in `std::vector` field names lost on copy-assignment slide; use `STArray`/`STObject` instead
- Storing `Currency` as `"XRP"` ISO code (`badCurrency()`) instead of zero silently rejected; `to_currency()` legacy returns `badCurrency()` rather than failing
- Forgetting to call `associateAsset(sle, asset)` near end of `doApply()` for vault/loan transactors unrounded `STNumber` values
- Returning `tec*` from `preflight()` `NotTEC` type prevents this at compile time (would allow fee theft on unsigned tx)
- `TxMeta::AffectedNodes` left unsorted before serialization consensus-fork risk
- Comparing `Issue` instances when one side is MPT-wrapped `Issue::operator==` only compares currency+account; use `Asset` equality
- Relying on debug-only `assert` inside `STObject::isFieldAllowed` to catch duplicate fields in release
- Treating `numFeatures` as a length / iteration bound (it includes retired slots)
- Calling `setValue()` on an `STNumber` field after `associateAsset()` without re-associating idempotency assertion fires in `add()`
- Using Ed25519 key with `STValidation` throws at construction time (only secp256k1 allowed)
- Batch inner transaction `sfRawTransactions` array exceeds `maxBatchTxCount` (8) or contains nested `ttBATCH` rejected by `passesLocalChecks()`
- `getNFTokenIDFromPage()` without the page-split guard: a `sfModifiedNode` for a third page may lack `sfNFTokens` in `sfPreviousFields`; skip silently
- `STNumber` JSON string parsing asserting `!getCurrentTransactionRules()` string-format numbers not allowed inside active transaction processing
## Key Patterns
### Amendment Registration
```cpp
// In features.macro:
XRPL_FEATURE(MyFeature, Supported::yes, VoteBehavior::DefaultNo)
XRPL_FIX (MyBugFix, Supported::yes, VoteBehavior::DefaultNo)
XRPL_RETIRE_FEATURE(OldFeature) // code removed; remains registered for ledger compat
```
Lifecycle: `Supported::no/DefaultNo` `Supported::yes/DefaultNo` (rare) `DefaultYes` for critical fixes. **Never** revert `Supported::yes` to `no` (would amendment-block existing nodes).
`setCurrentTransactionRules()` has a non-obvious side effect: it calls `Number::setMantissaScale()` to push the precision mode (`small` vs `large`) without requiring `Number` to query rules on every arithmetic call.
### NotTEC vs TER
```cpp
NotTEC preflight(...); // can only return tel/tem/tef/ter/tes (no tec)
TER doApply(...); // can return any code including tec*
```
`TERSubset<Trait>` enforces this at compile time via `enable_if`. `TERtoInt(v)` is the authorized free-function conversion (member `explicit operator` would be too permissive in initializer contexts).
### Signing / Verifying
```cpp
sign(st, HashPrefix::txSign, KeyType::secp256k1, sk); // writes sfSignature
verify(st, HashPrefix::txSign, pubKey); // returns bool
// Multi-sign optimization (shared body, per-signer suffix):
auto s = startMultiSigningData(obj);
finishMultiSigningData(signerAccountID, s); // append signer ID to shared payload
```
`addWithoutSigningFields()` excludes signature fields from the signed payload. Both `sign()` and `verify()` share the same serialization path (serialize once, check/set the field), ensuring they cannot diverge.
### Batch Signing
`serializeBatch()` (in `Batch.h`, inline) produces: `HashPrefix::batch ‖ outer flags(4) ‖ inner-tx count(4) ‖ inner-tx hash list`. Both `checkBatchSingleSign()` and `checkBatchMultiSign()` call this once; multi-sign appends the per-signer AccountID suffix via `finishMultiSigningData`. `passesLocalChecks()` rejects nested batches.
### STNumber + STTakesAsset
```cpp
// Vault/Loan/LoanBroker fields use STNumber (no asset embedded).
// In doApply(), after all mutations:
associateAsset(*sle, vaultAsset); // rounds all sMD_NeedsAsset fields, removes zero-defaults
```
`STNumber` serializes a `Number` (signed mantissa+exponent, 12 bytes); rounding is asset-dependent and resolved by `associateAsset` walking fields flagged `sMD_NeedsAsset`. Fields with `sMD_Default` are removed from the SLE after rounding if the value became zero. `associateAsset()` is offset-based (the only path that yields mutable `STBase&`).
### LP Token Currency Derivation
```cpp
Currency lpc = ammLPTCurrency(asset1, asset2); // canonical std::minmax
// byte 0 = 0x03 (LP marker), bytes 1..19 = low 152 bits of sha512Half(min, max)
```
### Pseudo-Account Synthesis
Pseudo-accounts (AMM, Vault, LoanBroker) carry a 256-bit synthesized ID in fields flagged `sMD_PseudoAccount` (`sfAMMID`, `sfVaultID`, `sfLoanBrokerID`). These identify a stateless account address derived from the owner ledger entry.
### NFT Token ID Recovery from Metadata
`getNFTokenIDFromPage()` uses set-difference: collect all token IDs from `sfPreviousFields` and `sfFinalFields` across all metadata nodes; assert `finalIDs.size() == prevIDs.size() + 1`; use `std::mismatch` to find the inserted entry. Guard: when a mint causes a page split, the third page's `sfModifiedNode` may have `sfPreviousFields` without `sfNFTokens` check presence before extracting.
## STAmount Arithmetic Details
- **IOU canonical range:** mantissa [10^15, 10^16), exponent [-96, +80]; zero = (mantissa=0, exponent=-100)
- **Two rounding modes:** `mulRound`/`divRound` (legacy, rounds up when fractional 0.1) vs `mulRoundStrict`/`divRoundStrict` (correct remainder tracking, propagates `NumberRoundModeGuard`)
- **Overflow guard in multiply:** if `min(a,b) > sqrt(cMaxNative)`, product overflows checked before 128-bit intermediate
- **`canAdd`/`canSubtract`:** for IOU, uses round-trip relative error test with 10^-4 tolerance
- **`areComparable()`:** uses `std::visit` over `Asset` variant; incompatible asset types throw immediately
- Feature-gated: `featureSingleAssetVault` / `featureLendingProtocol` gate the `fromNumber()` path in `operator=(Number const&)`
## QualityFunction (AMM Path Optimization)
`q(out) = m * out + b` where `b` = reciprocal-rate intercept, `m` = AMM price-impact slope.
- **`AMMTag`:** `m_ = -fee/poolIn`, `b_ = poolOut*fee/poolIn` derived from single-path AMM swap formula
- **`CLOBLikeTag`:** `m_ = 0`, `b_ = 1/quality.rate()` also used for multi-path AMM (fixed allocation = constant quality)
- **`combine()`:** `m_ += b_ * qf.m_; b_ *= qf.b_` linear function composition; clears `quality_` cache when slope becomes nonzero
- **`outFromAvgQ()`:** solves `out = (1/rate - b_) / m_`; rounding mode `upward` to conservatively bound output; returns `nullopt` if `m_==0`, rate==0, or `out<=0`
- `saveNumberRoundMode` RAII guard scopes the upward-rounding to just this computation
## AccountID Cache
Direct-mapped cache in `AccountID.cpp`. Indexed by `hardened_hash<>` (xxHash + random seed = DoS-resistant). Lock sharding: single `atomic<uint64_t> locks_` encodes 64 independent spinlocks via `packed_spinlock` (one per `index % 64`). Edge case: `encoding[0] != 0` guard distinguishes an uninitialized slot from a legitimate cache hit for the all-zero `xrpAccount()`. Cache is optional; `initAccountIdCache(0)` disables it entirely.
## Cross-Chain Bridge Attestations
Two parallel hierarchies: `Attestations::AttestationClaim` / `AttestationCreateAccount` (full, with signature what witnesses submit) vs `XChainClaimAttestation` / `XChainCreateAccountAttestation` (ledger-stored, signature stripped). Conversion constructors project signingstorage in one step.
`AttestationMatch` three-state enum: `match`, `matchExceptDst`, `nonDstMismatch`. `XChainAddClaimAttestation` requires `match`; `XChainClaim` (user-specified dst) accepts `matchExceptDst`. `sameEvent()` ignores signer identity fields; full `operator==` requires all fields.
Max attestations per container: 256 (far above any real witness set; guards memory allocation).
## Critical Files
### Foundations
- `include/xrpl/protocol/SField.h`, `src/libxrpl/protocol/SField.cpp` field registry, X-macro expansion, code packing
- `include/xrpl/protocol/Feature.h`, `src/libxrpl/protocol/Feature.cpp` `numFeatures` (ceiling!), `FeatureBitset`, `registerFeature` with compile-time name validation; `readOnly` atomic fence
- `include/xrpl/protocol/Rules.h`, `src/libxrpl/protocol/Rules.cpp` `Rules` snapshot of enabled amendments; `CurrentTransactionRules` is a `LocalValue<Rules const*>` (per-coroutine); `isFeatureEnabled()` queries thread-local; `setCurrentTransactionRules` pushes `Number` mantissa scale
- `include/xrpl/protocol/HashPrefix.h` protocol-immutable domain separators; `make_hash_prefix` constexpr packer
### Macro Tables (single sources of truth)
- `include/xrpl/protocol/detail/features.macro`
- `include/xrpl/protocol/detail/transactions.macro`
- `include/xrpl/protocol/detail/ledger_entries.macro`
- `include/xrpl/protocol/detail/sfields.macro`
- `include/xrpl/protocol/detail/permissions.macro`
### Type System Roots
- `STBase.h/cpp` polymorphic root; `emplace()` SOO helper; `JsonOptions`; `STExchange` traits glue
- `STObject.h/cpp` heterogeneous container, proxy system, template enforcement, debug uniqueness asserts
- `STVar` (`detail/STVar.h`) 72-byte inline variant; `on_heap()`; move steals pointer when heap-allocated; depth guard at 10
- `SOTemplate.h/cpp` schema with O(1) field index; move-only; carries `SOEStyle` + `SOETxMPTIssue`
### Format Registries
- `TxFormats.h/cpp`, `LedgerFormats.h/cpp`, `InnerObjectFormats.h/cpp` all inherit `KnownFormats<Key, Derived>` with `forward_list<Item>` (pointer-stable) + dual flat_maps
- `LedgerEntry` rpcName vs name distinction enables `DepositPreauth` collision handling
### Amount / Asset Stack
- `Asset.h/cpp` variant of Issue/MPTIssue; `visit()`, `equalTokens()`, `BadAsset` sentinel
- `Issue.h/cpp`, `MPTIssue.h/cpp` XRP/IOU and MPT identity; **note** `Issue::operator==` ignores MPT-ness always go through `Asset`
- `STAmount.h/cpp` unified serialized amount; `canMul`/`canAdd`/`canSubtract` safety checks; `mulRound`/`mulRoundStrict` (legacy vs precise rounding); `roundToScale`
- `STNumber.h/cpp` `Number`-typed field; pairs with `STTakesAsset` infrastructure; 12-byte wire: int64 mantissa + int32 exponent
- `STIssue.h/cpp`, `STCurrency.h/cpp` asset-only fields
- `STTakesAsset.h/cpp` `associateAsset` walks `sMD_NeedsAsset` fields, rounds + strips zero-defaults; include order: `STTakesAsset.h` before `STLedgerEntry.h`
- `IOUAmount.h/cpp`, `XRPAmount.h`, `MPTAmount.h/cpp` lean representations
- `Rate2.h` `Rate` newtype with `parityRate = 1_000_000_000`; transfer-rate math
- `Units.h` phantom-typed `Drops`/`FeeLevel`
- `Number` (in `xrpl/basics/`) high-precision arithmetic; `MantissaRange::large` enabled by SingleAssetVault/LendingProtocol amendments
- `AmountConversions.h` typed coercions
### Cryptography
- `PublicKey.h/cpp` 33-byte unified format (0xED prefix for Ed25519); `ECDSACanonicality` enum (canonical vs fullyCanonical); libsecp256k1 normalization
- `SecretKey.h/cpp` `secure_erase` in dtor; deleted `==`/`<<`; XRPL-specific secp256k1 derivation via `Generator`
- `Seed.h/cpp` 128-bit; `parseGenericSeed()` cascades hexbase58RFC1751passphrase, rejecting other key types first
- `detail/secp256k1.h` libsecp256k1 context singleton via template-with-default-param trick (ODR-safe header-only); created with `SIGN|VERIFY` flags combined
- `digest.h` `sha512Half`, `sha512_half_hasher_s` (secure erase variant)
- `tokens.h/cpp` (+ `b58_utils.h`, `token_errors.h`) Base58Check; fast path uses base 58^10 intermediate (1015× speedup via `unsigned __int128`, gated on non-MSVC); `TokenType` enum is protocol-stable; `alphabetReverse` is `constexpr` 256-element array; leading-zero bytes each map to `'r'`
### Wire I/O
- `Serializer.h/cpp` accumulator; `addVL`, `addFieldID`, big-endian integers, `getSHA512Half()`
- `SerialIter` non-owning forward cursor over a byte buffer; throws on underrun
- `Sign.h/cpp` `sign`/`verify` with HashPrefix prepended to `addWithoutSigningFields()` output; `startMultiSigningData`/`finishMultiSigningData` split for batch-signer optimization; `signingForID` helper for arbitrary payload bytes
- `Batch.h` inline `serializeBatch()`: `HashPrefix::batch ‖ flags(4) ‖ count(4) ‖ txids`
- `serialize.h` top-level convenience helpers
- `messages.h` protobuf message tag constants (`TYPE_BOOL` undef guard documented)
### Higher-Level Objects
- `STTx.h/cpp` caches `tid_` and `tx_type_`; `passesLocalChecks` (memos, pseudo-tx, MPT support, batch nesting, max 8 inner txs); `sterilize()` round-trip; `getBatchTransactionIDs()` lazy + immutable after first call; `getFeePayer()` returns `sfDelegate` or `sfAccount`; `checkSign()` dispatches single/multi/batch/counterparty; SQL helpers (`getMetaSQL`)
- `STLedgerEntry.h/cpp` (alias `SLE`) typed ledger object; `thread()` updates `sfPreviousTxnID`; `isThreadedType()` gated by `fixPreviousTxnID`; `getJson()` injects `jss::index` and synthetic `mpt_issuance_id` for MPT issuances
- `STValidation.h/cpp` lazy `valid_` cache; `mTrusted` separate from validity; `lookupNodeID` callback decouples manifest system; `validationFormat()` is function-local static (SField init order safety); `sfCookie` is `soeDEFAULT` to prevent fingerprinting
- `STArray.h/cpp`, `STVector256.h/cpp`, `STBitString.h/cpp`, `STInteger.h/cpp`, `STBlob.h/cpp`, `STAccount.h/cpp`, `STPathSet.h/cpp`, `STXChainBridge.h/cpp`
### Transaction Meta
- `TxMeta.h/cpp` `AffectedNodes` (sorted by index in `addRaw()`!), `DeliveredAmount`, `sfParentBatchID`; linear scan for node lookup (bounded by 32-slot reservation); `getAffectedAccounts()` must match JS `Meta#getAffectedAccounts`
- `LedgerHeader.h/cpp` 118-byte fixed serialization; close-time-resolution fudging
### Indexes and Keys
- `Indexes.h/cpp` `keylet::*` factories with `LedgerNameSpace` tagged hashing; `keylet::quality()` embeds 64-bit quality in last 8 bytes (big-endian); `keylet::amm()` uses `std::minmax` + `if constexpr` for heterogeneous token types; `nftpage` = owner(160 bits) masked token(96 bits) range scan, no hash
- `Keylet.h/cpp` type-tagged `(uint256, LedgerEntryType)`; `ltANY` wildcard, `ltCHILD` rejects directories
- `Protocol.h` protocol-wide constants (`FLAG_LEDGER_INTERVAL`, etc.)
- `nftPageMask.h`, `nft.h` NFT page boundary (low 96 bits); composite keys (high 160 = owner AccountID)
- `NFTokenID.h/cpp` flags(2)+fee(2)+issuer(20)+cipheredTaxon(4)+serial(4); LCG `384160001 * seq + 2459` ciphers taxon; `getNFTokenIDFromPage()` and `getNFTokenIDFromDeletedOffer()` for metadata enrichment
- `NFTokenOfferID.h/cpp`, `NFTSyntheticSerializer.h/cpp` derived/synthetic NFT entries; consumed by Clio as public API
- `Book.h` `(in_asset, out_asset)` order-book identity
- `SeqProxy.h/cpp` sequence vs ticket abstraction; sequence-type values sort before ticket-type values
### Validation Helpers (return NotTEC, preflight-time)
- `AMMCore.h/cpp` `invalidAMMAsset`, `invalidAMMAssetPair`, `invalidAMMAmount`; `ammLPTCurrency()` uses canonical `std::minmax`
- `Permissions.h/cpp` singleton; `isDelegable()` checks granular vs transaction-level (`<UINT16_MAX` boundary), amendment, delegable flag
### RPC / JSON Boundary
- `STParsedJSON.h/cpp` depth cap 64; field-path-qualified errors via `make_name`; recognizes `"Payment"`, `"tesSUCCESS"`, etc.
- `ErrorCodes.h/cpp` append-only enum; `sortedErrorInfos` validated at compile time; `warning_code_i` distinct from `error_code_i`
- `RPCErr.h/cpp` `RPC::Status`/`make_error` helpers
- `ApiVersion.h` `apiMinimumSupportedVersion`(1), `apiMaximumSupportedVersion`(2), `apiBetaVersion`(3); `apiVersionIfUnspecified`(1); `forAllApiVersions` / `forApiVersions` templates pass version as `integral_constant` for compile-time branching; `getAPIVersionNumber()` returns `apiInvalidVersion`(0) on parse failure; `setVersion()` has v1 legacy semver-string shim
- `MultiApiJson.h` per-API-version `Json::Value` array indexed `[version - RPC::apiMinimumVersion]`; composes with `forAllApiVersions` from `ApiVersion.h`; preserves wire compatibility across versions
- `jss.h` every JSON key as `Json::StaticString` via `JSS(name)` macro; PascalCase = protocol fields, snake_case = RPC
- `json_get_or_throw.h` `getOrThrow<T>(jv, name)` specializations enforce presence + type; standard idiom for parsing untrusted JSON
- `st.h` convenience aggregate header for all ST types
### Specialized Types
- `STIssue`, `STAccount` (160-bit, VL-encoded), `STBitString<Bits>`, `STInteger<T>`, `STBlob`, `STArray`, `STVector256`, `STCurrency`, `STPathSet`, `STXChainBridge`, `STNumber` (asset-contextual)
- `Quality.h/cpp` inverted encoding (lower uint64 = higher quality); `ceil_in`/`ceil_out` proportional scaling; `_strict` variants honor Number rounding mode
- `QualityFunction.h/cpp` linear `q(out)=m*out+b`; AMMTag (slope from pool) vs CLOBLikeTag (m=0); `combine()` for multi-step strands; `outFromAvgQ()` solves for capped output
- `XChainAttestations.h/cpp` `Attestations::` namespace (full, with signature) vs `xrpl::` (stored); `match()` returns three-state `AttestationMatch`
### Pseudo-Account Fields (`sMD_PseudoAccount`)
- `sfAMMID`, `sfVaultID`, `sfLoanBrokerID` 256-bit hash representing a synthesized account address
### Misc / System
- `BuildInfo.h/cpp` version string, `getVersionString()` consumed by manifest/handshake
- `SystemParameters.h` drops-per-XRP, `INITIAL_XRP`, ledger-related constants; validated by `static_assert`
- `UintTypes.h` `uint256`/`uint160`/`uint128` aliases and tagged variants (`Currency`, `NodeID`, etc.)
- `TER.h/cpp` error code enum families + `TERSubset`
- `TxFlags.h` X-macro driven flag tables (`tf*`); see TxFlags Architecture below
- `TxFormats.h/cpp` transaction-type field schema
- `AccountID.h/cpp` `calcAccountID()` = SHA-256 then RIPEMD-160 (matches Bitcoin for security argument); `AccountIdCache` direct-mapped with spinlock sharding
## Numeric Encoding Reference
```
IOU canonical: mantissa ∈ [10^15, 10^16), exponent ∈ [-96, +80]
zero = (mantissa=0, exponent=-100) — sorts below smallest positive
XRP max: cMaxNativeN = 10^17 drops (100 billion XRP)
MPT max: maxMPTokenAmount = INT64_MAX = 0x7FFFFFFFFFFFFFFF
Transfer rate: Rate{value} where value/1_000_000_000 = 1.0 (parityRate = 1:1)
NFT transfer fee: uint16 basis points (050000), convert via nft::transferFeeAsRate (×10000)
AMM auction fee: basis points; trading fee in tenths of basis points (10000 = 1%)
STNumber mantissa: int64 signed; when MantissaRange::large active, accessor divides by 10 to fit wire format
```
## Protocol-Stable Constants (NEVER CHANGE)
- `LedgerEntryType` numeric values (in ledger objects)
- `TxType` numeric values (in signed transactions)
- `SerializedTypeID` and `SField` codes (in serialized fields)
- `LedgerNameSpace` discriminator characters (in keylet derivation) legacy `CONTRACT`, `GENERATOR`, `NICKNAME` reserved even though deprecated
- `HashPrefix` enum values (in signature/hash domain separation)
- `error_code_i` and `warning_code_i` numeric values (clients depend on them; append-only)
- `TECcodes` (and other `TER` family numeric values) recorded in transaction metadata
- `TokenType` (Base58Check prefix bytes for accounts/seeds/nodes)
- LP token currency prefix byte (`0x03`)
- Universal transaction flags (`tfFullyCanonicalSig`, `tfInnerBatchTxn`)
- `FLAG_LEDGER_INTERVAL = 256` (drives consensus timing)
- `INITIAL_XRP = 100B × 10^6 drops` (validated by `static_assert` against `Number::maxRep`)
- NFT taxon LCG constants (`384160001 * seq + 2459`)
- All flag bit values (`tf*`, `lsf*`, `asf*`)
- XRPL Base58 alphabet (first char `'r'` for `AccountID=0` is cosmetically significant)
- `maxBatchTxCount = 8` (inner transactions per batch)
Changing any requires an amendment with explicit detection logic for old/new behavior.
## TxFlags Architecture
`TxFlags.h` is itself X-macro driven. Per-transaction flag groups are declared so that:
- Each group has `tf*` named bit constants
- `tfUniversalMask` is the union of universal flags (`tfFullyCanonicalSig`, `tfInnerBatchTxn`)
- Per-transaction `tf*Mask` constants are auto-computed via `MASK_ADJ` so that mask matches the declared flags exactly adding a flag automatically updates the mask
- `TF_FLAG2` marks flags whose meaning was changed by an amendment; old/new bits coexist with disjoint enable conditions
- Inner-batch flag `tfInnerBatchTxn` is special: marks a tx as a member of a batch (skipped by ordinary preflight signature checks)
Pattern: when adding a new flag, define it in `TxFlags.h` in the appropriate group; do NOT manually adjust the mask the macro derives it.
## STTx Construction Paths
Three constructors, all terminate with `tid_ = getHash(HashPrefix::transactionID)`:
1. **Wire** (`SerialIter&`) hottest path; enforces `txMinSizeBytes` (32) and `txMaxSizeBytes` (1 MB) before field parsing; `set(sit)` returning `true` (inner object terminator found at top level) throws
2. **Object promotion** (`STObject&&`) no size check; `applyTemplate` enforces conformance
3. **Programmatic** (`TxType, assembler`) installs template first; asserts `sfTransactionType` unchanged after assembler runs; `LogicError` (not `std::runtime_error`) on mutation
`getSeqProxy()` unifies `sfSequence` (classic) and `sfTicketSequence` (ticket); when `sfSequence==0` and `sfTicketSequence` present ticket mode. Sequence-type always sorts before ticket-type.
`getFeePayer()` returns `sfDelegate` if present, else `sfAccount`. Authorization is enforced in `Transactor::checkPermission`, not here.
## Counterparty Signing (`sfCounterpartySignature`)
Used by `LoanSet` allows a second party to sign the same transaction. `checkSign(Rules const&)` checks primary then counterparty (if field present). Errors from counterparty check are prefixed `"Counterparty: "`. `sign()` accepts optional `signatureTarget` reference to write into a sub-object.

212
docs/skills/rpc.md Normal file
View File

@@ -0,0 +1,212 @@
# RPC
JSON-RPC over HTTP/WebSocket and gRPC. Central handler table dispatches by method name + API version. Roles: ADMIN, USER, IDENTIFIED, PROXY, FORBID, GUEST.
## Key Invariants
- Handler table in `Handler.cpp`: each entry = `{name, function, role, condition, minApiVer, maxApiVer}` as `std::multimap<string, Handler>`. Same method name can have multiple entries with **non-overlapping** version ranges; overlap is a fatal `LogicError()` at startup.
- `conditionMet` checks amendment-blocked, UNL expired, operating mode ≥ SYNCING, validated ledger age < `Tuning::maxValidatedLedgerAge` (2 min), and validated/current gap ≤ 10 ledgers. Standalone mode bypasses age checks.
- API v1 vs v2 error code split: v1 emits `rpcNO_NETWORK`/`rpcNO_CURRENT`/`rpcNO_CLOSED`; v2+ collapses to `rpcNOT_SYNCED`.
- Sensitive fields (`passphrase`, `secret`, `seed`, `seed_hex`) are masked as `<masked>` in error response echoes.
- Batch requests: top-level `"method": "batch"` with `params` array; each sub-request processed independently and accumulated into JSON array. Batch is rejected entirely if any sub-request is itself a batch (no nesting).
- API v2 enforces strict JSON typing on previously-permissive boolean fields (e.g., `signer_lists`, `binary`, `forward`, `transactions`).
- Two handler registration styles exist but only two handlers use the new-style (class-based): `LedgerHandler` and `VersionHandler`. All ~67 others use old-style free functions in `handlerArray`.
## Common Bug Patterns
- New handler without entry in `Handler.cpp` static array = handler silently unreachable.
- Wrong `role_` on handler: USER-level with admin data leaks; ADMIN handler accessible to users = security hole.
- `conditionMet` returning false: ensure new conditions are documented and version-coded errors are paired.
- Resource charging: each request gets a fee via `Resource::Consumer`; missing charge allows DoS.
- `maxRequestSize` (1 MB) rejected before JSON parsing; oversized requests get no error detail.
- Marker pagination: callers can forge markers pointing into other accounts' directories — always call `RPC::isRelatedToAccount` before resuming.
- `parse<T>()` returning `std::nullopt` is a programming-error sentinel for type system; user-facing errors go through `required<T>` / `Expected<T, Json::Value>`.
- `loadType` must be set early in handler — escalates to `feeExceptionRPC` automatically on exception if still `feeReferenceRPC`.
- `WSInfoSub` only trusts `X-User`/`X-Forwarded-For` headers when the remote IP is in `secure_gateway_nets`; outside that list, those headers are stripped. Misconfiguring `secure_gateway` lets untrusted clients spoof identity.
- `RPCSub::sendThread` reads `mUsername`/`mPassword` outside `mLock` — minor data-race noted in source with `XXX` comment.
- `PathRequestManager::getAssetCache` assigns the `weak_ptr<AssetCache>` lock result to a local `shared_ptr` before returning — assigning directly to the weak member would cause immediate expiry.
- `account_objects` marker encodes phase (NFT page vs directory); resuming with a wrong-phase marker can traverse the wrong list. Both `nftPageStart` and directory marker use different sentinel shapes.
## Adding New RPC Handler
1. Declare in `Handlers.h`: `Json::Value doMyCommand(RPC::JsonContext&);`
2. Implement in new file under `src/xrpld/rpc/handlers/<category>/`.
3. Register in `Handler.cpp` `handlerArray` with role, condition, version range.
4. For class-based new-style handler (rare; only `LedgerHandler`, `VersionHandler`): expose static `name`, `role`, `condition`, `minApiVer`, `maxApiVer`; implement `check()` / `writeResult()`; register via `addHandler<T>()`.
5. For gRPC: define in `xrp_ledger.proto`, add `CallData` in `GRPCServerImpl::setupListeners()`, write `doXxxGrpc(RPC::GRPCContext<Request>&)` returning `std::pair<Response, grpc::Status>`.
## Handler Patterns
### Old-style registration (typical)
```cpp
// In Handler.cpp handlerArray[] — REQUIRED for every new handler:
{"my_command", byRef(&doMyCommand), Role::USER, NO_CONDITION},
// role: ADMIN for internal/sensitive, USER for public
// condition: NO_CONDITION, NEEDS_NETWORK_CONNECTION, NEEDS_CURRENT_LEDGER, NEEDS_CLOSED_LEDGER
// version range defaults to [apiMinimumSupportedVersion, apiMaximumValidVersion]
// To version-bound: `{"ledger_header", byRef(&doLedgerHeader), Role::USER, NO_CONDITION, 1, 1}`
```
### Version-Ranged Class Handler
```cpp
// Class with static metadata; registered in HandlerTable ctor via addHandler<T>()
template <> Handler handlerFrom<MyCommandHandler>() {
return {MyCommandHandler::name, &handle<Json::Value, MyCommandHandler>,
MyCommandHandler::role, MyCommandHandler::condition,
MyCommandHandler::minApiVer, MyCommandHandler::maxApiVer};
}
```
### Ledger Resolution
- `RPC::lookupLedger(ledger, context)` for JSON path — handles `ledger_hash`/`ledger_index`/legacy `ledger`/shortcut strings.
- `RPC::ledgerFromRequest<T>(ledger, context)` for gRPC.
- `RPC::getOrAcquireLedger(context)` returns `Expected<shared_ptr<Ledger const>, Json::Value>` and triggers `InboundLedgers::acquire()` for missing ledgers (used only by `ledger_request` admin command).
### Pagination Idiom
- Marker format: `"<uint256_hex>,<uint64_pageHint>"` for owner-directory handlers; raw hex for NFT page chains.
- Request `limit + 1` from `forEachItemAfter`; if `count == limit + 1`, emit marker from limit-th item.
- Always validate marker SLE belongs to requesting account before resuming.
- Limits from `RPC::Tuning::<command>` clamped via `readLimitField()`; admin/unlimited roles bypass clamp.
## Key Files
### Top-level
- `src/xrpld/rpc/handlers/Handlers.h` — authoritative declarations of all old-style handler functions (~67 entries).
- `src/xrpld/rpc/detail/Handler.cpp` — handler table, `getHandler()`, `HandlerTable` singleton, version overlap enforcement.
- `src/xrpld/rpc/detail/Handler.h``Handler` struct, `Condition` enum, `conditionMet()` template.
- `src/xrpld/rpc/detail/RPCHandler.cpp``doCommand()` pipeline: load-shed, role check, condition check, perf-log instrumented dispatch.
- `src/xrpld/rpc/detail/ServerHandler.cpp` — HTTP/WS server entry; auth, batch handling, version-aware error formatting, secret masking, HTTP status from RPC error codes (ripplerpc ≥ 3.0).
- `src/xrpld/rpc/RPCHandler.h``doCommand`, `roleRequired` declarations.
- `src/xrpld/rpc/Context.h``Context`, `JsonContext`, `GRPCContext<T>` aggregate dispatch envelopes.
- `src/xrpld/rpc/Request.h` — simpler `Request` envelope (less used; lives alongside `Context`).
- `src/xrpld/rpc/Status.h` — unified error type bridging `TER`, `error_code_i`, and bare int with `inject()` to JSON.
- `src/xrpld/rpc/Role.h``Role` enum, `isUnlimited`, `requestRole`, `ipAllowed`, `forwardedFor`.
- `src/xrpld/rpc/detail/Role.cpp` — IP subnet matching, secure_gateway resolution, RFC 7239 / `X-Forwarded-For` parsing.
- `src/xrpld/rpc/detail/RPCHelpers.cpp` / `.h` — pagination, seed parsing, keypair derivation, ledger-entry type selection, MPT/IOU asset parsing.
- `src/xrpld/rpc/detail/RPCLedgerHelpers.cpp` / `.h``lookupLedger`, `getLedger`, `getOrAcquireLedger`; staleness checks; gRPC `ledgerFromSpecifier`.
- `src/xrpld/rpc/detail/Tuning.h` — all numeric tunables (limits, ranges, throttles).
- `include/xrpl/protocol/ErrorCodes.h``error_code_i`, `inject_error`, `ErrorInfo` table, HTTP status mapping.
### Subscriptions
- `src/xrpld/rpc/detail/WSInfoSub.h` — WebSocket `InfoSub` subclass; `Json::stream`-based zero-intermediate serialization into `multi_buffer`. Only trusts `X-User`/`X-Forwarded-For` when remote IP is in `secure_gateway_nets`.
- `src/xrpld/rpc/RPCSub.h` / `detail/RPCSub.cpp` — outbound HTTP/HTTPS push subscription ("webhook"); legacy feature maintained for one specific partner; carries `VFALCO TODO` markers. `sendThread` reads `mUsername`/`mPassword` outside `mLock` (data-race risk).
- Streams: `server`, `ledger`, `book_changes`, `transactions`, `transactions_proposed` (`rt_transactions` deprecated alias), `validations`, `manifests`, `peer_status` (admin), `consensus`.
- `book_changes` stream can be subscribed but **cannot be unsubscribed** — there is no unsubscribe path for it.
- `account_history_tx_stream` is experimental; gated on `useTxTables()`; supports `stop_history_tx_only` in unsubscribe.
### Pathfinding
- `src/xrpld/rpc/detail/Pathfinder.cpp` / `.h` — three-phase engine: `findPaths()` (template expansion via static `mPathTable`), `computePathRanks()` (RippleCalc simulation), `getBestPaths()` (selection with covering-path).
- `src/xrpld/rpc/detail/PathRequest.cpp` / `.h` — per-request state machine; two constructors for `path_find` (subscription) vs `ripple_path_find` (legacy callback); adaptive `iLevel`.
- `src/xrpld/rpc/detail/PathRequestManager.cpp` / `.h` — collection of `wptr<PathRequest>`; re-entrant `updateAll()` loop; shared `AssetCache` via `weak_ptr` (intentional — cache lives only as long as a request holds it).
- `src/xrpld/rpc/detail/AssetCache.cpp` / `.h` — per-ledger thread-safe trust line + MPT cache; **direction-superset optimization**: caches lines in both directions (AB and BA) so a single fetch covers both source and destination queries; `shared_ptr<vector<>>` null sentinels for empty accounts.
- `src/xrpld/rpc/detail/AccountAssets.cpp` / `.h``accountSourceAssets` / `accountDestAssets` for path source/dest currency enumeration.
- `src/xrpld/rpc/detail/TrustLine.cpp` / `.h``TrustLineBase` + `PathFindTrustLine` (memory-minimal) + `RPCTrustLine` (adds quality rates).
- `src/xrpld/rpc/detail/MPT.h``PathFindMPT` (MPTID + zeroBalance + maxedOut); implicitly converts from `MPTIssue` for uniform interface with `PathFindTrustLine`.
- `src/xrpld/rpc/detail/PathfinderUtils.h``largestAmount`, `convertAmount`, `convertAllCheck`; XRP sentinel = `STAmount::cMaxNative`, IOU sentinel = `STAmount::cMaxValue / cMaxOffset`, MPT sentinel = `maxMPTokenAmount`. Pass these to signal "send all".
- `src/xrpld/rpc/detail/LegacyPathFind.cpp` / `.h` — RAII concurrency guard for synchronous `ripple_path_find`; lock-free CAS on `inProgress` counter; admin bypass. Destructor skips decrement if construction failed (`m_isOk` flag).
- `src/xrpld/rpc/detail/RippleLineCache.cpp` / `.h`**empty stubs**; functionality replaced by `AssetCache`. Still `#include`d in two files for inert compatibility.
### Transaction Signing / Submission
- `src/xrpld/rpc/detail/TransactionSign.cpp` / `.h``transactionSign`, `transactionSubmit`, `transactionSignFor`, `transactionSubmitMultiSigned`; `SigningForParams` mode discriminator; round-trip "sterilization" via `transactionConstructImpl`.
- Fee pipeline: `checkFee``getCurrentNetworkFee` (max of load-scaled base fee and TxQ-escalated open ledger fee, capped by `fee_mult_max`/`fee_div_max`).
- `ProcessTransactionFn` dependency injection via `getProcessTxnFn(NetworkOPs&)` for testability.
- `acctMatchesPubKey` handles three account states: unactivated (master-only), master+regular both valid, master disabled (regular only).
- `doSubmit`: pre-seeds `HashRouter` with the transaction hash before forwarding, so the node does not rebroadcast its own submission.
### Utility / Enrichment
- `src/xrpld/rpc/BookChanges.h` — header-only template `computeBookChanges<L>(ledger)`; produces OHLCV per pair; reused by RPC handler and `book_changes` subscription stream.
- `src/xrpld/rpc/CTID.h` — Concise Transaction ID (XLS-15d): 16-hex `C` + 28-bit ledgerSeq + 16-bit txnIdx + 16-bit netID. Uses `boost::regex`, not `std::regex`.
- `src/xrpld/rpc/DeliveredAmount.h` / `detail/DeliveredAmount.cpp``insertDeliveredAmount`; three-tier resolution; threshold = ledger 4594095 (Jan 2014) or close time 446000000s; `"unavailable"` string sentinel for pre-threshold ledgers; lazy lambdas avoid `LedgerMaster` calls when meta has `sfDeliveredAmount`.
- `src/xrpld/rpc/MPTokenIssuanceID.h` / `detail/MPTokenIssuanceID.cpp``insertMPTokenIssuanceID`; mirrors `DeliveredAmount` three-function pattern (eligibility / extraction / injection).
- `src/xrpld/rpc/handlers/server_info/ServerDefinitions.cpp` — built once at startup via Meyers singleton; SHA-512-half hash for client cache invalidation; X-macrodriven from protocol headers.
- `src/xrpld/rpc/GRPCHandlers.h` — declarations for 4 gRPC handlers (`doLedgerGrpc`, `doLedgerEntryGrpc`, `doLedgerDataGrpc`, `doLedgerDiffGrpc`). Contract: non-OK `grpc::Status` discards the response object.
- `src/xrpld/rpc/Output.h``boost::utility/string_ref`-based output sink. Vestigial; not used by current codebase (canonical sink is `Json::Output`).
- `src/xrpld/rpc/json_body.h` — Boost.Beast `Body` type for JSON HTTP responses; both `reader` and `writer` implement BodyReader concept (eager, one-shot).
### Client-side
- `src/xrpld/rpc/RPCCall.h` / `detail/RPCCall.cpp``xrpld` CLI dispatch; `RPCParser` with static `Command[]` table mapping method → parse function; "trusted interface" — minimal validation by design.
## Enrichment Pipeline
Three functions are called together wherever transaction metadata is serialized to JSON. **Always apply all three** at the same call sites to keep responses consistent:
1. `insertDeliveredAmount()` — actual delivered amount for payments/check-cash/account-delete.
2. `RPC::insertNFTSyntheticInJson()` — synthetic NFT fields (`nft_offer_index`, `nftoken_id`, etc.) extracted from metadata.
3. `RPC::insertMPTokenIssuanceID()``mpt_issuance_id` for successful `MPTokenIssuanceCreate` transactions.
Call sites: `Tx.cpp`, `AccountTx.cpp`, `NetworkOPs.cpp`, `LedgerToJson.cpp`, `Simulate.cpp`.
## Resource Cost Tiers
Set `context.loadType` early. Tiers (from `Fees.h`):
- `feeReferenceRPC` — default; auto-escalates to `feeExceptionRPC` on uncaught exception.
- `feeMediumBurdenRPC` — directory walks, account_lines, account_offers, simulate, history paging, tx_reduce_relay-class ops.
- `feeHeavyBurdenRPC` — pathfinding, signing, gateway_balances, ledger_request, submit_multisigned.
## Tuning Constants (in `Tuning.h`)
- `maxRequestSize = 1_000_000` — rejected pre-parse in `ServerHandler`.
- `maxJobQueueClients = 500``RPCHandler::fillHandler` returns `rpcTOO_BUSY`; admin/unlimited bypass.
- `maxValidatedLedgerAge = 2 min`.
- `maxPathfindsInProgress = 2`, `maxPathfindJobCount = 50`, `max_src_cur = 18`, `max_auto_src_cur = 88`.
- `binaryPageLength = 2048`, `jsonPageLength = 256` — selected via `pageLength(isBinary)` in `ledger_data`.
- Per-command `LimitRange`: most account queries `{10, 200, 400}`; `bookOffers {0, 60, 100}`; `nftOffers {50, 250, 500}`; `noRippleCheck {10, 300, 400}`; `accountNFTokens {20, 100, 400}`.
- `defaultAutoFillFeeMultiplier = 10`, `defaultAutoFillFeeDivisor = 1`.
## Two-Tier Signing Access Gate
Sign-related handlers (`sign`, `sign_for`, `submit` with `tx_json`, `channel_authorize`) enforce:
```cpp
if (context.role != Role::ADMIN && !context.app.config().canSign())
return rpcNOT_SUPPORTED;
```
`canSign()` reflects `[signing_support]` config; defaults false. Public nodes refuse to sign by default. All `sign`/`sign_for` responses include a `deprecated` warning steering clients to local/offline signing.
## API Version Behavioral Differences
- `apiCommandLineVersion` is used by the CLI; defaults differ from inbound.
- v2 promotes fields from inside transaction objects to top-level: `hash`, `ledger_index`, `ledger_hash`, `close_time_iso`.
- v2 renames metadata keys: `tx``tx_json`, `meta``meta_blob` for binary.
- v2 renames Payment `Amount``DeliverMax` (via `RPC::insertDeliverMax`).
- v2 strict boolean typing; v1 silently coerces.
- v2 rejects mixing `ledger_index_min`/`max` with `ledger_index`/`ledger_hash` in `account_tx`; v1 tolerates.
- v2 enforces precise marker objects in `account_tx` (`{ledger, seq}` integers).
- v3 (beta) adds human-readable singleton aliases in `ledger_entry` index lookup. `VersionHandler::maxApiVer` tracks the highest beta version; the width of the beta range (currently 1) matters for the version negotiation boundary.
## Handler Subdirectory Map
- `handlers/account/``AccountInfo`, `AccountLines`, `AccountChannels`, `AccountCurrencies`, `AccountNFTs`, `AccountObjects`, `AccountOffers`, `AccountTx`, `GatewayBalances`, `NoRippleCheck`, `OwnerInfo` (legacy).
- `handlers/admin/``BlackList`, `UnlList`, plus subdirectories for `data/` (CanDelete, LedgerCleaner, LedgerRequest), `keygen/` (WalletPropose, ValidationCreate), `log/`, `peer/`, `server_control/` (Stop, LedgerAccept — standalone-only), `signing/` (ChannelAuthorize, Sign, SignFor), `status/` (ConsensusInfo, FetchInfo, GetCounts, Print, ValidatorInfo, Validators, ValidatorListSites).
- `handlers/ledger/``Ledger` (class-based), `LedgerClosed`, `LedgerCurrent`, `LedgerData`, `LedgerDiff` (gRPC-only), `LedgerEntry` (parser table from `ledger_entries.macro`), `LedgerHeader`.
- `handlers/orderbook/``AMMInfo`, `BookChanges`, `BookOffers`, `DepositAuthorized`, `GetAggregatePrice`, `NFTBuyOffers` / `NFTSellOffers` / `NFTOffersHelpers.h`, `PathFind` (subscription), `RipplePathFind` (one-shot).
- `handlers/server_info/``Fee`, `Feature`, `Manifest`, `ServerDefinitions`, `ServerInfo`, `ServerState`, `Version.h` (class-based).
- `handlers/subscribe/``Subscribe`, `Unsubscribe`.
- `handlers/transaction/``Simulate` (dry-run via `tapDRY_RUN`; batch rejected), `Submit`, `SubmitMultiSigned`, `Tx`, `TransactionEntry` (ledger-pinned), `TxHistory` (paginated, `useTxTables()`-gated, deep-page cap 10000 for non-admin), `TxReduceRelay`.
- `handlers/utility/``Ping` (role-conditional response), `Random`.
- Top-level `handlers/`: `ChannelVerify` (no admin restriction, stateless), `VaultInfo` (XLS-66, vault + MPT issuance lookup).
## gRPC Specifics
- Four handlers: `Ledger`, `LedgerEntry`, `LedgerData` (binary-only, fixed page=2048, supports `marker`+`end_marker` for range parallelism), `LedgerDiff` (SHAMap delta; downcast `ReadView``Ledger` is the validation gate).
- `doLedgerGrpc` adds `get_objects` (state diff via `SHAMap::compare`) and `get_object_neighbors` (DEX best-offer tracking via `keylet::quality`).
- Handlers return `std::pair<Response, grpc::Status>`. Non-OK status discards response.
- Error mapping: `rpcINVALID_PARAMS``INVALID_ARGUMENT`; ledger missing → `NOT_FOUND`; diff overflow → `RESOURCE_EXHAUSTED`.
## Key Gotchas
- `noEvents` (`rpcNO_EVENTS`) is returned by `path_find` and `subscribe`/`unsubscribe` for non-WebSocket transports — HTTP has no push channel.
- `LegacyPathFind` admit-failure means destructor must not decrement; uses `m_isOk` flag.
- `getMasterKey` returns the input key unchanged when no manifest exists — used in `doManifest`/`doValidatorInfo` to distinguish "is master key" from "ephemeral with no manifest".
- `ledger_accept` only works in standalone mode; takes master mutex; drives `Consensus::simulate`.
- `ChannelAuthorize` / `ChannelVerify` use `HashPrefix::paymentChannelClaim` ('CLM') as domain separator; canonical message = prefix + 32-byte channelID + 8-byte drops.
- `deposit_authorized` with credentials: must sort `(issuer, type)` pairs canonically via `credentials::makeSorted` before computing keylet; `lifeExtender` vector keeps SLEs alive so `Slice` views into `sfCredentialType` remain valid.
- `LedgerEntry` uses `Expected<uint256, Json::Value>` parser return type rather than exceptions; v1 still re-throws `Json::error` for compatibility.
- `nft_buy_offers` / `nft_sell_offers` differ only by `keylet::nft_buys` vs `keylet::nft_sells` — both delegate to `enumerateNFTOffers` in `NFTOffersHelpers.h`.
- `getCountsJson` (in `GetCounts.h`) is callable from non-RPC contexts (e.g., `OverlayImpl::getCountsJson`).
- `wallet_propose` entropy warning: <80 bits strong warning; passphrase that already encodes the seed (1751/Base58/hex) suppresses warning.
- Account marker security: always verify `RPC::isRelatedToAccount(*ledger, sle, accountID)` on resumed pagination prevents cross-account directory traversal.
- `AMMInfo` has two distinct parsing paths: one for `amm_account` (direct lookup) and one for `asset`+`asset2` pair (derives AMM account via `keylet::amm`). Both resolve to the same AMM SLE but through different code paths changes must update both.
- `book_offers` applies an inline load-shedder: if `checkFee` determines the consumer is at warning/drop tier, it cuts the offer limit in half before iterating.
- `Simulate` rejects batch transactions (returns `rpcINVALID_PARAMS`) `tapDRY_RUN` does not support the batch transaction type.
- `autofillSignature()` in `Simulate.cpp` removes `SigningPubKey` and `TxnSignature` fields before applying dry-run, then restores them; callers must not pre-sign before calling Simulate.
- `RPCSub` is a legacy feature retained for one specific partner. It is **not** a general-purpose webhook system. New subscribers should use WebSocket instead.

317
docs/skills/shamap.md Normal file
View File

@@ -0,0 +1,317 @@
# SHAMap
Authenticated 16-way radix Merkle trie. Every ledger has two: a TRANSACTION tree (txid → tx, with or without metadata) and a STATE tree (object key → serialized object). Root hash is what validators sign — two nodes agree on ledger state iff their root hashes match. Tree depth is fixed at 64 (256-bit keys consumed 4 bits per level, 65 levels total: root at depth 0, leaves at depth 64).
Root is always a `SHAMapInnerNode`. Leaves only appear at depth 64.
## Core Invariants
- **Tree shape**: `branchFactor = 16`, `leafDepth = 64`, max 65 levels (root at depth 0). One nibble per level.
- **CoW ownership**: each node has a `cowid_`. Non-zero → exclusively owned by that map and mutable. Zero → shared/canonicalized, must not be mutated. `setItem()`, `setChild()`, `dirtyUp()` all assert `cowid_ != 0`.
- **`unshareNode` before any mutation** of a shared node — #1 bug class. Skipping it corrupts every snapshot sharing the node.
- **`canonicalize`** ensures one in-memory instance per hash via `Family::getTreeNodeCache()`. Asserts `cowid == 0` on entry AND on values returned by `cacheLookup()`.
- **`SHAMapNodeID` masking**: `id_ == (id_ & depthMask(depth_))` is enforced by constructor. Two nodes at the same depth on the same path always have identical `SHAMapNodeID`.
- **Inner node concurrency**: per-child bit spinlock in `lock_` (`std::atomic<uint16_t>`, one bit per branch). Allows concurrent reads of different children. `setChild`/`shareChild` skip locking because they require CoW ownership.
- **Leaf size floor**: `SHAMapItem::size() >= 12` asserted at leaf construction.
## State Machine
```cpp
enum class SHAMapState {
Modifying = 0, // open ledger — can add/remove/update
Immutable = 1, // frozen — no writes; asserts guard
Synching = 2, // root hash known; missing nodes can be added
Invalid = 3, // corrupt; consensus must discard
};
```
`setImmutable()` asserts state ≠ `Invalid`. Hash-mismatch or position-mismatch during `addKnownNode` transitions to `Invalid` (not a crash).
## Copy-on-Write Discipline (#1 Bug Class)
```cpp
// REQUIRED before mutating any shared node:
auto node = unshareNode(branch, key); // clones if shared
node->setChild(index, child); // safe to modify
```
`snapShot(isMutable)` does NOT copy nodes. It bumps the original's `cowid_` and shares the `root_` pointer. Subsequent writes call `unshareNode()` which clones on first touch. Immutable snapshots of immutable maps share everything with zero clone cost.
The copy constructor increments `cowid_` (`cowid_(other.cowid_ + 1)`) and calls `unshare()` if either side is mutable, preventing later mutations from racing. An immutable copy of an immutable map shares all nodes with zero clone cost.
`getHash()` does `const_cast<SHAMap&>(*this).unshare()` when the root hash is zero — acknowledged design compromise (logical read, physical mutate).
## Key Files
- `include/xrpl/shamap/SHAMap.h` — main class, state machine, `MissingNodes` struct
- `include/xrpl/shamap/SHAMapTreeNode.h` — base; `cowid_`, `hash_`, `IntrusiveRefCounts`, wire-type constants
- `include/xrpl/shamap/SHAMapInnerNode.h` — 16-way branch, sparse `TaggedPointer`, per-bit spinlocks, `fullBelowGen_`
- `include/xrpl/shamap/SHAMapLeafNode.h` — abstract leaf base, `item_` slot, `setItem` returns hash-changed bool
- `include/xrpl/shamap/SHAMapTxLeafNode.h` — tx without metadata (`tnTRANSACTION_NM`)
- `include/xrpl/shamap/SHAMapTxPlusMetaLeafNode.h` — tx with metadata (`tnTRANSACTION_MD`)
- `include/xrpl/shamap/SHAMapAccountStateLeafNode.h` — account state leaf (`tnACCOUNT_STATE`)
- `include/xrpl/shamap/SHAMapItem.h` — slab-allocated, struct-hack payload, intrusive refcount
- `include/xrpl/shamap/SHAMapNodeID.h` — (depth, masked-prefix) tree address
- `include/xrpl/shamap/SHAMapMissingNode.h` — exception + `SHAMapType` enum (TX/STATE/FREE)
- `include/xrpl/shamap/SHAMapAddNode.h` — useful/duplicate/invalid accumulator for sync results
- `include/xrpl/shamap/SHAMapSyncFilter.h` — pull/notify interface for fetch packs and ephemeral caches
- `include/xrpl/shamap/Family.h` — bundles NodeStore, two caches, missing-node recovery
- `include/xrpl/shamap/FullBelowCache.h` — "subtree complete locally" memo with generation counter
- `include/xrpl/shamap/TreeNodeCache.h``TaggedCache<uint256, SHAMapTreeNode>` with intrusive ptrs
- `include/xrpl/shamap/detail/TaggedPointer.h` / `.ipp` — sparse 16-child storage with 2-bit tag
- `src/libxrpl/shamap/SHAMap.cpp` — mutation (add/del/update), traversal, fetch, flush
- `src/libxrpl/shamap/SHAMapSync.cpp` — getMissingNodes, getNodeFat, addRootNode/addKnownNode, proofs
- `src/libxrpl/shamap/SHAMapDelta.cpp` — compare, walkMap, walkMapParallel
- `src/libxrpl/shamap/SHAMapInnerNode.cpp` — sparse storage mechanics, hash, serialization
- `src/libxrpl/shamap/SHAMapLeafNode.cpp` — abstract leaf shared behavior
- `src/libxrpl/shamap/SHAMapTreeNode.cpp` — deserialization factories
- `src/libxrpl/shamap/SHAMapNodeID.cpp` — depth mask table, branch nav, wire format
## Three Concrete Leaf Types
All inherit `SHAMapLeafNode` (which inherits `SHAMapTreeNode`). All `final`. Differ only in hash prefix, wire-type byte, and whether the key is fed into the hash.
| Class | Hash prefix (bytes) | Key in hash? | Key in wire? | Wire-type byte |
|---|---|---|---|---|
| `SHAMapTxLeafNode` | `HashPrefix::transactionID` (`'T','X','N'`) | no | no | `wireTypeTransaction = 0` |
| `SHAMapTxPlusMetaLeafNode` | `HashPrefix::txNode` (`'S','N','D'`) | yes | yes | `wireTypeTransactionWithMeta = 4` |
| `SHAMapAccountStateLeafNode` | `HashPrefix::leafNode` (`'M','L','N'`) | yes | yes | `wireTypeAccountState = 1` |
**Why the asymmetry**: a transaction's ID *is* `sha512Half(prefix, blob)`, so the key is already implied by the data. Account state and tx+meta keys (account address, offer index, etc.) do NOT appear in the blob and must be hashed in — otherwise two distinct objects with identical payloads would collide.
Open ledgers hold transactions as `tnTRANSACTION_NM`; closed ledgers rebuild the tx tree as `tnTRANSACTION_MD` after metadata is attached. Hash prefix difference makes the two roots structurally incompatible — by design.
Each concrete leaf has two constructors: hash-recompute (used by initial `make_*`) and hash-supplied (used by `clone()` and deserialization with `hashValid = true`).
**`SHAMapLeafNode`** is the intermediate abstract base. Both constructors are `protected` — only concrete subclasses call them. `isLeaf()`, `isInner()`, and `invariants()` are sealed `final override` here. `invariants()` asserts hash non-zero and item non-null. `item_` is `protected` (not `private`) so concrete subclasses can access it directly in their inline `updateHash()` implementations, avoiding virtual dispatch overhead in the hash path.
## SHAMapItem
Single allocation: struct fields + payload bytes via struct hack (placement-new'd after `sizeof(*this)`). Constructor is `private`, all copy/move deleted; only path is `make_shamapitem()`.
Backed by `detail::slabber` — a `SlabAllocatorSet<SHAMapItem>` with seven tiers (128/192/272/384/564/772/1052 extra bytes, 4060 MiB pools each, 2 MiB block alignment for THP). Falls back to `new uint8_t[]` for oversize. Max payload 16 MiB (asserted).
`refcount_` is `mutable std::atomic<uint32_t>`. `intrusive_ptr_add_ref` calls `LogicError` if count was already zero (resurrection guard). `intrusive_ptr_release` runs `std::destroy_at` then returns memory to `slabber.deallocate()`, falling through to `delete[]` when the pointer didn't come from a slab.
`SHAMapLeafNode::item_` is `boost::intrusive_ptr<SHAMapItem const>` — items are immutable; mutation produces a new item. The `const` through the pointer allows the same `SHAMapItem` to be shared across CoW snapshots without copying.
## Inner Node: Sparse Storage via TaggedPointer
`SHAMapInnerNode::hashesAndChildren_` is a single `TaggedPointer` holding two co-located arrays (`SHAMapHash[N]` followed by `SharedPtr<SHAMapTreeNode>[N]`). N comes from `boundaries = {2, 4, 6, 16}` indexed by the 2-bit tag stored in the pointer's low bits.
- `SHAMapHash` is `static_assert`'d to have `alignof >= 4`, freeing the low 2 bits.
- Tag 3 (N=16) is the **dense** case; tags 02 are sparse, with non-empty children packed in branch-number order.
- `isBranch_` (`uint16_t`) is the authoritative occupancy bitset. Translation: `getChildIndex(i) = popcnt16(isBranch_ & ((1<<i) - 1))`.
- Four `boost::singleton_pool`s (one per size class, 512 KiB blocks) back allocations. Dispatch is via `std::array<std::function<...>, 4>` indexed by tag — O(1), no virtual calls.
This typically cuts inner-node memory to ~25% of a dense layout.
**Resize**: `resizeChildArrays()` uses `TaggedPointer`'s move-restructuring constructors. The 4-arg form (`srcBranches`, `dstBranches`, `toAllocate`) handles simultaneous reshape — in-place if size class is unchanged (shifts within the existing allocation), otherwise allocates new and placement-copies.
**`RawAllocateTag` constructor**: allocates without running element constructors. Used internally only, always paired with explicit placement-new loops; destructor unconditionally runs explicit destructors.
`iterChildren(F)` exposes all 16 logical branches (zero-hash for empties — needed for `updateHash`). `iterNonEmptyChildIndexes(F)` gives `(branchNum, arrayIdx)` — used for mutation and serialization.
**Two compile-time constraints** in `TaggedPointer.ipp`: `boundaries.size() <= 4` (tag is exactly 2 bits) and `boundaries.back() == branchFactor`. Adding a 5th boundary fails the build.
## SHAMapNodeID
Two fields: `uint256 id_` (path prefix, masked to depth) and `unsigned depth_` (064). The static `depthMask()` table has 65 entries: nibble `d/2` gets `0xF0` at odd depths, `0xFF` at even, accumulating top-down.
- Constructor **rejects** unmasked input (asserts `id == id & depthMask`).
- `createID(depth, key)` is the factory that **applies** the mask for you. Asymmetric API by design.
- `getChildNodeID(m)` throws (not just asserts) at `leafDepth` — corrupted data may trigger this in release builds.
- `selectBranch(id, hash)` reads the nibble at `id.depth` from `hash`. The traversal primitive used everywhere.
- Wire format: 33 bytes (32 id + 1 depth). `deserializeSHAMapNodeID` returns `std::optional` and validates size, depth ≤ 64, and the mask invariant.
`operator<` sorts by `std::tie(depth_, id_)` — shallower before deeper, then by prefix value. Used as map/set key for traversal frontiers.
## Hash Computation
Inner: `sha512Half(HashPrefix::innerNode, hashes[0..15])` — always feed all 16, zeros for empties. Hash is identical regardless of dense/sparse storage.
`updateHash()` reads from `hashes` array directly. `updateHashDeep()` pulls hashes from child pointers first, then computes — used after batch mutations where in-memory child hashes were updated but the local hashes array wasn't synced.
## Wire Serialization Formats
**Inner nodes** — two formats chosen by occupancy in `serializeForWire`:
- **Compressed** (< 12 children): per non-empty branch, 32-byte hash + 1-byte branch index, total 33·n bytes.
- **Full** (≥ 12 children): all 16 hashes in order, 512 bytes.
Type byte appended at the end. `makeFullInner()` / `makeCompressedInner()` are the matching factories, each validating exact size (throws `std::runtime_error` on mismatch not silent corruption).
`serializeWithPrefix` always emits the full 16-hash form prefixed with `HashPrefix::innerNode` (used for hashing).
**Leaves** wire format trails with the single-byte wire-type tag at the END (not start). `makeFromWire` reads `rawNode[size-1]` to dispatch.
`makeFromPrefix(slice, hash)` uses the leading 4-byte `HashPrefix` and is the trusted (hash-known) path propagates `hashValid = true` to skip recompute in leaf constructors.
**Tag extraction asymmetry**: `makeTransaction` recomputes the key as `sha512Half(HashPrefix::transactionID, data)`. `makeTransactionWithMeta` and `makeAccountState` read the 32-byte key from the *tail* of the payload (via `getBitString`), then `chop` it before creating the `SHAMapItem`. A manual pre-check guards against zero-size reads before calling `getBitString` see `SHAMapTreeNode.cpp` comment `// FIXME: improve this interface`.
## Mutation Mechanics
All three mutations follow: walk-with-stack local change `dirtyUp()`.
- **`addGiveItem`**: empty branch create leaf there. Collision with existing leaf loop creating inner nodes deeper until keys diverge (respects merge property inner nodes only where 2 items coexist below).
- **`delItem`**: drop the leaf, walk up reducing child counts. Zero children null out. One child + `onlyBelow()` confirms a single leaf below collapse inner, hoist leaf upward via `makeTypedLeaf` with the original leaf type.
- **`updateGiveItem`**: locate, unshare, swap payload; call `dirtyUp` only if `setItem()` returns `true` (hash actually changed). Avoids spurious rehashing.
- **`dirtyUp`**: consume stack bottom-up, `unshareNode` each, `setChild` to link in the updated subtree. Produces a CoW-owned chain from mutation point to root.
`makeTypedLeaf()` maps `SHAMapNodeType` to one of three concrete leaf classes. Unrecognized types throw `LogicError` immediately.
## Node Fetching (Backed vs Unbacked)
`backed_ = true` integrates with `Family::db()`; `backed_ = false` (set via `setUnbacked()`) is in-memory only (e.g., transient tx-processing trees).
`fetchNodeNT` tiered lookup:
1. `cacheLookup()` `Family::getTreeNodeCache()`
2. `fetchNodeFromDB()` `Family::db().fetchNodeObject()`
3. `checkFilter()` `SHAMapSyncFilter::getNode()`, then notifies via `gotNode(true, ...)`
Misses return `nullptr` (`fetchNodeNT`) or throw `SHAMapMissingNode` (`fetchNode`, `descendThrow`). `finishFetch` catches `std::runtime_error` from deserialization, logs, and suppresses (doesn't crash).
On miss, `full_` is cleared and `Family::missingNodeAcquireBySeq(seq, hash)` is called links to inbound-ledger acquisition pipeline.
`descendAsync` is the non-blocking variant: posts `Family::db().asyncFetch()`, sets `pending = true`, invokes user callback on completion.
## Missing-Node Discovery (`getMissingNodes`)
`MissingNodes` inner struct holds traversal state. Several details matter:
- **`stack_` is `std::stack<StackEntry, std::deque<StackEntry>>`** NOT vector. Raw `SHAMapInnerNode*` pointers held in entries must remain valid across pushes; vector reallocation would invalidate them.
- **Random start nibble**: `firstChild = rand_int(255)` per stack entry. Concurrent callers on the same map produce different request sets maximizes coverage when many peers ask in parallel.
- **`maxDefer_ = 512`** in-flight async reads. When reached, `gmn_ProcessDeferredReads` blocks on a CV draining the batch. Completed nodes are canonicalized and the parent is revisited via `resumes_`.
- **FullBelow short-circuit**: before descending, `touch_if_exists(hash)` on the cache; on success, skip the subtree. After confirming complete, `insert(hash)` and `setFullBelowGen(generation)` on the in-memory node.
- Two helpers: `gmn_ProcessNodes` (per-node descent + bookkeeping), `gmn_ProcessDeferredReads` (I/O completion handler).
**Gotcha**: processing async completions out of order would mark "full below" incorrectly. The `resumes_` map + deferred-batch barrier prevent this.
## Serving Nodes to Peers: `getNodeFat`
Bundles a target node plus a bounded-depth subtree in one response to amortize sync latency. Depth budget only decrements when an inner node has > 1 child — single-child chains (compressed radix paths) traverse for free. `fatLeaves=true` includes adjacent leaves; otherwise inner-only.
`visitNodes` (used by `visitDifferences` and traversal helpers) uses an explicit `std::stack` to avoid recursion on potentially 64-level trees.
`visitDifferences` short-circuits at hash equality — O(1) when subtrees match. The `have` pointer is nullable: null means "report everything in `this`."
## Ingesting Nodes: `addRootNode` / `addKnownNode`
Returns `SHAMapAddNode` (tri-state: useful / duplicate / invalid). Aggregated via `+=` across batches in `InboundLedger`.
`addKnownNode` performs **two integrity checks**:
1. Deserialized node's hash matches the parent-branch hash.
2. For leaves at `leafDepth`: reconstructed `SHAMapNodeID` from the leaf's actual key matches the claimed `SHAMapNodeID`. This closes a theoretical hash-collision-at-wrong-position attack.
Mismatch transitions the map to `Invalid` — graceful, not a crash. Skips descent into FullBelow subtrees.
**Gotcha**: callers must handle `invalid` gracefully — empty branch or hash mismatch on traversal is legitimate when peer data is stale.
`isGood()` returns `(good + duplicate) > bad` — duplicates count positively (benign), only `bad` is evidence of misbehavior. `isUseful()` is stricter: did we actually make progress?
## Merkle Proofs
`getProofPath(key)` collects nodes from leaf to root (via `walkTowardsKey` with stack), serialized leaf-first.
`verifyProofPath(rootHash, key, path)` is **static** — no live tree needed. Walks root-to-leaf, verifying each hash and using `selectBranch` to pick the next expected hash. Length bounded at 65. Deserialization wrapped in try/catch (network input).
**Gotcha**: wrong key at any level causes false negative — verifier walks down using the *supplied* key's nibbles, not anything inside the path data.
## Comparison / Delta (`SHAMapDelta.cpp`)
`compare` short-circuits at root: `if (getHash() == other.getHash()) return true`. O(d) in the number of differences, not O(n) in total items.
Returns `Delta = std::map<uint256, DeltaItem>` where `DeltaItem = pair<SHAMapItem ptr, SHAMapItem ptr>`. Null first → added; null second → deleted; both → modified.
Four dispatch cases at each pair (leaf/leaf, inner/leaf, leaf/inner, inner/inner). Asymmetric cases delegate to `walkBranch`, which uses an `isFirstMap` bool to preserve (first-map version, second-map version) ordering in the pair regardless of which side is the subtree.
**`maxCount` defense**: passed by reference into `walkBranch`, decremented per insertion. Returns false on truncation. The ledger-diff RPC passes `INT_MAX` (unlimited); the sync RPC passes 256 (bounded exposure to malicious or fabricated diffs).
## walkMap / walkMapParallel
Completeness check: traverses, recording any node hash referenced but absent (via `descendNoStore`, which returns null instead of throwing) into a `vector<SHAMapMissingNode>`.
**`walkMapParallel`** partitions at depth 1 — one `std::thread` per non-empty, non-leaf top-level child (up to 16-way). Each thread has its own `nodeStack`; `missingNodes` and an `exceptions` vector are mutex-shared.
**Critical**: an uncaught exception in a `std::thread` calls `std::terminate`. Worker lambda catches `SHAMapMissingNode` and records to `exceptions` instead — must inspect on return. Return value `true` ⇔ no thrown exceptions, NOT ⇔ no missing nodes (those are always in the vector).
## Family Interface
`Family` is the abstract collaborator bundle: `db()`, `getFullBelowCache()`, `getTreeNodeCache()`, `missingNodeAcquireBy{Seq,Hash}()`, `journal()`, `sweep()`, `reset()`.
Non-copyable, non-movable (SHAMap stores `Family&`; moving would dangle references).
Production impl: `NodeFamily` (in `src/xrpld/shamap/`). Tests use lighter-weight in-memory versions in `src/test/shamap/common.h`.
`NodeFamily::missingNodeAcquireBySeq` maintains a `maxSeq_` high-water under `maxSeqMutex_` to avoid redundant acquisition requests when many SHAMaps simultaneously hit missing nodes.
## FullBelowCache
`KeyCache<uint256>` — stores only hashes (no values), thread-safe, time-expiring (default 2 minutes). Wrapped in `BasicFullBelowCache` to add a generation counter.
**Two-layer invalidation**:
- Per-node `fullBelowGen_` on `SHAMapInnerNode` (compared against current cache generation in `isFullBelow(gen)`).
- The cache itself (queried via `touch_if_exists`).
`clear()` purges entries AND increments `m_gen` — zero-cost global invalidation of every in-memory marker (mismatched generation → `isFullBelow` returns false). `reset()` purges and sets `m_gen = 1` (used at startup / hard reset). The distinction matters: any node carrying `fullBelowGen_ > 1` won't match the reset-to-1 state — correct, because those nodes are expected to be recreated fresh.
Only `backed_ = true` maps participate.
## TreeNodeCache
```cpp
using TreeNodeCache = TaggedCache<
uint256, SHAMapTreeNode, false,
intr_ptr::SharedWeakUnionPtr<SHAMapTreeNode>,
intr_ptr::SharedPtr<SHAMapTreeNode>>;
```
Two reasons for intrusive (not `std::shared_ptr`):
1. **Earlier memory reclamation**: `SHAMapInnerNode::partialDestructor()` releases the 16-way child array as soon as the strong count hits zero, even while weak references in the cache are still live. `std::make_shared` co-allocates control block + object, blocking reclamation until all weaks expire.
2. **Single-word strong/weak**: `SharedWeakUnion<T>` uses one pointer-sized word with a low-bit tag (alignment guarantees the bit is free). Demoting hot → cold flips one bit in place instead of swapping `shared_ptr``weak_ptr`.
## Canonicalization
`canonicalize(hash, nodePtr)` deduplicates: if the cache already holds this hash, replace the local pointer with the cached one; otherwise insert. Asserts `cowid == 0` (only shareable nodes can be canonical). `cacheLookup()` asserts the same on returned nodes.
`canonicalizeChild()` on inner nodes: when two threads concurrently fetch the same child from disk, the per-child spinlock serializes; first writer wins, late writer's freshly-deserialized node is discarded. The incumbent hash is verified to match before installation.
## SyncFilter
Two-method interface bridging SHAMap to peer fetch packs and consensus tx caches.
- `getNode(hash) -> optional<Blob>`: filter's chance to supply a node from a transient source (fetch pack, consensus cache).
- `gotNode(fromFilter, hash, ledgerSeq, Blob&&, type)`: notification of node receipt. **`Blob&&` may be moved/destroyed — do not reuse**. `fromFilter=true` means data came from this filter's own `getNode` (no need to re-store); `false` means it came from the network and should be persisted.
Implementations:
- `AccountStateSF`, `TransactionStateSF` — write to NodeStore + fetch pack. Only used on add paths.
- `ConsensusTransSetSF` — backed by `TaggedCache<SHAMapHash, Blob>`. Used on both add AND check (since the backing store is purely transient).
## Flushing
`walkSubTree(doWrite, type)` is post-order DFS with **explicit stack** (tree may be 64 deep — recursion risks stack overflow). Per node: `preFlushNode()` clones if needed (protects other maps sharing it), recompute hash, `unshare()` (set `cowid = 0`), and if `doWrite`, serialize and persist via `Family::db()`.
- `flushDirty()``walkSubTree(backed_, type)`.
- `unshare()``walkSubTree(false, ...)`. Used to make everything shareable without writing.
## Common Bug Patterns
- Modifying a node without `unshareNode` first → corrupts every snapshot sharing it.
- Using `std::vector` (instead of `std::deque`) as backing for the `MissingNodes` stack → raw inner-node pointers invalidated on push.
- Processing async fetch completions out of order in `getMissingNodes` → incorrect `setFullBelowGen`, subsequent walks skip incomplete subtrees.
- Inner serialization format mismatch (compressed/full) — branch-count cutoff is 12; deserializer enforces exact sizes and throws `std::runtime_error`.
- `addKnownNode` returning `invalid` is normal (empty branch, hash mismatch) — callers must handle, not assume valid.
- Proof verification with wrong key at any level → false negative; the verifier uses the supplied key's nibbles to pick branches.
- Failing to inspect the `exceptions` vector after `walkMapParallel` — workers swallow `SHAMapMissingNode` to avoid `std::terminate`; missing nodes go into the result vector but the return value reflects exceptions only.
- Mutating a node returned from the `TreeNodeCache` — they have `cowid == 0` by invariant.
- Adding a 5th entry to `TaggedPointer::boundaries``static_assert` fails; the 2-bit tag supports exactly 4 values.
- Calling `clone()` with the hash-supplied constructor when the item is also changing — the two-constructor split assumes the item and hash are consistent; pass `hashValid = false` to recompute.
## SHAMapMissingNode Catch Policy
The exception flows up out of `descendThrow` and friends. Catch handlers split into:
- **Recovery** (`LedgerCleaner`, `LedgerMaster`): catch, log at warn, schedule `getInboundLedgers().acquire()`.
- **Fatal/abort** (`RCLConsensus` consensus timer): catch, log at error, `Rethrow()` — crashes the consensus round.
- **Silent failure** (`Ledger.cpp`): catch, return failure — incomplete state tree is treated as invalid ledger.
`SHAMapType` enum values (`TRANSACTION = 1`, `STATE = 2`, `FREE = 3`) are part of the wire protocol — do NOT change.

143
docs/skills/sql.md Normal file
View File

@@ -0,0 +1,143 @@
# SQL Database
SQLite via SOCI for ledger/transaction history. Only SQLite is supported; any non-`sqlite` backend value in config throws at parse time (`detail::getSociInit` in `SociDB.cpp`).
## Key Invariants
- Two main databases: `lgrdb_` (ledger) and `txdb_` (transactions, optional via `useTxTables` config)
- Transaction tables are optional; disabling them disables transaction history and `account_tx` queries
- WAL checkpointing offloads to `JobQueue` (`jtWAL`); at most one checkpoint job in flight per `DatabaseCon` (guarded by `running_` mutex in `WALCheckpointer`)
- Database init failure is fatal (throws exception, prevents construction)
- Free disk space < 512 MB triggers fatal error on write operations
- File extension inconsistency: `validators` and `peerfinder` use `.sqlite`; all other DBs use `.db`. Historical artifact enforced in `detail::getSociInit`
## Schema
- `Ledgers`: seq, hash, parent hash, total coins, close time, etc. Indexed by `LedgerSeq`
- `Transactions`: TransID, TransType, FromAcct, FromSeq, LedgerSeq, Status, RawTxn, TxnMeta. Indexed by `LedgerSeq`
- `AccountTransactions`: TransID, Account, LedgerSeq, TxnSeq. Triple-indexed for `account_tx` queries
- Secondary DBs: Wallet (node identity, manifests), PeerFinder (bootstrap cache), State (deletion tracking)
- Schema defined in `src/xrpld/app/main/DBInit.h`
- No schema migration system; `CREATE TABLE IF NOT EXISTS` silently preserves old schemas with missing columns. **Exception**: PeerFinder has schema versioning via a `SchemaVersion` table.
## Configuration
| Option | Section | Values | Default |
|--------|---------|--------|---------|
| `backend` | `[sqdb]` / `[relational_db]` | `sqlite` only | sqlite |
| `page_size` | `[sqlite]` | 51265536, power of 2 | 4096 |
| `safety_level` | `[sqlite]` | high, medium, low | high |
| `journal_size_limit` | `[sqlite]` | integer >= 0 | 1582080 |
`safety_level: low` changes `journal_mode` and `synchronous` settings — can lose data on crash.
## WAL Checkpointing Architecture
The checkpointer subsystem is the trickiest part of this module. SQLite's WAL hook is a C callback registered on the native `sqlite3*` connection, but the work runs on a `JobQueue` thread that may still be executing when the owning `DatabaseCon` is destroyed.
### Two-file split
- **`SociDB.cpp`**: `WALCheckpointer` class (anonymous namespace) — installs the hook, implements `schedule()` and `checkpoint()`, holds the `weak_ptr<soci::session>`.
- **`DatabaseCon.cpp`**: `CheckpointersCollection` class — process-wide singleton registry (`checkpointers`, namespace-scope variable) mapping monotonically-incrementing integer IDs to `shared_ptr<Checkpointer>`; exposes `create`, `fromId`, `erase`. All `DatabaseCon` instances share this one registry.
`DatabaseCon.cpp` has no direct SQLite dependency; it only manages the `Checkpointer` abstract interface.
### ID-based hook indirection
- `WALCheckpointer` is registered with `sqlite3_wal_hook` using its `std::uintptr_t id_` cast to `void*`, **not** a raw `this` pointer.
- The C hook calls `checkpointerFromId()``CheckpointersCollection::fromId()` (process-wide singleton). If lookup returns null (connection torn down), the hook deregisters itself via `sqlite3_wal_hook(conn, nullptr, nullptr)`.
- Prevents use-after-free: the hook may fire on a writer thread after `DatabaseCon` begins destruction.
### Session ownership split
- `DatabaseCon` holds `std::shared_ptr<soci::session>`; `WALCheckpointer` holds only `std::weak_ptr<soci::session>`.
- If the checkpointer held a `shared_ptr`, an in-flight job would keep the WAL lock alive, blocking a freshly-opened replacement `DatabaseCon` on the same file.
- `WALCheckpointer::checkpoint()` calls `session_.lock()` and bails silently if expired.
### Destructor sequence (`DatabaseCon::~DatabaseCon`)
Order matters:
1. `checkpointers.erase(checkpointer_->id())` — future hook invocations now no-op and self-deregister.
2. Take `weak_ptr<Checkpointer> wk(checkpointer_)`, then `checkpointer_.reset()`.
3. Busy-poll `wk.use_count() != 0` with 100 ms sleeps until all in-flight job lambdas release their `shared_ptr<Checkpointer>`.
The 100 ms poll is deliberate (rare event; simpler than a condvar). Without this wait, reopening the same SQLite file immediately after destruction can fail because the old checkpoint job may still hold the WAL lock.
### `setupCheckpointing()` — deferred wiring
- Separated from constructors so checkpointing is opt-in.
- Constructors accepting `CheckpointerSetup` open the DB first, then call `setupCheckpointing(JobQueue*, ServiceRegistry&)`.
- Null `JobQueue*` throws `std::logic_error` (programming error, not runtime).
- The checkpointer must be inserted into `CheckpointersCollection` **before** `setupCheckpointing` returns, because the WAL hook is armed inside the `WALCheckpointer` constructor and writes can fire it immediately.
### Checkpoint job behavior
- Triggered by `sqlite3_wal_hook` after every WAL write; `static checkpointPageCount = 1000` mirrors SQLite's auto-checkpoint threshold.
- `schedule()` uses `running_` bool under mutex to enforce single in-flight job; if `JobQueue` rejects the job, `running_` is reset.
- Enqueued lambda captures `std::weak_ptr<Checkpointer>`; destroyed `DatabaseCon` causes the job to exit without touching the session.
- `checkpoint()` calls `sqlite3_wal_checkpoint_v2` with `SQLITE_CHECKPOINT_PASSIVE`. `SQLITE_LOCKED` logged at trace (expected under reader contention); other errors logged as warnings. `running_` reset under mutex after each attempt.
- Net effect: routes checkpoint work off the writer thread onto `jtWAL`. Without this, SQLite does it synchronously on whichever thread crosses the page threshold.
## SOCI Adapter Notes (`SociDB.cpp`)
- `DBConfig` is two-phase: parse params, open later. `detail::getSociInit` and `detail::getSociSqliteInit` resolve backend + path; the `.sqlite` vs `.db` extension fork lives in `getSociInit`. `getSociSqliteInit` throws `std::runtime_error` if the database name is empty.
- Two free-function `open()` overloads: config-based (delegates through `DBConfig`) and explicit-string (enforces same "sqlite only" constraint). Both paths call `s.open(soci::sqlite3, connectionString)`.
- `getConnection(session&)` recovers the raw `sqlite3*` via `dynamic_cast<soci::sqlite3_session_backend*>` — the only intentional break in the SOCI abstraction. Throws `std::logic_error` if the cast fails. Required for WAL hooks and `sqlite3_db_status`.
- `getKBUsedAll()``sqlite3_memory_used()` (process-global). `getKBUsedDB()``SQLITE_DBSTATUS_CACHE_USED` (per-connection).
- Four `convert()` overloads bridge `soci::blob``std::vector<uint8_t>` / `std::string`. Empty blobs require `blob.trim(0)` rather than `blob.write(nullptr, 0)`.
- `SociDB.cpp` opens with `#pragma clang diagnostic ignored "-Wdeprecated"` because SOCI headers use deprecated constructs; scoped to this TU only.
## Common Bug Patterns
- No schema migration system; `CREATE TABLE IF NOT EXISTS` silently preserves old schemas with missing columns. New columns on existing deployments require manual `ALTER TABLE` or explicit documentation that the column may be absent.
- `page_size` must be power of 2 between 51265536; invalid values cause init failure.
- Online deletion coordinates between NodeStore rotation and SQL table pruning; race conditions here lose history.
- Empty database name passed to `detail::getSociSqliteInit` throws — no silent fallback.
- A `WALCheckpointer` registered with `sqlite3_wal_hook` can outlive its `DatabaseCon` if a checkpoint job is in flight; teardown must wait for the job to drain (see Destructor sequence above).
- Opening a new `DatabaseCon` to the same file immediately after destroying the old one can fail if the destructor busy-poll is skipped or shortened — the old checkpoint job may still hold the WAL lock.
## Key Patterns
### Schema Evolution Caveat
```cpp
// No migration system — old databases keep old schemas.
// CREATE TABLE IF NOT EXISTS silently skips if table exists with old columns.
// New columns require manual ALTER TABLE or must be treated as optional/absent.
// PeerFinder is the exception: it has a SchemaVersion table.
```
### Disk Space Guard
```cpp
// Required on all write paths.
if (freeDiskSpace < minDiskFree)
Throw<std::runtime_error>("Not enough disk space for database write");
```
### WAL Hook Cookie
```cpp
// Always pass an integer ID, never `this`.
// DatabaseCon may be destroyed while a hook invocation is mid-flight on a writer thread.
sqlite3_wal_hook(conn, &walHookCallback,
reinterpret_cast<void*>(checkpointer->id()));
```
### Penetrating the SOCI Abstraction
```cpp
// getConnection() is the only intentional SOCI abstraction break.
// Required for sqlite3_wal_hook and sqlite3_db_status APIs.
auto* be = dynamic_cast<soci::sqlite3_session_backend*>(s.get_backend());
if (!be || !be->conn_) throw std::logic_error("Not a sqlite3 session");
sqlite3* conn = be->conn_;
```
## Key Files
| File | Purpose |
|------|---------|
| `src/libxrpl/rdb/SociDB.cpp` | SOCI/SQLite adapter, `WALCheckpointer`, blob conversion, memory stats |
| `src/libxrpl/rdb/DatabaseCon.cpp` | Connection lifecycle, `CheckpointersCollection`, destructor drain |
| `src/xrpld/app/main/DBInit.h` | Schema definitions (CREATE TABLE statements) |
| `src/xrpld/app/rdb/backend/detail/SQLiteDatabase.cpp` | Main `SQLiteDatabase` implementation |
| `src/xrpld/app/rdb/backend/detail/Node.cpp` | Ledger/tx read-write operations |
| `src/xrpld/app/rdb/detail/State.cpp` | Deletion state tracking |
| `src/xrpld/core/detail/DatabaseCon.cpp` | Legacy reference; lifecycle now in `libxrpl` |

75
docs/skills/test.md Normal file
View File

@@ -0,0 +1,75 @@
# Testing
JTx framework for in-memory ledger testing. Tests live in `src/test/`, derive from `beast::unit_test::suite`, and register with `BEAST_DEFINE_TESTSUITE`.
## Key Patterns
### Test File Structure
```cpp
class MyFeature_test : public beast::unit_test::suite {
void testBasic() {
testcase("basic");
using namespace jtx;
Env env{*this};
// ... test logic ...
}
void run() override { testBasic(); }
};
BEAST_DEFINE_TESTSUITE(MyFeature, app, ripple);
```
### Amendment Gating
```cpp
// REQUIRED: test with AND without the feature amendment
Env env{*this}; // all amendments on
Env env{*this, testable_amendments() - featureMyFeature}; // feature disabled
// With custom config:
Env env{*this, envconfig([](std::unique_ptr<Config> cfg) {
/* modify config */ return cfg;
})};
```
### Transaction Submission
```cpp
auto const alice = Account{"alice"};
auto const bob = Account{"bob"};
env.fund(XRP(10000), alice, bob);
env.close(); // REQUIRED between setup and test transactions
env(pay(alice, bob, XRP(100))); // expects tesSUCCESS
env(pay(alice, bob, XRP(999999)), ter(tecUNFUNDED_PAYMENT)); // specific TER
env(pay(alice, bob, XRP(100)), txflags(tfPartialPayment)); // with flags
```
### State Verification
```cpp
env.require(balance(alice, XRP(9900)));
env.require(balance(alice, USD(1000)));
env.require(owners(alice, 2));
auto const sle = env.le(keylet::account(alice.id()));
BEAST_EXPECT(sle);
BEAST_EXPECT((*sle)[sfBalance] == XRP(9900));
```
## Review Checklist
- New transaction types MUST have tests with AND without the feature amendment
- Every error path (tem*, tec*, tef*) must have a test exercising it
- `env.close()` required between transactions that need ledger boundaries
- Verify state after transaction, not just TER code
- Test class naming: `FeatureName_test`, registered with `BEAST_DEFINE_TESTSUITE`
## Common Pitfalls
- Forgetting `*this` as first Env arg disconnects assertion reporting
- Comparing XRPAmount with raw integers instead of `XRP()` or `drops()`
- Trust lines must be established before IOU payments
- Each Env creates a full Application — keep test methods focused
## Key Files
- `src/test/jtx/Env.h` — test environment class
- `src/test/jtx/jtx.h` — convenience header (includes all helpers)
- `src/test/jtx/Account.h` — test accounts
- `src/test/jtx/amount.h` — XRP(), drops(), IOU helpers

560
docs/skills/transactors.md Normal file
View File

@@ -0,0 +1,560 @@
# Transactors
Transaction processing pipeline: preflight (static validation) → preclaim (ledger state checks) → doApply (state mutation). Base class `Transactor` in `src/libxrpl/tx/`. Every transaction type inherits from it; only `doApply()` is virtual — all other dispatch is compile-time.
## Pipeline Architecture
### Three Phases
1. **`preflight`** — stateless, no ledger access. Validates fields, flags, signatures (cached via HashRouter). Cheap, parallelizable. Returns `NotTEC`. Results carry a `TxConsequences` summary used by the transaction queue.
2. **`preclaim`** — read-only `ReadView` access. Checks account exists, fee sufficient, sequence valid, signature valid. Returns `TER`. Sets `likelyToClaimFee` for relay decisions.
3. **`doApply`** — mutable `ApplyView` access. Only runs if preclaim returned `tesSUCCESS` and `likelyToClaimFee` is true.
`apply()` in `apply.cpp` composes all three. It accepts a preflight callable so the same `preclaim`+`doApply` machinery serves normal and batch-inner transactions. `applyTransaction()` adds `tapRETRY` semantics and dispatches to `applyBatchTransactions()` after a successful `ttBATCH`.
**Important preclaim security invariant** (documented in `applySteps.cpp`): every check through and including `checkSign` must return `NotTEC` (not a `tec` code). A `tec` before signature verification would charge a fee without authentication — a critical security property.
### Layered Preflight: `preflight0` → `preflight1` → `T::preflight` → `preflight2` → `T::preflightSigValidated`
`Transactor::invokePreflight<T>` calls (in order): `T::checkExtraFeatures`, `preflight1(ctx, T::getFlagsMask(ctx))`, `T::preflight`, `preflight2`, `T::preflightSigValidated`. Each is a static method; derived classes participate via name hiding — never virtual.
- **`preflight0`** (called from `preflight1`): gates on `sfNetworkID` presence/absence, zero-hash tx ID, valid flag bits, and pseudo-tx/batch-inner exclusivity.
- **`preflight1`**: account is non-zero, `sfFee` is non-negative native XRP, signing key format valid, tickets and `sfAccountTxnID` are mutually exclusive.
- **`preflight2`**: simulation mode (`tapDRY_RUN`), cryptographic signature check via hash router. Skipped entirely for `tfInnerBatchTxn` (outer batch authorizes).
**Rule**: derived `preflight` runs *between* `preflight1` and `preflight2`. Never call `preflight1`/`preflight2` directly.
### Compile-time Polymorphism (Name Hiding, Not Virtual)
`with_txn_type()` in `applySteps.cpp` uses an X-macro over `transactions.macro` to convert runtime `TxType` to a compile-time template parameter via a switch dispatch — no virtual dispatch, no transactor headers included in `applySteps.cpp` (explicitly forbidden).
### `ConsequencesFactoryType`
Each transactor declares `static constexpr ConsequencesFactoryType ConsequencesFactory{...}`:
- **`Normal`** — standard fee/sequence consequences. Most transactors.
- **`Blocker`** — queues block later transactions from same account. Examples: `SetRegularKey`, `AccountDelete`, `SignerListSet`, `XChainAddClaimAttestation`.
- **`Custom`** — derived class implements `makeTxConsequences(PreflightContext const&)`. Examples: `Payment` (XRP spend via `sfSendMax`), `OfferCreate` (XRP TakerGets), `TicketCreate` (multi-sequence), `AccountSet` (conditional blocker on auth/master flags), `LoanSet` (counterparty signers).
C++20 `requires` clauses in `applySteps.cpp` pick the factory at compile time.
### Numeric Precision Guards
`with_txn_type()` installs RAII guards before dispatch:
- When `featureSingleAssetVault` or `featureLendingProtocol` is active: `CurrentTransactionRulesGuard` (thread-local rules access) + `NumberSO` (floating-point-style number arithmetic per `fixUniversalNumber`).
- Otherwise: `NumberMantissaScaleGuard` (legacy small-mantissa mode).
Ideally these would apply everywhere from the start; they were retrofitted into `with_txn_type` for `preflight`/`preclaim` when vault/lending features needed correct numeric rules in read-only phases.
## Key Invariants
- Pipeline is strict: preflight runs WITHOUT ledger state, preclaim WITH read-only view, doApply with mutable view.
- `preflight` validates all fields exist and are well-formed; this is the ONLY place to reject malformed transactions cheaply.
- Fee is always deducted on `tecCLAIM`; `payFee` runs before `doApply`.
- Sequence/ticket consumption happens in `consumeSeqProxy`; must succeed before any state changes.
- Invariant checkers run after `doApply`; they can veto the transaction post-execution.
- Amendment gating belongs in `checkExtraFeatures`, NOT in `preflight`. The framework guards on the central permission registry first.
- `tem*`/`tef*`/`tel*` results: fee NOT charged, transaction not included. `tec*` results: fee charged, transaction included.
## State Commitment & tec* Rollback (CRITICAL for review)
**`doApply` mutations are NOT committed until `ctx_.apply()` is called at the end of `operator()`.** All peek/insert/update/erase during `doApply` go into an `ApplyContext` view (`view_`) layered on top of `base_`. Whether that view gets flushed to `base_` depends entirely on the TER that `doApply` returns.
`ApplyContext::discard()` replaces `view_` with a fresh view on `base_`**every doApply mutation is thrown away**:
```cpp
void ApplyContext::discard() { view_.emplace(&base_, flags_); }
```
### Return-code decision table (in `Transactor::operator()`)
| doApply returns | What commits to the ledger |
|---|---|
| `tesSUCCESS` | All doApply mutations + fee + seq (via `ctx_.apply`) |
| `tec*` (normal, `!tapRETRY`) | `reset(fee)` calls `discard()`, then re-applies fee + seq only. **All doApply mutations reverted.** |
| `tec*` with `tapFAIL_HARD` | `discard()` called directly, nothing committed (not even fee) |
| `tec*` with `tapRETRY` | `applied=false`, `ctx_.apply` never called, tx re-queued |
| `tef*` / `tem*` / `ter*` | `applied=false`, `ctx_.apply` never called |
| `tecINVARIANT_FAILED` after invariants | reset again, commit fee only |
`isTecClaimHardFail(ter, flags) = isTecClaim(ter) && !(flags & tapRETRY)` — drives the reset path.
### What this means
- **A `tec*` return from doApply acts as a full-transaction rollback.** You do NOT need to order mutations defensively. If a helper called late in doApply returns `tec*`, everything mutated earlier is discarded.
- **Orphan-state bugs "we mutated X then returned tec* so X is now inconsistent" are not possible at the transactor boundary.**
- **The real failure mode**: stale SLE pointers, missing `view().update(sle)` after mutation, mutating values read by value. These are in-memory bugs, not state-commit bugs.
- **Sandboxes inside `doApply` add nesting, not safety.** `PaymentSandbox`/nested `ApplyView` are for conditionally committing a *subset* of changes within a single doApply.
- **Only `ctx_.apply(result)` publishes to `base_`**; a doApply that returns early, throws, or crashes never reaches that call.
### `reset()` Fee Clamping
`reset()` discards all ledger mutations via `ctx_.discard()` then re-deducts the fee, clamping if necessary:
```cpp
if (fee > balance) fee = balance;
```
This ensures a failing transaction can still claim its fee even when the account is over-committed.
### Verifying a suspected orphan-state bug
1. Read the caller of `doApply` — confirm the TER path (`operator()` in Transactor.cpp).
2. Check whether `discard()` is reached via `reset()` or the `tapFAIL_HARD` branch.
3. If both paths call `discard()`, the mutations cannot persist on tec*.
4. Look instead for: missing `view().update(sle)`, stale SLE pointers, or genuine non-atomic side effects (e.g., hash router flags — NOT in ApplyContext view).
## Apply Loop Details (Transactor::operator()())
1. RAII guards: `NumberSO`, `CurrentTransactionRulesGuard` (for `fixUniversalNumber`, `featureSingleAssetVault`, `featureLendingProtocol`)
2. Debug builds: serialize/re-parse round-trip catches serdes mismatches; `trapTransaction()` provides a named breakpoint for replaying specific transactions
3. `apply()` runs `preCompute()` → captures `preFeeBalance_``consumeSeqProxy()``payFee()` → updates `sfAccountTxnID``doApply()`
4. Enforces `tecOVERSIZE` if metadata exceeds `oversizeMetaDataCap`
5. Special `tec` codes (`tecOVERSIZE`, `tecKILLED`, `tecINCOMPLETE`, `tecEXPIRED`) trigger context-diff visitation then targeted cleanup: `removeUnfundedOffers`, `removeExpiredNFTokenOffers`, `removeDeletedTrustLines`, `removeDeletedMPTs`, `removeExpiredCredentials`
6. `ctx_.checkInvariants()` runs all 25+ invariants; failure causes second reset + re-check; second failure escalates to `tefINVARIANT_FAILED` (not included in ledger)
7. `tapDRY_RUN` forces `applied=false` unconditionally
## Common Bug Patterns
- New transaction type missing preflight validation for new fields = malformed transactions reach doApply and corrupt state
- Forgetting to handle `tecCLAIM` in doApply: fee is deducted but no other state changes should occur
- Batch transactions have their own signing path (`checkBatchSign`); changes to signing must cover both paths
- `calculateBaseFee` override without updating `minimumFee` causes fee calculation divergence between nodes
- Missing invariant checker update for new ledger entry types = silent constraint violations
- Forgetting amendment gating: place feature checks in `checkExtraFeatures`, NOT `preflight`
- Using `view().update()` on a stale SLE pointer after another mutation
- Computing reserve against `view().peek(account)->getFieldAmount(sfBalance)` AFTER fee deduction instead of `preFeeBalance_`
- Missing `associateAsset(*sle, asset)` call at end of `doApply` for SLEs with `STNumber` or `STTakesAsset` fields (lending/vault transactors)
- preclaim `Rules` race: if ledger advanced between preflight and preclaim, `applySteps.cpp` silently re-runs preflight with new rules before constructing `PreclaimContext`
- Calling `ter*` codes before signature verification in preclaim (see security invariant above)
## Transactor Template
### Header (`include/xrpl/tx/transactors/.../MyTx.h`)
```cpp
#pragma once
#include <xrpl/tx/Transactor.h>
namespace xrpl {
class MyTransaction : public Transactor {
public:
static constexpr ConsequencesFactoryType ConsequencesFactory{Normal};
explicit MyTransaction(ApplyContext& ctx) : Transactor(ctx) {}
static bool checkExtraFeatures(PreflightContext const& ctx); // amendment gating
static std::uint32_t getFlagsMask(PreflightContext const& ctx);
static NotTEC preflight(PreflightContext const& ctx); // NO ledger
static TER preclaim(PreclaimContext const& ctx); // read-only
TER doApply() override; // read-write
};
}
```
### Implementation
```cpp
bool MyTransaction::checkExtraFeatures(PreflightContext const& ctx)
{ // PREFERRED location for amendment checks
return ctx.rules.enabled(featureMyFeature);
}
NotTEC MyTransaction::preflight(PreflightContext const& ctx)
{ // Static validation — NO ctx.view, NO ledger access
if (ctx.tx[sfAmount] <= beast::zero)
return temBAD_AMOUNT;
return tesSUCCESS;
}
TER MyTransaction::preclaim(PreclaimContext const& ctx)
{ // Read-only — ctx.view.read() only, NO peek/insert/erase
if (!ctx.view.exists(keylet::account(ctx.tx[sfAccount])))
return terNO_ACCOUNT;
return tesSUCCESS;
}
TER MyTransaction::doApply()
{ // Mutable — view().peek(), view().insert(), view().update(), view().erase()
auto sle = view().peek(keylet::account(account_));
sle->setFieldAmount(sfBalance, newBal);
view().update(sle); // REQUIRED after mutation
return tesSUCCESS;
}
```
### Registration Checklist
```cpp
// ALL of these are REQUIRED for a new transaction type:
// 1. transactions.macro: TRANSACTION(ttMY_TYPE, N, MyTx, delegation, fields, privileges)
// 2. applySteps.cpp: case ttMY_TYPE: dispatched via X-macro automatically
// 3. features.macro: XRPL_FEATURE(MyFeature, Supported::yes, DefaultNo)
// 4. Feature.h: increment numFeatures
// 5. InvariantCheck.cpp: update privilege mask + checkers if new ledger objects
// 6. Batch.cpp: add to disabledTxTypes if not batch-compatible
// 7. Permission table: add granular permissions if delegable
```
### Common Field Constraints (constants in `Protocol.h`)
- `maxCredentialURILength` = 256, `maxCredentialTypeLength` = 64
- `maxTokenURILength` = 256 (NFT URI), `dirMaxTokensPerPage` = 32
- `maxMultiSigners` = 32, `MaxPathSize` = 6, `MaxPathLength` = 8
- `maxBatchTxCount` = 8, `maxOracleDataSeries` = 10
- `maxPermissionedDomainCredentialsArraySize` = 10
- `maxDeletableTokenOfferEntries` = 500, `maxDeletableDirEntries` = 1000
- `maxDeletableAMMTrustLines` = 512, `maxMPTokenAmount` = 0x7FFF_FFFF_FFFF_FFFF
- `maxDataPayloadLength`, `maxMPTokenMetadataLength` = 1024
## The Big Patterns
### Sandbox Pattern (Atomic Sub-operation)
Used when multiple mutations must all succeed or all be discarded *within* a single `doApply`:
```cpp
TER doApply() override {
Sandbox sb(&view());
auto const result = applyGuts(sb, ...);
if (isTesSuccess(result))
sb.apply(ctx_.rawView());
return result;
}
```
Variants:
- `PaymentSandbox` — for `flow()` calls (used by `Payment`, `CheckCash`, `OfferCreate` crossing). Required because `flow()` uses deferred-credit accounting.
- `RippleCalc::rippleCalculate()` wraps its own `PaymentSandbox` inside the caller's sandbox (double-sandbox pattern) for exception safety — if `flow()` throws, the caller's sandbox remains unmodified.
- Dual sandbox in `OfferCreate`: `sb` (main) + `sbCancel` (offer cleanup); commit one or the other based on `tfFillOrKill` outcome.
- Nested sandboxes: `applyBatchTransactions` uses `wholeBatchView` (over outer view) + `perTxBatchView` (per inner tx).
### Reserve Check Convention
ALWAYS check against `preFeeBalance_` (snapshot before fee deduction), not the current post-fee balance. This deliberately allows accounts to dip into reserve to pay the fee while still requiring full reserve coverage for new owned objects.
```cpp
auto const reserve = view().fees().accountReserve(ownerCount + 1);
if (preFeeBalance_ < reserve)
return tecINSUFFICIENT_RESERVE;
```
### Owner Directory + Owner Count Pattern
Creating an owned object:
1. `view().dirInsert(keylet::ownerDir(owner), key, ...)` → returns page index
2. Store page index in SLE's `sfOwnerNode` (and `sfDestinationNode`, `sfIssuerNode`, etc., for multi-party objects)
3. `adjustOwnerCount(view, sleOwner, +N, j)` where N is the reserve cost
4. `view().insert(sle)`
Deleting an owned object:
1. Read `sfOwnerNode` (etc.) from SLE
2. `view().dirRemove(keylet::ownerDir(owner), pageIndex, key, false)` — O(1) using cached page
3. `adjustOwnerCount(view, sleOwner, -N, j)`
4. `view().erase(sle)`
Reserve cost is usually 1 unit per object, but:
- `AccountDelete`, `LedgerStateFix`, `AMMCreate` charge a full reserve via `calculateOwnerReserveFee` instead of base fee
- Two-object structures (`Vault`, `LoanBroker`) charge +2 for object + pseudo-account (incremented before reserve check so check reflects true post-creation state)
- `SignerListSet` post-amendment uses `lsfOneOwnerCount` flag (1 unit regardless of N signers); pre-amendment charges 2+N
- `OracleSet` uses tiered count: 1 unit for ≤5 price pairs, 2 units for more
### Pseudo-Account Pattern
Synthetic `AccountRoot` SLEs with disabled master key, used to hold protocol-managed assets on behalf of users. Created via `createPseudoAccount(view, ownerKey, sfDiscriminator)`. Examples and their discriminator fields:
| Construct | Discriminator | Owns |
|---|---|---|
| AMM | `sfAMMID` | LP token issuance, both pool asset trustlines/MPTokens |
| Vault | `sfVaultID` | Vault asset holding, share MPTokenIssuance |
| LoanBroker | `sfLoanBrokerID` | Cover capital holding |
Pseudo-account guard rules:
- `ValidPseudoAccounts` invariant: exactly one discriminator field, sequence never changes, required flags (`lsfDisableMaster | lsfDefaultRipple | lsfDepositAuth`), no `sfRegularKey`
- For pseudo-accounts, initial sequence must be 0 and flags must be exactly `lsfDisableMaster | lsfDefaultRipple | lsfDepositAuth`
- Many transactors explicitly reject pseudo-account destinations (`tecPSEUDO_ACCOUNT`): `Payment` direct, `CheckCreate`, `PaymentChannelCreate`, `VaultCreate` (asset issuer), `Clawback` (holder)
- `MPTokenAuthorize` issuer-path skips pseudo-account holders (they are implicitly always authorized)
- Pseudo-accounts cannot sign — when `featureLendingProtocol` active, `checkSign` rejects with `tefBAD_AUTH`
- Anti-nesting: AMM preclaim detects LP-token-issuer pseudo-accounts via `sfAMMID` on the issuer's `AccountRoot` and rejects using them as AMM assets
### `associateAsset` Convention
After mutating any SLE that contains `STNumber` or `STTakesAsset`-derived fields (loan, broker, vault objects), call `associateAsset(*sle, asset)` at the end of `doApply`. This re-rounds stored numeric values to the asset's precision. Per `STTakesAsset.h` contract, this must be the last operation after all writes are complete. Failing to call it produces silent precision corruption.
## Permission & Delegation System
### `checkPermission` (called from preclaim)
Validates the optional `sfDelegate` field. If absent, normal account signing applies. If present:
1. Read `DelegateObject` at `keylet::delegate(account, delegate)`; missing → `terNO_DELEGATE_PERMISSION`
2. Try full transaction-type permission via `checkTxPermission()` (uses `TxType + 1` encoding)
3. Fall back to granular permission via `loadGranularPermission()` + per-transactor logic
### Encoding Convention
Permission values store both forms in a single `uint32_t`:
- Transaction types: `TxType + 1` (always ≤ `UINT16_MAX`; `+1` avoids ambiguous zero since `ttPAYMENT == 0`)
- Granular permissions: values `> UINT16_MAX`, enumerated in `permissions.macro`
`Permission` singleton asserts this separation at construction time.
### Granular Permissions
`DelegateUtils.cpp` provides:
- `checkTxPermission()` — linear scan for `TxType + 1` match; returns `terNO_DELEGATE_PERMISSION` on null delegate or no match
- `loadGranularPermission()` — populates per-tx-type granular set via `Permission::getInstance().getGranularTxType()` reverse-map; returns silently with empty set on null delegate
Examples of granular permissions:
- `Payment` direct only: `PaymentMint` (issuer source), `PaymentBurn` (issuer destination) — blocked if `sfPaths` present or asset conversion
- `AccountSet`: field-level grants per metadata field (`AccountDomainSet`, `AccountTransferRateSet`, etc.); flag changes blocked entirely
- `TrustSet`: `TrustlineAuthorize`, `TrustlineFreeze`, `TrustlineUnfreeze`; cannot create new lines, cannot change limit
- `MPTokenIssuanceSet`: `MPTokenIssuanceLock`, `MPTokenIssuanceUnlock`
## Permission Model & Cross-Transactor Static Interfaces
Several transactors expose static deletion/creation methods on `ApplyView` so other transactors (especially `AccountDelete`) can clean up owned objects without constructing a fake transaction:
- `DepositPreauth::removeFromLedger(ApplyView&, uint256, Journal)`
- `DIDDelete::deleteSLE(ApplyView&, SLE, AccountID, Journal)`
- `OracleDelete::deleteOracle(ApplyView&, SLE, AccountID, Journal)`
- `DelegateSet::deleteDelegate(ApplyView&, SLE, AccountID, Journal)`
- `SignerListSet::removeFromLedger(ApplyView&, ServiceRegistry&, AccountID, Journal)`
- `MPTokenIssuanceCreate::create(ApplyView&, Journal, MPTCreateArgs)` — used by `VaultCreate` to mint share token
- `AMMWithdraw::withdraw`/`equalWithdrawTokens` — used by `AMMClawback`, `AMMDelete`
- `LoanManage::unimpairLoan/impairLoan/defaultLoan` — used by `LoanPay`
`AccountDelete` uses a `nonObligationDeleter()` switch over `LedgerEntryType` returning a `DeleterFuncPtr`. `nullptr` means "obligation, cannot delete". The same switch is used in both `preclaim` (to detect blockers) and `doApply` (to invoke deletions), keeping type classification in sync. Deletable types: offers, signer lists, tickets, deposit preauth, NFT offers, DIDs, oracles, credentials, delegates.
### AccountDelete-specific preclaim rules
- NFT obligations: `sfMintedNFTokens != sfBurnedNFTokens``tecHAS_OBLIGATIONS`; authorized-minting replay guard: `FirstNFTokenSequence + MintedNFTokens + 255` must not exceed current ledger sequence
- Sequence freshness: account seq must be ≥ 256 below current ledger index (`tecTOO_SOON`) — prevents replay after resurrection
- Owner directory: more than `maxDeletableDirEntries` (1000) deletable items → `tefTOO_BIG`
## Signature Verification
`checkSign()` (in preclaim) dispatches:
1. **Batch inner** (`tfInnerBatchTxn`): asserts no key/sig/signers; outer batch authorized them
2. **Dry-run** (`tapDRY_RUN`): skipped if no key/signers
3. **Multi-sign** (`sfSigners` present): delegates to `checkMultiSign()`
4. **Single sig**: derives signer from public key, calls `checkSingleSign()`
`checkSingleSign()` precedence: regular key → enabled master key → `tefMASTER_DISABLED`.
`checkMultiSign()` performs O(n) linear merge of sorted `sfSigners` against the sorted `SignerEntry` list from the account's signer list SLE. Terminates with `tefBAD_QUORUM` if accumulated weight < `sfSignerQuorum`.
`checkBatchSign()` validates the outer batch transaction's `sfBatchSigners` array. Outer account is excluded from `sfBatchSigners`; unsigned-account inner transactions (e.g., funding an account creation) are permitted if signed by their master key.
`LoanSet::checkSign()` overrides to verify both the primary signer AND the `sfCounterpartySignature` sub-object (which may itself be single or multisig). `calculateBaseFee` adds one `baseFee` per counterparty signer.
## Validation Helpers (in `Transactor`)
- `validNumericRange<T>(opt, min, max)` absent optional is valid
- `validNumericMinimum<T>(opt, min)` absent optional is valid
- Overloads for `unit::ValueUnit<Unit, T>` for type-safe units
These follow the convention that an absent optional field is valid; only present values are range-checked.
## Invariant Checker Framework
After every successful or fee-claiming transaction, every checker in the `InvariantChecks` tuple runs. Two-phase: `visitEntry(isDelete, before, after)` per modified SLE, then `finalize(tx, result, fee, view, journal)` once.
### Dispatch (in `ApplyContext::checkInvariantsHelper`)
Uses `std::index_sequence` + fold expression for variadic visit. Critically, `finalize()` results are collected into a `std::array<bool>` then checked with `std::all_of` NOT short-circuited with `&&` so every failing invariant logs its own diagnostic.
Invariant checkers run even on failed (`tec*`) transactions bugs or exploits could cause a failed transaction to mutate ledger state unexpectedly.
### `failInvariantCheck` Escalation
- First failure `tecINVARIANT_FAILED` (committed to ledger, fee charged)
- Repeated failure during retry (recognized because incoming result is already `tecINVARIANT_FAILED` or `tefINVARIANT_FAILED`) `tefINVARIANT_FAILED` (not included in ledger)
Rationale: if even the minimal fee-charge path breaks invariants, no ledger entry of any kind should be created.
### The `enforce` Pattern (Soft Rollout)
```cpp
bool const enforce = view.rules().enabled(featureX);
if (violation) {
JLOG(j.fatal()) << "...";
XRPL_ASSERT(enforce, "..."); // fires in debug builds regardless
return !enforce; // returns true (passes) if amendment off
}
```
This lets invariants ship before activation: violations log unconditionally (visible to operators), assertion fires in debug/test builds (catches dev mistakes), but only become consensus-breaking when the gating amendment activates.
### Privilege System (`InvariantCheckPrivilege.h`)
`Privilege` bitmask enum + `hasPrivilege(STTx, Privilege)` (implemented via `transactions.macro` X-macro). Used by checkers to know what each transaction type may legitimately do. `must` vs. `may` variants let invariants enforce cardinality (e.g., `AccountDelete` *must* delete exactly one account root; `AMMWithdraw` *may* delete one).
| Privilege | Granted to (examples) |
|---|---|
| `createAcct` | `Payment` (XRP funding) |
| `createPseudoAcct` | `AMMCreate`, `VaultCreate`, `LoanBrokerSet` |
| `mustDeleteAcct` | `AccountDelete`, `AMMDelete` |
| `mayDeleteAcct` | `AMMWithdraw`, `AMMClawback` |
| `overrideFreeze` | `AMMClawback` (only against AMM trust lines, not global freeze) |
| `changeNFTCounts` | `NFTokenMint`, `NFTokenBurn` |
| `createMPTIssuance` / `destroyMPTIssuance` | `MPTokenIssuanceCreate`/`Destroy`, also `VaultCreate`/`Delete` |
| `mustAuthorizeMPT` / `mayAuthorizeMPT` | `MPTokenAuthorize`, AMM withdraw/clawback |
| `mayCreateMPT` / `mayDeleteMPT` | `Payment`, `CheckCash`, `AMMCreate`, `AMMDelete` |
| `mustModifyVault` / `mayModifyVault` | Vault transactors, loan transactors |
### The 25+ Registered Invariants
| Checker | What it enforces |
|---|---|
| `TransactionFeeCheck` | Fee non-negative, < INITIAL_XRP, sfFee |
| `XRPNotCreated` | Net XRP delta across accounts/paychans/escrows = -fee (pay channels tracked as `sfAmount - sfBalance`) |
| `XRPBalanceChecks` | Every account balance is native XRP in [0, INITIAL_XRP] |
| `NoBadOffers` | No negative-amount, no XRP-for-XRP offers |
| `NoZeroEscrow` | Escrow/MPT amounts within bounds; MPT locked outstanding; also validates `ltMPTOKEN_ISSUANCE` and `ltMPTOKEN` entries |
| `AccountRootsNotDeleted` | Account deletion cardinality matches `must`/`may` privilege |
| `AccountRootsDeletedClean` | Deleted account had zero balance + zero owner count + no orphaned objects; uses `before` SLE for pseudo-account linked object keys (fields may be cleared during deletion) |
| `ValidNewAccountRoot` | New accounts only from `createAcct`/`createPseudoAcct`; correct initial seq + flags |
| `ValidPseudoAccounts` | Exactly one discriminator, sequence unchanged, required flags, no regular key; errors accumulated in `vector<string>` and all logged before returning |
| `ValidClawback` | At most one trust line/MPT modified, holder balance non-negative |
| `NoModifiedUnmodifiableFields` | `sfLedgerEntryType`/`sfLedgerIndex` immutable; loan/broker origination fields immutable; gated on `featureLendingProtocol` |
| `LedgerEntryTypesMatch` | Modified entries don't change type; new entries are recognized types |
| `NoXRPTrustLines` | No trust line uses XRP as currency |
| `NoDeepFreezeTrustLinesWithoutFreeze` | DeepFreeze flag requires regular Freeze flag |
| `TransfersNotFrozen` | Trust line transfers respect global/per-line/deep freeze (gated `featureDeepFreeze`) |
| `ValidNFTokenPage` | Page links coherent, size 1-32 tokens, sorted, valid URIs |
| `NFTokenCountTracking` | `sfMintedNFTokens`/`sfBurnedNFTokens` only change with `changeNFTCounts` privilege; strict monotonic increase on success |
| `ValidMPTIssuance` | MPT issuance/holder counts match transaction privileges |
| `ValidMPTPayment` | OutstandingAmount = sum(holder MPTAmount + LockedAmount); overflow detection |
| `ValidAMM` | Per-tx-type rules: create exact `sqrt(A*B)`, deposit/withdraw constant-product invariant `sqrt(x*y) ≥ LPSupply`, vote/bid leave pool unchanged |
| `ValidPermissionedDomain` | AcceptedCredentials non-empty, max size, unique, sorted |
| `ValidPermissionedDEX` | Domain-scoped tx only touches offers/dirs with matching domain; hybrid offers structurally valid |
| `ValidVault` | Per-tx-type rules: deposit/withdraw asset/share conservation, immutable fields unchanged, loss only via loan ops; XRP vault fee compensation for depositor/withdrawer balance check |
| `ValidLoan` | Payment completion bidirectional (paymentRemaining=0 all outstanding=0), `lsfLoanOverpayment` immutable, non-negative fees, positive `sfPeriodicPayment` |
| `ValidLoanBroker` | Sequence monotonic, non-negative cover/debt, vault exists, cover pseudo-account balance (== under `fixSecurity3_1_3` except at delete); no amendment gate (objects can't exist unless amendment is active) |
## doApply Order Convention (Cleanup)
When erasing an SLE that participates in directories, the order is **always**:
1. Remove from owner directory (and destination/issuer directory if applicable) via `dirRemove` with stored `sfOwnerNode`/etc.
2. `adjustOwnerCount(view, sleOwner, -N, j)`
3. `view().erase(sle)`
Erasing first would lose the page index needed for `dirRemove`. Many transactors guard `dirRemove` failure with `tefBAD_LEDGER` and `LCOV_EXCL` markers these branches represent ledger corruption rather than user error.
## Failure Modes Worth Special-Casing
- `tecOVERSIZE`: metadata too large. `operator()` re-runs `doApply` after `reset()` to collect cleanup targets only
- `tecINCOMPLETE`: progress was made but more work remains. `AMMDelete` and `VaultDelete` commit partial work on this code caller resubmits
- `tecPATH_DRY`: payment path exhausted. `Payment` converts retry codes from `RippleCalc` to this (forces fee deduction, prevents path-spam)
- `tecKILLED`: order/loan time-window expired or sequence overflow (`LoanSet` arithmetic overflow check)
- `tecEXPIRED`: legitimately expired object; some transactors (e.g., `NFTokenAcceptOffer` under `fixExpiredNFTokenOfferRemoval`) clean up before returning this
- `tecINSUFFICIENT_RESERVE`: reserve check failed against `preFeeBalance_`
- `tecINTERNAL` / `tefBAD_LEDGER`: ledger corruption sentinels. Often marked `LCOV_EXCL` because preclaim should have prevented them. `RippleCalc` converts exceptions to `tecINTERNAL` rather than rethrowing (deterministic fallback all validators agree on)
- `terNO_AMM`, `terNO_DELEGATE_PERMISSION`, `terNO_ACCOUNT`, `terNO_LINE`: retryable failures
## Hash Router Caching
Some expensive operations cache results in the `HashRouter` using private flag bits to avoid recomputation across multiple validation passes:
- **Signature verification** (`apply.cpp` `checkValidity`): `SF_SIGBAD`, `SF_SIGGOOD`, `SF_LOCALBAD`, `SF_LOCALGOOD` (PRIVATE1PRIVATE4)
- **Crypto-condition validation** (`EscrowFinish::preflightSigValidated`): `SF_CF_VALID`, `SF_CF_INVALID` (PRIVATE5PRIVATE6)
The `forceValidity()` API can promote cached state (using `[[fallthrough]]`) but cannot downgrade (never sets `SF_SIGBAD`) used to mark locally-submitted transactions as pre-verified. **Use with extreme care**: bypassing signature verification in the cache affects every subsequent `checkValidity` call on the same hash until cache expiry.
Validity enum P2P semantics: `SigBad` = don't forward; `SigGoodOnly` = relay but don't apply; `Valid` = relay and apply.
## Batch Transactions
`Batch` (in `system/Batch.cpp`) bundles 2-8 inner transactions with one of four execution policies (mutually exclusive, enforced via `std::popcount`):
- `tfAllOrNothing`: any failure aborts, full rollback (`applyBatchTransactions` returns false)
- `tfUntilFailure`: stop at first failure, keep prior successes (returns false if no inner tx was ever applied)
- `tfOnlyOne`: stop at first success
- `tfIndependent`: run all, commit successes
`Batch::doApply()` returns `tesSUCCESS` and does nothing `applyBatchTransactions()` in `apply.cpp` is called separately by `applyTransaction()` after the outer apply succeeds, executing inner txs in a nested `wholeBatchView`/`perTxBatchView` sandbox structure.
**Critical for new transactors:** Update `disabledTxTypes` in `Batch.cpp` if your type cannot run inside a batch. Currently disabled: all `ttVAULT_*` and `ttLOAN_*` types (multi-step state machines whose invariants are difficult to reason about under batch atomicity).
Inner transaction rules (enforced in `Batch::preflight`):
- `tfInnerBatchTxn` flag must be set
- Empty `sfSigningPubKey`, no `sfTxnSignature`, no `sfSigners`
- Fee = 0 XRP
- Exactly one of `sfSequence` (nonzero) or `sfTicketSequence`
- For `tfAllOrNothing`/`tfUntilFailure`: no duplicate sequence/ticket values across same-account inner txs (relaxed for `tfIndependent`/`tfOnlyOne`)
- Each inner tx has `xrpl::preflight` called on it with `tapBATCH` and `parentBatchId`; no nested `ttBATCH`
`Batch::preflightSigValidated` reconciles `sfBatchSigners` bidirectionally: each signer removed from `requiredSigners` as matched; any signer not in `requiredSigners` `temBAD_SIGNER`; outer account explicitly excluded. Then `ctx.tx.checkBatchSign(ctx.rules)` verifies the cryptographic batch signature payload.
`Batch::calculateBaseFee` = `baseFee + Σ(inner tx fees) + numSigners × baseFee`. Overflow guards everywhere (marked `LCOV_EXCL`).
**`fixBatchInnerSigs` amendment**: corrects a bug in the original Batch implementation where inner-batch transactions could be assigned `SF_SIGGOOD` cache entries (implying valid signatures on unsigned objects). After the fix, inner-batch transactions follow the `neverValid` path.
All Batch log messages use `BatchTrace[<parentBatchId>]` prefix for correlation.
## Key Files
- `src/libxrpl/tx/Transactor.cpp` - base class, three-phase pipeline, fee calculation, signature dispatch
- `src/libxrpl/tx/ApplyContext.cpp` - sandboxed view management, `discard()`, invariant orchestration
- `src/libxrpl/tx/apply.cpp` - top-level `apply()`, `checkValidity()` caching, `applyBatchTransactions()`
- `src/libxrpl/tx/applySteps.cpp` - X-macro dispatch via `with_txn_type`, `TxConsequences` factories
- `src/libxrpl/tx/SignerEntries.cpp` - multi-sig signer list deserialization (`SignerEntries::deserialize`)
- `include/xrpl/protocol/detail/transactions.macro` - canonical type definitions, privileges, features
- `src/libxrpl/tx/transactors/.../` - one subdirectory per feature family (account, dex, escrow, lending, vault, etc.)
- `src/libxrpl/tx/invariants/` - 25+ invariant checkers; add new ones to `InvariantChecks` tuple in `InvariantCheck.h`
- `src/libxrpl/tx/paths/` - payment flow engine (`Flow.cpp`, `StrandFlow.h`, `BookStep.cpp`, `RippleCalc.cpp`) used by `Payment`, `CheckCash`, `OfferCreate` crossing
## Payment Path Engine Notes
`Payment`, `OfferCreate` (crossing), and `CheckCash` (IOU/MPT) all route through `flow()` in `Flow.cpp` `StrandFlow.h`. Key concepts:
- A **strand** is a `std::vector<std::unique_ptr<Step>>`; each `Step` is one hop (`DirectStepI`, `BookStepXX`, `XRPEndpointStep`, `MPTEndpointStep`)
- `flow()` is templated on `(TIn, TOut)` pairs for the three asset types (6 combinations). `Flow.cpp` is the façade that resolves runtime `STAmount`/`Asset` values into compile-time template parameters via `std::visit`, then hands off to `StrandFlow.h`.
- Two-pass execution: reverse pass (compute required input for desired output) then forward pass (compute output for actual input)
- Limiting step detection: if reverse pass cannot satisfy desired output, that step is identified as the bottleneck and used as the anchor for forward pass
- Multi-strand flow uses `ActiveStrands` priority queue sorted by `qualityUpperBound`; one strand consumed per outer iteration (probe-and-push)
- Safety limits: `MaxOffersToConsume` = 1000 per book step, `maxTries` = 1000 outer iterations, `maxOffersToConsider` = 1500 cumulative, `AMMContext::MaxIterations` = 30
- `PaymentSandbox` (not regular `Sandbox`) is required because `flow()` uses deferred-credit accounting
- AMM offers are synthesized by `AMMLiquidity` to look like CLOB offers to `BookStep`; single-path uses `changeSpotPriceQuality`, multi-path uses Fibonacci-scaled offer sizes; `AMMContext` tracks whether multi-path is active (disables quality optimization)
- `RippleCalc::rippleCalculate()` creates a nested `PaymentSandbox` inside the caller's `PaymentSandbox` (exception safety); `flow()` applies its internal sandbox to `flowSB` only on success
- `ter*` retry codes from `RippleCalc` are converted to `tecPATH_DRY` in `Payment::doApply` (forces fee charge, prevents path-spam)
- `sfDeliverMin` + `tfPartialPayment`: if actual delivery < `sfDeliverMin` `tecPATH_PARTIAL`; `ctx_.deliver()` records actual delivered amount for metadata (critical for partial payment detection downstream)
- `std::optional<uint256> domainID` threads through `toStrands()` for permissioned payment domains
### `sendMax` semantics in `RippleCalc`
`sendMax` is `nullopt` when sending the same IOU that the destination receives with sender as issuer (no separate spending cap needed). Otherwise set to `saMaxAmountReq`. `limitQuality` is only constructed when `pInputs->limitQuality && saMaxAmountReq > beast::zero`.
## Asset Type Dispatch Pattern
Modern transactors that support both IOU (`Issue`) and MPT (`MPTIssue`) assets use template specialization + `std::visit` rather than runtime branching. The pattern:
```cpp
TER MyTx::preclaim(PreclaimContext const& ctx) {
return std::visit(
[&]<typename T>(T const&) { return preclaimHelper<T>(ctx); },
ctx.tx[sfAmount].asset().value());
}
template <ValidIssueType T>
static TER preclaimHelper(PreclaimContext const& ctx);
template <> TER preclaimHelper<Issue>(...);
template <> TER preclaimHelper<MPTIssue>(...);
```
Used by `Clawback`, `Escrow*`, `Vault*`, `AMM*Withdraw/Deposit`, `LoanBrokerCoverClawback`. Each specialization handles asset-type-specific permission flags (`lsfAllowTrustLineClawback`/`lsfNoFreeze` vs `lsfMPTCanClawback`), authorization (`StrongAuth` vs `WeakAuth`), and freeze checks (`tecFROZEN` vs `tecLOCKED`).
## Lending Protocol (XLS-66)
`LendingHelpers.cpp` is the numerical core. Key concepts:
- **Amortization math**: `loanPeriodicPayment()` uses standard `r(1+r)^n / ((1+r)^n - 1)` factor (Eq. 67 in XLS-66 Eq. Glossary). Zero-interest path uses equal principal slices (no division by zero).
- **Theoretical vs. rounded state**: `LoanProperties` holds both; `computeTheoreticalLoanState()` computes at full precision; `constructRoundedLoanState()` reflects actual ledger values. Rounding errors are carried forward during re-amortization.
- **Payment types**: regular, late (penalty via `loanLatePaymentInterest`), full/early-closure, overpayment (triggers re-amortization via `tryOverpayment()`).
- **`checkLoanGuards()`** enforces 4 precision invariants at creation/re-amortization: measurable interest, positive first-payment principal, non-zero rounded payment, payment count math. All return `tecPRECISION_LOSS`.
- **Template proxy pattern**: `doPayment<NumberProxy, UInt32Proxy, UInt32OptionalProxy>` runs against real SLE (via `ValueProxy`) or simulation values same code path for both.
- `loanMakePayment()` dispatches to the correct payment type and runs up to `loanMaximumPaymentsPerTransaction` installments per call.
## Vault Architecture
Six vault transactors: `VaultCreate`, `VaultDeposit`, `VaultWithdraw`, `VaultSet`, `VaultDelete`, `VaultClawback`. Key creation invariants (see `VaultCreate.cpp`):
- `sfWithdrawalPolicy` currently only accepts `vaultStrategyFirstComeFirstServe` (= 1)
- `sfDomainID` is only valid when `tfVaultPrivate` is set
- `sfScale` restricted: meaningless for XRP/MPT assets; bounded above by `vaultMaximumIOUScale` (18)
- Vault pseudo-account asset issuer cannot be another pseudo-account (`tecWRONG_ASSET`) those assets have no clawback path
- `adjustOwnerCount` increments by **2** (vault + pseudo-account) before reserve check
- MPT share issuance flags derived from transaction flags: tradeable unless `tfVaultShareNonTransferable`; `lsfMPTRequireAuth` added for private vaults
- `associateAsset` is the final call in `doApply`
`ValidVault` invariant uses a delta-map (`uint256 → Number`) with sign conventions per entry type (+1 for share issuance outstanding amount, -1 for asset balances). Entries captured even at zero delta for accounting completeness. Fee compensation applied for XRP vault balance deltas (skipped for delegated transactions).

62
docs/skills/websockets.md Normal file
View File

@@ -0,0 +1,62 @@
# WebSockets
Async WebSocket support for client RPC and real-time subscriptions. Both plain and SSL. Built on Boost.Beast + Boost.Asio.
## Key Invariants
- All async operations run on a per-session Boost.Asio strand for thread safety
- Outgoing message queue (`wq_`) has a per-session limit (`port().ws_queue_limit`); exceeding it closes the connection with policy error "client is too slow"
- `complete()` must be called after processing each message to resume reading; forgetting it stalls the session
- `WSInfoSub` holds a weak pointer to `WSSession`; dead sessions are automatically skipped during event delivery
- Message size limit: `RPC::Tuning::maxRequestSize`; oversized messages get a `jsonInvalid` error response
## Connection Flow
1. `Door` accepts TCP -> `Detector` probes for SSL vs plain -> creates `SSLHTTPPeer` or `PlainHTTPPeer`
2. HTTP request with WebSocket upgrade -> `ServerHandler::onHandoff` -> `session.websocketUpgrade()` -> creates `PlainWSPeer` or `SSLWSPeer`
3. `BaseWSPeer::run()` -> set permessage-deflate options -> `async_accept` handshake -> `on_ws_handshake` -> `do_read` loop
4. `on_read` -> `ServerHandler::onWSMessage` -> validate JSON -> post to job queue -> `processSession` -> send response -> `complete()`
## Common Bug Patterns
- `on_read` consumes the buffer AFTER calling `onWSMessage`; if the handler accesses the buffer asynchronously after `onWSMessage` returns, it reads garbage
- `close()` with pending messages defers the actual close; calling `send()` after `close()` but before actual close queues more messages
- Missing `complete()` call after sending response = session never reads again = appears hung
- Job queue shutdown: if `postCoro` returns nullptr, session must close with `going_away`; dropping this silently leaks sessions
## Review Checklist
- Verify strand execution for all socket operations (read, write, close, timer)
- New subscription streams: ensure `WSInfoSub::send` handles the new event type
- Flow control: test with slow clients to verify queue limit enforcement
## Key Patterns
### complete() Resumes Read Loop
```cpp
// REQUIRED after sending response — missing this = session hangs forever
void BaseWSPeer::complete()
{
if (!strand_.running_in_this_thread())
return post(strand_, std::bind(
&BaseWSPeer::complete, impl().shared_from_this()));
do_read(); // resume reading next message
}
```
### Queue Limit Enforcement
```cpp
// REQUIRED: close slow clients to prevent memory exhaustion
if (wq_.size() >= port().ws_queue_limit)
{
close(boost::beast::websocket::close_code::policy_error);
return; // do NOT queue unboundedly
}
```
## Key Files
- `include/xrpl/server/detail/BaseWSPeer.h` - session lifecycle and message queue
- `include/xrpl/server/detail/Door.h` - connection acceptance
- `src/xrpld/rpc/detail/ServerHandler.cpp` - `onWSMessage` and `onHandoff`
- `src/xrpld/rpc/detail/WSInfoSub.h` - subscription delivery

4
flake.lock generated
View File

@@ -15,7 +15,7 @@
"type": "indirect"
}
},
"nixpkgs-custom-glibc": {
"nixpkgs-glibc231": {
"flake": false,
"locked": {
"lastModified": 1593520194,
@@ -35,7 +35,7 @@
"root": {
"inputs": {
"nixpkgs": "nixpkgs",
"nixpkgs-custom-glibc": "nixpkgs-custom-glibc"
"nixpkgs-glibc231": "nixpkgs-glibc231"
}
}
},

View File

@@ -6,16 +6,16 @@
# version — matches the system libc on Ubuntu 20.04 LTS. Imported
# manually (flake = false) because this revision predates nixpkgs'
# own flake.nix.
nixpkgs-custom-glibc = {
nixpkgs-glibc231 = {
url = "github:NixOS/nixpkgs/9cd98386a38891d1074fc18036b842dc4416f562";
flake = false;
};
};
outputs =
{ nixpkgs, nixpkgs-custom-glibc, ... }:
{ nixpkgs, nixpkgs-glibc231, ... }:
let
forEachSystem = import ./nix/utils.nix { inherit nixpkgs nixpkgs-custom-glibc; };
forEachSystem = import ./nix/utils.nix { inherit nixpkgs nixpkgs-glibc231; };
in
{
devShells = forEachSystem (import ./nix/devshell.nix);

View File

@@ -7,12 +7,48 @@
namespace xrpl {
/** Read the entire contents of a file into a string.
*
* Resolves `sourcePath` to its canonical (absolute, symlink-free) form before
* opening it, which prevents TOCTOU races between path resolution and the open.
* When `maxSize` is supplied and the file exceeds that byte count, `ec` is set
* to `boost::system::errc::file_too_large` and an empty string is returned
* without reading any data.
*
* All errors — non-existent path, permission denial, size exceeded, open
* failure, and mid-read I/O error — are reported through `ec`. The function
* never throws.
*
* @param ec Output error code; set on any failure, left unchanged on success.
* @param sourcePath Path to the file to read; must exist and be resolvable.
* @param maxSize Optional upper bound on file size in bytes. If the file is
* larger, `ec` is set to `errc::file_too_large` and `{}` is returned.
* @return The full file contents on success, or an empty string on any error.
* @note EOF during the single-pass read is not an error; only `bad()` (hardware
* or stream-corruption failure) triggers an error code after the read.
*/
std::string
getFileContents(
boost::system::error_code& ec,
boost::filesystem::path const& sourcePath,
std::optional<std::size_t> maxSize = std::nullopt);
/** Write a string to a file, creating or truncating it as necessary.
*
* Opens `destPath` with `std::ios::out | std::ios::trunc`, so any existing
* content is discarded and the file is created if it does not yet exist.
* This is a full replacement, not an atomic rename-and-swap; callers that
* require crash-safe writes must implement that at a higher level.
*
* All errors — open failure and mid-write I/O error — are reported through
* `ec`. The function never throws.
*
* @param ec Output error code; set on any failure, left unchanged on success.
* @param destPath Path to the destination file; parent directory must exist.
* @param contents Data to write; written in a single `<<` operation.
* @note Unlike `getFileContents`, this function does not call `canonical()`
* because the destination file may not yet exist.
*/
void
writeFileContents(
boost::system::error_code& ec,

View File

@@ -406,8 +406,8 @@ private:
// pointer. The low bit must be masked to zero when converting back to a
// pointer. If the low bit is '1', this is a weak pointer.
std::uintptr_t tp_{0};
static constexpr std::uintptr_t kTagMask = 1;
static constexpr std::uintptr_t kPtrMask = ~kTagMask;
static constexpr std::uintptr_t kTAG_MASK = 1;
static constexpr std::uintptr_t kPTR_MASK = ~kTAG_MASK;
private:
/** Return the raw pointer held by this object.

View File

@@ -567,14 +567,14 @@ template <class T>
bool
SharedWeakUnion<T>::isStrong() const
{
return (tp_ & kTagMask) == 0u;
return (tp_ & kTAG_MASK) == 0u;
}
template <class T>
bool
SharedWeakUnion<T>::isWeak() const
{
return (tp_ & kTagMask) != 0u;
return (tp_ & kTAG_MASK) != 0u;
}
template <class T>
@@ -641,7 +641,7 @@ template <class T>
T*
SharedWeakUnion<T>::unsafeGetRawPtr() const
{
return reinterpret_cast<T*>(tp_ & kPtrMask);
return reinterpret_cast<T*>(tp_ & kPTR_MASK);
}
template <class T>
@@ -650,7 +650,7 @@ SharedWeakUnion<T>::unsafeSetRawPtr(T* p, RefStrength rs)
{
tp_ = reinterpret_cast<std::uintptr_t>(p);
if (tp_ && rs == RefStrength::Weak)
tp_ |= kTagMask;
tp_ |= kTAG_MASK;
}
template <class T>

View File

@@ -98,11 +98,11 @@ private:
// enough for strong pointers and 14 bit counts are enough for weak
// pointers. Use type aliases to make it easy to switch types.
using CountType = std::uint16_t;
static constexpr size_t kStrongCountNumBits = sizeof(CountType) * 8;
static constexpr size_t kWeakCountNumBits = kStrongCountNumBits - 2;
static constexpr size_t kSTRONG_COUNT_NUM_BITS = sizeof(CountType) * 8;
static constexpr size_t kWEAK_COUNT_NUM_BITS = kSTRONG_COUNT_NUM_BITS - 2;
using FieldType = std::uint32_t;
static constexpr size_t kFieldTypeBits = sizeof(FieldType) * 8;
static constexpr FieldType kOne = 1;
static constexpr size_t kFIELD_TYPE_BITS = sizeof(FieldType) * 8;
static constexpr FieldType kONE = 1;
/** `refCounts` consists of four fields that are treated atomically:
@@ -137,21 +137,21 @@ private:
*/
mutable std::atomic<FieldType> refCounts_{kStrongDelta};
mutable std::atomic<FieldType> refCounts_{kSTRONG_DELTA};
/** Amount to change the strong count when adding or releasing a reference
Note: The strong count is stored in the low `StrongCountNumBits` bits
of refCounts
*/
static constexpr FieldType kStrongDelta = 1;
static constexpr FieldType kSTRONG_DELTA = 1;
/** Amount to change the weak count when adding or releasing a reference
Note: The weak count is stored in the high `WeakCountNumBits` bits of
refCounts
*/
static constexpr FieldType kWeakDelta = (kOne << kStrongCountNumBits);
static constexpr FieldType kWEAK_DELTA = (kONE << kSTRONG_COUNT_NUM_BITS);
/** Flag that is set when the partialDestroy function has started running
(or is about to start running).
@@ -159,33 +159,34 @@ private:
See description of the `refCounts` field for a fuller description of
this field.
*/
static constexpr FieldType kPartialDestroyStartedMask = (kOne << (kFieldTypeBits - 1));
static constexpr FieldType kPARTIAL_DESTROY_STARTED_MASK = (kONE << (kFIELD_TYPE_BITS - 1));
/** Flag that is set when the partialDestroy function has finished running
See description of the `refCounts` field for a fuller description of
this field.
*/
static constexpr FieldType kPartialDestroyFinishedMask = (kOne << (kFieldTypeBits - 2));
static constexpr FieldType kPARTIAL_DESTROY_FINISHED_MASK = (kONE << (kFIELD_TYPE_BITS - 2));
/** Mask that will zero out all the `count` bits and leave the tag bits
unchanged.
*/
static constexpr FieldType kTagMask = kPartialDestroyStartedMask | kPartialDestroyFinishedMask;
static constexpr FieldType kTAG_MASK =
kPARTIAL_DESTROY_STARTED_MASK | kPARTIAL_DESTROY_FINISHED_MASK;
/** Mask that will zero out the `tag` bits and leave the count bits
unchanged.
*/
static constexpr FieldType kValueMask = ~kTagMask;
static constexpr FieldType kVALUE_MASK = ~kTAG_MASK;
/** Mask that will zero out everything except the strong count.
*/
static constexpr FieldType kStrongMask = ((kOne << kStrongCountNumBits) - 1) & kValueMask;
static constexpr FieldType kSTRONG_MASK = ((kONE << kSTRONG_COUNT_NUM_BITS) - 1) & kVALUE_MASK;
/** Mask that will zero out everything except the weak count.
*/
static constexpr FieldType kWeakMask =
(((kOne << kWeakCountNumBits) - 1) << kStrongCountNumBits) & kValueMask;
static constexpr FieldType kWEAK_MASK =
(((kONE << kWEAK_COUNT_NUM_BITS) - 1) << kSTRONG_COUNT_NUM_BITS) & kVALUE_MASK;
/** Unpack the count and tag fields from the packed atomic integer form. */
struct RefCountPair
@@ -210,29 +211,29 @@ private:
[[nodiscard]] FieldType
combinedValue() const noexcept;
static constexpr CountType kMaxStrongValue =
static_cast<CountType>((kOne << kStrongCountNumBits) - 1);
static constexpr CountType kMaxWeakValue =
static_cast<CountType>((kOne << kWeakCountNumBits) - 1);
static constexpr CountType kMAX_STRONG_VALUE =
static_cast<CountType>((kONE << kSTRONG_COUNT_NUM_BITS) - 1);
static constexpr CountType kMAX_WEAK_VALUE =
static_cast<CountType>((kONE << kWEAK_COUNT_NUM_BITS) - 1);
/** Put an extra margin to detect when running up against limits.
This is only used in debug code, and is useful if we reduce the
number of bits in the strong and weak counts (to 16 and 14 bits).
*/
static constexpr CountType kCheckStrongMaxValue = kMaxStrongValue - 32;
static constexpr CountType kCheckWeakMaxValue = kMaxWeakValue - 32;
static constexpr CountType kCHECK_STRONG_MAX_VALUE = kMAX_STRONG_VALUE - 32;
static constexpr CountType kCHECK_WEAK_MAX_VALUE = kMAX_WEAK_VALUE - 32;
};
};
inline void
IntrusiveRefCounts::addStrongRef() const noexcept
{
refCounts_.fetch_add(kStrongDelta, std::memory_order_acq_rel);
refCounts_.fetch_add(kSTRONG_DELTA, std::memory_order_acq_rel);
}
inline void
IntrusiveRefCounts::addWeakRef() const noexcept
{
refCounts_.fetch_add(kWeakDelta, std::memory_order_acq_rel);
refCounts_.fetch_add(kWEAK_DELTA, std::memory_order_acq_rel);
}
inline ReleaseStrongRefAction
@@ -251,10 +252,10 @@ IntrusiveRefCounts::releaseStrongRef() const
{
RefCountPair const prevVal{prevIntVal};
XRPL_ASSERT(
(prevVal.strong >= kStrongDelta),
(prevVal.strong >= kSTRONG_DELTA),
"xrpl::IntrusiveRefCounts::releaseStrongRef : previous ref "
"higher than new");
auto nextIntVal = prevIntVal - kStrongDelta;
auto nextIntVal = prevIntVal - kSTRONG_DELTA;
ReleaseStrongRefAction action = NoOp;
if (prevVal.strong == 1)
{
@@ -264,7 +265,7 @@ IntrusiveRefCounts::releaseStrongRef() const
}
else
{
nextIntVal |= kPartialDestroyStartedMask;
nextIntVal |= kPARTIAL_DESTROY_STARTED_MASK;
action = PartialDestroy;
}
}
@@ -275,7 +276,7 @@ IntrusiveRefCounts::releaseStrongRef() const
// count to zero can start a partial destroy, and that can't happen
// twice.
XRPL_ASSERT(
(action == NoOp) || !(prevIntVal & kPartialDestroyStartedMask),
(action == NoOp) || !(prevIntVal & kPARTIAL_DESTROY_STARTED_MASK),
"xrpl::IntrusiveRefCounts::releaseStrongRef : not in partial "
"destroy");
return action;
@@ -288,8 +289,8 @@ IntrusiveRefCounts::addWeakReleaseStrongRef() const
{
using enum ReleaseStrongRefAction;
static_assert(kWeakDelta > kStrongDelta);
static constexpr auto kDelta = kWeakDelta - kStrongDelta;
static_assert(kWEAK_DELTA > kSTRONG_DELTA);
auto constexpr kDELTA = kWEAK_DELTA - kSTRONG_DELTA;
auto prevIntVal = refCounts_.load(std::memory_order_acquire);
// This loop will almost always run once. The loop is needed to atomically
// change the counts and flags (the count could be atomically changed, but
@@ -311,7 +312,7 @@ IntrusiveRefCounts::addWeakReleaseStrongRef() const
"xrpl::IntrusiveRefCounts::addWeakReleaseStrongRef : not in "
"partial destroy");
auto nextIntVal = prevIntVal + kDelta;
auto nextIntVal = prevIntVal + kDELTA;
ReleaseStrongRefAction action = NoOp;
if (prevVal.strong == 1)
{
@@ -321,14 +322,14 @@ IntrusiveRefCounts::addWeakReleaseStrongRef() const
}
else
{
nextIntVal |= kPartialDestroyStartedMask;
nextIntVal |= kPARTIAL_DESTROY_STARTED_MASK;
action = PartialDestroy;
}
}
if (refCounts_.compare_exchange_weak(prevIntVal, nextIntVal, std::memory_order_acq_rel))
{
XRPL_ASSERT(
(!(prevIntVal & kPartialDestroyStartedMask)),
(!(prevIntVal & kPARTIAL_DESTROY_STARTED_MASK)),
"xrpl::IntrusiveRefCounts::addWeakReleaseStrongRef : not "
"started partial destroy");
return action;
@@ -339,7 +340,7 @@ IntrusiveRefCounts::addWeakReleaseStrongRef() const
inline ReleaseWeakRefAction
IntrusiveRefCounts::releaseWeakRef() const
{
auto prevIntVal = refCounts_.fetch_sub(kWeakDelta, std::memory_order_acq_rel);
auto prevIntVal = refCounts_.fetch_sub(kWEAK_DELTA, std::memory_order_acq_rel);
RefCountPair prev = prevIntVal;
if (prev.weak == 1 && prev.strong == 0)
{
@@ -356,7 +357,7 @@ IntrusiveRefCounts::releaseWeakRef() const
{
// partial destroy MUST finish before running a full destroy (when
// using weak pointers)
refCounts_.wait(prevIntVal - kWeakDelta, std::memory_order_acquire);
refCounts_.wait(prevIntVal - kWEAK_DELTA, std::memory_order_acquire);
}
return ReleaseWeakRefAction::Destroy;
}
@@ -375,7 +376,7 @@ IntrusiveRefCounts::checkoutStrongRefFromWeak() const noexcept
if (prev.strong == 0u)
return false;
desiredValue = curValue + kStrongDelta;
desiredValue = curValue + kSTRONG_DELTA;
}
return true;
}
@@ -399,22 +400,23 @@ inline IntrusiveRefCounts::~IntrusiveRefCounts() noexcept
#ifndef NDEBUG
auto v = refCounts_.load(std::memory_order_acquire);
XRPL_ASSERT(
(!(v & kValueMask)), "xrpl::IntrusiveRefCounts::~IntrusiveRefCounts : count must be zero");
auto t = v & kTagMask;
XRPL_ASSERT((!t || t == kTagMask), "xrpl::IntrusiveRefCounts::~IntrusiveRefCounts : valid tag");
(!(v & kVALUE_MASK)), "xrpl::IntrusiveRefCounts::~IntrusiveRefCounts : count must be zero");
auto t = v & kTAG_MASK;
XRPL_ASSERT(
(!t || t == kTAG_MASK), "xrpl::IntrusiveRefCounts::~IntrusiveRefCounts : valid tag");
#endif
}
//------------------------------------------------------------------------------
inline IntrusiveRefCounts::RefCountPair::RefCountPair(IntrusiveRefCounts::FieldType v) noexcept
: strong{static_cast<CountType>(v & kStrongMask)}
, weak{static_cast<CountType>((v & kWeakMask) >> kStrongCountNumBits)}
, partialDestroyStartedBit{v & kPartialDestroyStartedMask}
, partialDestroyFinishedBit{v & kPartialDestroyFinishedMask}
: strong{static_cast<CountType>(v & kSTRONG_MASK)}
, weak{static_cast<CountType>((v & kWEAK_MASK) >> kSTRONG_COUNT_NUM_BITS)}
, partialDestroyStartedBit{v & kPARTIAL_DESTROY_STARTED_MASK}
, partialDestroyFinishedBit{v & kPARTIAL_DESTROY_FINISHED_MASK}
{
XRPL_ASSERT(
(strong < kCheckStrongMaxValue && weak < kCheckWeakMaxValue),
(strong < kCHECK_STRONG_MAX_VALUE && weak < kCHECK_WEAK_MAX_VALUE),
"xrpl::IntrusiveRefCounts::RefCountPair(FieldType) : inputs inside "
"range");
}
@@ -425,7 +427,7 @@ inline IntrusiveRefCounts::RefCountPair::RefCountPair(
: strong{s}, weak{w}
{
XRPL_ASSERT(
(strong < kCheckStrongMaxValue && weak < kCheckWeakMaxValue),
(strong < kCHECK_STRONG_MAX_VALUE && weak < kCHECK_WEAK_MAX_VALUE),
"xrpl::IntrusiveRefCounts::RefCountPair(CountType, CountType) : "
"inputs inside range");
}
@@ -434,11 +436,11 @@ inline IntrusiveRefCounts::FieldType
IntrusiveRefCounts::RefCountPair::combinedValue() const noexcept
{
XRPL_ASSERT(
(strong < kCheckStrongMaxValue && weak < kCheckWeakMaxValue),
(strong < kCHECK_STRONG_MAX_VALUE && weak < kCHECK_WEAK_MAX_VALUE),
"xrpl::IntrusiveRefCounts::RefCountPair::combinedValue : inputs "
"inside range");
return (static_cast<IntrusiveRefCounts::FieldType>(weak)
<< IntrusiveRefCounts::kStrongCountNumBits) |
<< IntrusiveRefCounts::kSTRONG_COUNT_NUM_BITS) |
static_cast<IntrusiveRefCounts::FieldType>(strong) | partialDestroyStartedBit |
partialDestroyFinishedBit;
}
@@ -449,7 +451,7 @@ partialDestructorFinished(T** o)
{
T& self = **o;
IntrusiveRefCounts::RefCountPair const p =
self.refCounts_.fetch_or(IntrusiveRefCounts::kPartialDestroyFinishedMask);
self.refCounts_.fetch_or(IntrusiveRefCounts::kPARTIAL_DESTROY_FINISHED_MASK);
XRPL_ASSERT(
(!p.partialDestroyFinishedBit && p.partialDestroyStartedBit && !p.strong),
"xrpl::partialDestructorFinished : not a weak ref");

Some files were not shown because too many files have changed in this diff Show More