mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-04 09:16:47 +00:00
Compare commits
49 Commits
pratik/ote
...
ximinez/le
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4bf649e9ef | ||
|
|
4d7788ff4a | ||
|
|
6f93cbc011 | ||
|
|
fbed72cedb | ||
|
|
f1f4418d59 | ||
|
|
b64da840c8 | ||
|
|
9869d6e75d | ||
|
|
e079186a2f | ||
|
|
29d8f1d18b | ||
|
|
92225d2838 | ||
|
|
4f7188dc98 | ||
|
|
b12dd38d5c | ||
|
|
70cad8850d | ||
|
|
4a1e0cc7fe | ||
|
|
404be5b054 | ||
|
|
922885a4f8 | ||
|
|
012926ead9 | ||
|
|
9a0b70b45d | ||
|
|
6311292c2b | ||
|
|
4f98a09b65 | ||
|
|
eabfa77c77 | ||
|
|
d689047950 | ||
|
|
4d4ac414ae | ||
|
|
6e751091b1 | ||
|
|
d6459c8ac7 | ||
|
|
40cb63f423 | ||
|
|
98206e6514 | ||
|
|
a047d1bc9b | ||
|
|
2ca91e701e | ||
|
|
3db7e84e06 | ||
|
|
75d0960e1d | ||
|
|
091709e7d7 | ||
|
|
b5756c44bc | ||
|
|
613a94645d | ||
|
|
c0323540f9 | ||
|
|
e0fd480ae7 | ||
|
|
fe80f0e895 | ||
|
|
9988e596e9 | ||
|
|
3523c437a8 | ||
|
|
0f38b4b541 | ||
|
|
f84350c61c | ||
|
|
7a118245f7 | ||
|
|
47ddc34fda | ||
|
|
1f579efc2f | ||
|
|
e464e101be | ||
|
|
4dfa6db32a | ||
|
|
766124ed6d | ||
|
|
5c34a7b8fb | ||
|
|
f6f3542b7e |
@@ -191,14 +191,11 @@ CheckOptions:
|
||||
readability-identifier-naming.ParameterCase: camelBack
|
||||
readability-identifier-naming.FunctionCase: camelBack
|
||||
readability-identifier-naming.MemberCase: camelBack
|
||||
readability-identifier-naming.PrivateMemberCase: camelBack
|
||||
readability-identifier-naming.PrivateMemberSuffix: _
|
||||
readability-identifier-naming.ProtectedMemberCase: camelBack
|
||||
readability-identifier-naming.ProtectedMemberSuffix: _
|
||||
readability-identifier-naming.PublicMemberCase: camelBack
|
||||
readability-identifier-naming.PublicMemberSuffix: ""
|
||||
readability-identifier-naming.GlobalFunctionIgnoredRegexp: "^(to_string|hash_append|tuple_hash)$"
|
||||
|
||||
HeaderFilterRegex: '^.*/(tests?|xrpl|xrpld)/.*\.(h|hpp|ipp)$'
|
||||
HeaderFilterRegex: '^.*/(test|xrpl|xrpld)/.*\.(h|hpp|ipp)$'
|
||||
ExcludeHeaderFilterRegex: '^.*/protocol_autogen/.*\.(h|hpp)$'
|
||||
WarningsAsErrors: "*"
|
||||
|
||||
18
.github/actions/build-deps/action.yml
vendored
18
.github/actions/build-deps/action.yml
vendored
@@ -37,12 +37,12 @@ runs:
|
||||
run: |
|
||||
echo 'Installing dependencies.'
|
||||
conan install \
|
||||
--profile ci \
|
||||
--build="${BUILD_OPTION}" \
|
||||
--options:host='&:tests=True' \
|
||||
--options:host='&:xrpld=True' \
|
||||
--settings:all build_type="${BUILD_TYPE}" \
|
||||
--conf:all tools.build:jobs=${BUILD_NPROC} \
|
||||
--conf:all tools.build:verbosity="${LOG_VERBOSITY}" \
|
||||
--conf:all tools.compilation:verbosity="${LOG_VERBOSITY}" \
|
||||
.
|
||||
--profile ci \
|
||||
--build="${BUILD_OPTION}" \
|
||||
--options:host='&:tests=True' \
|
||||
--options:host='&:xrpld=True' \
|
||||
--settings:all build_type="${BUILD_TYPE}" \
|
||||
--conf:all tools.build:jobs=${BUILD_NPROC} \
|
||||
--conf:all tools.build:verbosity="${LOG_VERBOSITY}" \
|
||||
--conf:all tools.compilation:verbosity="${LOG_VERBOSITY}" \
|
||||
.
|
||||
|
||||
10
.github/actions/generate-version/action.yml
vendored
10
.github/actions/generate-version/action.yml
vendored
@@ -15,7 +15,7 @@ runs:
|
||||
shell: bash
|
||||
env:
|
||||
VERSION: ${{ github.ref_name }}
|
||||
run: echo "VERSION=${VERSION}" >>"${GITHUB_ENV}"
|
||||
run: echo "VERSION=${VERSION}" >> "${GITHUB_ENV}"
|
||||
|
||||
# When a tag is not pushed, then the version (e.g. 1.2.3-b0) is extracted
|
||||
# from the BuildInfo.cpp file and the shortened commit hash appended to it.
|
||||
@@ -28,17 +28,17 @@ runs:
|
||||
echo 'Extracting version from BuildInfo.cpp.'
|
||||
VERSION="$(cat src/libxrpl/protocol/BuildInfo.cpp | grep "versionString =" | awk -F '"' '{print $2}')"
|
||||
if [[ -z "${VERSION}" ]]; then
|
||||
echo 'Unable to extract version from BuildInfo.cpp.'
|
||||
exit 1
|
||||
echo 'Unable to extract version from BuildInfo.cpp.'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo 'Appending shortened commit hash to version.'
|
||||
SHA='${{ github.sha }}'
|
||||
VERSION="${VERSION}+${SHA:0:7}"
|
||||
|
||||
echo "VERSION=${VERSION}" >>"${GITHUB_ENV}"
|
||||
echo "VERSION=${VERSION}" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Output version
|
||||
id: version
|
||||
shell: bash
|
||||
run: echo "version=${VERSION}" >>"${GITHUB_OUTPUT}"
|
||||
run: echo "version=${VERSION}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
403
.github/scripts/format-inline-bash.py
vendored
403
.github/scripts/format-inline-bash.py
vendored
@@ -1,403 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Format embedded shell snippets using the shfmt hook configured in
|
||||
.pre-commit-config.yaml.
|
||||
|
||||
Two shapes are recognised:
|
||||
|
||||
* YAML workflow/action files: literal block-scalar runs (`run: |`) and
|
||||
single-line runs (`run: some command`). A single-line run is upgraded to
|
||||
a `run: |` block scalar if shfmt's output spans multiple lines.
|
||||
|
||||
* Markdown files: ``` ```bash ``` fenced code blocks.
|
||||
|
||||
Any block that shfmt cannot parse is skipped with a warning on stderr, so
|
||||
the file is left untouched and surrounding blocks still get formatted.
|
||||
|
||||
For each occurrence the body is dedented, written to a temp .sh file,
|
||||
formatted via `pre-commit run shfmt --files <temp>` (falling back to
|
||||
`prek`), then re-indented and written back in place.
|
||||
|
||||
When invoked without arguments, every .yml/.yaml under .github/ plus every
|
||||
.md file in the repo is scanned. When invoked with file arguments (the
|
||||
pre-commit case), only those files are processed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
REPO = Path(__file__).resolve().parents[2]
|
||||
|
||||
_HOOK_RUNNER = next((cmd for cmd in ("pre-commit", "prek") if shutil.which(cmd)), None)
|
||||
if _HOOK_RUNNER is None:
|
||||
sys.exit("error: neither `pre-commit` nor `prek` found on PATH")
|
||||
|
||||
RUN_BLOCK_RE = re.compile(r"^(?P<prefix>[ \t]*(?:- )?)run:[ \t]*\|[+-]?[ \t]*$")
|
||||
RUN_INLINE_RE = re.compile(
|
||||
r"^(?P<prefix>[ \t]*(?:- )?)run:[ \t]+" r"(?P<value>(?!\|[+-]?[ \t]*$)\S.*?)[ \t]*$"
|
||||
)
|
||||
MD_BASH_OPEN_RE = re.compile(r"^(?P<indent>[ ]{0,3})`{3}bash[ \t]*$")
|
||||
MD_FENCE_CLOSE_RE = re.compile(r"^[ ]{0,3}`{3,}[ \t]*$")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BlockRun:
|
||||
"""A `run: |` block scalar; `body_start:body_end` slices into `lines`."""
|
||||
|
||||
body_start: int
|
||||
body_end: int
|
||||
body_indent: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InlineRun:
|
||||
"""A single-line `run: value` at `line_idx`."""
|
||||
|
||||
line_idx: int
|
||||
prefix: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MdBashBlock:
|
||||
"""A markdown ``` ```bash ``` fenced code block.
|
||||
|
||||
`body_start:body_end` slices into the file's lines; `open_line_idx`
|
||||
points at the opening fence line.
|
||||
"""
|
||||
|
||||
open_line_idx: int
|
||||
body_start: int
|
||||
body_end: int
|
||||
body_indent: int
|
||||
|
||||
|
||||
RunItem = Union[BlockRun, InlineRun]
|
||||
|
||||
|
||||
def _scan_block_body(
|
||||
lines: list[str], body_start: int, run_col: int
|
||||
) -> tuple[int | None, int]:
|
||||
"""Locate the body of a `run: |` block scalar starting at `body_start`.
|
||||
|
||||
Returns `(body_indent, scan_end)`. `scan_end` is the line index where the
|
||||
outer scanner should resume. `body_indent` is `None` when no body is
|
||||
present (the scalar is empty, or the next non-blank line has indent
|
||||
`<= run_col`).
|
||||
"""
|
||||
body_indent: int | None = None
|
||||
scan_end = len(lines)
|
||||
for idx in range(body_start, len(lines)):
|
||||
line = lines[idx]
|
||||
if line.strip() == "":
|
||||
continue
|
||||
indent = len(line) - len(line.lstrip(" "))
|
||||
if body_indent is None:
|
||||
if indent > run_col:
|
||||
body_indent = indent
|
||||
else:
|
||||
scan_end = idx
|
||||
break
|
||||
elif indent < body_indent:
|
||||
scan_end = idx
|
||||
break
|
||||
if body_indent is not None:
|
||||
while scan_end > body_start and lines[scan_end - 1].strip() == "":
|
||||
scan_end -= 1
|
||||
if scan_end <= body_start:
|
||||
body_indent = None
|
||||
return body_indent, scan_end
|
||||
|
||||
|
||||
def find_run_blocks(lines: list[str]) -> list[RunItem]:
|
||||
"""Return run items in document order."""
|
||||
items: list[RunItem] = []
|
||||
line_idx = 0
|
||||
while line_idx < len(lines):
|
||||
line = lines[line_idx]
|
||||
if block_match := RUN_BLOCK_RE.match(line):
|
||||
run_col = len(block_match.group("prefix"))
|
||||
body_start = line_idx + 1
|
||||
body_indent, scan_end = _scan_block_body(lines, body_start, run_col)
|
||||
if body_indent is not None:
|
||||
items.append(
|
||||
BlockRun(
|
||||
body_start=body_start,
|
||||
body_end=scan_end,
|
||||
body_indent=body_indent,
|
||||
)
|
||||
)
|
||||
line_idx = scan_end
|
||||
continue
|
||||
if inline_match := RUN_INLINE_RE.match(line):
|
||||
items.append(
|
||||
InlineRun(
|
||||
line_idx=line_idx,
|
||||
prefix=inline_match.group("prefix"),
|
||||
value=inline_match.group("value"),
|
||||
)
|
||||
)
|
||||
line_idx += 1
|
||||
return items
|
||||
|
||||
|
||||
def find_md_bash_blocks(lines: list[str]) -> list[MdBashBlock]:
|
||||
"""Return ``` ```bash ``` fenced code blocks in document order."""
|
||||
blocks: list[MdBashBlock] = []
|
||||
line_idx = 0
|
||||
while line_idx < len(lines):
|
||||
open_match = MD_BASH_OPEN_RE.match(lines[line_idx])
|
||||
if not open_match:
|
||||
line_idx += 1
|
||||
continue
|
||||
body_start = line_idx + 1
|
||||
close_idx = next(
|
||||
(
|
||||
j
|
||||
for j in range(body_start, len(lines))
|
||||
if MD_FENCE_CLOSE_RE.match(lines[j])
|
||||
),
|
||||
None,
|
||||
)
|
||||
if close_idx is None:
|
||||
line_idx = body_start
|
||||
continue
|
||||
body = lines[body_start:close_idx]
|
||||
non_blank = [b for b in body if b.strip()]
|
||||
body_indent = (
|
||||
min(len(b) - len(b.lstrip(" ")) for b in non_blank)
|
||||
if non_blank
|
||||
else len(open_match.group("indent"))
|
||||
)
|
||||
blocks.append(
|
||||
MdBashBlock(
|
||||
open_line_idx=line_idx,
|
||||
body_start=body_start,
|
||||
body_end=close_idx,
|
||||
body_indent=body_indent,
|
||||
)
|
||||
)
|
||||
line_idx = close_idx + 1
|
||||
return blocks
|
||||
|
||||
|
||||
def dedent(lines: list[str], n: int) -> list[str]:
|
||||
pad = " " * n
|
||||
return [
|
||||
(
|
||||
""
|
||||
if line.strip() == ""
|
||||
else (line[n:] if line.startswith(pad) else line.lstrip(" "))
|
||||
)
|
||||
for line in lines
|
||||
]
|
||||
|
||||
|
||||
def reindent(lines: list[str], n: int) -> list[str]:
|
||||
pad = " " * n
|
||||
return [pad + line if line else "" for line in lines]
|
||||
|
||||
|
||||
_SHFMT_ERR_RE = re.compile(r"\.sh:\d+:\d+:\s")
|
||||
_GHA_EXPR_RE = re.compile(r"\$\{\{.*?\}\}", re.DOTALL)
|
||||
_GHA_PLACEHOLDER_RE = re.compile(r"__GHA_EXPR_(\d+)__")
|
||||
|
||||
|
||||
def _encode_gha_exprs(text: str) -> tuple[str, list[str]]:
|
||||
"""Replace `${{ ... }}` expressions with bash-safe placeholder identifiers."""
|
||||
exprs: list[str] = []
|
||||
|
||||
def repl(match: re.Match[str]) -> str:
|
||||
exprs.append(match.group(0))
|
||||
return f"__GHA_EXPR_{len(exprs) - 1}__"
|
||||
|
||||
return _GHA_EXPR_RE.sub(repl, text), exprs
|
||||
|
||||
|
||||
def _decode_gha_exprs(text: str, exprs: list[str]) -> str:
|
||||
"""Restore `${{ ... }}` expressions from placeholder identifiers."""
|
||||
return _GHA_PLACEHOLDER_RE.sub(lambda m: exprs[int(m.group(1))], text)
|
||||
|
||||
|
||||
def shfmt_via_hook(tmp_path: Path) -> tuple[bool, str]:
|
||||
# `${{ ... }}` is not valid shell, so swap it for a placeholder identifier
|
||||
# that shfmt can parse, then restore it after formatting.
|
||||
encoded, exprs = _encode_gha_exprs(tmp_path.read_text())
|
||||
if exprs:
|
||||
tmp_path.write_text(encoded)
|
||||
res = subprocess.run(
|
||||
[_HOOK_RUNNER, "run", "shfmt", "--files", str(tmp_path)],
|
||||
cwd=REPO,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
output = res.stdout + res.stderr
|
||||
# shfmt emits parse errors as "<path>:<line>:<col>: <message>".
|
||||
parse_err = bool(_SHFMT_ERR_RE.search(output))
|
||||
# A non-zero exit that is neither a parse error nor pre-commit's "I had
|
||||
# to modify files" signal means the hook itself failed to run (missing
|
||||
# binary, install failure, bad config, ...). Surface that loudly rather
|
||||
# than silently treating it as a no-op.
|
||||
if (
|
||||
res.returncode != 0
|
||||
and not parse_err
|
||||
and "files were modified by this hook" not in output
|
||||
):
|
||||
sys.exit(
|
||||
f"error: `{_HOOK_RUNNER} run shfmt` failed with exit {res.returncode}:\n{output}"
|
||||
)
|
||||
if exprs and not parse_err:
|
||||
tmp_path.write_text(_decode_gha_exprs(tmp_path.read_text(), exprs))
|
||||
return not parse_err, output
|
||||
|
||||
|
||||
def _skip(path: Path, where: int, kind: str, output: str) -> None:
|
||||
print(
|
||||
f" shfmt could not parse {kind} at {path}:{where + 1} — skipped",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(f" {output.strip()}", file=sys.stderr)
|
||||
|
||||
|
||||
def process_yaml_file(path: Path, tmp_path: Path) -> int:
|
||||
text = path.read_text()
|
||||
had_nl = text.endswith("\n")
|
||||
lines = text.split("\n")
|
||||
if had_nl:
|
||||
lines = lines[:-1]
|
||||
items = find_run_blocks(lines)
|
||||
if not items:
|
||||
return 0
|
||||
changed = 0
|
||||
# Process in reverse so earlier indices remain valid as we splice.
|
||||
for item in reversed(items):
|
||||
if isinstance(item, BlockRun):
|
||||
body = lines[item.body_start : item.body_end]
|
||||
tmp_path.write_text("\n".join(dedent(body, item.body_indent)) + "\n")
|
||||
ok, output = shfmt_via_hook(tmp_path)
|
||||
if not ok:
|
||||
_skip(path, item.body_start, "block", output)
|
||||
continue
|
||||
formatted = tmp_path.read_text().rstrip("\n")
|
||||
new_body = reindent(formatted.split("\n"), item.body_indent)
|
||||
if new_body != body:
|
||||
lines[item.body_start : item.body_end] = new_body
|
||||
changed += 1
|
||||
else:
|
||||
tmp_path.write_text(item.value + "\n")
|
||||
ok, output = shfmt_via_hook(tmp_path)
|
||||
if not ok:
|
||||
_skip(path, item.line_idx, "inline run", output)
|
||||
continue
|
||||
formatted = tmp_path.read_text().rstrip("\n")
|
||||
if formatted == item.value:
|
||||
continue
|
||||
formatted_lines = formatted.split("\n")
|
||||
if len(formatted_lines) == 1:
|
||||
lines[item.line_idx] = f"{item.prefix}run: {formatted}"
|
||||
else:
|
||||
body_indent = len(item.prefix) + 2
|
||||
lines[item.line_idx : item.line_idx + 1] = [
|
||||
f"{item.prefix}run: |",
|
||||
*reindent(formatted_lines, body_indent),
|
||||
]
|
||||
changed += 1
|
||||
new_text = "\n".join(lines) + ("\n" if had_nl else "")
|
||||
if new_text != text:
|
||||
path.write_text(new_text)
|
||||
return changed
|
||||
|
||||
|
||||
def process_md_file(path: Path, tmp_path: Path) -> int:
|
||||
text = path.read_text()
|
||||
had_nl = text.endswith("\n")
|
||||
lines = text.split("\n")
|
||||
if had_nl:
|
||||
lines = lines[:-1]
|
||||
blocks = find_md_bash_blocks(lines)
|
||||
if not blocks:
|
||||
return 0
|
||||
changed = 0
|
||||
for block in reversed(blocks):
|
||||
body = lines[block.body_start : block.body_end]
|
||||
tmp_path.write_text("\n".join(dedent(body, block.body_indent)) + "\n")
|
||||
ok, output = shfmt_via_hook(tmp_path)
|
||||
if not ok:
|
||||
_skip(path, block.open_line_idx, "```bash block", output)
|
||||
continue
|
||||
formatted = tmp_path.read_text().rstrip("\n")
|
||||
formatted_lines = formatted.split("\n") if formatted else []
|
||||
new_body = reindent(formatted_lines, block.body_indent)
|
||||
if new_body != body:
|
||||
lines[block.body_start : block.body_end] = new_body
|
||||
changed += 1
|
||||
new_text = "\n".join(lines) + ("\n" if had_nl else "")
|
||||
if new_text != text:
|
||||
path.write_text(new_text)
|
||||
return changed
|
||||
|
||||
|
||||
def process_file(path: Path, tmp_path: Path) -> int:
|
||||
if path.suffix in (".yml", ".yaml"):
|
||||
return process_yaml_file(path, tmp_path)
|
||||
if path.suffix == ".md":
|
||||
return process_md_file(path, tmp_path)
|
||||
return 0
|
||||
|
||||
|
||||
def gather_files(argv: list[str]) -> list[Path]:
|
||||
"""Return YAML workflow/action files and markdown files that we should
|
||||
process — either the paths in `argv` or, when `argv` is empty, every
|
||||
such file in the repo (skipping `external/`)."""
|
||||
if argv:
|
||||
candidates: list[Path] = [
|
||||
(REPO / a).resolve() if not Path(a).is_absolute() else Path(a) for a in argv
|
||||
]
|
||||
else:
|
||||
gh = REPO / ".github"
|
||||
candidates = [
|
||||
*gh.rglob("*.yml"),
|
||||
*gh.rglob("*.yaml"),
|
||||
*(
|
||||
p
|
||||
for p in REPO.rglob("*.md")
|
||||
if "external" not in p.relative_to(REPO).parts
|
||||
),
|
||||
]
|
||||
return sorted(
|
||||
p
|
||||
for p in candidates
|
||||
if p.exists()
|
||||
and (
|
||||
(p.suffix in (".yml", ".yaml") and ".github" in p.parts)
|
||||
or p.suffix == ".md"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
files = gather_files(argv)
|
||||
if not files:
|
||||
return 0
|
||||
with tempfile.TemporaryDirectory(prefix="format-inline-bash-") as tmpdir:
|
||||
tmp_path = Path(tmpdir) / "shfmt.sh"
|
||||
total = 0
|
||||
for f in files:
|
||||
n = process_file(f, tmp_path)
|
||||
if n:
|
||||
print(f"{f.relative_to(REPO)}: reformatted {n} block(s)")
|
||||
total += n
|
||||
return 1 if total else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
||||
0
.github/scripts/levelization/generate.py
vendored
Executable file → Normal file
0
.github/scripts/levelization/generate.py
vendored
Executable file → Normal file
2
.github/scripts/rename/binary.sh
vendored
2
.github/scripts/rename/binary.sh
vendored
@@ -6,7 +6,7 @@ set -e
|
||||
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
|
||||
SED_COMMAND=sed
|
||||
if [[ "${OSTYPE}" == 'darwin'* ]]; then
|
||||
if ! command -v gsed &>/dev/null; then
|
||||
if ! command -v gsed &> /dev/null; then
|
||||
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
12
.github/scripts/rename/cmake.sh
vendored
12
.github/scripts/rename/cmake.sh
vendored
@@ -8,12 +8,12 @@ set -e
|
||||
SED_COMMAND=sed
|
||||
HEAD_COMMAND=head
|
||||
if [[ "${OSTYPE}" == 'darwin'* ]]; then
|
||||
if ! command -v gsed &>/dev/null; then
|
||||
if ! command -v gsed &> /dev/null; then
|
||||
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
|
||||
exit 1
|
||||
fi
|
||||
SED_COMMAND=gsed
|
||||
if ! command -v ghead &>/dev/null; then
|
||||
if ! command -v ghead &> /dev/null; then
|
||||
echo "Error: ghead is not installed. Please install it using 'brew install coreutils'."
|
||||
exit 1
|
||||
fi
|
||||
@@ -74,10 +74,10 @@ if grep -q '"xrpld"' cmake/XrplCore.cmake; then
|
||||
# The script has been rerun, so just restore the name of the binary.
|
||||
${SED_COMMAND} -i 's/"xrpld"/"rippled"/' cmake/XrplCore.cmake
|
||||
elif ! grep -q '"rippled"' cmake/XrplCore.cmake; then
|
||||
${HEAD_COMMAND} -n -1 cmake/XrplCore.cmake >cmake.tmp
|
||||
echo ' # For the time being, we will keep the name of the binary as it was.' >>cmake.tmp
|
||||
echo ' set_target_properties(xrpld PROPERTIES OUTPUT_NAME "rippled")' >>cmake.tmp
|
||||
tail -1 cmake/XrplCore.cmake >>cmake.tmp
|
||||
${HEAD_COMMAND} -n -1 cmake/XrplCore.cmake > cmake.tmp
|
||||
echo ' # For the time being, we will keep the name of the binary as it was.' >> cmake.tmp
|
||||
echo ' set_target_properties(xrpld PROPERTIES OUTPUT_NAME "rippled")' >> cmake.tmp
|
||||
tail -1 cmake/XrplCore.cmake >> cmake.tmp
|
||||
mv cmake.tmp cmake/XrplCore.cmake
|
||||
fi
|
||||
|
||||
|
||||
2
.github/scripts/rename/config.sh
vendored
2
.github/scripts/rename/config.sh
vendored
@@ -6,7 +6,7 @@ set -e
|
||||
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
|
||||
SED_COMMAND=sed
|
||||
if [[ "${OSTYPE}" == 'darwin'* ]]; then
|
||||
if ! command -v gsed &>/dev/null; then
|
||||
if ! command -v gsed &> /dev/null; then
|
||||
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
24
.github/scripts/rename/copyright.sh
vendored
24
.github/scripts/rename/copyright.sh
vendored
@@ -6,7 +6,7 @@ set -e
|
||||
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
|
||||
SED_COMMAND=sed
|
||||
if [[ "${OSTYPE}" == 'darwin'* ]]; then
|
||||
if ! command -v gsed &>/dev/null; then
|
||||
if ! command -v gsed &> /dev/null; then
|
||||
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
|
||||
exit 1
|
||||
fi
|
||||
@@ -62,37 +62,37 @@ done
|
||||
# restoring the verbiage that is already present in LICENSE.md. Ensure that if
|
||||
# the script is run multiple times, duplicate notices are not added.
|
||||
if ! grep -q 'Raw Material Software' include/xrpl/beast/core/CurrentThreadName.h; then
|
||||
echo -e "// Portions of this file are from JUCE (http://www.juce.com).\n// Copyright (c) 2013 - Raw Material Software Ltd.\n// Please visit http://www.juce.com\n\n$(cat include/xrpl/beast/core/CurrentThreadName.h)" >include/xrpl/beast/core/CurrentThreadName.h
|
||||
echo -e "// Portions of this file are from JUCE (http://www.juce.com).\n// Copyright (c) 2013 - Raw Material Software Ltd.\n// Please visit http://www.juce.com\n\n$(cat include/xrpl/beast/core/CurrentThreadName.h)" > include/xrpl/beast/core/CurrentThreadName.h
|
||||
fi
|
||||
if ! grep -q 'Dev Null' src/test/app/NetworkID_test.cpp; then
|
||||
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/app/NetworkID_test.cpp)" >src/test/app/NetworkID_test.cpp
|
||||
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/app/NetworkID_test.cpp)" > src/test/app/NetworkID_test.cpp
|
||||
fi
|
||||
if ! grep -q 'Dev Null' src/test/app/tx/apply_test.cpp; then
|
||||
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/app/tx/apply_test.cpp)" >src/test/app/tx/apply_test.cpp
|
||||
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/app/tx/apply_test.cpp)" > src/test/app/tx/apply_test.cpp
|
||||
fi
|
||||
if ! grep -q 'Dev Null' src/test/rpc/ManifestRPC_test.cpp; then
|
||||
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/rpc/ManifestRPC_test.cpp)" >src/test/rpc/ManifestRPC_test.cpp
|
||||
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/rpc/ManifestRPC_test.cpp)" > src/test/rpc/ManifestRPC_test.cpp
|
||||
fi
|
||||
if ! grep -q 'Dev Null' src/test/rpc/ValidatorInfo_test.cpp; then
|
||||
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/rpc/ValidatorInfo_test.cpp)" >src/test/rpc/ValidatorInfo_test.cpp
|
||||
echo -e "// Copyright (c) 2020 Dev Null Productions\n\n$(cat src/test/rpc/ValidatorInfo_test.cpp)" > src/test/rpc/ValidatorInfo_test.cpp
|
||||
fi
|
||||
if ! grep -q 'Dev Null' src/xrpld/rpc/handlers/server_info/Manifest.cpp; then
|
||||
echo -e "// Copyright (c) 2019 Dev Null Productions\n\n$(cat src/xrpld/rpc/handlers/server_info/Manifest.cpp)" >src/xrpld/rpc/handlers/server_info/Manifest.cpp
|
||||
echo -e "// Copyright (c) 2019 Dev Null Productions\n\n$(cat src/xrpld/rpc/handlers/server_info/Manifest.cpp)" > src/xrpld/rpc/handlers/server_info/Manifest.cpp
|
||||
fi
|
||||
if ! grep -q 'Dev Null' src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp; then
|
||||
echo -e "// Copyright (c) 2019 Dev Null Productions\n\n$(cat src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp)" >src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp
|
||||
echo -e "// Copyright (c) 2019 Dev Null Productions\n\n$(cat src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp)" > src/xrpld/rpc/handlers/admin/status/ValidatorInfo.cpp
|
||||
fi
|
||||
if ! grep -q 'Bougalis' include/xrpl/basics/SlabAllocator.h; then
|
||||
echo -e "// Copyright (c) 2022, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/SlabAllocator.h)" >include/xrpl/basics/SlabAllocator.h # cspell: ignore Nikolaos Bougalis nikb
|
||||
echo -e "// Copyright (c) 2022, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/SlabAllocator.h)" > include/xrpl/basics/SlabAllocator.h # cspell: ignore Nikolaos Bougalis nikb
|
||||
fi
|
||||
if ! grep -q 'Bougalis' include/xrpl/basics/spinlock.h; then
|
||||
echo -e "// Copyright (c) 2022, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/spinlock.h)" >include/xrpl/basics/spinlock.h # cspell: ignore Nikolaos Bougalis nikb
|
||||
echo -e "// Copyright (c) 2022, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/spinlock.h)" > include/xrpl/basics/spinlock.h # cspell: ignore Nikolaos Bougalis nikb
|
||||
fi
|
||||
if ! grep -q 'Bougalis' include/xrpl/basics/tagged_integer.h; then
|
||||
echo -e "// Copyright (c) 2014, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/tagged_integer.h)" >include/xrpl/basics/tagged_integer.h # cspell: ignore Nikolaos Bougalis nikb
|
||||
echo -e "// Copyright (c) 2014, Nikolaos D. Bougalis <nikb@bougalis.net>\n\n$(cat include/xrpl/basics/tagged_integer.h)" > include/xrpl/basics/tagged_integer.h # cspell: ignore Nikolaos Bougalis nikb
|
||||
fi
|
||||
if ! grep -q 'Ritchford' include/xrpl/beast/utility/Zero.h; then
|
||||
echo -e "// Copyright (c) 2014, Tom Ritchford <tom@swirly.com>\n\n$(cat include/xrpl/beast/utility/Zero.h)" >include/xrpl/beast/utility/Zero.h # cspell: ignore Ritchford
|
||||
echo -e "// Copyright (c) 2014, Tom Ritchford <tom@swirly.com>\n\n$(cat include/xrpl/beast/utility/Zero.h)" > include/xrpl/beast/utility/Zero.h # cspell: ignore Ritchford
|
||||
fi
|
||||
|
||||
# Restore newlines and tabs in string literals in the affected file.
|
||||
|
||||
2
.github/scripts/rename/definitions.sh
vendored
2
.github/scripts/rename/definitions.sh
vendored
@@ -6,7 +6,7 @@ set -e
|
||||
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
|
||||
SED_COMMAND=sed
|
||||
if [[ "${OSTYPE}" == 'darwin'* ]]; then
|
||||
if ! command -v gsed &>/dev/null; then
|
||||
if ! command -v gsed &> /dev/null; then
|
||||
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
2
.github/scripts/rename/docs.sh
vendored
2
.github/scripts/rename/docs.sh
vendored
@@ -6,7 +6,7 @@ set -e
|
||||
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
|
||||
SED_COMMAND=sed
|
||||
if [[ "${OSTYPE}" == 'darwin'* ]]; then
|
||||
if ! command -v gsed &>/dev/null; then
|
||||
if ! command -v gsed &> /dev/null; then
|
||||
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
2
.github/scripts/rename/namespace.sh
vendored
2
.github/scripts/rename/namespace.sh
vendored
@@ -6,7 +6,7 @@ set -e
|
||||
# On MacOS, ensure that GNU sed is installed and available as `gsed`.
|
||||
SED_COMMAND=sed
|
||||
if [[ "${OSTYPE}" == 'darwin'* ]]; then
|
||||
if ! command -v gsed &>/dev/null; then
|
||||
if ! command -v gsed &> /dev/null; then
|
||||
echo "Error: gsed is not installed. Please install it using 'brew install gnu-sed'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
118
.github/workflows/build-nix-image.yml
vendored
118
.github/workflows/build-nix-image.yml
vendored
@@ -6,16 +6,14 @@ on:
|
||||
- develop
|
||||
paths:
|
||||
- ".github/workflows/build-nix-image.yml"
|
||||
- ".github/workflows/reusable-build-docker-image.yml"
|
||||
- "docker/**"
|
||||
- "docker/nix.Dockerfile"
|
||||
- "flake.nix"
|
||||
- "flake.lock"
|
||||
- "nix/**"
|
||||
pull_request:
|
||||
paths:
|
||||
- ".github/workflows/build-nix-image.yml"
|
||||
- ".github/workflows/reusable-build-docker-image.yml"
|
||||
- "docker/**"
|
||||
- "docker/nix.Dockerfile"
|
||||
- "flake.nix"
|
||||
- "flake.lock"
|
||||
- "nix/**"
|
||||
@@ -29,81 +27,75 @@ defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
env:
|
||||
UBUNTU_VERSION: "20.04"
|
||||
RHEL_VERSION: "9"
|
||||
DEBIAN_VERSION: "bookworm"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build ${{ matrix.distro.name }} (${{ matrix.target.platform }})
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# The base images are the oldest supported version of each distro
|
||||
# that we want to build images for.
|
||||
distro:
|
||||
- name: nixos
|
||||
base_image: nixos/nix:latest
|
||||
- name: ubuntu
|
||||
base_image: ubuntu:20.04
|
||||
- name: rhel
|
||||
base_image: registry.access.redhat.com/ubi9/ubi:latest
|
||||
- name: debian
|
||||
base_image: debian:bookworm
|
||||
target:
|
||||
- platform: linux/amd64
|
||||
runner: ubuntu-latest
|
||||
- platform: linux/arm64
|
||||
runner: ubuntu-24.04-arm
|
||||
uses: ./.github/workflows/reusable-build-docker-image.yml
|
||||
with:
|
||||
image_name: ghcr.io/xrplf/xrpld/nix-${{ matrix.distro.name }}
|
||||
dockerfile: docker/nix.Dockerfile
|
||||
base_image: ${{ matrix.distro.base_image }}
|
||||
platform: ${{ matrix.target.platform }}
|
||||
runner: ${{ matrix.target.runner }}
|
||||
push: ${{ github.repository == 'XRPLF/rippled' && github.event_name == 'push' }}
|
||||
|
||||
merge:
|
||||
name: Merge ${{ matrix.distro }} manifest
|
||||
needs: build
|
||||
if: ${{ github.repository == 'XRPLF/rippled' && github.event_name == 'push' }}
|
||||
name: Build and push Nix image (${{ matrix.distro }})
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
distro: [nixos, ubuntu, rhel, debian]
|
||||
env:
|
||||
IMAGE_NAME: ghcr.io/xrplf/xrpld/nix-${{ matrix.distro }}
|
||||
include:
|
||||
- distro: nixos
|
||||
- distro: ubuntu
|
||||
- distro: rhel
|
||||
- distro: debian
|
||||
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9 # v6.1.0
|
||||
with:
|
||||
images: ${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=sha,prefix=sha-,format=short
|
||||
type=raw,value=latest
|
||||
- name: Determine base image
|
||||
id: vars
|
||||
run: |
|
||||
case "${{ matrix.distro }}" in
|
||||
nixos)
|
||||
echo "base_image=nixos/nix:latest" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
ubuntu)
|
||||
echo "base_image=ubuntu:${UBUNTU_VERSION}" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
rhel)
|
||||
echo "base_image=registry.access.redhat.com/ubi${RHEL_VERSION}/ubi:latest" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
debian)
|
||||
echo "base_image=debian:${DEBIAN_VERSION}" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
|
||||
if: github.event_name == 'push'
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Create multi-arch manifests
|
||||
run: |
|
||||
for tag in $(jq -cr '.tags[]' <<<"$DOCKER_METADATA_OUTPUT_JSON"); do
|
||||
docker buildx imagetools create -t "$tag" "${tag}-amd64" "${tag}-arm64"
|
||||
done
|
||||
- name: Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0
|
||||
with:
|
||||
images: ghcr.io/xrplf/ci/nix-${{ matrix.distro }}
|
||||
tags: |
|
||||
type=sha,prefix=sha-,format=short
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${{ steps.meta.outputs.version }}"
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||
with:
|
||||
context: .
|
||||
file: docker/nix.Dockerfile
|
||||
platforms: linux/amd64
|
||||
push: ${{ github.event_name == 'push' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
build-args: BASE_IMAGE=${{ steps.vars.outputs.base_image }}
|
||||
|
||||
23
.github/workflows/check-pr-description.yml
vendored
23
.github/workflows/check-pr-description.yml
vendored
@@ -5,17 +5,8 @@ on:
|
||||
types:
|
||||
- checks_requested
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
- ready_for_review
|
||||
branches:
|
||||
- develop
|
||||
- "release-*"
|
||||
- "release/*"
|
||||
- "staging/*"
|
||||
types: [opened, edited, reopened, synchronize, ready_for_review]
|
||||
branches: [develop]
|
||||
|
||||
jobs:
|
||||
check_description:
|
||||
@@ -29,11 +20,11 @@ jobs:
|
||||
env:
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
run: printenv PR_BODY >pr_body.md
|
||||
run: printenv PR_BODY > pr_body.md
|
||||
|
||||
- name: Check PR description differs from template
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
python .github/scripts/check-pr-description.py \
|
||||
--template-file .github/pull_request_template.md \
|
||||
--pr-body-file pr_body.md
|
||||
run: >
|
||||
python .github/scripts/check-pr-description.py
|
||||
--template-file .github/pull_request_template.md
|
||||
--pr-body-file pr_body.md
|
||||
|
||||
15
.github/workflows/check-pr-title.yml
vendored
15
.github/workflows/check-pr-title.yml
vendored
@@ -5,19 +5,10 @@ on:
|
||||
types:
|
||||
- checks_requested
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
- ready_for_review
|
||||
branches:
|
||||
- develop
|
||||
- "release-*"
|
||||
- "release/*"
|
||||
- "staging/*"
|
||||
types: [opened, edited, reopened, synchronize, ready_for_review]
|
||||
branches: [develop]
|
||||
|
||||
jobs:
|
||||
check_title:
|
||||
if: ${{ github.event.pull_request.draft != true }}
|
||||
uses: XRPLF/actions/.github/workflows/check-pr-title.yml@cba1f0891650baf1a9c88624dc2d72573be2eb81
|
||||
uses: XRPLF/actions/.github/workflows/check-pr-title.yml@291206777251b4d493641b5afbdf7c23009d2988
|
||||
|
||||
8
.github/workflows/on-pr.yml
vendored
8
.github/workflows/on-pr.yml
vendored
@@ -98,7 +98,7 @@ jobs:
|
||||
READY: ${{ contains(github.event.pull_request.labels.*.name, 'Ready to merge') }}
|
||||
MERGE: ${{ github.event_name == 'merge_group' }}
|
||||
run: |
|
||||
echo "go=${{ (env.DRAFT != 'true' && env.READY == 'true') || env.FILES == 'true' || env.MERGE == 'true' }}" >>"${GITHUB_OUTPUT}"
|
||||
echo "go=${{ (env.DRAFT != 'true' && env.READY == 'true') || env.FILES == 'true' || env.MERGE == 'true' }}" >> "${GITHUB_OUTPUT}"
|
||||
cat "${GITHUB_OUTPUT}"
|
||||
outputs:
|
||||
go: ${{ steps.go.outputs.go == 'true' }}
|
||||
@@ -168,9 +168,9 @@ jobs:
|
||||
PR_URL: ${{ github.event.pull_request.html_url }}
|
||||
run: |
|
||||
gh api --method POST -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
/repos/xrplf/clio/dispatches -f "event_type=check_libxrpl" \
|
||||
-F "client_payload[ref]=${{ needs.upload-recipe.outputs.recipe_ref }}" \
|
||||
-F "client_payload[pr_url]=${PR_URL}"
|
||||
/repos/xrplf/clio/dispatches -f "event_type=check_libxrpl" \
|
||||
-F "client_payload[ref]=${{ needs.upload-recipe.outputs.recipe_ref }}" \
|
||||
-F "client_payload[pr_url]=${PR_URL}"
|
||||
|
||||
passed:
|
||||
if: failure() || cancelled()
|
||||
|
||||
2
.github/workflows/pre-commit.yml
vendored
2
.github/workflows/pre-commit.yml
vendored
@@ -14,7 +14,7 @@ on:
|
||||
jobs:
|
||||
# Call the workflow in the XRPLF/actions repo that runs the pre-commit hooks.
|
||||
run-hooks:
|
||||
uses: XRPLF/actions/.github/workflows/pre-commit.yml@cba1f0891650baf1a9c88624dc2d72573be2eb81
|
||||
uses: XRPLF/actions/.github/workflows/pre-commit.yml@5e942d61bf32f7557a7c159cfac4712a687b3e3a
|
||||
with:
|
||||
runs_on: ubuntu-latest
|
||||
container: '{ "image": "ghcr.io/xrplf/ci/tools-rippled-pre-commit:sha-41ec7c1" }'
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
# Build a single-platform Docker image. On push, the image is pushed to
|
||||
# GHCR with arch-suffixed tags (e.g. `:latest-amd64`, `:sha-abc-amd64`)
|
||||
# so the calling workflow can stitch per-arch builds into a multi-arch
|
||||
# manifest without needing to pass digests around.
|
||||
name: Reusable build Docker image (single platform)
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
image_name:
|
||||
description: "Full image name without tag (e.g. 'ghcr.io/xrplf/xrpld/nix-ubuntu')"
|
||||
required: true
|
||||
type: string
|
||||
dockerfile:
|
||||
description: "Path to the Dockerfile, relative to the repository root"
|
||||
required: true
|
||||
type: string
|
||||
base_image:
|
||||
description: "Value passed to the Dockerfile as the BASE_IMAGE build arg"
|
||||
required: true
|
||||
type: string
|
||||
platform:
|
||||
description: "Docker platform string, e.g. linux/amd64"
|
||||
required: true
|
||||
type: string
|
||||
runner:
|
||||
description: "GitHub Actions runner label to build on"
|
||||
required: true
|
||||
type: string
|
||||
push:
|
||||
description: "Whether to push the image to GHCR"
|
||||
required: true
|
||||
type: boolean
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build (${{ inputs.platform }})
|
||||
runs-on: ${{ inputs.runner }}
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Determine arch
|
||||
id: vars
|
||||
env:
|
||||
PLATFORM: ${{ inputs.platform }}
|
||||
run: |
|
||||
echo "arch=${PLATFORM##*/}" >>$GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
if: inputs.push
|
||||
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9 # v6.1.0
|
||||
with:
|
||||
images: ${{ inputs.image_name }}
|
||||
tags: |
|
||||
type=sha,prefix=sha-,format=short
|
||||
type=raw,value=latest
|
||||
flavor: |
|
||||
suffix=-${{ steps.vars.outputs.arch }},onlatest=true
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
|
||||
with:
|
||||
context: .
|
||||
file: ${{ inputs.dockerfile }}
|
||||
platforms: ${{ inputs.platform }}
|
||||
push: ${{ inputs.push }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
build-args: BASE_IMAGE=${{ inputs.base_image }}
|
||||
80
.github/workflows/reusable-build-test-config.yml
vendored
80
.github/workflows/reusable-build-test-config.yml
vendored
@@ -113,7 +113,7 @@ jobs:
|
||||
|
||||
- name: Set ccache log file
|
||||
if: ${{ inputs.ccache_enabled && runner.debug == '1' }}
|
||||
run: echo "CCACHE_LOGFILE=${{ runner.temp }}/ccache.log" >>"${GITHUB_ENV}"
|
||||
run: echo "CCACHE_LOGFILE=${{ runner.temp }}/ccache.log" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Print build environment
|
||||
uses: XRPLF/actions/print-build-env@59dec886e4afb05a1724443af08baccbc045b574
|
||||
@@ -146,11 +146,11 @@ jobs:
|
||||
CMAKE_ARGS: ${{ inputs.cmake_args }}
|
||||
run: |
|
||||
cmake \
|
||||
-G '${{ runner.os == 'Windows' && 'Visual Studio 17 2022' || 'Ninja' }}' \
|
||||
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
|
||||
${CMAKE_ARGS} \
|
||||
..
|
||||
-G '${{ runner.os == 'Windows' && 'Visual Studio 17 2022' || 'Ninja' }}' \
|
||||
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
|
||||
${CMAKE_ARGS} \
|
||||
..
|
||||
|
||||
- name: Check protocol autogen files are up-to-date
|
||||
working-directory: ${{ env.BUILD_DIR }}
|
||||
@@ -172,32 +172,32 @@ jobs:
|
||||
cmake --build . --target code_gen
|
||||
DIFF=$(git -C .. status --porcelain -- include/xrpl/protocol_autogen src/tests/libxrpl/protocol_autogen)
|
||||
if [ -n "${DIFF}" ]; then
|
||||
echo "::error::Generated protocol files are out of date"
|
||||
git -C .. diff -- include/xrpl/protocol_autogen src/tests/libxrpl/protocol_autogen
|
||||
echo "${MESSAGE}"
|
||||
exit 1
|
||||
echo "::error::Generated protocol files are out of date"
|
||||
git -C .. diff -- include/xrpl/protocol_autogen src/tests/libxrpl/protocol_autogen
|
||||
echo "${MESSAGE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Build the binary
|
||||
working-directory: ${{ env.BUILD_DIR }}
|
||||
env:
|
||||
BUILD_NPROC: ${{ steps.nproc.outputs.nproc }}
|
||||
BUILD_NPROC: ${{ runner.os == 'Linux' && '16' || steps.nproc.outputs.nproc }}
|
||||
BUILD_TYPE: ${{ inputs.build_type }}
|
||||
CMAKE_TARGET: ${{ inputs.cmake_target }}
|
||||
run: |
|
||||
cmake \
|
||||
--build . \
|
||||
--config "${BUILD_TYPE}" \
|
||||
--parallel "${BUILD_NPROC}" \
|
||||
--target "${CMAKE_TARGET}"
|
||||
--build . \
|
||||
--config "${BUILD_TYPE}" \
|
||||
--parallel "${BUILD_NPROC}" \
|
||||
--target "${CMAKE_TARGET}"
|
||||
|
||||
- name: Show ccache statistics
|
||||
if: ${{ inputs.ccache_enabled }}
|
||||
run: |
|
||||
ccache --show-stats -vv
|
||||
if [ '${{ runner.debug }}' = '1' ]; then
|
||||
cat "${CCACHE_LOGFILE}"
|
||||
curl ${CCACHE_REMOTE_STORAGE%|*}/status || true
|
||||
cat "${CCACHE_LOGFILE}"
|
||||
curl ${CCACHE_REMOTE_STORAGE%|*}/status || true
|
||||
fi
|
||||
|
||||
- name: Upload the binary (Linux)
|
||||
@@ -214,7 +214,7 @@ jobs:
|
||||
working-directory: ${{ env.BUILD_DIR }}
|
||||
run: |
|
||||
set -o pipefail
|
||||
./xrpld --definitions | python3 -m json.tool >server_definitions.json
|
||||
./xrpld --definitions | python3 -m json.tool > server_definitions.json
|
||||
|
||||
- name: Upload server definitions
|
||||
if: ${{ github.event.repository.visibility == 'public' && inputs.config_name == 'debian-bookworm-gcc-13-amd64-release' }}
|
||||
@@ -231,10 +231,10 @@ jobs:
|
||||
run: |
|
||||
ldd ./xrpld
|
||||
if [ "$(ldd ./xrpld | grep -E '(libstdc\+\+|libgcc)' | wc -l)" -eq 0 ]; then
|
||||
echo 'The binary is statically linked.'
|
||||
echo 'The binary is statically linked.'
|
||||
else
|
||||
echo 'The binary is dynamically linked.'
|
||||
exit 1
|
||||
echo 'The binary is dynamically linked.'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify presence of instrumentation (Linux)
|
||||
@@ -250,12 +250,12 @@ jobs:
|
||||
run: |
|
||||
ASAN_OPTS="include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-asan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/asan.supp"
|
||||
if [[ "${CONFIG_NAME}" == *gcc* ]]; then
|
||||
ASAN_OPTS="${ASAN_OPTS}:alloc_dealloc_mismatch=0"
|
||||
ASAN_OPTS="${ASAN_OPTS}:alloc_dealloc_mismatch=0"
|
||||
fi
|
||||
echo "ASAN_OPTIONS=${ASAN_OPTS}" >>${GITHUB_ENV}
|
||||
echo "TSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-tsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/tsan.supp" >>${GITHUB_ENV}
|
||||
echo "UBSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-ubsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/ubsan.supp" >>${GITHUB_ENV}
|
||||
echo "LSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-lsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/lsan.supp" >>${GITHUB_ENV}
|
||||
echo "ASAN_OPTIONS=${ASAN_OPTS}" >> ${GITHUB_ENV}
|
||||
echo "TSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-tsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/tsan.supp" >> ${GITHUB_ENV}
|
||||
echo "UBSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-ubsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/ubsan.supp" >> ${GITHUB_ENV}
|
||||
echo "LSAN_OPTIONS=include=${GITHUB_WORKSPACE}/sanitizers/suppressions/runtime-lsan-options.txt:suppressions=${GITHUB_WORKSPACE}/sanitizers/suppressions/lsan.supp" >> ${GITHUB_ENV}
|
||||
|
||||
- name: Run the separate tests
|
||||
if: ${{ !inputs.build_only }}
|
||||
@@ -266,9 +266,9 @@ jobs:
|
||||
PARALLELISM: ${{ runner.os == 'Windows' && '1' || steps.nproc.outputs.nproc }}
|
||||
run: |
|
||||
ctest \
|
||||
--output-on-failure \
|
||||
-C "${BUILD_TYPE}" \
|
||||
-j "${PARALLELISM}"
|
||||
--output-on-failure \
|
||||
-C "${BUILD_TYPE}" \
|
||||
-j "${PARALLELISM}"
|
||||
|
||||
- name: Run the embedded tests
|
||||
if: ${{ !inputs.build_only }}
|
||||
@@ -278,7 +278,7 @@ jobs:
|
||||
run: |
|
||||
set -o pipefail
|
||||
# Coverage builds are slower due to instrumentation; use fewer parallel jobs to avoid flakiness
|
||||
[ "$COVERAGE_ENABLED" = "true" ] && BUILD_NPROC=$((BUILD_NPROC - 2))
|
||||
[ "$COVERAGE_ENABLED" = "true" ] && BUILD_NPROC=$(( BUILD_NPROC - 2 ))
|
||||
./xrpld --unittest --unittest-jobs "${BUILD_NPROC}" 2>&1 | tee unittest.log
|
||||
|
||||
- name: Show test failure summary
|
||||
@@ -287,19 +287,19 @@ jobs:
|
||||
WORKING_DIR: ${{ runner.os == 'Windows' && format('{0}\{1}', env.BUILD_DIR, inputs.build_type) || env.BUILD_DIR }}
|
||||
run: |
|
||||
if [ ! -d "${WORKING_DIR}" ]; then
|
||||
echo "Working directory '${WORKING_DIR}' does not exist."
|
||||
exit 0
|
||||
echo "Working directory '${WORKING_DIR}' does not exist."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cd "${WORKING_DIR}"
|
||||
|
||||
if [ ! -f unittest.log ]; then
|
||||
echo "unittest.log not found; embedded tests may not have run."
|
||||
exit 0
|
||||
echo "unittest.log not found; embedded tests may not have run."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if ! grep -E "failed" unittest.log; then
|
||||
echo "Log present but no failure lines found in unittest.log."
|
||||
echo "Log present but no failure lines found in unittest.log."
|
||||
fi
|
||||
- name: Debug failure (Linux)
|
||||
if: ${{ failure() && runner.os == 'Linux' && !inputs.build_only }}
|
||||
@@ -317,14 +317,14 @@ jobs:
|
||||
BUILD_TYPE: ${{ inputs.build_type }}
|
||||
run: |
|
||||
cmake \
|
||||
--build . \
|
||||
--config "${BUILD_TYPE}" \
|
||||
--parallel "${BUILD_NPROC}" \
|
||||
--target coverage
|
||||
--build . \
|
||||
--config "${BUILD_TYPE}" \
|
||||
--parallel "${BUILD_NPROC}" \
|
||||
--target coverage
|
||||
|
||||
- name: Upload coverage report
|
||||
if: ${{ github.repository == 'XRPLF/rippled' && !inputs.build_only && env.COVERAGE_ENABLED == 'true' }}
|
||||
uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 # v6.0.1
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
|
||||
with:
|
||||
disable_search: true
|
||||
disable_telem: true
|
||||
|
||||
@@ -38,9 +38,9 @@ jobs:
|
||||
run: |
|
||||
DIFF=$(git status --porcelain)
|
||||
if [ -n "${DIFF}" ]; then
|
||||
# Print the differences to give the contributor a hint about what to
|
||||
# expect when running levelization on their own machine.
|
||||
git diff
|
||||
echo "${MESSAGE}"
|
||||
exit 1
|
||||
# Print the differences to give the contributor a hint about what to
|
||||
# expect when running levelization on their own machine.
|
||||
git diff
|
||||
echo "${MESSAGE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
10
.github/workflows/reusable-check-rename.yml
vendored
10
.github/workflows/reusable-check-rename.yml
vendored
@@ -48,9 +48,9 @@ jobs:
|
||||
run: |
|
||||
DIFF=$(git status --porcelain)
|
||||
if [ -n "${DIFF}" ]; then
|
||||
# Print the differences to give the contributor a hint about what to
|
||||
# expect when running the renaming scripts on their own machine.
|
||||
git diff
|
||||
echo "${MESSAGE}"
|
||||
exit 1
|
||||
# Print the differences to give the contributor a hint about what to
|
||||
# expect when running the renaming scripts on their own machine.
|
||||
git diff
|
||||
echo "${MESSAGE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
48
.github/workflows/reusable-clang-tidy.yml
vendored
48
.github/workflows/reusable-clang-tidy.yml
vendored
@@ -70,13 +70,13 @@ jobs:
|
||||
working-directory: ${{ env.BUILD_DIR }}
|
||||
run: |
|
||||
cmake \
|
||||
-G 'Ninja' \
|
||||
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
|
||||
-Dtests=ON \
|
||||
-Dwerr=ON \
|
||||
-Dxrpld=ON \
|
||||
..
|
||||
-G 'Ninja' \
|
||||
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
|
||||
-Dtests=ON \
|
||||
-Dwerr=ON \
|
||||
-Dxrpld=ON \
|
||||
..
|
||||
|
||||
# clang-tidy needs headers generated from proto files
|
||||
- name: Build libxrpl.libpb
|
||||
@@ -133,7 +133,7 @@ jobs:
|
||||
- name: Write issue header
|
||||
if: ${{ steps.run_clang_tidy.outcome != 'success' }}
|
||||
run: |
|
||||
cat >"${ISSUE_FILE}" <<EOF
|
||||
cat > "${ISSUE_FILE}" <<EOF
|
||||
## Clang-tidy Check Failed
|
||||
|
||||
### Clang-tidy Output:
|
||||
@@ -144,30 +144,30 @@ jobs:
|
||||
if: ${{ steps.run_clang_tidy.outcome != 'success' }}
|
||||
run: |
|
||||
if [ -f "${OUTPUT_FILE}" ]; then
|
||||
# Extract lines containing 'error:', 'warning:', or 'note:'
|
||||
grep -E '(error:|warning:|note:)' "${OUTPUT_FILE}" >filtered-output.txt || true
|
||||
# Extract lines containing 'error:', 'warning:', or 'note:'
|
||||
grep -E '(error:|warning:|note:)' "${OUTPUT_FILE}" > filtered-output.txt || true
|
||||
|
||||
# If filtered output is empty, use original (might be a different error format)
|
||||
if [ ! -s filtered-output.txt ]; then
|
||||
cp "${OUTPUT_FILE}" filtered-output.txt
|
||||
fi
|
||||
# If filtered output is empty, use original (might be a different error format)
|
||||
if [ ! -s filtered-output.txt ]; then
|
||||
cp "${OUTPUT_FILE}" filtered-output.txt
|
||||
fi
|
||||
|
||||
# Truncate if too large
|
||||
head -c 60000 filtered-output.txt >>"${ISSUE_FILE}"
|
||||
if [ "$(wc -c <filtered-output.txt)" -gt 60000 ]; then
|
||||
echo "" >>"${ISSUE_FILE}"
|
||||
echo "... (output truncated, see artifacts for full output)" >>"${ISSUE_FILE}"
|
||||
fi
|
||||
# Truncate if too large
|
||||
head -c 60000 filtered-output.txt >> "${ISSUE_FILE}"
|
||||
if [ "$(wc -c < filtered-output.txt)" -gt 60000 ]; then
|
||||
echo "" >> "${ISSUE_FILE}"
|
||||
echo "... (output truncated, see artifacts for full output)" >> "${ISSUE_FILE}"
|
||||
fi
|
||||
|
||||
rm filtered-output.txt
|
||||
rm filtered-output.txt
|
||||
else
|
||||
echo "No output file found" >>"${ISSUE_FILE}"
|
||||
echo "No output file found" >> "${ISSUE_FILE}"
|
||||
fi
|
||||
|
||||
- name: Append issue footer
|
||||
if: ${{ steps.run_clang_tidy.outcome != 'success' }}
|
||||
run: |
|
||||
cat >>"${ISSUE_FILE}" <<EOF
|
||||
cat >> "${ISSUE_FILE}" <<EOF
|
||||
\`\`\`
|
||||
|
||||
---
|
||||
@@ -176,7 +176,7 @@ jobs:
|
||||
|
||||
- name: Create issue
|
||||
if: ${{ steps.run_clang_tidy.outcome != 'success' && inputs.create_issue_on_failure }}
|
||||
uses: XRPLF/actions/create-issue@2b8bc36af85b88bca0dd7bfac2e2dc05f94ad712
|
||||
uses: XRPLF/actions/create-issue@36d450d12d301e8410c1b7936e5de70c291cbe36
|
||||
with:
|
||||
title: "Clang-tidy check failed"
|
||||
body_file: ${{ env.ISSUE_FILE }}
|
||||
|
||||
2
.github/workflows/reusable-package.yml
vendored
2
.github/workflows/reusable-package.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
id: generate
|
||||
working-directory: .github/scripts/strategy-matrix
|
||||
run: |
|
||||
./generate.py --packaging --config=linux.json >>"${GITHUB_OUTPUT}"
|
||||
./generate.py --packaging --config=linux.json >> "${GITHUB_OUTPUT}"
|
||||
|
||||
generate-version:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -42,4 +42,4 @@ jobs:
|
||||
env:
|
||||
GENERATE_CONFIG: ${{ inputs.os != '' && format('--config={0}.json', inputs.os) || '' }}
|
||||
GENERATE_OPTION: ${{ inputs.strategy_matrix == 'all' && '--all' || '' }}
|
||||
run: ./generate.py ${GENERATE_OPTION} ${GENERATE_CONFIG} >>"${GITHUB_OUTPUT}"
|
||||
run: ./generate.py ${GENERATE_OPTION} ${GENERATE_CONFIG} >> "${GITHUB_OUTPUT}"
|
||||
|
||||
@@ -37,50 +37,37 @@ repos:
|
||||
exclude: ^include/xrpl/protocol_autogen/(transactions|ledger_entries)/
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-clang-format
|
||||
rev: dd18dad857d6133e90bbe478f4f2f22ec0030269 # frozen: v22.1.5
|
||||
rev: cd481d7b0bfb5c7b3090c21846317f9a8262e891 # frozen: v22.1.0
|
||||
hooks:
|
||||
- id: clang-format
|
||||
args: [--style=file]
|
||||
"types_or": [c++, c, proto]
|
||||
exclude: ^include/xrpl/protocol_autogen/(transactions|ledger_entries)/
|
||||
|
||||
- repo: https://github.com/BlankSpruce/gersemi-pre-commit
|
||||
rev: faadd6a9d852369ca94f4d15b2404c967ba8cb01 # frozen: 0.27.6
|
||||
- repo: https://github.com/BlankSpruce/gersemi
|
||||
rev: 0.26.0
|
||||
hooks:
|
||||
- id: gersemi
|
||||
|
||||
- repo: https://github.com/rbubley/mirrors-prettier
|
||||
rev: 515f543f5718ebfd6ce22e16708bb32c68ff96e1 # frozen: v3.8.3
|
||||
rev: c2bc67fe8f8f549cc489e00ba8b45aa18ee713b1 # frozen: v3.8.1
|
||||
hooks:
|
||||
- id: prettier
|
||||
args: [--end-of-line=auto]
|
||||
|
||||
- repo: https://github.com/psf/black-pre-commit-mirror
|
||||
rev: 4160603246a6b365d4a2af661c6d71b0a0f50478 # frozen: 26.5.1
|
||||
rev: ea488cebbfd88a5f50b8bd95d5c829d0bb76feb8 # frozen: 26.1.0
|
||||
hooks:
|
||||
- id: black
|
||||
|
||||
- repo: https://github.com/scop/pre-commit-shfmt
|
||||
rev: 05c1426671b9237fb5e1444dd63aa5731bec0dfb # frozen: v3.13.1-1
|
||||
- repo: https://github.com/openstack/bashate
|
||||
rev: 5798d24d571676fc407e81df574c1ef57b520f23 # frozen: 2.1.1
|
||||
hooks:
|
||||
- id: shfmt
|
||||
args: [--write, --indent=4, --case-indent=true]
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: format-inline-bash-workflows
|
||||
name: "format `run:` blocks in workflows/actions"
|
||||
entry: ./.github/scripts/format-inline-bash.py
|
||||
language: python
|
||||
files: ^\.github/(workflows|actions)/.*\.ya?ml$
|
||||
- id: format-inline-bash-markdown
|
||||
name: "format ```bash blocks in markdown"
|
||||
entry: ./.github/scripts/format-inline-bash.py
|
||||
language: python
|
||||
files: \.md$
|
||||
- id: bashate
|
||||
args: ["--ignore=E006"]
|
||||
|
||||
- repo: https://github.com/streetsidesoftware/cspell-cli
|
||||
rev: 4643f154907327ee0a2c7038f0296e0dd77d9776 # frozen: v10.0.0
|
||||
rev: a42085ade523f591dca134379a595e7859986445 # frozen: v9.7.0
|
||||
hooks:
|
||||
- id: cspell # Spell check changed files
|
||||
exclude: |
|
||||
|
||||
15
BUILD.md
15
BUILD.md
@@ -151,8 +151,8 @@ git init
|
||||
git remote add origin git@github.com:XRPLF/conan-center-index.git
|
||||
git sparse-checkout init
|
||||
for recipe in "${recipes[@]}"; do
|
||||
echo "Checking out recipe '${recipe}'..."
|
||||
git sparse-checkout add recipes/${recipe}
|
||||
echo "Checking out recipe '${recipe}'..."
|
||||
git sparse-checkout add recipes/${recipe}
|
||||
done
|
||||
git fetch origin master
|
||||
git checkout master
|
||||
@@ -180,7 +180,7 @@ the new recipe will be automatically pulled from the official Conan Center.
|
||||
|
||||
If you see an error similar to the following after running `conan profile show`:
|
||||
|
||||
```text
|
||||
```bash
|
||||
ERROR: Invalid setting '17' is not a valid 'settings.compiler.version' value.
|
||||
Possible values are ['5.0', '5.1', '6.0', '6.1', '7.0', '7.3', '8.0', '8.1',
|
||||
'9.0', '9.1', '10.0', '11.0', '12.0', '13', '13.0', '13.1', '14', '14.0', '15',
|
||||
@@ -427,19 +427,16 @@ install ccache --version 4.11.3 --allow-downgrade`.
|
||||
Single-config generators:
|
||||
|
||||
```
|
||||
cmake --build . --parallel N
|
||||
cmake --build .
|
||||
```
|
||||
|
||||
Multi-config generators:
|
||||
|
||||
```
|
||||
cmake --build . --config Release --parallel N
|
||||
cmake --build . --config Debug --parallel N
|
||||
cmake --build . --config Release
|
||||
cmake --build . --config Debug
|
||||
```
|
||||
|
||||
Replace the `--parallel` parameter N with the desired number of parallel jobs. A common starting point is half of the number of available CPU
|
||||
cores.
|
||||
|
||||
5. Test xrpld.
|
||||
|
||||
Single-config generators:
|
||||
|
||||
@@ -1,567 +0,0 @@
|
||||
# Distributed Tracing Fundamentals
|
||||
|
||||
> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
|
||||
> **Next**: [Architecture Analysis](./01-architecture-analysis.md)
|
||||
|
||||
---
|
||||
|
||||
## What is Distributed Tracing?
|
||||
|
||||
Distributed tracing is a method for tracking data objects as they flow through distributed systems. In a network like XRP Ledger, a single transaction touches multiple independent nodes—each with no shared memory or logging. Distributed tracing connects these dots.
|
||||
|
||||
**Without tracing:** You see isolated logs on each node with no way to correlate them.
|
||||
|
||||
**With tracing:** You see the complete journey of a transaction or an event across all nodes it touched.
|
||||
|
||||
---
|
||||
|
||||
## Actors and Actions at a Glance
|
||||
|
||||
### Actors
|
||||
|
||||
| Who (Plain English) | Technical Term |
|
||||
| ---------------------------------------------- | --------------- |
|
||||
| A single unit of work being tracked | Span |
|
||||
| The complete journey of a request | Trace |
|
||||
| Data that links spans across services | Trace Context |
|
||||
| Code that creates spans and propagates context | Instrumentation |
|
||||
| Service that receives and processes traces | Collector |
|
||||
| Storage and visualization system | Backend (Tempo) |
|
||||
| Decision logic for which traces to keep | Sampler |
|
||||
|
||||
### Actions
|
||||
|
||||
| What Happens (Plain English) | Technical Term |
|
||||
| --------------------------------------- | ----------------------- |
|
||||
| Start tracking a new operation | Create a Span |
|
||||
| Connect a child operation to its parent | Set `parent_span_id` |
|
||||
| Group all related operations together | Share a `trace_id` |
|
||||
| Pass tracking data between services | Context Propagation |
|
||||
| Decide whether to record a trace | Sampling (Head or Tail) |
|
||||
| Send completed traces to storage | Export (OTLP) |
|
||||
|
||||
---
|
||||
|
||||
## Core Concepts
|
||||
|
||||
### 1. Trace
|
||||
|
||||
A **trace** represents the entire journey of a request through the system. It has a unique `trace_id` that stays constant across all nodes.
|
||||
|
||||
```
|
||||
Trace ID: abc123
|
||||
├── Node A: received transaction
|
||||
├── Node B: relayed transaction
|
||||
├── Node C: included in consensus
|
||||
└── Node D: applied to ledger
|
||||
```
|
||||
|
||||
### 2. Span
|
||||
|
||||
A **span** represents a single unit of work within a trace. Each span has:
|
||||
|
||||
| Attribute | Description | Example |
|
||||
| ---------------- | -------------------------------- | -------------------------- |
|
||||
| `trace_id` | Identifies the trace | `event123` |
|
||||
| `span_id` | Unique identifier | `span456` |
|
||||
| `parent_span_id` | Parent span (if any) | `p_span123` |
|
||||
| `name` | Operation name | `rpc.submit` |
|
||||
| `start_time` | When work began (local time) | `2024-01-15T10:30:00Z` |
|
||||
| `end_time` | When work completed (local time) | `2024-01-15T10:30:00.050Z` |
|
||||
| `attributes` | Key-value metadata | `tx.hash=ABC...` |
|
||||
| `status` | OK, ERROR MSG | `OK` |
|
||||
|
||||
### 3. Trace Context
|
||||
|
||||
**Trace context** is the data that propagates between services to link spans together. It contains:
|
||||
|
||||
- `trace_id` - The trace this span belongs to
|
||||
- `span_id` - The current span (becomes parent for child spans)
|
||||
- `trace_flags` - Sampling decisions
|
||||
|
||||
---
|
||||
|
||||
## How Spans Form a Trace
|
||||
|
||||
Spans have parent-child relationships forming a tree structure:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph trace["Trace: abc123"]
|
||||
A["tx.submit<br/>span_id: 001<br/>50ms"] --> B["tx.validate<br/>span_id: 002<br/>5ms"]
|
||||
A --> C["tx.relay<br/>span_id: 003<br/>10ms"]
|
||||
A --> D["tx.apply<br/>span_id: 004<br/>30ms"]
|
||||
D --> E["ledger.update<br/>span_id: 005<br/>20ms"]
|
||||
end
|
||||
|
||||
style A fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style B fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style C fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style D fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style E fill:#bf360c,stroke:#8c2809,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **tx.submit (blue, root)**: The top-level span representing the entire transaction submission; all other spans are its descendants.
|
||||
- **tx.validate, tx.relay, tx.apply (green)**: Direct children of tx.submit, representing the three main stages -- validation, relay to peers, and application to the ledger.
|
||||
- **ledger.update (red)**: A grandchild span nested under tx.apply, representing the actual ledger state mutation triggered by applying the transaction.
|
||||
- **Arrows (parent to child)**: Each arrow indicates a parent-child span relationship where the parent's completion depends on the child finishing.
|
||||
|
||||
The same trace visualized as a **timeline (Gantt chart)**:
|
||||
|
||||
```
|
||||
Time → 0ms 10ms 20ms 30ms 40ms 50ms
|
||||
├───────────────────────────────────────────┤
|
||||
tx.submit│▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓│
|
||||
├─────┤
|
||||
tx.valid │▓▓▓▓▓│
|
||||
│ ├──────────┤
|
||||
tx.relay │ │▓▓▓▓▓▓▓▓▓▓│
|
||||
│ ├────────────────────────────┤
|
||||
tx.apply │ │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓│
|
||||
│ ├──────────────────┤
|
||||
ledger │ │▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓│
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Span Relationships
|
||||
|
||||
Spans don't always form simple parent-child trees. Distributed tracing defines several relationship types to capture different causal patterns:
|
||||
|
||||
### 1. Parent-Child (ChildOf)
|
||||
|
||||
The default relationship. The parent span **depends on** or **contains** the child span. The child runs within the scope of the parent.
|
||||
|
||||
```
|
||||
tx.submit (parent)
|
||||
├── tx.validate (child) ← parent waits for this
|
||||
├── tx.relay (child) ← parent waits for this
|
||||
└── tx.apply (child) ← parent waits for this
|
||||
```
|
||||
|
||||
**When to use:** Synchronous calls, nested operations, any case where the parent's completion depends on the child.
|
||||
|
||||
### 2. Follows-From
|
||||
|
||||
A causal relationship where the first span **triggers** the second, but does **not wait** for it. The originator fires and moves on.
|
||||
|
||||
```
|
||||
Time →
|
||||
|
||||
tx.receive [=======]
|
||||
↓ triggers (follows-from)
|
||||
tx.relay [===========] ← runs independently
|
||||
```
|
||||
|
||||
**When to use:** Asynchronous jobs, queued work, fire-and-forget patterns. For example, a node receives a transaction and queues it for relay — the relay span _follows from_ the receive span but the receiver doesn't wait for relaying to complete.
|
||||
|
||||
> **OpenTracing** defined `FollowsFrom` as a first-class reference type alongside `ChildOf`.
|
||||
> **OpenTelemetry** represents this using **Span Links** with descriptive attributes instead (see below).
|
||||
|
||||
### 3. Span Links (Cross-Trace and Non-Hierarchical)
|
||||
|
||||
Links connect spans that are **causally related but not in a parent-child hierarchy**. Unlike parent-child, links can cross trace boundaries.
|
||||
|
||||
```
|
||||
Trace A Trace B
|
||||
────── ──────
|
||||
batch.schedule batch.execute
|
||||
├─ item.enqueue (span X) ┌──► process.item
|
||||
├─ item.enqueue (span Y) ───┤ (links to X, Y, Z)
|
||||
├─ item.enqueue (span Z) └──►
|
||||
```
|
||||
|
||||
**Use cases:**
|
||||
|
||||
| Pattern | Description |
|
||||
| -------------------- | --------------------------------------------------------------------------- |
|
||||
| **Batch processing** | A batch span links back to all individual spans that contributed to it |
|
||||
| **Fan-in** | An aggregation span links to the multiple producer spans it merges |
|
||||
| **Fan-out** | Multiple downstream spans link back to the single span that triggered them |
|
||||
| **Async handoff** | A deferred job links back to the request that queued it (follows-from) |
|
||||
| **Cross-trace** | Correlating spans across independent traces (e.g., retries, related events) |
|
||||
|
||||
**Link structure:** Each link carries the target span's context plus optional attributes:
|
||||
|
||||
```
|
||||
Link {
|
||||
trace_id: <target trace>
|
||||
span_id: <target span>
|
||||
attributes: { "link.description": "triggered by batch scheduler" }
|
||||
}
|
||||
```
|
||||
|
||||
### Relationship Summary
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph parent_child["Parent-Child"]
|
||||
direction TB
|
||||
P["Parent"] --> C["Child"]
|
||||
end
|
||||
|
||||
subgraph follows_from["Follows-From"]
|
||||
direction TB
|
||||
A["Span A"] -.->|triggers| B["Span B"]
|
||||
end
|
||||
|
||||
subgraph links["Span Links"]
|
||||
direction TB
|
||||
X["Span X\n(Trace 1)"] -.-|link| Y["Span Y\n(Trace 2)"]
|
||||
end
|
||||
|
||||
parent_child ~~~ follows_from ~~~ links
|
||||
|
||||
style P fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style C fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style A fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style B fill:#bf360c,stroke:#8c2809,color:#ffffff
|
||||
style X fill:#4a148c,stroke:#38006b,color:#ffffff
|
||||
style Y fill:#4a148c,stroke:#38006b,color:#ffffff
|
||||
```
|
||||
|
||||
| Relationship | Same Trace? | Dependency? | OTel Mechanism |
|
||||
| ---------------- | ----------- | -------------------------- | ----------------- |
|
||||
| **Parent-Child** | Yes | Parent depends on child | `parent_span_id` |
|
||||
| **Follows-From** | Usually | Causal but no dependency | Link + attributes |
|
||||
| **Span Link** | Either | Correlation, no dependency | Link + attributes |
|
||||
|
||||
---
|
||||
|
||||
## Trace ID Generation
|
||||
|
||||
A `trace_id` is a 128-bit (16-byte) identifier that groups all spans belonging to one logical operation. How it's generated determines how easily you can find and correlate traces later.
|
||||
|
||||
### General Approaches
|
||||
|
||||
#### 1. Random (W3C Default)
|
||||
|
||||
Generate a random 128-bit ID when a trace starts. Standard approach for most services.
|
||||
|
||||
```
|
||||
trace_id = random_128_bits()
|
||||
```
|
||||
|
||||
| Pros | Cons |
|
||||
| --------------------------- | --------------------------------------------- |
|
||||
| Simple, standard | No natural correlation to domain events |
|
||||
| Guaranteed unique per trace | If propagation is lost, trace is broken |
|
||||
| Works with all OTel tooling | "Find trace for TX abc" requires index lookup |
|
||||
|
||||
#### 2. Deterministic (Derived from Domain Data)
|
||||
|
||||
Compute the trace_id from a hash of a natural identifier. Every node independently derives the **same** trace_id for the same event.
|
||||
|
||||
```
|
||||
trace_id = SHA-256(domain_identifier)[0:16] // truncate to 128 bits
|
||||
```
|
||||
|
||||
| Pros | Cons |
|
||||
| --------------------------------------------------- | ---------------------------------------------------------- |
|
||||
| Propagation-resilient — same ID computed everywhere | Same event processed twice (retry) shares trace_id |
|
||||
| Natural search — domain ID maps directly to trace | Non-standard (tooling assumes random) |
|
||||
| No coordination needed between nodes | 256→128 bit truncation (collision risk negligible at ~2⁶⁴) |
|
||||
|
||||
#### 3. Hybrid (Deterministic Prefix + Random Suffix)
|
||||
|
||||
First 8 bytes derived from domain data, last 8 bytes random.
|
||||
|
||||
```
|
||||
trace_id = SHA-256(domain_identifier)[0:8] || random_64_bits()
|
||||
```
|
||||
|
||||
| Pros | Cons |
|
||||
| ------------------------------------------- | ---------------------------------------- |
|
||||
| Prefix search: "find all traces for TX abc" | Must propagate to maintain full trace_id |
|
||||
| Unique per processing instance | More complex generation logic |
|
||||
| Retries get distinct trace_ids | Partial correlation only (prefix match) |
|
||||
|
||||
### XRPL Workflow Analysis
|
||||
|
||||
XRPL has a unique advantage: its core workflows produce **globally unique 256-bit hashes** that are known on every node. This makes deterministic trace_id generation practical in ways most systems can't achieve.
|
||||
|
||||
#### Natural Identifiers by Workflow
|
||||
|
||||
| Workflow | Natural Identifier | Size | Known at Start? | Same on All Nodes? |
|
||||
| ------------------- | --------------------------------- | ---------- | ----------------------------- | -------------------------------- |
|
||||
| **Transaction** | Transaction hash (`tid_`) | 256-bit | Yes — computed before signing | Yes — hash of canonical tx data |
|
||||
| **Consensus round** | Previous ledger hash + ledger seq | 256+32 bit | Yes — known when round opens | Yes — all validators agree |
|
||||
| **Validation** | Ledger hash being validated | 256-bit | Yes — from consensus result | Yes — same closed ledger |
|
||||
| **Ledger catch-up** | Target ledger hash | 256-bit | Yes — we know what to fetch | Yes — identifies ledger globally |
|
||||
|
||||
#### Where These Identifiers Live in Code
|
||||
|
||||
```
|
||||
Transaction: STTx::getTransactionID() → uint256 tid_
|
||||
TMTransaction::rawTransaction → recompute hash from bytes
|
||||
|
||||
Consensus: ConsensusProposal::prevLedger_ → uint256 (previous ledger hash)
|
||||
ConsensusProposal::position_ → uint256 (TxSet hash)
|
||||
LedgerHeader::seq → uint32_t (ledger sequence)
|
||||
|
||||
Validation: STValidation::getLedgerHash() → uint256
|
||||
STValidation::getNodeID() → NodeID (160-bit)
|
||||
|
||||
Ledger fetch: InboundLedger constructor → uint256 hash, uint32_t seq
|
||||
TMGetLedger::ledgerHash → bytes (uint256)
|
||||
```
|
||||
|
||||
### Recommended Strategy: Workflow-Scoped Deterministic
|
||||
|
||||
Each workflow type derives its trace_id from its natural domain identifier:
|
||||
|
||||
```
|
||||
Transaction trace: trace_id = SHA-256("tx" || tx_hash)[0:16]
|
||||
Consensus trace: trace_id = SHA-256("cons" || prev_ledger_hash || ledger_seq)[0:16]
|
||||
Ledger catch-up: trace_id = SHA-256("fetch" || target_ledger_hash)[0:16]
|
||||
```
|
||||
|
||||
The string prefix (`"tx"`, `"cons"`, `"fetch"`) prevents collisions between workflows that might share underlying hashes.
|
||||
|
||||
**Why this works for XRPL:**
|
||||
|
||||
1. **Propagation-resilient** — Even if a P2P message drops trace context, every node independently computes the same trace_id from the same tx_hash or ledger_hash. Spans still correlate.
|
||||
|
||||
2. **Zero-cost search** — "Show me the trace for transaction ABC" becomes a direct lookup: compute `SHA-256("tx" || ABC)[0:16]` and query. No secondary index needed.
|
||||
|
||||
3. **Cross-workflow linking via Span Links** — A consensus trace links to individual transaction traces. A validation span links to the consensus trace. This connects the full picture without forcing everything into one giant trace.
|
||||
|
||||
### Cross-Workflow Correlation
|
||||
|
||||
Each workflow gets its own trace. Span Links tie them together:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph tx_trace["Transaction Trace"]
|
||||
direction LR
|
||||
Tn["trace_id = f(tx_hash)"]:::note --> T1["tx.receive"] --> T2["tx.validate"] --> T3["tx.relay"]
|
||||
end
|
||||
|
||||
subgraph cons_trace["Consensus Trace"]
|
||||
direction LR
|
||||
Cn["trace_id = f(prev_ledger, seq)"]:::note --> C1["cons.open"] --> C2["cons.propose"] --> C3["cons.accept"]
|
||||
end
|
||||
|
||||
subgraph val_trace["Validation"]
|
||||
direction LR
|
||||
Vn["spans within consensus trace"]:::note --> V1["val.create"] --> V2["val.broadcast"]
|
||||
end
|
||||
|
||||
subgraph fetch_trace["Catch-Up Trace"]
|
||||
direction LR
|
||||
Fn["trace_id = f(ledger_hash)"]:::note --> F1["fetch.request"] --> F2["fetch.receive"] --> F3["fetch.apply"]
|
||||
end
|
||||
|
||||
C1 -.-|"span link\n(tx traces)"| T3
|
||||
C3 --> V1
|
||||
F1 -.-|"span link\n(target ledger)"| C3
|
||||
|
||||
classDef note fill:none,stroke:#888,stroke-dasharray:5 5,color:#333,font-style:italic
|
||||
style T1 fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style T2 fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style T3 fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style C1 fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style C2 fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style C3 fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style V1 fill:#bf360c,stroke:#8c2809,color:#ffffff
|
||||
style V2 fill:#bf360c,stroke:#8c2809,color:#ffffff
|
||||
style F1 fill:#4a148c,stroke:#38006b,color:#ffffff
|
||||
style F2 fill:#4a148c,stroke:#38006b,color:#ffffff
|
||||
style F3 fill:#4a148c,stroke:#38006b,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Transaction Trace (blue)**: An independent trace whose `trace_id` is deterministically derived from the transaction hash. Contains receive, validate, and relay spans.
|
||||
- **Consensus Trace (green)**: An independent trace whose `trace_id` is derived from the previous ledger hash and sequence number. Covers the open, propose, and accept phases.
|
||||
- **Validation (red)**: Validation spans live within the consensus trace (not a separate trace). They are created after the accept phase completes.
|
||||
- **Catch-Up Trace (purple)**: An independent trace for ledger acquisition, derived from the target ledger hash. Used when a node is behind and fetching missing ledgers.
|
||||
- **Dotted arrows (span links)**: Cross-trace correlations. Consensus links to transaction traces it included; catch-up links to the consensus trace that produced the target ledger.
|
||||
- **Solid arrow (C3 to V1)**: A parent-child relationship -- validation spans are direct children of the consensus accept span within the same trace.
|
||||
|
||||
**How a query flows:**
|
||||
|
||||
```
|
||||
"Why was TX abc slow?"
|
||||
1. Compute trace_id = SHA-256("tx" || abc)[0:16]
|
||||
2. Find transaction trace → see it was included in consensus round N
|
||||
3. Follow span link → consensus trace for round N
|
||||
4. See which phase was slow (propose? accept?)
|
||||
5. If a node was catching up, follow link → catch-up trace
|
||||
```
|
||||
|
||||
### Trade-offs to Consider
|
||||
|
||||
| Concern | Mitigation |
|
||||
| ----------------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
|
||||
| **Retries get same trace_id** | Add `attempt` attribute to root span; spans have unique span_ids and timestamps |
|
||||
| **256→128 bit truncation** | Birthday-bound collision at ~2⁶⁴ operations — negligible for XRPL's throughput |
|
||||
| **Non-standard generation** | OTel spec allows any 16-byte non-zero value; tooling works on the hex string |
|
||||
| **Hash computation cost** | SHA-256 is ~0.3μs per call; XRPL already computes these hashes for other purposes |
|
||||
| **Late-binding identifiers** | Ledger hash isn't known until after consensus — validation spans use ledger_seq as fallback, then link to the consensus trace |
|
||||
|
||||
---
|
||||
|
||||
## Distributed Traces Across Nodes
|
||||
|
||||
In distributed systems like xrpld, traces span **multiple independent nodes**. The trace context must be propagated in network messages:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant NodeA as Node A
|
||||
participant NodeB as Node B
|
||||
participant NodeC as Node C
|
||||
|
||||
Client->>NodeA: Submit TX<br/>(no trace context)
|
||||
|
||||
Note over NodeA: Creates new trace<br/>trace_id: abc123<br/>span: tx.receive
|
||||
|
||||
NodeA->>NodeB: Relay TX<br/>(trace_id: abc123, parent: 001)
|
||||
|
||||
Note over NodeB: Creates child span<br/>span: tx.relay<br/>parent_span_id: 001
|
||||
|
||||
NodeA->>NodeC: Relay TX<br/>(trace_id: abc123, parent: 001)
|
||||
|
||||
Note over NodeC: Creates child span<br/>span: tx.relay<br/>parent_span_id: 001
|
||||
|
||||
Note over NodeA,NodeC: All spans share trace_id: abc123<br/>enabling correlation across nodes
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Client**: The external entity that submits a transaction. It does not carry trace context -- the trace originates at the first node.
|
||||
- **Node A**: The entry point that creates a new trace (trace_id: abc123) and the root span `tx.receive`. It relays the transaction to peers with trace context attached.
|
||||
- **Node B and Node C**: Peer nodes that receive the relayed transaction along with the propagated trace context. Each creates a child span under Node A's span, preserving the same `trace_id`.
|
||||
- **Arrows with trace context**: The relay messages carry `trace_id` and `parent_span_id`, allowing each downstream node to link its spans back to the originating span on Node A.
|
||||
|
||||
---
|
||||
|
||||
## Context Propagation
|
||||
|
||||
For traces to work across nodes, **trace context must be propagated** in messages.
|
||||
|
||||
### What's in the Context (~26 bytes)
|
||||
|
||||
| Field | Size | Description |
|
||||
| ------------- | -------- | ------------------------------------------------------- |
|
||||
| `trace_id` | 16 bytes | Identifies the entire trace (constant across all nodes) |
|
||||
| `span_id` | 8 bytes | The sender's current span (becomes parent on receiver) |
|
||||
| `trace_flags` | 1 byte | Sampling decision (bit 0 = sampled; bits 1-7 reserved) |
|
||||
| `trace_state` | variable | Optional vendor-specific data (typically omitted) |
|
||||
|
||||
### How span_id Changes at Each Hop
|
||||
|
||||
Only **one** `span_id` travels in the context - the sender's current span. Each node:
|
||||
|
||||
1. Extracts the received `span_id` and uses it as the `parent_span_id`
|
||||
2. Creates a **new** `span_id` for its own span
|
||||
3. Sends its own `span_id` as the parent when forwarding
|
||||
|
||||
```
|
||||
Node A Node B Node C
|
||||
────── ────── ──────
|
||||
|
||||
Span AAA Span BBB Span CCC
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
Context out: Context out: Context out:
|
||||
├─ trace_id: abc123 ├─ trace_id: abc123 ├─ trace_id: abc123
|
||||
├─ span_id: AAA ──────────► ├─ span_id: BBB ──────────► ├─ span_id: CCC ──────►
|
||||
└─ flags: 01 └─ flags: 01 └─ flags: 01
|
||||
│ │
|
||||
parent = AAA parent = BBB
|
||||
```
|
||||
|
||||
The `trace_id` stays constant, but `span_id` **changes at every hop** to maintain the parent-child chain.
|
||||
|
||||
### Propagation Formats
|
||||
|
||||
There are two patterns:
|
||||
|
||||
### HTTP/RPC Headers (W3C Trace Context)
|
||||
|
||||
```
|
||||
traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01
|
||||
│ │ │ │
|
||||
│ │ │ └── Flags (sampled)
|
||||
│ │ └── Parent span ID (16 hex)
|
||||
│ └── Trace ID (32 hex)
|
||||
└── Version
|
||||
```
|
||||
|
||||
### Protocol Buffers (xrpld P2P messages)
|
||||
|
||||
```protobuf
|
||||
message TMTransaction {
|
||||
bytes rawTransaction = 1;
|
||||
// ... existing fields ...
|
||||
|
||||
// Trace context extension
|
||||
bytes trace_parent = 100; // W3C traceparent
|
||||
bytes trace_state = 101; // W3C tracestate
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Sampling
|
||||
|
||||
Not every trace needs to be recorded. **Sampling** reduces overhead:
|
||||
|
||||
### Head Sampling (at trace start)
|
||||
|
||||
```
|
||||
Request arrives → Random 10% chance → Record or skip entire trace
|
||||
```
|
||||
|
||||
- ✅ Low overhead
|
||||
- ❌ May miss interesting traces
|
||||
|
||||
### Tail Sampling (after trace completes)
|
||||
|
||||
```
|
||||
Trace completes → Collector evaluates:
|
||||
- Error? → KEEP
|
||||
- Slow? → KEEP
|
||||
- Normal? → Sample 10%
|
||||
```
|
||||
|
||||
- ✅ Never loses important traces
|
||||
- ❌ Higher memory usage at collector
|
||||
|
||||
---
|
||||
|
||||
## Key Benefits for xrpld
|
||||
|
||||
| Challenge | How Tracing Helps |
|
||||
| ---------------------------------- | ---------------------------------------- |
|
||||
| "Where is my transaction?" | Follow trace across all nodes it touched |
|
||||
| "Why was consensus slow?" | See timing breakdown of each phase |
|
||||
| "Which node is the bottleneck?" | Compare span durations across nodes |
|
||||
| "What happened during the outage?" | Correlate errors across the network |
|
||||
|
||||
---
|
||||
|
||||
## Glossary
|
||||
|
||||
| Term | Definition |
|
||||
| -------------------- | ------------------------------------------------------------------- |
|
||||
| **Trace** | Complete journey of a request, identified by `trace_id` |
|
||||
| **Span** | Single operation within a trace |
|
||||
| **Parent-Child** | Span relationship where the parent depends on the child |
|
||||
| **Follows-From** | Causal relationship where originator doesn't wait for the result |
|
||||
| **Span Link** | Non-hierarchical connection between spans, possibly across traces |
|
||||
| **Deterministic ID** | Trace ID derived from domain data (e.g., tx_hash) instead of random |
|
||||
| **Context** | Data propagated between services (`trace_id`, `span_id`, flags) |
|
||||
| **Instrumentation** | Code that creates spans and propagates context |
|
||||
| **Collector** | Service that receives, processes, and exports traces |
|
||||
| **Backend** | Storage/visualization system (Tempo) |
|
||||
| **Head Sampling** | Sampling decision at trace start |
|
||||
| **Tail Sampling** | Sampling decision after trace completes |
|
||||
|
||||
---
|
||||
|
||||
_Next: [Architecture Analysis](./01-architecture-analysis.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
|
||||
@@ -1,467 +0,0 @@
|
||||
# Architecture Analysis
|
||||
|
||||
> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
|
||||
> **Related**: [Design Decisions](./02-design-decisions.md) | [Implementation Strategy](./03-implementation-strategy.md)
|
||||
|
||||
---
|
||||
|
||||
## 1.1 Current xrpld Architecture Overview
|
||||
|
||||
> **WS** = WebSocket | **UNL** = Unique Node List | **TxQ** = Transaction Queue | **StatsD** = Statistics Daemon
|
||||
|
||||
The xrpld node software consists of several interconnected components that need instrumentation for distributed tracing:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph xrpld["xrpld Node"]
|
||||
subgraph services["Core Services"]
|
||||
RPC["RPC Server<br/>(HTTP/WS/gRPC)"]
|
||||
Overlay["Overlay<br/>(P2P Network)"]
|
||||
Consensus["Consensus<br/>(RCLConsensus)"]
|
||||
ValidatorList["ValidatorList<br/>(UNL Mgmt)"]
|
||||
end
|
||||
|
||||
JobQueue["JobQueue<br/>(Thread Pool)"]
|
||||
|
||||
subgraph processing["Processing Layer"]
|
||||
NetworkOPs["NetworkOPs<br/>(Tx Processing)"]
|
||||
LedgerMaster["LedgerMaster<br/>(Ledger Mgmt)"]
|
||||
NodeStore["NodeStore<br/>(Database)"]
|
||||
InboundLedgers["InboundLedgers<br/>(Ledger Sync)"]
|
||||
end
|
||||
|
||||
subgraph appservices["Application Services"]
|
||||
PathFind["PathFinding<br/>(Payment Paths)"]
|
||||
TxQ["TxQ<br/>(Fee Escalation)"]
|
||||
LoadMgr["LoadManager<br/>(Fee/Load)"]
|
||||
end
|
||||
|
||||
subgraph observability["Existing Observability"]
|
||||
PerfLog["PerfLog<br/>(JSON)"]
|
||||
Insight["Insight<br/>(StatsD)"]
|
||||
Logging["Logging<br/>(Journal)"]
|
||||
end
|
||||
|
||||
services --> JobQueue
|
||||
JobQueue --> processing
|
||||
JobQueue --> appservices
|
||||
end
|
||||
|
||||
style xrpld fill:#424242,stroke:#212121,color:#ffffff
|
||||
style services fill:#1565c0,stroke:#0d47a1,color:#ffffff
|
||||
style processing fill:#2e7d32,stroke:#1b5e20,color:#ffffff
|
||||
style appservices fill:#6a1b9a,stroke:#4a148c,color:#ffffff
|
||||
style observability fill:#e65100,stroke:#bf360c,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Core Services (blue)**: The entry points into xrpld -- RPC Server handles client requests, Overlay manages peer-to-peer networking, Consensus drives agreement, and ValidatorList manages trusted validators.
|
||||
- **JobQueue (center)**: The asynchronous thread pool that decouples Core Services from the Processing and Application layers. All work flows through it.
|
||||
- **Processing Layer (green)**: Core business logic -- NetworkOPs processes transactions, LedgerMaster manages ledger state, NodeStore handles persistence, and InboundLedgers synchronizes missing data.
|
||||
- **Application Services (purple)**: Higher-level features -- PathFinding computes payment routes, TxQ manages fee-based queuing, and LoadManager tracks server load.
|
||||
- **Existing Observability (orange)**: The current monitoring stack (PerfLog, Insight, Journal logging) that OpenTelemetry will complement, not replace.
|
||||
- **Arrows (Services to JobQueue to layers)**: Work originates at Core Services, is enqueued onto the JobQueue, and dispatched to Processing or Application layers for execution.
|
||||
|
||||
---
|
||||
|
||||
## 1.1.1 Actors and Actions
|
||||
|
||||
### Actors
|
||||
|
||||
| Who (Plain English) | Technical Term |
|
||||
| ----------------------------------------- | -------------------------- |
|
||||
| Network node running XRPL software | xrpld node |
|
||||
| External client submitting requests | RPC Client |
|
||||
| Network neighbor sharing data | Peer (PeerImp) |
|
||||
| Request handler for client queries | RPC Server (ServerHandler) |
|
||||
| Command executor for specific RPC methods | RPCHandler |
|
||||
| Agreement process between nodes | Consensus (RCLConsensus) |
|
||||
| Transaction processing coordinator | NetworkOPs |
|
||||
| Background task scheduler | JobQueue |
|
||||
| Ledger state manager | LedgerMaster |
|
||||
| Payment route calculator | PathFinding (Pathfinder) |
|
||||
| Transaction waiting room | TxQ (Transaction Queue) |
|
||||
| Fee adjustment system | LoadManager |
|
||||
| Trusted validator list manager | ValidatorList |
|
||||
| Protocol upgrade tracker | AmendmentTable |
|
||||
| Ledger state hash tree | SHAMap |
|
||||
| Persistent key-value storage | NodeStore |
|
||||
|
||||
### Actions
|
||||
|
||||
| What Happens (Plain English) | Technical Term |
|
||||
| ---------------------------------------------- | ---------------------- |
|
||||
| Client sends a request to a node | `rpc.request` |
|
||||
| Node executes a specific RPC command | `rpc.command.*` |
|
||||
| Node receives a transaction from a peer | `tx.receive` |
|
||||
| Node checks if a transaction is valid | `tx.validate` |
|
||||
| Node forwards a transaction to neighbors | `tx.relay` |
|
||||
| Nodes agree on which transactions to include | `consensus.round` |
|
||||
| Consensus progresses through phases | `consensus.phase.*` |
|
||||
| Node builds a new confirmed ledger | `ledger.build` |
|
||||
| Node fetches missing ledger data from peers | `ledger.acquire` |
|
||||
| Node computes payment routes | `pathfind.compute` |
|
||||
| Node queues a transaction for later processing | `txq.enqueue` |
|
||||
| Node increases fees due to high load | `fee.escalate` |
|
||||
| Node fetches the latest trusted validator list | `validator.list.fetch` |
|
||||
| Node votes on a protocol amendment | `amendment.vote` |
|
||||
| Node synchronizes state tree data | `shamap.sync` |
|
||||
|
||||
---
|
||||
|
||||
## 1.2 Key Components for Instrumentation
|
||||
|
||||
> **TxQ** = Transaction Queue | **UNL** = Unique Node List
|
||||
|
||||
| Component | Location | Purpose | Trace Value |
|
||||
| ------------------ | ------------------------------------------ | ------------------------ | -------------------------------- |
|
||||
| **Overlay** | `src/xrpld/overlay/` | P2P communication | Message propagation timing |
|
||||
| **PeerImp** | `src/xrpld/overlay/detail/PeerImp.cpp` | Individual peer handling | Per-peer latency |
|
||||
| **RCLConsensus** | `src/xrpld/app/consensus/RCLConsensus.cpp` | Consensus algorithm | Round timing, phase analysis |
|
||||
| **NetworkOPs** | `src/xrpld/app/misc/NetworkOPs.cpp` | Transaction processing | Tx lifecycle tracking |
|
||||
| **ServerHandler** | `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point | Request latency |
|
||||
| **RPCHandler** | `src/xrpld/rpc/detail/RPCHandler.cpp` | Command execution | Per-command timing |
|
||||
| **JobQueue** | `src/xrpl/core/JobQueue.h` | Async task execution | Queue wait times |
|
||||
| **PathFinding** | `src/xrpld/app/paths/` | Payment path computation | Path latency, cache hits |
|
||||
| **TxQ** | `src/xrpld/app/misc/TxQ.cpp` | Transaction queue/fees | Queue depth, eviction rates |
|
||||
| **LoadManager** | `src/xrpld/app/main/LoadManager.cpp` | Fee escalation/load | Fee levels, load factors |
|
||||
| **InboundLedgers** | `src/xrpld/app/ledger/InboundLedgers.cpp` | Ledger acquisition | Sync time, peer reliability |
|
||||
| **ValidatorList** | `src/xrpld/app/misc/ValidatorList.cpp` | UNL management | List freshness, fetch failures |
|
||||
| **AmendmentTable** | `src/xrpld/app/misc/AmendmentTable.cpp` | Protocol amendments | Voting status, activation events |
|
||||
| **SHAMap** | `src/xrpld/shamap/` | State hash tree | Sync speed, missing nodes |
|
||||
|
||||
---
|
||||
|
||||
## 1.3 Transaction Flow Diagram
|
||||
|
||||
Transaction flow spans multiple nodes in the network. Each node creates linked spans to form a distributed trace:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant PeerA as Peer A (Receive)
|
||||
participant PeerB as Peer B (Relay)
|
||||
participant PeerC as Peer C (Validate)
|
||||
|
||||
Client->>PeerA: 1. Submit TX
|
||||
|
||||
rect rgb(230, 245, 255)
|
||||
Note over PeerA: tx.receive SPAN START
|
||||
PeerA->>PeerA: HashRouter Deduplication
|
||||
PeerA->>PeerA: tx.validate (child span)
|
||||
end
|
||||
|
||||
PeerA->>PeerB: 2. Relay TX (with trace ctx)
|
||||
|
||||
rect rgb(230, 245, 255)
|
||||
Note over PeerB: tx.receive (linked span)
|
||||
end
|
||||
|
||||
PeerB->>PeerC: 3. Relay TX
|
||||
|
||||
rect rgb(230, 245, 255)
|
||||
Note over PeerC: tx.receive (linked span)
|
||||
PeerC->>PeerC: tx.process
|
||||
end
|
||||
|
||||
Note over Client,PeerC: DISTRIBUTED TRACE (same trace_id: abc123)
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Client**: The external entity that submits a transaction to Peer A. It has no trace context -- the trace starts at the first node.
|
||||
- **Peer A (Receive)**: The entry node that creates the root span `tx.receive`, runs HashRouter deduplication to avoid processing duplicates, and creates a child `tx.validate` span.
|
||||
- **Peer A to Peer B arrow**: The relay message carries trace context (trace_id + parent span_id), enabling Peer B to create a linked span under the same trace.
|
||||
- **Peer B (Relay)**: Receives the transaction and trace context, creates a `tx.receive` span linked to Peer A's trace, then relays onward.
|
||||
- **Peer C (Validate)**: Final hop in this example. Creates a linked `tx.receive` span and runs `tx.process` to fully process the transaction.
|
||||
- **Blue rectangles**: Highlight the span boundaries on each node, showing where instrumentation creates and closes spans.
|
||||
|
||||
### Trace Structure
|
||||
|
||||
```
|
||||
trace_id: abc123
|
||||
├── span: tx.receive (Peer A)
|
||||
│ ├── span: tx.validate
|
||||
│ └── span: tx.relay
|
||||
├── span: tx.receive (Peer B) [parent: Peer A]
|
||||
│ └── span: tx.relay
|
||||
└── span: tx.receive (Peer C) [parent: Peer B]
|
||||
└── span: tx.process
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1.4 Consensus Round Flow
|
||||
|
||||
Consensus rounds are multi-phase operations that benefit significantly from tracing:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph round["consensus.round (root span)"]
|
||||
attrs["Attributes:<br/>xrpl.consensus.ledger.seq = 12345678<br/>xrpl.consensus.mode = proposing<br/>xrpl.consensus.proposers = 35"]
|
||||
|
||||
subgraph open["consensus.phase.open"]
|
||||
open_desc["Duration: ~3s<br/>Waiting for transactions"]
|
||||
end
|
||||
|
||||
subgraph establish["consensus.phase.establish"]
|
||||
est_attrs["proposals_received = 28<br/>disputes_resolved = 3"]
|
||||
est_children["├── consensus.proposal.receive (×28)<br/>├── consensus.proposal.send (×1)<br/>└── consensus.dispute.resolve (×3)"]
|
||||
end
|
||||
|
||||
subgraph accept["consensus.phase.accept"]
|
||||
acc_attrs["transactions_applied = 150<br/>ledger.hash = DEF456..."]
|
||||
acc_children["├── ledger.build<br/>└── ledger.validate"]
|
||||
end
|
||||
|
||||
attrs --> open
|
||||
open --> establish
|
||||
establish --> accept
|
||||
end
|
||||
|
||||
style round fill:#f57f17,stroke:#e65100,color:#ffffff
|
||||
style open fill:#1565c0,stroke:#0d47a1,color:#ffffff
|
||||
style establish fill:#2e7d32,stroke:#1b5e20,color:#ffffff
|
||||
style accept fill:#c2185b,stroke:#880e4f,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **consensus.round (orange, root span)**: The top-level span encompassing the entire consensus round, with attributes like ledger sequence, mode, and proposer count.
|
||||
- **consensus.phase.open (blue)**: The first phase where the node waits (~3s) to collect incoming transactions before proposing.
|
||||
- **consensus.phase.establish (green)**: The negotiation phase where validators exchange proposals, resolve disputes, and converge on a transaction set. Child spans track each proposal received/sent and each dispute resolved.
|
||||
- **consensus.phase.accept (pink)**: The final phase where the agreed transaction set is applied, a new ledger is built, and the ledger is validated. Child spans cover `ledger.build` and `ledger.validate`.
|
||||
- **Arrows (open to establish to accept)**: The sequential flow through the three consensus phases. Each phase must complete before the next begins.
|
||||
|
||||
---
|
||||
|
||||
## 1.5 RPC Request Flow
|
||||
|
||||
> **WS** = WebSocket
|
||||
|
||||
RPC requests support W3C Trace Context headers for distributed tracing across services:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph request["rpc.request (root span)"]
|
||||
http["HTTP Request — POST /<br/>traceparent:<br/>00-abc123...-def456...-01"]
|
||||
|
||||
attrs["Attributes:<br/>http.method = POST<br/>net.peer.ip = 192.168.1.100<br/>xrpl.rpc.command = submit"]
|
||||
|
||||
subgraph enqueue["jobqueue.enqueue"]
|
||||
job_attr["xrpl.job.type = jtCLIENT_RPC"]
|
||||
end
|
||||
|
||||
subgraph command["rpc.command.submit"]
|
||||
cmd_attrs["xrpl.rpc.version = 2<br/>xrpl.rpc.role = user"]
|
||||
cmd_children["├── tx.deserialize<br/>├── tx.validate_local<br/>└── tx.submit_to_network"]
|
||||
end
|
||||
|
||||
response["Response: 200 OK<br/>Duration: 45ms"]
|
||||
|
||||
http --> attrs
|
||||
attrs --> enqueue
|
||||
enqueue --> command
|
||||
command --> response
|
||||
end
|
||||
|
||||
style request fill:#2e7d32,stroke:#1b5e20,color:#ffffff
|
||||
style enqueue fill:#1565c0,stroke:#0d47a1,color:#ffffff
|
||||
style command fill:#e65100,stroke:#bf360c,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **rpc.request (green, root span)**: The outermost span representing the full RPC request lifecycle, from HTTP receipt to response. Carries the W3C `traceparent` header for distributed tracing.
|
||||
- **HTTP Request node**: Shows the incoming POST request with its `traceparent` header and extracted attributes (method, peer IP, command name).
|
||||
- **jobqueue.enqueue (blue)**: The span covering the asynchronous handoff from the RPC thread to the JobQueue worker thread. The trace context is preserved across this async boundary.
|
||||
- **rpc.command.submit (orange)**: The span for the actual command execution, with child spans for deserialization, local validation, and network submission.
|
||||
- **Response node**: The final output with HTTP status and total duration, marking the end of the root span.
|
||||
- **Arrows (top to bottom)**: The sequential processing pipeline -- receive request, extract attributes, enqueue job, execute command, return response.
|
||||
|
||||
---
|
||||
|
||||
## 1.6 Key Trace Points
|
||||
|
||||
> **TxQ** = Transaction Queue
|
||||
|
||||
The following table identifies priority instrumentation points across the codebase:
|
||||
|
||||
| Category | Span Name | File | Method | Priority |
|
||||
| --------------- | ---------------------- | ---------------------- | ----------------------- | -------- |
|
||||
| **Transaction** | `tx.receive` | `PeerImp.cpp` | `handleTransaction()` | High |
|
||||
| **Transaction** | `tx.validate` | `NetworkOPs.cpp` | `processTransaction()` | High |
|
||||
| **Transaction** | `tx.process` | `NetworkOPs.cpp` | `doTransactionSync()` | High |
|
||||
| **Transaction** | `tx.relay` | `OverlayImpl.cpp` | `relay()` | Medium |
|
||||
| **Consensus** | `consensus.round` | `RCLConsensus.cpp` | `startRound()` | High |
|
||||
| **Consensus** | `consensus.phase.*` | `Consensus.h` | `timerEntry()` | High |
|
||||
| **Consensus** | `consensus.proposal.*` | `RCLConsensus.cpp` | `peerProposal()` | Medium |
|
||||
| **RPC** | `rpc.request` | `ServerHandler.cpp` | `onRequest()` | High |
|
||||
| **RPC** | `rpc.command.*` | `RPCHandler.cpp` | `doCommand()` | High |
|
||||
| **Peer** | `peer.connect` | `OverlayImpl.cpp` | `onHandoff()` | Low |
|
||||
| **Peer** | `peer.message.*` | `PeerImp.cpp` | `onMessage()` | Low |
|
||||
| **Ledger** | `ledger.acquire` | `InboundLedgers.cpp` | `acquire()` | Medium |
|
||||
| **Ledger** | `ledger.build` | `RCLConsensus.cpp` | `buildLCL()` | High |
|
||||
| **PathFinding** | `pathfind.request` | `PathRequest.cpp` | `doUpdate()` | High |
|
||||
| **PathFinding** | `pathfind.compute` | `Pathfinder.cpp` | `findPaths()` | High |
|
||||
| **TxQ** | `txq.enqueue` | `TxQ.cpp` | `apply()` | High |
|
||||
| **TxQ** | `txq.apply` | `TxQ.cpp` | `processClosedLedger()` | High |
|
||||
| **Fee** | `fee.escalate` | `LoadManager.cpp` | `raiseLocalFee()` | Medium |
|
||||
| **Ledger** | `ledger.replay` | `LedgerReplayer.h` | `replay()` | Medium |
|
||||
| **Ledger** | `ledger.delta` | `LedgerDeltaAcquire.h` | `processData()` | Medium |
|
||||
| **Validator** | `validator.list.fetch` | `ValidatorList.cpp` | `verify()` | Medium |
|
||||
| **Validator** | `validator.manifest` | `Manifest.cpp` | `applyManifest()` | Low |
|
||||
| **Amendment** | `amendment.vote` | `AmendmentTable.cpp` | `doVoting()` | Low |
|
||||
| **SHAMap** | `shamap.sync` | `SHAMap.cpp` | `fetchRoot()` | Medium |
|
||||
|
||||
---
|
||||
|
||||
## 1.7 Instrumentation Priority
|
||||
|
||||
> **TxQ** = Transaction Queue
|
||||
|
||||
```mermaid
|
||||
quadrantChart
|
||||
title Instrumentation Priority Matrix
|
||||
x-axis Low Complexity --> High Complexity
|
||||
y-axis Low Value --> High Value
|
||||
quadrant-1 Implement First
|
||||
quadrant-2 Plan Carefully
|
||||
quadrant-3 Quick Wins
|
||||
quadrant-4 Consider Later
|
||||
|
||||
RPC Tracing: [0.2, 0.92]
|
||||
Transaction Tracing: [0.55, 0.88]
|
||||
Consensus Tracing: [0.78, 0.82]
|
||||
PathFinding: [0.38, 0.75]
|
||||
TxQ and Fees: [0.25, 0.65]
|
||||
Ledger Sync: [0.62, 0.58]
|
||||
Peer Message Tracing: [0.35, 0.25]
|
||||
JobQueue Tracing: [0.2, 0.48]
|
||||
Validator Mgmt: [0.48, 0.42]
|
||||
Amendment Tracking: [0.15, 0.32]
|
||||
SHAMap Operations: [0.72, 0.45]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1.8 Observable Outcomes
|
||||
|
||||
> **TxQ** = Transaction Queue | **UNL** = Unique Node List
|
||||
|
||||
After implementing OpenTelemetry, operators and developers will gain visibility into the following:
|
||||
|
||||
### 1.8.1 What You Will See: Traces
|
||||
|
||||
| Trace Type | Description | Example Query in Grafana/Tempo |
|
||||
| -------------------------- | ------------------------------------------------------------------------------------------- | ---------------------------------------------------- |
|
||||
| **Transaction Lifecycle** | Full journey from RPC submission through validation, relay, consensus, and ledger inclusion | `{service.name="xrpld" && xrpl.tx.hash="ABC123..."}` |
|
||||
| **Cross-Node Propagation** | Transaction path across multiple xrpld nodes with timing | `{xrpl.tx.relay_count > 0}` |
|
||||
| **Consensus Rounds** | Complete round with all phases (open, establish, accept) | `{span.name=~"consensus.round.*"}` |
|
||||
| **RPC Request Processing** | Individual command execution with timing breakdown | `{xrpl.rpc.command="account_info"}` |
|
||||
| **Ledger Acquisition** | Peer-to-peer ledger data requests and responses | `{span.name="ledger.acquire"}` |
|
||||
| **PathFinding Latency** | Path computation time and cache effectiveness for payment RPCs | `{span.name="pathfind.compute"}` |
|
||||
| **TxQ Behavior** | Queue depth, eviction patterns, fee escalation during congestion | `{span.name=~"txq.*"}` |
|
||||
| **Ledger Sync** | Full acquisition timeline including delta and transaction fetches | `{span.name=~"ledger.acquire.*"}` |
|
||||
| **Validator Health** | UNL fetch success, manifest updates, stale list detection | `{span.name=~"validator.*"}` |
|
||||
|
||||
### 1.8.2 What You Will See: Metrics (Derived from Traces)
|
||||
|
||||
| Metric | Description | Dashboard Panel |
|
||||
| ----------------------------- | --------------------------------------- | --------------------------- |
|
||||
| **RPC Latency (p50/p95/p99)** | Response time distribution per command | Heatmap by command |
|
||||
| **Transaction Throughput** | Transactions processed per second | Time series graph |
|
||||
| **Consensus Round Duration** | Time to complete consensus phases | Histogram |
|
||||
| **Cross-Node Latency** | Time for transaction to reach N nodes | Line chart with percentiles |
|
||||
| **Error Rate** | Failed transactions/RPC calls by type | Stacked bar chart |
|
||||
| **PathFinding Latency** | Path computation time per currency pair | Heatmap by currency |
|
||||
| **TxQ Depth** | Queued transactions over time | Time series with thresholds |
|
||||
| **Fee Escalation Level** | Current fee multiplier | Gauge with alert thresholds |
|
||||
| **Ledger Sync Duration** | Time to acquire missing ledgers | Histogram |
|
||||
|
||||
### 1.8.3 Concrete Dashboard Examples
|
||||
|
||||
**Transaction Trace View (Tempo):**
|
||||
|
||||
```
|
||||
┌────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ Trace: abc123... (Transaction Submission) Duration: 847ms │
|
||||
├────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ ├── rpc.request [ServerHandler] ████░░░░░░ 45ms │
|
||||
│ │ └── rpc.command.submit [RPCHandler] ████░░░░░░ 42ms │
|
||||
│ │ └── tx.receive [NetworkOPs] ███░░░░░░░ 35ms │
|
||||
│ │ ├── tx.validate [TxQ] █░░░░░░░░░ 8ms │
|
||||
│ │ └── tx.relay [Overlay] ██░░░░░░░░ 15ms │
|
||||
│ │ ├── tx.receive [Node-B] █████░░░░░ 52ms │
|
||||
│ │ │ └── tx.relay [Node-B] ██░░░░░░░░ 18ms │
|
||||
│ │ └── tx.receive [Node-C] ██████░░░░ 65ms │
|
||||
│ └── consensus.round [RCLConsensus] ████████░░ 720ms │
|
||||
│ ├── consensus.phase.open ██░░░░░░░░ 180ms │
|
||||
│ ├── consensus.phase.establish █████░░░░░ 480ms │
|
||||
│ └── consensus.phase.accept █░░░░░░░░░ 60ms │
|
||||
└────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**RPC Performance Dashboard Panel:**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ RPC Command Latency (Last 1 Hour) │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ Command │ p50 │ p95 │ p99 │ Errors │ Rate │
|
||||
│──────────────────┼────────┼────────┼────────┼────────┼──────│
|
||||
│ account_info │ 12ms │ 45ms │ 89ms │ 0.1% │ 150/s│
|
||||
│ submit │ 35ms │ 120ms │ 250ms │ 2.3% │ 45/s│
|
||||
│ ledger │ 8ms │ 25ms │ 55ms │ 0.0% │ 80/s│
|
||||
│ tx │ 15ms │ 50ms │ 100ms │ 0.5% │ 60/s│
|
||||
│ server_info │ 5ms │ 12ms │ 20ms │ 0.0% │ 200/s│
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Consensus Health Dashboard Panel:**
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
xyChart:
|
||||
width: 1200
|
||||
height: 400
|
||||
plotReservedSpacePercent: 50
|
||||
chartOrientation: vertical
|
||||
themeVariables:
|
||||
xyChart:
|
||||
plotColorPalette: "#3498db"
|
||||
---
|
||||
xychart-beta
|
||||
title "Consensus Round Duration (Last 24 Hours)"
|
||||
x-axis "Time of Day (Hours)" [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
|
||||
y-axis "Duration (seconds)" 1 --> 5
|
||||
line [2.1, 2.4, 2.8, 3.2, 3.8, 4.3, 4.5, 5.0, 4.7, 4.0, 3.2, 2.6, 2.0]
|
||||
```
|
||||
|
||||
### 1.8.4 Operator Actionable Insights
|
||||
|
||||
| Scenario | What You'll See | Action |
|
||||
| ------------------------- | ---------------------------------------------------------------------------- | ------------------------------------------------ |
|
||||
| **Slow RPC** | Span showing which phase is slow (parsing, execution, serialization) | Optimize specific code path |
|
||||
| **Transaction Stuck** | Trace stops at validation; error attribute shows reason | Fix transaction parameters |
|
||||
| **Consensus Delay** | Phase.establish taking too long; proposer attribute shows missing validators | Investigate network connectivity |
|
||||
| **Memory Spike** | Large batch of spans correlating with memory increase | Tune batch_size or sampling |
|
||||
| **Network Partition** | Traces missing cross-node links for specific peer | Check peer connectivity |
|
||||
| **Path Computation Slow** | pathfind.compute span shows high latency; cache miss rate in attributes | Warm the RippleLineCache, check order book depth |
|
||||
| **TxQ Full** | txq.enqueue spans show evictions; fee.escalate spans increasing | Monitor fee levels, alert operators |
|
||||
| **Ledger Sync Stalled** | ledger.acquire spans timing out; peer reliability attributes show issues | Check peer connectivity, add trusted peers |
|
||||
| **UNL Stale** | validator.list.fetch spans failing; last_update attribute aging | Verify validator site URLs, check DNS |
|
||||
|
||||
### 1.8.5 Developer Debugging Workflow
|
||||
|
||||
1. **Find Transaction**: Query by `xrpl.tx.hash` to get full trace
|
||||
2. **Identify Bottleneck**: Look at span durations to find slowest component
|
||||
3. **Check Attributes**: Review `xrpl.tx.validity`, `xrpl.rpc.status` for errors
|
||||
4. **Correlate Logs**: Use `trace_id` to find related PerfLog entries
|
||||
5. **Compare Nodes**: Filter by `service.instance.id` to compare behavior across nodes
|
||||
|
||||
---
|
||||
|
||||
_Next: [Design Decisions](./02-design-decisions.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
|
||||
@@ -1,633 +0,0 @@
|
||||
# Design Decisions
|
||||
|
||||
> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
|
||||
> **Related**: [Architecture Analysis](./01-architecture-analysis.md) | [Code Samples](./04-code-samples.md)
|
||||
|
||||
---
|
||||
|
||||
## 2.1 OpenTelemetry Components
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
### 2.1.1 SDK Selection
|
||||
|
||||
**Primary Choice**: OpenTelemetry C++ SDK (`opentelemetry-cpp`)
|
||||
|
||||
| Component | Purpose | Required |
|
||||
| --------------------------------------- | ---------------------- | ------------------------- |
|
||||
| `opentelemetry-cpp::api` | Tracing API headers | Yes |
|
||||
| `opentelemetry-cpp::sdk` | SDK implementation | Yes |
|
||||
| `opentelemetry-cpp::ext` | Extensions (exporters) | Yes |
|
||||
| `opentelemetry-cpp::otlp_http_exporter` | OTLP/HTTP export | Yes (shipped in Phase 1b) |
|
||||
| `opentelemetry-cpp::otlp_grpc_exporter` | OTLP/gRPC export | Future (not yet wired up) |
|
||||
|
||||
### 2.1.2 Instrumentation Strategy
|
||||
|
||||
**Manual Instrumentation** (recommended):
|
||||
|
||||
| Approach | Pros | Cons |
|
||||
| ---------- | --------------------------------------------------------------- | ------------------------------------------------------- |
|
||||
| **Manual** | Precise control, optimized placement, xrpld-specific attributes | More development effort |
|
||||
| **Auto** | Less code, automatic coverage | Less control, potential overhead, limited customization |
|
||||
|
||||
---
|
||||
|
||||
## 2.2 Exporter Configuration
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph nodes["xrpld Nodes"]
|
||||
node1["xrpld<br/>Node 1"]
|
||||
node2["xrpld<br/>Node 2"]
|
||||
node3["xrpld<br/>Node 3"]
|
||||
end
|
||||
|
||||
collector["OpenTelemetry<br/>Collector<br/>(sidecar or standalone)"]
|
||||
|
||||
subgraph backends["Observability Backends"]
|
||||
tempo["Tempo"]
|
||||
elastic["Elastic<br/>APM"]
|
||||
end
|
||||
|
||||
node1 -->|"OTLP/HTTP<br/>:4318"| collector
|
||||
node2 -->|"OTLP/HTTP<br/>:4318"| collector
|
||||
node3 -->|"OTLP/HTTP<br/>:4318"| collector
|
||||
|
||||
collector --> tempo
|
||||
collector --> elastic
|
||||
|
||||
style nodes fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style backends fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style collector fill:#bf360c,stroke:#8c2809,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **xrpld Nodes (blue)**: The source of telemetry data. Each xrpld node exports spans via OTLP/HTTP on port 4318 (the only exporter shipped in Phase 1b).
|
||||
- **OpenTelemetry Collector (red)**: The central aggregation point that receives spans from all nodes. Can run as a sidecar (per-node) or standalone (shared). Handles batching, filtering, and routing.
|
||||
- **Observability Backends (green)**: The storage and visualization destinations. Tempo is the recommended backend for both development and production, and Elastic APM is an alternative. The Collector routes to one or more backends.
|
||||
- **Arrows (nodes to collector to backends)**: The data pipeline -- spans flow from nodes to the Collector over HTTP, then the Collector fans out to the configured backends.
|
||||
|
||||
### 2.2.1 OTLP/HTTP (Shipped in Phase 1b)
|
||||
|
||||
```cpp
|
||||
// Configuration for OTLP over HTTP (the only exporter currently wired up).
|
||||
namespace otlp = opentelemetry::exporter::otlp;
|
||||
|
||||
otlp::OtlpHttpExporterOptions opts;
|
||||
opts.url = "http://localhost:4318/v1/traces";
|
||||
opts.content_type = otlp::HttpRequestContentType::kJson; // or kBinary
|
||||
```
|
||||
|
||||
### 2.2.2 OTLP/gRPC (Future Work — Planned Upgrade)
|
||||
|
||||
OTLP/gRPC is planned as a future upgrade from the HTTP exporter. The gRPC
|
||||
transport offers lower per-span overhead and tighter back-pressure semantics
|
||||
than HTTP/JSON, making it attractive for production deployments once the HTTP
|
||||
path is validated in earlier phases.
|
||||
|
||||
Required to land this upgrade:
|
||||
|
||||
1. Add `opentelemetry-cpp::otlp_grpc_exporter` to the Conan recipe (the
|
||||
dependency already exists but is not linked in Phase 1b builds).
|
||||
2. Extend `TelemetryConfig.cpp` to parse an `exporter` key (`otlp_http`
|
||||
default, `otlp_grpc` opt-in) and a gRPC endpoint override.
|
||||
3. In `Telemetry::start()` branch on the parsed exporter type and construct
|
||||
either `OtlpHttpExporterFactory::Create(httpOpts)` or
|
||||
`OtlpGrpcExporterFactory::Create(grpcOpts)` accordingly.
|
||||
4. Update the runbook and dashboards to document the alternate port and TLS
|
||||
settings.
|
||||
|
||||
Example Phase 1b+ gRPC configuration (when wired up):
|
||||
|
||||
```cpp
|
||||
// Configuration for OTLP over gRPC (future work).
|
||||
namespace otlp = opentelemetry::exporter::otlp;
|
||||
|
||||
otlp::OtlpGrpcExporterOptions opts;
|
||||
opts.endpoint = "<otel-collector-host>:4317";
|
||||
opts.use_ssl_credentials = true;
|
||||
opts.ssl_credentials_cacert_path = "/path/to/ca.crt";
|
||||
```
|
||||
|
||||
Until that work lands, `OtlpGrpcExporterOptions` is **not** used by any code
|
||||
path in Phase 1b through Phase 5.
|
||||
|
||||
---
|
||||
|
||||
## 2.3 Span Naming Conventions
|
||||
|
||||
> **TxQ** = Transaction Queue | **UNL** = Unique Node List | **WS** = WebSocket
|
||||
|
||||
### 2.3.1 Naming Schema
|
||||
|
||||
```
|
||||
<component>.<operation>[.<sub-operation>]
|
||||
```
|
||||
|
||||
**Examples**:
|
||||
|
||||
- `tx.receive` - Transaction received from peer
|
||||
- `consensus.phase.establish` - Consensus establish phase
|
||||
- `rpc.command.server_info` - server_info RPC command
|
||||
|
||||
### 2.3.2 Complete Span Catalog
|
||||
|
||||
```yaml
|
||||
# Transaction Spans
|
||||
tx:
|
||||
receive: "Transaction received from network"
|
||||
validate: "Transaction signature/format validation"
|
||||
process: "Full transaction processing"
|
||||
relay: "Transaction relay to peers"
|
||||
apply: "Apply transaction to ledger"
|
||||
|
||||
# Consensus Spans
|
||||
consensus:
|
||||
round: "Complete consensus round"
|
||||
phase:
|
||||
open: "Open phase - collecting transactions"
|
||||
establish: "Establish phase - reaching agreement"
|
||||
accept: "Accept phase - applying consensus"
|
||||
proposal:
|
||||
receive: "Receive peer proposal"
|
||||
send: "Send our proposal"
|
||||
validation:
|
||||
receive: "Receive peer validation"
|
||||
send: "Send our validation"
|
||||
|
||||
# RPC Spans
|
||||
rpc:
|
||||
request: "HTTP/WebSocket request handling"
|
||||
command:
|
||||
"*": "Specific RPC command (dynamic)"
|
||||
|
||||
# Peer Spans
|
||||
peer:
|
||||
connect: "Peer connection establishment"
|
||||
disconnect: "Peer disconnection"
|
||||
message:
|
||||
send: "Send protocol message"
|
||||
receive: "Receive protocol message"
|
||||
|
||||
# Ledger Spans
|
||||
ledger:
|
||||
acquire: "Ledger acquisition from network"
|
||||
build: "Build new ledger"
|
||||
validate: "Ledger validation"
|
||||
close: "Close ledger"
|
||||
replay: "Ledger replay executed"
|
||||
delta: "Delta-based ledger acquired"
|
||||
|
||||
# PathFinding Spans
|
||||
pathfind:
|
||||
request: "Path request initiated"
|
||||
compute: "Path computation executed"
|
||||
|
||||
# TxQ Spans
|
||||
txq:
|
||||
enqueue: "Transaction queued"
|
||||
apply: "Queued transaction applied"
|
||||
|
||||
# Fee/Load Spans
|
||||
fee:
|
||||
escalate: "Fee escalation triggered"
|
||||
|
||||
# Validator Spans
|
||||
validator:
|
||||
list:
|
||||
fetch: "UNL list fetched"
|
||||
manifest: "Manifest update processed"
|
||||
|
||||
# Amendment Spans
|
||||
amendment:
|
||||
vote: "Amendment voting executed"
|
||||
|
||||
# SHAMap Spans
|
||||
shamap:
|
||||
sync: "State tree synchronization"
|
||||
|
||||
# Job Spans
|
||||
job:
|
||||
enqueue: "Job added to queue"
|
||||
execute: "Job execution"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2.4 Attribute Schema
|
||||
|
||||
> **TxQ** = Transaction Queue | **UNL** = Unique Node List | **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
### 2.4.1 Resource Attributes (Set Once at Startup)
|
||||
|
||||
```cpp
|
||||
// Standard OpenTelemetry semantic conventions
|
||||
resource::SemanticConventions::SERVICE_NAME = "xrpld"
|
||||
resource::SemanticConventions::SERVICE_VERSION = BuildInfo::getVersionString()
|
||||
resource::SemanticConventions::SERVICE_INSTANCE_ID = <node_public_key_base58>
|
||||
|
||||
// Custom xrpld attributes
|
||||
"xrpl.network.id" = <network_id> // e.g., 0 for mainnet
|
||||
"xrpl.network.type" = "mainnet" | "testnet" | "devnet" | "standalone"
|
||||
"xrpl.node.type" = "validator" | "stock" | "reporting"
|
||||
"xrpl.node.cluster" = <cluster_name> // If clustered
|
||||
```
|
||||
|
||||
### 2.4.2 Span Attributes by Category
|
||||
|
||||
#### Transaction Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.tx.hash" = string // Transaction hash (hex)
|
||||
"xrpl.tx.type" = string // "Payment", "OfferCreate", etc.
|
||||
"xrpl.tx.account" = string // Source account (redacted in prod)
|
||||
"xrpl.tx.sequence" = int64 // Account sequence number
|
||||
"xrpl.tx.fee" = int64 // Fee in drops
|
||||
"xrpl.tx.result" = string // "tesSUCCESS", "tecPATH_DRY", etc.
|
||||
"xrpl.tx.ledger_index" = int64 // Ledger containing transaction
|
||||
```
|
||||
|
||||
#### Consensus Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.consensus.round" = int64 // Round number
|
||||
"xrpl.consensus.phase" = string // "open", "establish", "accept"
|
||||
"xrpl.consensus.mode" = string // "proposing", "observing", etc.
|
||||
"xrpl.consensus.proposers" = int64 // Number of proposers
|
||||
"xrpl.consensus.ledger.prev" = string // Previous ledger hash
|
||||
"xrpl.consensus.ledger.seq" = int64 // Ledger sequence
|
||||
"xrpl.consensus.tx_count" = int64 // Transactions in consensus set
|
||||
"xrpl.consensus.duration_ms" = float64 // Round duration
|
||||
```
|
||||
|
||||
#### RPC Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.rpc.command" = string // Command name
|
||||
"xrpl.rpc.version" = int64 // API version
|
||||
"xrpl.rpc.role" = string // "admin" or "user"
|
||||
"xrpl.rpc.params" = string // Sanitized parameters (optional)
|
||||
```
|
||||
|
||||
#### Peer & Message Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.peer.id" = string // Peer public key (base58)
|
||||
"xrpl.peer.address" = string // IP:port
|
||||
"xrpl.peer.latency_ms" = float64 // Measured latency
|
||||
"xrpl.peer.cluster" = string // Cluster name if clustered
|
||||
"xrpl.message.type" = string // Protocol message type name
|
||||
"xrpl.message.size_bytes" = int64 // Message size
|
||||
"xrpl.message.compressed" = bool // Whether compressed
|
||||
```
|
||||
|
||||
#### Ledger & Job Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.ledger.hash" = string // Ledger hash
|
||||
"xrpl.ledger.index" = int64 // Ledger sequence/index
|
||||
"xrpl.ledger.close_time" = int64 // Close time (epoch)
|
||||
"xrpl.ledger.tx_count" = int64 // Transaction count
|
||||
"xrpl.job.type" = string // Job type name
|
||||
"xrpl.job.queue_ms" = float64 // Time spent in queue
|
||||
"xrpl.job.worker" = int64 // Worker thread ID
|
||||
```
|
||||
|
||||
#### PathFinding Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.pathfind.source_currency" = string // Source currency code
|
||||
"xrpl.pathfind.dest_currency" = string // Destination currency code
|
||||
"xrpl.pathfind.path_count" = int64 // Number of paths found
|
||||
"xrpl.pathfind.cache_hit" = bool // RippleLineCache hit
|
||||
```
|
||||
|
||||
#### TxQ Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.txq.queue_depth" = int64 // Current queue depth
|
||||
"xrpl.txq.fee_level" = int64 // Fee level of transaction
|
||||
"xrpl.txq.eviction_reason" = string // Why transaction was evicted
|
||||
```
|
||||
|
||||
#### Fee Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.fee.load_factor" = int64 // Current load factor
|
||||
"xrpl.fee.escalation_level" = int64 // Fee escalation multiplier
|
||||
```
|
||||
|
||||
#### Validator Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.validator.list_size" = int64 // UNL size
|
||||
"xrpl.validator.list_age_sec" = int64 // Seconds since last update
|
||||
```
|
||||
|
||||
#### Amendment Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.amendment.name" = string // Amendment name
|
||||
"xrpl.amendment.status" = string // "enabled", "vetoed", "supported"
|
||||
```
|
||||
|
||||
#### SHAMap Attributes
|
||||
|
||||
```cpp
|
||||
"xrpl.shamap.type" = string // "transaction", "state", "account_state"
|
||||
"xrpl.shamap.missing_nodes" = int64 // Number of missing nodes during sync
|
||||
"xrpl.shamap.duration_ms" = float64 // Sync duration
|
||||
```
|
||||
|
||||
### 2.4.3 Data Collection Summary
|
||||
|
||||
The following table summarizes what data is collected by category:
|
||||
|
||||
| Category | Attributes Collected | Purpose |
|
||||
| --------------- | ---------------------------------------------------------------------- | ---------------------------- |
|
||||
| **Transaction** | `tx.hash`, `tx.type`, `tx.result`, `tx.fee`, `ledger_index` | Trace transaction lifecycle |
|
||||
| **Consensus** | `round`, `phase`, `mode`, `proposers` (public keys), `duration_ms` | Analyze consensus timing |
|
||||
| **RPC** | `command`, `version`, `status`, `duration_ms` | Monitor RPC performance |
|
||||
| **Peer** | `peer.id` (public key), `latency_ms`, `message.type`, `message.size` | Network topology analysis |
|
||||
| **Ledger** | `ledger.hash`, `ledger.index`, `close_time`, `tx_count` | Ledger progression tracking |
|
||||
| **Job** | `job.type`, `queue_ms`, `worker` | JobQueue performance |
|
||||
| **PathFinding** | `pathfind.source_currency`, `dest_currency`, `path_count`, `cache_hit` | Payment path analysis |
|
||||
| **TxQ** | `txq.queue_depth`, `fee_level`, `eviction_reason` | Queue depth and fee tracking |
|
||||
| **Fee** | `fee.load_factor`, `escalation_level` | Fee escalation monitoring |
|
||||
| **Validator** | `validator.list_size`, `list_age_sec` | UNL health monitoring |
|
||||
| **Amendment** | `amendment.name`, `status` | Protocol upgrade tracking |
|
||||
| **SHAMap** | `shamap.type`, `missing_nodes`, `duration_ms` | State tree sync performance |
|
||||
|
||||
### 2.4.4 Privacy & Sensitive Data Policy
|
||||
|
||||
> **PII** = Personally Identifiable Information
|
||||
|
||||
OpenTelemetry instrumentation is designed to collect **operational metadata only**, never sensitive content.
|
||||
|
||||
#### Data NOT Collected
|
||||
|
||||
The following data is explicitly **excluded** from telemetry collection:
|
||||
|
||||
| Excluded Data | Reason |
|
||||
| ----------------------- | ----------------------------------------- |
|
||||
| **Private Keys** | Never exposed; not relevant to tracing |
|
||||
| **Account Balances** | Financial data; privacy sensitive |
|
||||
| **Transaction Amounts** | Financial data; privacy sensitive |
|
||||
| **Raw TX Payloads** | May contain sensitive memo/data fields |
|
||||
| **Personal Data** | No PII collected |
|
||||
| **IP Addresses** | Configurable; excluded by default in prod |
|
||||
|
||||
#### Privacy Protection Mechanisms
|
||||
|
||||
| Mechanism | Description |
|
||||
| ----------------------------- | ------------------------------------------------------------------------- |
|
||||
| **Account Hashing** | `xrpl.tx.account` is hashed at collector level before storage |
|
||||
| **Configurable Redaction** | Sensitive fields can be excluded via `[telemetry]` config section |
|
||||
| **Sampling** | Only 10% of traces recorded by default, reducing data exposure |
|
||||
| **Local Control** | Node operators have full control over what gets exported |
|
||||
| **No Raw Payloads** | Transaction content is never recorded, only metadata (hash, type, result) |
|
||||
| **Collector-Level Filtering** | Additional redaction/hashing can be configured at OTel Collector |
|
||||
|
||||
#### Collector-Level Data Protection
|
||||
|
||||
The OpenTelemetry Collector can be configured to hash or redact sensitive attributes before export:
|
||||
|
||||
```yaml
|
||||
processors:
|
||||
attributes:
|
||||
actions:
|
||||
# Hash account addresses before storage
|
||||
- key: xrpl.tx.account
|
||||
action: hash
|
||||
# Remove IP addresses entirely
|
||||
- key: xrpl.peer.address
|
||||
action: delete
|
||||
# Redact specific fields
|
||||
- key: xrpl.rpc.params
|
||||
action: delete
|
||||
```
|
||||
|
||||
#### Configuration Options for Privacy
|
||||
|
||||
In `xrpld.cfg`, operators can control data collection granularity:
|
||||
|
||||
```ini
|
||||
[telemetry]
|
||||
enabled=1
|
||||
|
||||
# Disable collection of specific components
|
||||
trace_transactions=1
|
||||
trace_consensus=1
|
||||
trace_rpc=1
|
||||
trace_peer=0 # Disable peer tracing (high volume, includes addresses)
|
||||
|
||||
# Redact specific attributes
|
||||
redact_account=1 # Hash account addresses before export
|
||||
redact_peer_address=1 # Remove peer IP addresses
|
||||
```
|
||||
|
||||
> **Note**: The `redact_account` configuration in `xrpld.cfg` controls SDK-level redaction before export, while collector-level filtering (see [Collector-Level Data Protection](#collector-level-data-protection) above) provides an additional defense-in-depth layer. Both can operate independently.
|
||||
|
||||
> **Key Principle**: Telemetry collects **operational metadata** (timing, counts, hashes) — never **sensitive content** (keys, balances, amounts, raw payloads).
|
||||
|
||||
---
|
||||
|
||||
## 2.5 Context Propagation Design
|
||||
|
||||
> **WS** = WebSocket
|
||||
|
||||
### 2.5.1 Propagation Boundaries
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph http["HTTP/WebSocket (RPC)"]
|
||||
w3c["W3C Trace Context Headers:<br/>traceparent:<br/>00-trace_id-span_id-flags<br/>tracestate: xrpld=..."]
|
||||
end
|
||||
|
||||
subgraph protobuf["Protocol Buffers (P2P)"]
|
||||
proto["message TraceContext {<br/> bytes trace_id = 1; // 16 bytes<br/> bytes span_id = 2; // 8 bytes<br/> uint32 trace_flags = 3;<br/> string trace_state = 4;<br/>}"]
|
||||
end
|
||||
|
||||
subgraph jobqueue["JobQueue (Internal Async)"]
|
||||
job["Context captured at job creation,<br/>restored at execution<br/><br/>class Job {<br/> otel::context::Context<br/> traceContext_;<br/>};"]
|
||||
end
|
||||
|
||||
style http fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style protobuf fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style jobqueue fill:#bf360c,stroke:#8c2809,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **HTTP/WebSocket - RPC (blue)**: For client-facing RPC requests, trace context is propagated using the W3C `traceparent` header. This is the standard approach and works with any OTel-compatible client.
|
||||
- **Protocol Buffers - P2P (green)**: For peer-to-peer messages between xrpld nodes, trace context is embedded as a protobuf `TraceContext` message carrying trace_id, span_id, flags, and optional trace_state.
|
||||
- **JobQueue - Internal Async (red)**: For asynchronous work within a single node, the OTel context is captured when a job is created and restored when the job executes on a worker thread. This bridges the async gap so spans remain linked.
|
||||
|
||||
---
|
||||
|
||||
## 2.6 Integration with Existing Observability
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **WS** = WebSocket
|
||||
|
||||
### 2.6.1 Existing Frameworks Comparison
|
||||
|
||||
xrpld already has two observability mechanisms. OpenTelemetry complements (not replaces) them:
|
||||
|
||||
| Aspect | PerfLog | Beast Insight (StatsD) | OpenTelemetry |
|
||||
| --------------------- | ----------------------------- | ---------------------------- | ------------------------- |
|
||||
| **Type** | Logging | Metrics | Distributed Tracing |
|
||||
| **Data** | JSON log entries | Counters, gauges, histograms | Spans with context |
|
||||
| **Scope** | Single node | Single node | **Cross-node** |
|
||||
| **Output** | `perf.log` file | StatsD server | OTLP Collector |
|
||||
| **Question answered** | "What happened on this node?" | "How many? How fast?" | "What was the journey?" |
|
||||
| **Correlation** | By timestamp | By metric name | By `trace_id` |
|
||||
| **Overhead** | Low (file I/O) | Low (UDP packets) | Low-Medium (configurable) |
|
||||
|
||||
### 2.6.2 What Each Framework Does Best
|
||||
|
||||
#### PerfLog
|
||||
|
||||
- **Purpose**: Detailed local event logging for RPC and job execution
|
||||
- **Strengths**:
|
||||
- Rich JSON output with timing data
|
||||
- Already integrated in RPC handlers
|
||||
- File-based, no external dependencies
|
||||
- **Limitations**:
|
||||
- Single-node only (no cross-node correlation)
|
||||
- No parent-child relationships between events
|
||||
- Manual log parsing required
|
||||
|
||||
```json
|
||||
// Example PerfLog entry
|
||||
{
|
||||
"time": "2024-01-15T10:30:00.123Z",
|
||||
"method": "submit",
|
||||
"duration_us": 1523,
|
||||
"result": "tesSUCCESS"
|
||||
}
|
||||
```
|
||||
|
||||
#### Beast Insight (StatsD)
|
||||
|
||||
- **Purpose**: Real-time metrics for monitoring dashboards
|
||||
- **Strengths**:
|
||||
- Aggregated metrics (counters, gauges, histograms)
|
||||
- Low overhead (UDP, fire-and-forget)
|
||||
- Good for alerting thresholds
|
||||
- **Limitations**:
|
||||
- No request-level detail
|
||||
- No causal relationships
|
||||
- Single-node perspective
|
||||
|
||||
```cpp
|
||||
// Example StatsD usage in xrpld
|
||||
insight.increment("rpc.submit.count");
|
||||
insight.gauge("ledger.age", age);
|
||||
insight.timing("consensus.round", duration);
|
||||
```
|
||||
|
||||
#### OpenTelemetry (NEW)
|
||||
|
||||
- **Purpose**: Distributed request tracing across nodes
|
||||
- **Strengths**:
|
||||
- **Cross-node correlation** via `trace_id`
|
||||
- Parent-child span relationships
|
||||
- Rich attributes per span
|
||||
- Industry standard (CNCF)
|
||||
- **Limitations**:
|
||||
- Requires collector infrastructure
|
||||
- Higher complexity than logging
|
||||
|
||||
```cpp
|
||||
// Example OpenTelemetry span
|
||||
auto span = telemetry.startSpan("tx.relay");
|
||||
span->SetAttribute("tx.hash", hash);
|
||||
span->SetAttribute("peer.id", peerId);
|
||||
// Span automatically linked to parent via context
|
||||
```
|
||||
|
||||
### 2.6.3 When to Use Each
|
||||
|
||||
| Scenario | PerfLog | StatsD | OpenTelemetry |
|
||||
| --------------------------------------- | ---------- | ------ | ------------- |
|
||||
| "How many TXs per second?" | ❌ | ✅ | ✅ |
|
||||
| "What's the p99 RPC latency?" | ❌ | ✅ | ✅ |
|
||||
| "Why was this specific TX slow?" | ⚠️ partial | ❌ | ✅ |
|
||||
| "Which node delayed consensus?" | ❌ | ❌ | ✅ |
|
||||
| "What happened on node X at time T?" | ✅ | ❌ | ✅ |
|
||||
| "Show me the TX journey across 5 nodes" | ❌ | ❌ | ✅ |
|
||||
|
||||
### 2.6.4 Coexistence Strategy
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph xrpld["xrpld Process"]
|
||||
perflog["PerfLog<br/>(JSON to file)"]
|
||||
insight["Beast Insight<br/>(StatsD)"]
|
||||
otel["OpenTelemetry<br/>(Tracing)"]
|
||||
end
|
||||
|
||||
perflog --> perffile["perf.log"]
|
||||
insight --> statsd["StatsD Server"]
|
||||
otel --> collector["OTLP Collector"]
|
||||
|
||||
perffile --> grafana["Grafana<br/>(Unified UI)"]
|
||||
statsd --> grafana
|
||||
collector --> grafana
|
||||
|
||||
style xrpld fill:#212121,stroke:#0a0a0a,color:#ffffff
|
||||
style grafana fill:#bf360c,stroke:#8c2809,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **xrpld Process (dark gray)**: The single xrpld node running all three observability frameworks side by side. Each framework operates independently with no interference.
|
||||
- **PerfLog to perf.log**: PerfLog writes JSON-formatted event logs to a local file. Grafana can ingest these via Loki or a file-based datasource.
|
||||
- **Beast Insight to StatsD Server**: Insight sends aggregated metrics (counters, gauges) over UDP to a StatsD server. Grafana reads from StatsD-compatible backends like Graphite or Prometheus (via StatsD exporter).
|
||||
- **OpenTelemetry to OTLP Collector**: OTel exports spans over OTLP/gRPC to a Collector, which then forwards to a trace backend (Tempo).
|
||||
- **Grafana (red, unified UI)**: All three data streams converge in Grafana, enabling operators to correlate logs, metrics, and traces in a single dashboard.
|
||||
|
||||
### 2.6.5 Correlation with PerfLog
|
||||
|
||||
Trace IDs can be correlated with existing PerfLog entries for comprehensive debugging:
|
||||
|
||||
```cpp
|
||||
// In RPCHandler.cpp - correlate trace with PerfLog
|
||||
Status doCommand(RPC::JsonContext& context, Json::Value& result)
|
||||
{
|
||||
// Start OpenTelemetry span
|
||||
auto span = context.app.getTelemetry().startSpan(
|
||||
"rpc.command." + context.method);
|
||||
|
||||
// Get trace ID for correlation
|
||||
auto traceId = span->GetContext().trace_id().IsValid()
|
||||
? toHex(span->GetContext().trace_id())
|
||||
: "";
|
||||
|
||||
// Use existing PerfLog with trace correlation
|
||||
auto const curId = context.app.getPerfLog().currentId();
|
||||
context.app.getPerfLog().rpcStart(context.method, curId);
|
||||
|
||||
// Future: Add trace ID to PerfLog entry
|
||||
// context.app.getPerfLog().setTraceId(curId, traceId);
|
||||
|
||||
try {
|
||||
auto ret = handler(context, result);
|
||||
context.app.getPerfLog().rpcFinish(context.method, curId);
|
||||
span->SetStatus(opentelemetry::trace::StatusCode::kOk);
|
||||
return ret;
|
||||
} catch (std::exception const& e) {
|
||||
context.app.getPerfLog().rpcError(context.method, curId);
|
||||
span->RecordException(e);
|
||||
span->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
_Previous: [Architecture Analysis](./01-architecture-analysis.md)_ | _Next: [Implementation Strategy](./03-implementation-strategy.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
|
||||
@@ -1,528 +0,0 @@
|
||||
# Implementation Strategy
|
||||
|
||||
> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
|
||||
> **Related**: [Code Samples](./04-code-samples.md) | [Configuration Reference](./05-configuration-reference.md)
|
||||
|
||||
---
|
||||
|
||||
## 3.1 Directory Structure
|
||||
|
||||
The telemetry implementation follows xrpld's existing code organization pattern:
|
||||
|
||||
```
|
||||
include/xrpl/
|
||||
├── telemetry/
|
||||
│ ├── Telemetry.h # Main telemetry interface
|
||||
│ ├── TelemetryConfig.h # Configuration structures
|
||||
│ ├── TraceContext.h # Context propagation utilities
|
||||
│ ├── SpanGuard.h # RAII span management
|
||||
│ └── SpanAttributes.h # Attribute helper functions
|
||||
|
||||
src/libxrpl/
|
||||
├── telemetry/
|
||||
│ ├── Telemetry.cpp # Implementation
|
||||
│ ├── TelemetryConfig.cpp # Config parsing
|
||||
│ ├── TraceContext.cpp # Context serialization
|
||||
│ └── NullTelemetry.cpp # No-op implementation
|
||||
|
||||
src/xrpld/
|
||||
├── telemetry/
|
||||
│ ├── TracingInstrumentation.h # Instrumentation macros
|
||||
│ └── TracingInstrumentation.cpp
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3.2 Implementation Approach
|
||||
|
||||
<div align="center">
|
||||
|
||||
```mermaid
|
||||
%%{init: {'flowchart': {'nodeSpacing': 20, 'rankSpacing': 30}}}%%
|
||||
flowchart TB
|
||||
subgraph phase1["Phase 1: Core"]
|
||||
direction LR
|
||||
sdk["SDK Integration"] ~~~ interface["Telemetry Interface"] ~~~ config["Configuration"]
|
||||
end
|
||||
|
||||
subgraph phase2["Phase 2: RPC"]
|
||||
direction LR
|
||||
http["HTTP Context"] ~~~ rpc["RPC Handlers"]
|
||||
end
|
||||
|
||||
subgraph phase3["Phase 3: P2P"]
|
||||
direction LR
|
||||
proto["Protobuf Context"] ~~~ tx["Transaction Relay"]
|
||||
end
|
||||
|
||||
subgraph phase4["Phase 4: Consensus"]
|
||||
direction LR
|
||||
consensus["Consensus Rounds"] ~~~ proposals["Proposals"]
|
||||
end
|
||||
|
||||
phase1 --> phase2 --> phase3 --> phase4
|
||||
|
||||
style phase1 fill:#1565c0,stroke:#0d47a1,color:#ffffff
|
||||
style phase2 fill:#2e7d32,stroke:#1b5e20,color:#ffffff
|
||||
style phase3 fill:#e65100,stroke:#bf360c,color:#ffffff
|
||||
style phase4 fill:#c2185b,stroke:#880e4f,color:#ffffff
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
### Key Principles
|
||||
|
||||
1. **Minimal Intrusion**: Instrumentation should not alter existing control flow
|
||||
2. **Zero-Cost When Disabled**: Use compile-time flags and no-op implementations
|
||||
3. **Backward Compatibility**: Protocol Buffer extensions use high field numbers
|
||||
4. **Graceful Degradation**: Tracing failures must not affect node operation
|
||||
|
||||
---
|
||||
|
||||
## 3.3 Performance Overhead Summary
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
| Metric | Overhead | Notes |
|
||||
| ------------- | ---------- | ------------------------------------------------ |
|
||||
| CPU | 1-3% | Of per-transaction CPU cost (~200μs baseline) |
|
||||
| Memory | ~10 MB | SDK statics + batch buffer + worker thread stack |
|
||||
| Network | 10-50 KB/s | Compressed OTLP export to collector |
|
||||
| Latency (p99) | <2% | With proper sampling configuration |
|
||||
|
||||
---
|
||||
|
||||
## 3.4 Detailed CPU Overhead Analysis
|
||||
|
||||
### 3.4.1 Per-Operation Costs
|
||||
|
||||
> **Note on hardware assumptions**: The costs below are based on the official OTel C++ SDK CI benchmarks
|
||||
> (969 runs on GitHub Actions 2-core shared runners). On production server hardware (3+ GHz Xeon),
|
||||
> expect costs at the **lower end** of each range (~30-50% improvement over CI hardware).
|
||||
|
||||
| Operation | Time (ns) | Frequency | Impact |
|
||||
| --------------------- | --------- | ---------------------- | ---------- |
|
||||
| Span creation | 500-1000 | Every traced operation | Low |
|
||||
| Span end | 100-200 | Every traced operation | Low |
|
||||
| SetAttribute (string) | 80-120 | 3-5 per span | Low |
|
||||
| SetAttribute (int) | 40-60 | 2-3 per span | Negligible |
|
||||
| AddEvent | 100-200 | 0-2 per span | Low |
|
||||
| Context injection | 150-250 | Per outgoing message | Low |
|
||||
| Context extraction | 100-180 | Per incoming message | Low |
|
||||
| GetCurrent context | 10-20 | Thread-local access | Negligible |
|
||||
|
||||
**Source**: Span creation based on OTel C++ SDK `BM_SpanCreation` benchmark (AlwaysOnSampler +
|
||||
SimpleSpanProcessor + InMemoryExporter), median ~1,000 ns on CI hardware. AddEvent includes
|
||||
timestamp read + string copy + vector push + mutex acquisition. Context injection/extraction
|
||||
confirmed by `BM_SpanCreationWithScope` benchmark delta (~160 ns).
|
||||
|
||||
### 3.4.2 Transaction Processing Overhead
|
||||
|
||||
<div align="center">
|
||||
|
||||
```mermaid
|
||||
%%{init: {'pie': {'textPosition': 0.75}}}%%
|
||||
pie showData
|
||||
"tx.receive (1400ns)" : 1400
|
||||
"tx.validate (1200ns)" : 1200
|
||||
"tx.relay (1200ns)" : 1200
|
||||
"Context inject (200ns)" : 200
|
||||
```
|
||||
|
||||
**Transaction Tracing Overhead (~4.0μs total)**
|
||||
|
||||
</div>
|
||||
|
||||
**Overhead percentage**: 4.0 μs / 200 μs (avg tx processing) = **~2.0%**
|
||||
|
||||
> **Breakdown**: Each span (tx.receive, tx.validate, tx.relay) costs ~1,000 ns for creation plus
|
||||
> ~200-400 ns for 3-5 attribute sets. Context injection is ~200 ns (confirmed by benchmarks).
|
||||
> On production hardware, expect ~2.6 μs total (~1.3% overhead) due to faster span creation (~500-600 ns).
|
||||
|
||||
### 3.4.3 Consensus Round Overhead
|
||||
|
||||
| Operation | Count | Cost (ns) | Total |
|
||||
| ---------------------- | ----- | --------- | ---------- |
|
||||
| consensus.round span | 1 | ~1200 | ~1.2 μs |
|
||||
| consensus.phase spans | 3 | ~1100 | ~3.3 μs |
|
||||
| proposal.receive spans | ~20 | ~1100 | ~22 μs |
|
||||
| proposal.send spans | ~3 | ~1100 | ~3.3 μs |
|
||||
| Context operations | ~30 | ~200 | ~6 μs |
|
||||
| **TOTAL** | | | **~36 μs** |
|
||||
|
||||
> **Why higher**: Each span costs ~1,000 ns creation + ~100-200 ns for 1-2 attributes, totaling ~1,100-1,200 ns.
|
||||
> Context operations remain ~200 ns (confirmed by benchmarks). On production hardware, expect ~24 μs total.
|
||||
|
||||
**Overhead percentage**: 36 μs / 3s (typical round) = **~0.001%** (negligible)
|
||||
|
||||
### 3.4.4 RPC Request Overhead
|
||||
|
||||
| Operation | Cost (ns) |
|
||||
| ---------------- | ------------ |
|
||||
| rpc.request span | ~1200 |
|
||||
| rpc.command span | ~1100 |
|
||||
| Context extract | ~250 |
|
||||
| Context inject | ~200 |
|
||||
| **TOTAL** | **~2.75 μs** |
|
||||
|
||||
> **Why higher**: Each span costs ~1,000 ns creation + ~100-200 ns for attributes (command name,
|
||||
> version, role). Context extract/inject costs are confirmed by OTel C++ benchmarks.
|
||||
|
||||
- Fast RPC (1ms): 2.75 μs / 1ms = **~0.275%**
|
||||
- Slow RPC (100ms): 2.75 μs / 100ms = **~0.003%**
|
||||
|
||||
---
|
||||
|
||||
## 3.5 Memory Overhead Analysis
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
### 3.5.1 Static Memory
|
||||
|
||||
| Component | Size | Allocated |
|
||||
| ------------------------------------ | ----------- | ---------- |
|
||||
| TracerProvider singleton | ~64 KB | At startup |
|
||||
| BatchSpanProcessor (circular buffer) | ~16 KB | At startup |
|
||||
| BatchSpanProcessor (worker thread) | ~8 MB | At startup |
|
||||
| OTLP exporter (gRPC channel init) | ~256 KB | At startup |
|
||||
| Propagator registry | ~8 KB | At startup |
|
||||
| **Total static** | **~8.3 MB** | |
|
||||
|
||||
> **Why higher than earlier estimate**: The BatchSpanProcessor's circular buffer itself is only ~16 KB
|
||||
> (2049 x 8-byte `AtomicUniquePtr` entries), but it spawns a dedicated worker thread whose default
|
||||
> stack size on Linux is ~8 MB. The OTLP gRPC exporter allocates memory for channel stubs and TLS
|
||||
> initialization. The worker thread stack dominates the static footprint.
|
||||
|
||||
### 3.5.2 Dynamic Memory
|
||||
|
||||
| Component | Size per unit | Max units | Peak |
|
||||
| -------------------- | -------------- | ---------- | --------------- |
|
||||
| Active span | ~500-800 bytes | 1000 | ~500-800 KB |
|
||||
| Queued span (export) | ~500 bytes | 2048 | ~1 MB |
|
||||
| Attribute storage | ~80 bytes | 5 per span | Included |
|
||||
| Context storage | ~64 bytes | Per thread | ~6.4 KB |
|
||||
| **Total dynamic** | | | **~1.5-1.8 MB** |
|
||||
|
||||
> **Why active spans are larger**: An active `Span` object includes the wrapper (~88 bytes: shared_ptr,
|
||||
> mutex, unique_ptr to Recordable) plus `SpanData` (~250 bytes: SpanContext, timestamps, name, status,
|
||||
> empty containers) plus attribute storage (~200-500 bytes for 3-5 string attributes in a `std::map`).
|
||||
> Source: `sdk/src/trace/span.h` and `sdk/include/opentelemetry/sdk/trace/span_data.h`.
|
||||
> Queued spans release the wrapper, keeping only `SpanData` + attributes (~500 bytes).
|
||||
|
||||
### 3.5.3 Memory Growth Characteristics
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
xyChart:
|
||||
width: 700
|
||||
height: 400
|
||||
---
|
||||
xychart-beta
|
||||
title "Memory Usage vs Span Rate (bounded by queue limit)"
|
||||
x-axis "Spans/second" [0, 200, 400, 600, 800, 1000]
|
||||
y-axis "Memory (MB)" 0 --> 12
|
||||
line [8.5, 9.2, 9.6, 9.9, 10.0, 10.0]
|
||||
```
|
||||
|
||||
**Notes**:
|
||||
|
||||
- Memory increases with span rate but **plateaus at queue capacity** (default 2048 spans)
|
||||
- Batch export prevents unbounded growth
|
||||
- At queue limit, oldest spans are dropped (not blocked)
|
||||
- Maximum memory is bounded: ~8.3 MB static (dominated by worker thread stack) + 2048 queued spans x ~500 bytes (~1 MB) + active spans (~0.8 MB) ≈ **~10 MB ceiling**
|
||||
- The worker thread stack (~8 MB) is virtual memory; actual RSS depends on stack usage (typically much less)
|
||||
|
||||
### 3.5.4 Performance Data Sources
|
||||
|
||||
The overhead estimates in Sections 3.3-3.5 are derived from the following sources:
|
||||
|
||||
| Source | What it covers | URL |
|
||||
| ------------------------------------------------ | ----------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| OTel C++ SDK CI benchmarks (969 runs) | Span creation, context activation, sampler overhead | [Benchmark Dashboard](https://open-telemetry.github.io/opentelemetry-cpp/benchmarks/) |
|
||||
| `api/test/trace/span_benchmark.cc` | API-level span creation (~22 ns no-op) | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/api/test/trace/span_benchmark.cc) |
|
||||
| `sdk/test/trace/sampler_benchmark.cc` | SDK span creation with samplers (~1,000 ns AlwaysOn) | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/test/trace/sampler_benchmark.cc) |
|
||||
| `sdk/include/.../span_data.h` | SpanData memory layout (~250 bytes base) | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/include/opentelemetry/sdk/trace/span_data.h) |
|
||||
| `sdk/src/trace/span.h` | Span wrapper memory layout (~88 bytes) | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/src/trace/span.h) |
|
||||
| `sdk/include/.../batch_span_processor_options.h` | Default queue size (2048), batch size (512) | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/include/opentelemetry/sdk/trace/batch_span_processor_options.h) |
|
||||
| `sdk/include/.../circular_buffer.h` | CircularBuffer implementation (AtomicUniquePtr array) | [Source](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/include/opentelemetry/sdk/common/circular_buffer.h) |
|
||||
| OTLP proto definition | Serialized span size estimation | [Proto](https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/trace/v1/trace.proto) |
|
||||
|
||||
---
|
||||
|
||||
## 3.6 Network Overhead Analysis
|
||||
|
||||
### 3.6.1 Export Bandwidth
|
||||
|
||||
> **Bytes per span**: Estimates use ~500 bytes/span (conservative upper bound). OTLP protobuf analysis
|
||||
> shows a typical span with 3-5 string attributes serializes to ~200-300 bytes raw; with gzip
|
||||
> compression (~60-70% of raw) and batching (amortized headers), ~350 bytes/span is more realistic.
|
||||
> The table uses the conservative estimate for capacity planning.
|
||||
|
||||
| Sampling Rate | Spans/sec | Bandwidth | Notes |
|
||||
| ------------- | --------- | --------- | ---------------- |
|
||||
| 100% | ~500 | ~250 KB/s | Development only |
|
||||
| 10% | ~50 | ~25 KB/s | Staging |
|
||||
| 1% | ~5 | ~2.5 KB/s | Production |
|
||||
| Error-only | ~1 | ~0.5 KB/s | Minimal overhead |
|
||||
|
||||
### 3.6.2 Trace Context Propagation
|
||||
|
||||
| Message Type | Context Size | Messages/sec | Overhead |
|
||||
| ---------------------- | ------------ | ------------ | ----------- |
|
||||
| TMTransaction | 25 bytes | ~100 | ~2.5 KB/s |
|
||||
| TMProposeSet | 25 bytes | ~10 | ~250 B/s |
|
||||
| TMValidation | 25 bytes | ~50 | ~1.25 KB/s |
|
||||
| **Total P2P overhead** | | | **~4 KB/s** |
|
||||
|
||||
---
|
||||
|
||||
## 3.7 Optimization Strategies
|
||||
|
||||
### 3.7.1 Sampling Strategies
|
||||
|
||||
#### Tail Sampling
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
trace["New Trace"]
|
||||
|
||||
trace --> errors{"Is Error?"}
|
||||
errors -->|Yes| sample["SAMPLE"]
|
||||
errors -->|No| consensus{"Is Consensus?"}
|
||||
|
||||
consensus -->|Yes| sample
|
||||
consensus -->|No| slow{"Is Slow?"}
|
||||
|
||||
slow -->|Yes| sample
|
||||
slow -->|No| prob{"Random < 10%?"}
|
||||
|
||||
prob -->|Yes| sample
|
||||
prob -->|No| drop["DROP"]
|
||||
|
||||
style sample fill:#4caf50,stroke:#388e3c,color:#fff
|
||||
style drop fill:#f44336,stroke:#c62828,color:#fff
|
||||
```
|
||||
|
||||
### 3.7.2 Batch Tuning Recommendations
|
||||
|
||||
| Environment | Batch Size | Batch Delay | Max Queue |
|
||||
| ------------------ | ---------- | ----------- | --------- |
|
||||
| Low-latency | 128 | 1000ms | 512 |
|
||||
| High-throughput | 1024 | 10000ms | 8192 |
|
||||
| Memory-constrained | 256 | 2000ms | 512 |
|
||||
|
||||
### 3.7.3 Conditional Instrumentation
|
||||
|
||||
```cpp
|
||||
// Compile-time feature flag
|
||||
#ifndef XRPL_ENABLE_TELEMETRY
|
||||
// Zero-cost when disabled
|
||||
#define XRPL_TRACE_SPAN(t, n) ((void)0)
|
||||
#endif
|
||||
|
||||
// Runtime component filtering
|
||||
if (telemetry.shouldTracePeer())
|
||||
{
|
||||
XRPL_TRACE_SPAN(telemetry, "peer.message.receive");
|
||||
// ... instrumentation
|
||||
}
|
||||
// No overhead when component tracing disabled
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3.8 Links to Detailed Documentation
|
||||
|
||||
- **[Code Samples](./04-code-samples.md)**: Complete implementation code for all components
|
||||
- **[Configuration Reference](./05-configuration-reference.md)**: Configuration options and collector setup
|
||||
- **[Implementation Phases](./06-implementation-phases.md)**: Detailed timeline and milestones
|
||||
|
||||
---
|
||||
|
||||
## 3.9 Code Intrusiveness Assessment
|
||||
|
||||
> **TxQ** = Transaction Queue
|
||||
|
||||
This section provides a detailed assessment of how intrusive the OpenTelemetry integration is to the existing xrpld codebase.
|
||||
|
||||
### 3.9.1 Files Modified Summary
|
||||
|
||||
| Component | Files Modified | Lines Added | Lines Changed | Architectural Impact |
|
||||
| --------------------- | -------------- | ----------- | ------------- | -------------------- |
|
||||
| **Core Telemetry** | 5 new files | ~800 | 0 | None (new module) |
|
||||
| **Application Init** | 2 files | ~30 | ~5 | Minimal |
|
||||
| **RPC Layer** | 3 files | ~80 | ~20 | Minimal |
|
||||
| **Transaction Relay** | 4 files | ~120 | ~40 | Low |
|
||||
| **Consensus** | 3 files | ~100 | ~30 | Low-Medium |
|
||||
| **Protocol Buffers** | 1 file | ~25 | 0 | Low |
|
||||
| **CMake/Build** | 3 files | ~50 | ~10 | Minimal |
|
||||
| **PathFinding** | 2 | ~80 | ~5 | Minimal |
|
||||
| **TxQ/Fee** | 2 | ~60 | ~5 | Minimal |
|
||||
| **Validator/Amend** | 3 | ~40 | ~5 | Minimal |
|
||||
| **Total** | **~28 files** | **~1,490** | **~120** | **Low** |
|
||||
|
||||
### 3.9.2 Detailed File Impact
|
||||
|
||||
```mermaid
|
||||
pie title Code Changes by Component
|
||||
"New Telemetry Module" : 800
|
||||
"Transaction Relay" : 160
|
||||
"Consensus" : 130
|
||||
"RPC Layer" : 100
|
||||
"PathFinding" : 80
|
||||
"TxQ/Fee" : 60
|
||||
"Validator/Amendment" : 40
|
||||
"Application Init" : 35
|
||||
"Protocol Buffers" : 25
|
||||
"Build System" : 60
|
||||
```
|
||||
|
||||
#### New Files (No Impact on Existing Code)
|
||||
|
||||
| File | Lines | Purpose |
|
||||
| ---------------------------------------------- | ----- | -------------------- |
|
||||
| `include/xrpl/telemetry/Telemetry.h` | ~160 | Main interface |
|
||||
| `include/xrpl/telemetry/SpanGuard.h` | ~120 | RAII wrapper |
|
||||
| `include/xrpl/telemetry/TraceContext.h` | ~80 | Context propagation |
|
||||
| `src/xrpld/telemetry/TracingInstrumentation.h` | ~60 | Macros |
|
||||
| `src/libxrpl/telemetry/Telemetry.cpp` | ~200 | Implementation |
|
||||
| `src/libxrpl/telemetry/TelemetryConfig.cpp` | ~60 | Config parsing |
|
||||
| `src/libxrpl/telemetry/NullTelemetry.cpp` | ~40 | No-op implementation |
|
||||
|
||||
#### Modified Files (Existing Xrpld Code)
|
||||
|
||||
| File | Lines Added | Lines Changed | Risk Level |
|
||||
| ------------------------------------------------- | ----------- | ------------- | ---------- |
|
||||
| `src/xrpld/app/main/Application.cpp` | ~15 | ~3 | Low |
|
||||
| `include/xrpl/core/ServiceRegistry.h` | ~5 | ~2 | Low |
|
||||
| `src/xrpld/rpc/detail/ServerHandler.cpp` | ~40 | ~10 | Low |
|
||||
| `src/xrpld/rpc/handlers/*.cpp` | ~30 | ~8 | Low |
|
||||
| `src/xrpld/overlay/detail/PeerImp.cpp` | ~60 | ~15 | Medium |
|
||||
| `src/xrpld/overlay/detail/OverlayImpl.cpp` | ~30 | ~10 | Medium |
|
||||
| `src/xrpld/app/consensus/RCLConsensus.cpp` | ~50 | ~15 | Medium |
|
||||
| `src/xrpld/app/consensus/RCLConsensusAdaptor.cpp` | ~40 | ~12 | Medium |
|
||||
| `src/xrpld/core/JobQueue.cpp` | ~20 | ~5 | Low |
|
||||
| `src/xrpld/app/paths/PathRequest.cpp` | ~40 | ~3 | Low |
|
||||
| `src/xrpld/app/paths/Pathfinder.cpp` | ~40 | ~2 | Low |
|
||||
| `src/xrpld/app/misc/TxQ.cpp` | ~40 | ~3 | Low |
|
||||
| `src/xrpld/app/main/LoadManager.cpp` | ~20 | ~2 | Low |
|
||||
| `src/xrpld/app/misc/ValidatorList.cpp` | ~20 | ~2 | Low |
|
||||
| `src/xrpld/app/misc/AmendmentTable.cpp` | ~10 | ~2 | Low |
|
||||
| `src/xrpld/app/misc/Manifest.cpp` | ~10 | ~1 | Low |
|
||||
| `src/xrpld/shamap/SHAMap.cpp` | ~20 | ~3 | Low |
|
||||
| `src/xrpld/overlay/detail/ripple.proto` | ~25 | 0 | Low |
|
||||
| `CMakeLists.txt` | ~40 | ~8 | Low |
|
||||
| `cmake/FindOpenTelemetry.cmake` | ~50 | 0 | None (new) |
|
||||
|
||||
### 3.9.3 Risk Assessment by Component
|
||||
|
||||
<div align="center">
|
||||
|
||||
**Do First** ↖ ↗ **Plan Carefully**
|
||||
|
||||
```mermaid
|
||||
quadrantChart
|
||||
title Code Intrusiveness Risk Matrix
|
||||
x-axis Low Risk --> High Risk
|
||||
y-axis Low Value --> High Value
|
||||
|
||||
RPC Tracing: [0.2, 0.55]
|
||||
Transaction Relay: [0.55, 0.85]
|
||||
Consensus Tracing: [0.75, 0.92]
|
||||
Peer Message Tracing: [0.85, 0.35]
|
||||
JobQueue Context: [0.3, 0.42]
|
||||
Ledger Acquisition: [0.48, 0.65]
|
||||
PathFinding: [0.38, 0.72]
|
||||
TxQ and Fees: [0.25, 0.62]
|
||||
Validator Mgmt: [0.15, 0.35]
|
||||
```
|
||||
|
||||
**Optional** ↙ ↘ **Avoid**
|
||||
|
||||
</div>
|
||||
|
||||
#### Risk Level Definitions
|
||||
|
||||
| Risk Level | Definition | Mitigation |
|
||||
| ---------- | ---------------------------------------------------------------- | ---------------------------------- |
|
||||
| **Low** | Additive changes only; no modification to existing logic | Standard code review |
|
||||
| **Medium** | Minor modifications to existing functions; clear boundaries | Comprehensive unit tests |
|
||||
| **High** | Changes to core logic or data structures; potential side effects | Integration tests + staged rollout |
|
||||
|
||||
### 3.9.4 Architectural Impact Assessment
|
||||
|
||||
| Aspect | Impact | Justification |
|
||||
| -------------------- | ------- | -------------------------------------------------------------------------------- |
|
||||
| **Data Flow** | Minimal | Read-only instrumentation; no modification to consensus or transaction data flow |
|
||||
| **Threading Model** | Minimal | Context propagation uses thread-local storage (standard OTel pattern) |
|
||||
| **Memory Model** | Low | Bounded queues prevent unbounded growth; RAII ensures cleanup |
|
||||
| **Network Protocol** | Low | Optional fields in protobuf (high field numbers); backward compatible |
|
||||
| **Configuration** | None | New config section; existing configs unaffected |
|
||||
| **Build System** | Low | Optional CMake flag; builds work without OpenTelemetry |
|
||||
| **Dependencies** | Low | OpenTelemetry SDK is optional; null implementation when disabled |
|
||||
|
||||
### 3.9.5 Backward Compatibility
|
||||
|
||||
| Compatibility | Status | Notes |
|
||||
| --------------- | ------- | ----------------------------------------------------- |
|
||||
| **Config File** | ✅ Full | New `[telemetry]` section is optional |
|
||||
| **Protocol** | ✅ Full | Optional protobuf fields with high field numbers |
|
||||
| **Build** | ✅ Full | `XRPL_ENABLE_TELEMETRY=OFF` produces identical binary |
|
||||
| **Runtime** | ✅ Full | `enabled=0` produces zero overhead |
|
||||
| **API** | ✅ Full | No changes to public RPC or P2P APIs |
|
||||
|
||||
### 3.9.6 Rollback Strategy
|
||||
|
||||
If issues are discovered after deployment:
|
||||
|
||||
1. **Immediate**: Set `enabled=0` in config and restart (zero code change)
|
||||
2. **Quick**: Rebuild with `XRPL_ENABLE_TELEMETRY=OFF`
|
||||
3. **Complete**: Revert telemetry commits (clean separation makes this easy)
|
||||
|
||||
### 3.9.7 Code Change Examples
|
||||
|
||||
**Minimal RPC Instrumentation (Low Intrusiveness):**
|
||||
|
||||
```cpp
|
||||
// Before
|
||||
void ServerHandler::onRequest(...) {
|
||||
auto result = processRequest(req);
|
||||
send(result);
|
||||
}
|
||||
|
||||
// After (only ~10 lines added)
|
||||
void ServerHandler::onRequest(...) {
|
||||
XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request"); // +1 line
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.command", command); // +1 line
|
||||
|
||||
auto result = processRequest(req);
|
||||
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.status", status); // +1 line
|
||||
send(result);
|
||||
}
|
||||
```
|
||||
|
||||
**Consensus Instrumentation (Medium Intrusiveness):**
|
||||
|
||||
```cpp
|
||||
// Before
|
||||
void RCLConsensusAdaptor::startRound(...) {
|
||||
// ... existing logic
|
||||
}
|
||||
|
||||
// After (context storage required)
|
||||
void RCLConsensusAdaptor::startRound(...) {
|
||||
XRPL_TRACE_CONSENSUS(app_.getTelemetry(), "consensus.round");
|
||||
XRPL_TRACE_SET_ATTR("xrpl.consensus.ledger.seq", seq);
|
||||
|
||||
// Store context for child spans in phase transitions
|
||||
currentRoundContext_ = _xrpl_guard_->context(); // New member variable
|
||||
|
||||
// ... existing logic unchanged
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
_Previous: [Design Decisions](./02-design-decisions.md)_ | _Next: [Code Samples](./04-code-samples.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,972 +0,0 @@
|
||||
# Configuration Reference
|
||||
|
||||
> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
|
||||
> **Related**: [Code Samples](./04-code-samples.md) | [Implementation Phases](./06-implementation-phases.md)
|
||||
|
||||
---
|
||||
|
||||
## 5.1 xrpld Configuration
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **TxQ** = Transaction Queue
|
||||
|
||||
### 5.1.1 Configuration File Section
|
||||
|
||||
Add to `cfg/xrpld-example.cfg`:
|
||||
|
||||
```ini
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# TELEMETRY (OpenTelemetry Distributed Tracing)
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
#
|
||||
# Enables distributed tracing for transaction flow, consensus, and RPC calls.
|
||||
# Traces are exported to an OpenTelemetry Collector using OTLP protocol.
|
||||
#
|
||||
# [telemetry]
|
||||
#
|
||||
# # Enable/disable telemetry (default: 0 = disabled)
|
||||
# enabled=1
|
||||
#
|
||||
# # OTLP endpoint (default: http://localhost:4318/v1/traces - OTLP/HTTP)
|
||||
# # Note: only OTLP/HTTP is shipped in Phase 1b. OTLP/gRPC support is
|
||||
# # planned as future work and is not yet parsed by TelemetryConfig.cpp.
|
||||
# endpoint=http://localhost:4318/v1/traces
|
||||
#
|
||||
# # Use TLS for exporter connection (default: 0)
|
||||
# use_tls=0
|
||||
#
|
||||
# # Path to CA certificate for TLS (optional)
|
||||
# # tls_ca_cert=/path/to/ca.crt
|
||||
#
|
||||
# # Sampling ratio: 0.0-1.0 (default: 1.0 = 100% sampling)
|
||||
# # Use lower values in production to reduce overhead
|
||||
# # Default: 1.0 (all traces). For production deployments with high
|
||||
# # throughput, 0.1 (10%) is recommended to reduce overhead.
|
||||
# # See Section 7.4.2 for sampling strategy details.
|
||||
# sampling_ratio=0.1
|
||||
#
|
||||
# # Batch processor settings
|
||||
# batch_size=512 # Spans per batch (default: 512)
|
||||
# batch_delay_ms=5000 # Max delay before sending batch (default: 5000)
|
||||
# max_queue_size=2048 # Max queued spans (default: 2048)
|
||||
#
|
||||
# # Component-specific tracing (default: all enabled except peer)
|
||||
# trace_transactions=1 # Transaction relay and processing
|
||||
# trace_consensus=1 # Consensus rounds and proposals
|
||||
# trace_rpc=1 # RPC request handling
|
||||
# trace_peer=0 # Peer messages (high volume, disabled by default)
|
||||
# trace_ledger=1 # Ledger acquisition and building
|
||||
#
|
||||
# # Planned (not yet parsed by TelemetryConfig.cpp):
|
||||
# # trace_pathfind=1 # Path computation (Phase 2)
|
||||
# # trace_txq=1 # Transaction queue (Phase 3)
|
||||
# # trace_validator=0 # Validator list / manifest (future)
|
||||
# # trace_amendment=0 # Amendment voting (future)
|
||||
#
|
||||
# # Service identification (automatically detected if not specified)
|
||||
# # service_name=xrpld
|
||||
# # service_instance_id=<node_public_key>
|
||||
|
||||
[telemetry]
|
||||
enabled=0
|
||||
```
|
||||
|
||||
### 5.1.2 Configuration Options Summary
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
| --------------------- | ------ | --------------------------------- | ----------------------------------------- |
|
||||
| `enabled` | bool | `false` | Enable/disable telemetry |
|
||||
| `endpoint` | string | `http://localhost:4318/v1/traces` | OTLP/HTTP collector endpoint |
|
||||
| `use_tls` | bool | `false` | Enable TLS for exporter connection |
|
||||
| `tls_ca_cert` | string | `""` | Path to CA certificate file |
|
||||
| `sampling_ratio` | float | `1.0` | Sampling ratio (0.0-1.0) |
|
||||
| `batch_size` | uint | `512` | Spans per export batch |
|
||||
| `batch_delay_ms` | uint | `5000` | Max delay before sending batch (ms) |
|
||||
| `max_queue_size` | uint | `2048` | Maximum queued spans |
|
||||
| `trace_transactions` | bool | `true` | Enable transaction tracing |
|
||||
| `trace_consensus` | bool | `true` | Enable consensus tracing |
|
||||
| `trace_rpc` | bool | `true` | Enable RPC tracing |
|
||||
| `trace_peer` | bool | `false` | Enable peer message tracing (high volume) |
|
||||
| `trace_ledger` | bool | `true` | Enable ledger tracing |
|
||||
| `service_name` | string | `"xrpld"` | Service name for traces |
|
||||
| `service_instance_id` | string | `<node_pubkey>` | Instance identifier |
|
||||
|
||||
**Planned (not yet implemented)**: the following options appear in the design
|
||||
documents but are not parsed by `TelemetryConfig.cpp` in Phase 1b and later
|
||||
phases. They will be added as the corresponding subsystems are instrumented:
|
||||
|
||||
| Option | Planned Phase | Purpose |
|
||||
| ----------------- | ------------- | ---------------------------------------- |
|
||||
| `exporter` | Future | Select between OTLP/HTTP and OTLP/gRPC |
|
||||
| `trace_pathfind` | Phase 2 | Path computation tracing toggle |
|
||||
| `trace_txq` | Phase 3 | Transaction queue tracing toggle |
|
||||
| `trace_validator` | Future | Validator list / manifest update tracing |
|
||||
| `trace_amendment` | Future | Amendment voting tracing |
|
||||
|
||||
---
|
||||
|
||||
## 5.2 Configuration Parser
|
||||
|
||||
> **TxQ** = Transaction Queue
|
||||
|
||||
```cpp
|
||||
// src/libxrpl/telemetry/TelemetryConfig.cpp
|
||||
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
#include <xrpl/basics/Log.h>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
Telemetry::Setup
|
||||
setup_Telemetry(
|
||||
Section const& section,
|
||||
std::string const& nodePublicKey,
|
||||
std::string const& version)
|
||||
{
|
||||
Telemetry::Setup setup;
|
||||
|
||||
// Basic settings
|
||||
setup.enabled = section.value_or("enabled", false);
|
||||
setup.serviceName = section.value_or("service_name", "xrpld");
|
||||
setup.serviceVersion = version;
|
||||
setup.serviceInstanceId = section.value_or(
|
||||
"service_instance_id", nodePublicKey);
|
||||
|
||||
// Exporter settings
|
||||
setup.exporterType = section.value_or("exporter", "otlp_grpc");
|
||||
|
||||
if (setup.exporterType == "otlp_grpc")
|
||||
setup.exporterEndpoint = section.value_or("endpoint", "localhost:4317");
|
||||
else if (setup.exporterType == "otlp_http")
|
||||
setup.exporterEndpoint = section.value_or("endpoint", "localhost:4318");
|
||||
|
||||
setup.useTls = section.value_or("use_tls", false);
|
||||
setup.tlsCertPath = section.value_or("tls_ca_cert", "");
|
||||
|
||||
// Sampling
|
||||
setup.samplingRatio = section.value_or("sampling_ratio", 1.0);
|
||||
if (setup.samplingRatio < 0.0 || setup.samplingRatio > 1.0)
|
||||
{
|
||||
Throw<std::runtime_error>(
|
||||
"telemetry.sampling_ratio must be between 0.0 and 1.0");
|
||||
}
|
||||
|
||||
// Batch processor
|
||||
setup.batchSize = section.value_or("batch_size", 512u);
|
||||
setup.batchDelay = std::chrono::milliseconds{
|
||||
section.value_or("batch_delay_ms", 5000u)};
|
||||
setup.maxQueueSize = section.value_or("max_queue_size", 2048u);
|
||||
|
||||
// Component filtering
|
||||
setup.traceTransactions = section.value_or("trace_transactions", true);
|
||||
setup.traceConsensus = section.value_or("trace_consensus", true);
|
||||
setup.traceRpc = section.value_or("trace_rpc", true);
|
||||
setup.tracePeer = section.value_or("trace_peer", false);
|
||||
setup.traceLedger = section.value_or("trace_ledger", true);
|
||||
setup.tracePathfind = section.value_or("trace_pathfind", true);
|
||||
setup.traceTxQ = section.value_or("trace_txq", true);
|
||||
setup.traceValidator = section.value_or("trace_validator", false);
|
||||
setup.traceAmendment = section.value_or("trace_amendment", false);
|
||||
|
||||
return setup;
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5.3 Application Integration
|
||||
|
||||
### 5.3.1 ApplicationImp Changes
|
||||
|
||||
> **Deferred identity**: The node public key (`nodeIdentity_`) is not
|
||||
> available during `ApplicationImp`'s member initializer list — it is
|
||||
> resolved later in `setup()`. The `Telemetry` object is therefore
|
||||
> constructed with an empty `serviceInstanceId` and patched via
|
||||
> `setServiceInstanceId()` once `setup()` has called `getNodeIdentity()`.
|
||||
|
||||
```cpp
|
||||
// src/xrpld/app/main/Application.cpp (modified)
|
||||
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
class ApplicationImp : public Application, public BasicApp
|
||||
{
|
||||
// ... existing members (perfLog_, etc.) ...
|
||||
|
||||
// Telemetry — constructed in the member initializer list with
|
||||
// an empty serviceInstanceId, patched in setup().
|
||||
std::unique_ptr<telemetry::Telemetry> telemetry_;
|
||||
|
||||
// Member initializer list (excerpt):
|
||||
// ...
|
||||
// , telemetry_(
|
||||
// telemetry::make_Telemetry(
|
||||
// telemetry::setup_Telemetry(
|
||||
// config_->section("telemetry"),
|
||||
// "", // Updated later via setServiceInstanceId()
|
||||
// BuildInfo::getVersionString()),
|
||||
// logs_->journal("Telemetry")))
|
||||
// ...
|
||||
|
||||
bool setup(...) override
|
||||
{
|
||||
// ... existing setup code ...
|
||||
|
||||
nodeIdentity_ = getNodeIdentity(*this, cmdline);
|
||||
|
||||
// Inject node identity into telemetry resource attributes,
|
||||
// unless the user already set a custom service_instance_id.
|
||||
if (!config_->section("telemetry").exists("service_instance_id"))
|
||||
telemetry_->setServiceInstanceId(
|
||||
toBase58(TokenType::NodePublic, nodeIdentity_->first));
|
||||
|
||||
// ... rest of setup ...
|
||||
}
|
||||
|
||||
void start(bool withTimers) override
|
||||
{
|
||||
// ... existing start code ...
|
||||
telemetry_->start();
|
||||
}
|
||||
|
||||
void run() override
|
||||
{
|
||||
// ... existing run/shutdown code ...
|
||||
telemetry_->stop();
|
||||
}
|
||||
|
||||
telemetry::Telemetry&
|
||||
getTelemetry() override
|
||||
{
|
||||
return *telemetry_;
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### 5.3.2 ServiceRegistry Interface Addition
|
||||
|
||||
```cpp
|
||||
// include/xrpl/core/ServiceRegistry.h (modified)
|
||||
|
||||
namespace telemetry {
|
||||
class Telemetry;
|
||||
} // namespace telemetry
|
||||
|
||||
class ServiceRegistry
|
||||
{
|
||||
public:
|
||||
// ... existing virtual methods ...
|
||||
|
||||
/** Get the telemetry system for distributed tracing. */
|
||||
virtual telemetry::Telemetry&
|
||||
getTelemetry() = 0;
|
||||
};
|
||||
```
|
||||
|
||||
> **Note:** `Application` extends `ServiceRegistry`, so `getTelemetry()` is
|
||||
> available on both. Components that hold a `ServiceRegistry&` (e.g.
|
||||
> `NetworkOPsImp`) call `registry_.get().getTelemetry()`. Components that
|
||||
> still hold an `Application&` (e.g. `ServerHandler`, `PeerImp`,
|
||||
> `RCLConsensusAdaptor`) call `app_.getTelemetry()` directly.
|
||||
|
||||
---
|
||||
|
||||
## 5.4 CMake Integration
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
### 5.4.1 Find OpenTelemetry Module
|
||||
|
||||
```cmake
|
||||
# cmake/FindOpenTelemetry.cmake
|
||||
|
||||
# Find OpenTelemetry C++ SDK
|
||||
#
|
||||
# This module defines:
|
||||
# OpenTelemetry_FOUND - System has OpenTelemetry
|
||||
# OpenTelemetry::api - API library target
|
||||
# OpenTelemetry::sdk - SDK library target
|
||||
# OpenTelemetry::otlp_grpc_exporter - OTLP gRPC exporter target
|
||||
# OpenTelemetry::otlp_http_exporter - OTLP HTTP exporter target
|
||||
|
||||
find_package(opentelemetry-cpp CONFIG QUIET)
|
||||
|
||||
if(opentelemetry-cpp_FOUND)
|
||||
set(OpenTelemetry_FOUND TRUE)
|
||||
|
||||
# Create imported targets if not already created by config
|
||||
if(NOT TARGET OpenTelemetry::api)
|
||||
add_library(OpenTelemetry::api ALIAS opentelemetry-cpp::api)
|
||||
endif()
|
||||
if(NOT TARGET OpenTelemetry::sdk)
|
||||
add_library(OpenTelemetry::sdk ALIAS opentelemetry-cpp::sdk)
|
||||
endif()
|
||||
if(NOT TARGET OpenTelemetry::otlp_grpc_exporter)
|
||||
add_library(OpenTelemetry::otlp_grpc_exporter ALIAS
|
||||
opentelemetry-cpp::otlp_grpc_exporter)
|
||||
endif()
|
||||
else()
|
||||
# Try pkg-config fallback
|
||||
find_package(PkgConfig QUIET)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(OTEL opentelemetry-cpp QUIET)
|
||||
if(OTEL_FOUND)
|
||||
set(OpenTelemetry_FOUND TRUE)
|
||||
# Create imported targets from pkg-config
|
||||
add_library(OpenTelemetry::api INTERFACE IMPORTED)
|
||||
target_include_directories(OpenTelemetry::api INTERFACE
|
||||
${OTEL_INCLUDE_DIRS})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(OpenTelemetry
|
||||
REQUIRED_VARS OpenTelemetry_FOUND)
|
||||
```
|
||||
|
||||
### 5.4.2 CMakeLists.txt Changes
|
||||
|
||||
```cmake
|
||||
# CMakeLists.txt (additions)
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# TELEMETRY OPTIONS
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
option(XRPL_ENABLE_TELEMETRY
|
||||
"Enable OpenTelemetry distributed tracing support" OFF)
|
||||
|
||||
if(XRPL_ENABLE_TELEMETRY)
|
||||
find_package(OpenTelemetry REQUIRED)
|
||||
|
||||
# Define compile-time flag
|
||||
add_compile_definitions(XRPL_ENABLE_TELEMETRY)
|
||||
|
||||
message(STATUS "OpenTelemetry tracing: ENABLED")
|
||||
else()
|
||||
message(STATUS "OpenTelemetry tracing: DISABLED")
|
||||
endif()
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# TELEMETRY LIBRARY
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
if(XRPL_ENABLE_TELEMETRY)
|
||||
add_library(xrpl_telemetry
|
||||
src/libxrpl/telemetry/Telemetry.cpp
|
||||
src/libxrpl/telemetry/TelemetryConfig.cpp
|
||||
src/libxrpl/telemetry/TraceContext.cpp
|
||||
)
|
||||
|
||||
target_include_directories(xrpl_telemetry
|
||||
PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
target_link_libraries(xrpl_telemetry
|
||||
PUBLIC
|
||||
OpenTelemetry::api
|
||||
OpenTelemetry::sdk
|
||||
OpenTelemetry::otlp_grpc_exporter
|
||||
PRIVATE
|
||||
xrpl_basics
|
||||
)
|
||||
|
||||
# Add to main library dependencies
|
||||
target_link_libraries(xrpld PRIVATE xrpl_telemetry)
|
||||
else()
|
||||
# Create null implementation library
|
||||
add_library(xrpl_telemetry
|
||||
src/libxrpl/telemetry/NullTelemetry.cpp
|
||||
)
|
||||
target_include_directories(xrpl_telemetry
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
)
|
||||
endif()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5.5 OpenTelemetry Collector Configuration
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **APM** = Application Performance Monitoring
|
||||
|
||||
### 5.5.1 Development Configuration
|
||||
|
||||
```yaml
|
||||
# otel-collector-dev.yaml
|
||||
# Minimal configuration for local development
|
||||
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 1s
|
||||
send_batch_size: 100
|
||||
|
||||
exporters:
|
||||
# Console output for debugging
|
||||
logging:
|
||||
verbosity: detailed
|
||||
sampling_initial: 5
|
||||
sampling_thereafter: 200
|
||||
|
||||
# Tempo for trace visualization
|
||||
otlp/tempo:
|
||||
endpoint: tempo:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [logging, otlp/tempo]
|
||||
```
|
||||
|
||||
### 5.5.2 Production Configuration
|
||||
|
||||
```yaml
|
||||
# otel-collector-prod.yaml
|
||||
# Production configuration with filtering, sampling, and multiple backends
|
||||
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
tls:
|
||||
cert_file: /etc/otel/server.crt
|
||||
key_file: /etc/otel/server.key
|
||||
ca_file: /etc/otel/ca.crt
|
||||
|
||||
processors:
|
||||
# Memory limiter to prevent OOM
|
||||
memory_limiter:
|
||||
check_interval: 1s
|
||||
limit_mib: 1000
|
||||
spike_limit_mib: 200
|
||||
|
||||
# Batch processing for efficiency
|
||||
batch:
|
||||
timeout: 5s
|
||||
send_batch_size: 512
|
||||
send_batch_max_size: 1024
|
||||
|
||||
# Tail-based sampling (keep errors and slow traces)
|
||||
tail_sampling:
|
||||
decision_wait: 10s
|
||||
num_traces: 100000
|
||||
expected_new_traces_per_sec: 1000
|
||||
policies:
|
||||
# Always keep error traces
|
||||
- name: errors
|
||||
type: status_code
|
||||
status_code:
|
||||
status_codes: [ERROR]
|
||||
# Keep slow consensus rounds (>5s)
|
||||
- name: slow-consensus
|
||||
type: latency
|
||||
latency:
|
||||
threshold_ms: 5000
|
||||
# Keep slow RPC requests (>1s)
|
||||
- name: slow-rpc
|
||||
type: and
|
||||
and:
|
||||
and_sub_policy:
|
||||
- name: rpc-spans
|
||||
type: string_attribute
|
||||
string_attribute:
|
||||
key: xrpl.rpc.command
|
||||
values: [".*"]
|
||||
enabled_regex_matching: true
|
||||
- name: latency
|
||||
type: latency
|
||||
latency:
|
||||
threshold_ms: 1000
|
||||
# Probabilistic sampling for the rest
|
||||
- name: probabilistic
|
||||
type: probabilistic
|
||||
probabilistic:
|
||||
sampling_percentage: 10
|
||||
|
||||
# Attribute processing
|
||||
attributes:
|
||||
actions:
|
||||
# Hash sensitive data
|
||||
- key: xrpl.tx.account
|
||||
action: hash
|
||||
# Add deployment info
|
||||
- key: deployment.environment
|
||||
value: production
|
||||
action: upsert
|
||||
|
||||
exporters:
|
||||
# Grafana Tempo for long-term storage
|
||||
otlp/tempo:
|
||||
endpoint: tempo.monitoring:4317
|
||||
tls:
|
||||
insecure: false
|
||||
ca_file: /etc/otel/tempo-ca.crt
|
||||
|
||||
# Elastic APM for correlation with logs
|
||||
otlp/elastic:
|
||||
endpoint: apm.elastic:8200
|
||||
headers:
|
||||
Authorization: "Bearer ${ELASTIC_APM_TOKEN}"
|
||||
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
zpages:
|
||||
endpoint: 0.0.0.0:55679
|
||||
|
||||
service:
|
||||
extensions: [health_check, zpages]
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, tail_sampling, attributes, batch]
|
||||
exporters: [otlp/tempo, otlp/elastic]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5.6 Docker Compose Development Environment
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
```yaml
|
||||
# docker-compose-telemetry.yaml
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
# OpenTelemetry Collector
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:0.92.0
|
||||
container_name: otel-collector
|
||||
command: ["--config=/etc/otel-collector-config.yaml"]
|
||||
volumes:
|
||||
- ./otel-collector-dev.yaml:/etc/otel-collector-config.yaml:ro
|
||||
ports:
|
||||
- "4317:4317" # OTLP gRPC
|
||||
- "4318:4318" # OTLP HTTP
|
||||
- "13133:13133" # Health check
|
||||
depends_on:
|
||||
- tempo
|
||||
|
||||
# Tempo for trace visualization
|
||||
tempo:
|
||||
image: grafana/tempo:2.6.1
|
||||
container_name: tempo
|
||||
ports:
|
||||
- "3200:3200" # Tempo HTTP API
|
||||
- "4317" # OTLP gRPC (internal)
|
||||
|
||||
# Grafana for dashboards
|
||||
grafana:
|
||||
image: grafana/grafana:10.2.3
|
||||
container_name: grafana
|
||||
environment:
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
|
||||
volumes:
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||
ports:
|
||||
- "3000:3000"
|
||||
depends_on:
|
||||
- tempo
|
||||
|
||||
# Prometheus for metrics (optional, for correlation)
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.48.1
|
||||
container_name: prometheus
|
||||
volumes:
|
||||
- ./prometheus.yaml:/etc/prometheus/prometheus.yml:ro
|
||||
ports:
|
||||
- "9090:9090"
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: xrpld-telemetry
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5.7 Configuration Architecture
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph config["Configuration Sources"]
|
||||
cfgFile["xrpld.cfg<br/>[telemetry] section"]
|
||||
cmake["CMake<br/>XRPL_ENABLE_TELEMETRY"]
|
||||
end
|
||||
|
||||
subgraph init["Initialization"]
|
||||
parse["setup_Telemetry()"]
|
||||
factory["make_Telemetry()"]
|
||||
end
|
||||
|
||||
subgraph runtime["Runtime Components"]
|
||||
tracer["TracerProvider"]
|
||||
exporter["OTLP Exporter"]
|
||||
processor["BatchProcessor"]
|
||||
end
|
||||
|
||||
subgraph collector["Collector Pipeline"]
|
||||
recv["Receivers"]
|
||||
proc["Processors"]
|
||||
exp["Exporters"]
|
||||
end
|
||||
|
||||
cfgFile --> parse
|
||||
cmake -->|"compile flag"| parse
|
||||
parse --> factory
|
||||
factory --> tracer
|
||||
tracer --> processor
|
||||
processor --> exporter
|
||||
exporter -->|"OTLP"| recv
|
||||
recv --> proc
|
||||
proc --> exp
|
||||
|
||||
style config fill:#e3f2fd,stroke:#1976d2
|
||||
style runtime fill:#e8f5e9,stroke:#388e3c
|
||||
style collector fill:#fff3e0,stroke:#ff9800
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Configuration Sources**: `xrpld.cfg` provides runtime settings (endpoint, sampling) while the CMake flag controls whether telemetry is compiled in at all.
|
||||
- **Initialization**: `setup_Telemetry()` parses config values, then `make_Telemetry()` constructs the provider, processor, and exporter objects.
|
||||
- **Runtime Components**: The `TracerProvider` creates spans, the `BatchProcessor` buffers them, and the `OTLP Exporter` serializes and sends them over the wire.
|
||||
- **OTLP arrow to Collector**: Trace data leaves the xrpld process via OTLP (gRPC or HTTP) and enters the external Collector pipeline.
|
||||
- **Collector Pipeline**: `Receivers` ingest OTLP data, `Processors` apply sampling/filtering/enrichment, and `Exporters` forward traces to storage backends (Tempo, etc.).
|
||||
|
||||
---
|
||||
|
||||
## 5.8 Grafana Integration
|
||||
|
||||
> **APM** = Application Performance Monitoring
|
||||
|
||||
Step-by-step instructions for integrating xrpld traces with Grafana.
|
||||
|
||||
### 5.8.1 Data Source Configuration
|
||||
|
||||
#### Tempo (Recommended)
|
||||
|
||||
```yaml
|
||||
# grafana/provisioning/datasources/tempo.yaml
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
access: proxy
|
||||
url: http://tempo:3200
|
||||
jsonData:
|
||||
httpMethod: GET
|
||||
tracesToLogs:
|
||||
datasourceUid: loki
|
||||
tags: ["service.name", "xrpl.tx.hash"]
|
||||
mappedTags: [{ key: "trace_id", value: "traceID" }]
|
||||
mapTagNamesEnabled: true
|
||||
filterByTraceID: true
|
||||
serviceMap:
|
||||
datasourceUid: prometheus
|
||||
nodeGraph:
|
||||
enabled: true
|
||||
search:
|
||||
hide: false
|
||||
lokiSearch:
|
||||
datasourceUid: loki
|
||||
```
|
||||
|
||||
#### Elastic APM
|
||||
|
||||
```yaml
|
||||
# grafana/provisioning/datasources/elastic-apm.yaml
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Elasticsearch-APM
|
||||
type: elasticsearch
|
||||
access: proxy
|
||||
url: http://elasticsearch:9200
|
||||
database: "apm-*"
|
||||
jsonData:
|
||||
esVersion: "8.0.0"
|
||||
timeField: "@timestamp"
|
||||
logMessageField: message
|
||||
logLevelField: log.level
|
||||
```
|
||||
|
||||
### 5.8.2 Dashboard Provisioning
|
||||
|
||||
```yaml
|
||||
# grafana/provisioning/dashboards/dashboards.yaml
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: "xrpld-dashboards"
|
||||
orgId: 1
|
||||
folder: "xrpld"
|
||||
folderUid: "xrpld"
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 30
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/rippled
|
||||
```
|
||||
|
||||
### 5.8.3 Example Dashboard: RPC Performance
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "xrpld RPC Performance",
|
||||
"uid": "xrpld-rpc-performance",
|
||||
"panels": [
|
||||
{
|
||||
"title": "RPC Latency by Command",
|
||||
"type": "heatmap",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && span.xrpl.rpc.command != \"\"} | histogram_over_time(duration) by (span.xrpl.rpc.command)"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "RPC Error Rate",
|
||||
"type": "timeseries",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && status.code=error} | rate() by (span.xrpl.rpc.command)"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Top 10 Slowest RPC Commands",
|
||||
"type": "table",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && span.xrpl.rpc.command != \"\"} | avg(duration) by (span.xrpl.rpc.command) | topk(10)"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 }
|
||||
},
|
||||
{
|
||||
"title": "Recent Traces",
|
||||
"type": "table",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\"}"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 5.8.4 Example Dashboard: Transaction Tracing
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "xrpld Transaction Tracing",
|
||||
"uid": "xrpld-tx-tracing",
|
||||
"panels": [
|
||||
{
|
||||
"title": "Transaction Throughput",
|
||||
"type": "stat",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=\"tx.receive\"} | rate()"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Cross-Node Relay Count",
|
||||
"type": "timeseries",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=\"tx.relay\"} | avg(span.xrpl.tx.relay_count)"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }
|
||||
},
|
||||
{
|
||||
"title": "Transaction Validation Errors",
|
||||
"type": "table",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=\"tx.validate\" && status.code=error}"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 5.8.5 TraceQL Query Examples
|
||||
|
||||
Common queries for xrpld traces:
|
||||
|
||||
```
|
||||
# Find all traces for a specific transaction hash
|
||||
{resource.service.name="xrpld" && span.xrpl.tx.hash="ABC123..."}
|
||||
|
||||
# Find slow RPC commands (>100ms)
|
||||
{resource.service.name="xrpld" && name=~"rpc.command.*"} | duration > 100ms
|
||||
|
||||
# Find consensus rounds taking >5 seconds
|
||||
{resource.service.name="xrpld" && name="consensus.round"} | duration > 5s
|
||||
|
||||
# Find failed transactions with error details
|
||||
{resource.service.name="xrpld" && name="tx.validate" && status.code=error}
|
||||
|
||||
# Find transactions relayed to many peers
|
||||
{resource.service.name="xrpld" && name="tx.relay"} | span.xrpl.tx.relay_count > 10
|
||||
|
||||
# Compare latency across nodes
|
||||
{resource.service.name="xrpld" && name="rpc.command.account_info"} | avg(duration) by (resource.service.instance.id)
|
||||
```
|
||||
|
||||
### 5.8.6 Correlation with PerfLog
|
||||
|
||||
To correlate OpenTelemetry traces with existing PerfLog data:
|
||||
|
||||
**Step 1: Configure Loki to ingest PerfLog**
|
||||
|
||||
```yaml
|
||||
# promtail-config.yaml
|
||||
scrape_configs:
|
||||
- job_name: xrpld-perflog
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: xrpld
|
||||
__path__: /var/log/rippled/perf*.log
|
||||
pipeline_stages:
|
||||
- json:
|
||||
expressions:
|
||||
trace_id: trace_id
|
||||
ledger_seq: ledger_seq
|
||||
tx_hash: tx_hash
|
||||
- labels:
|
||||
trace_id:
|
||||
ledger_seq:
|
||||
tx_hash:
|
||||
```
|
||||
|
||||
**Step 2: Add trace_id to PerfLog entries**
|
||||
|
||||
Modify PerfLog to include trace_id when available:
|
||||
|
||||
```cpp
|
||||
// In PerfLog output, add trace_id from current span context
|
||||
void logPerf(Json::Value& entry) {
|
||||
auto span = opentelemetry::trace::GetSpan(
|
||||
opentelemetry::context::RuntimeContext::GetCurrent());
|
||||
if (span && span->GetContext().IsValid()) {
|
||||
char traceIdHex[33];
|
||||
span->GetContext().trace_id().ToLowerBase16(traceIdHex);
|
||||
entry["trace_id"] = std::string(traceIdHex, 32);
|
||||
}
|
||||
// ... existing logging
|
||||
}
|
||||
```
|
||||
|
||||
**Step 3: Configure Grafana trace-to-logs link**
|
||||
|
||||
In Tempo data source configuration, set up the derived field:
|
||||
|
||||
```yaml
|
||||
jsonData:
|
||||
tracesToLogs:
|
||||
datasourceUid: loki
|
||||
tags: ["trace_id", "xrpl.tx.hash"]
|
||||
filterByTraceID: true
|
||||
filterBySpanID: false
|
||||
```
|
||||
|
||||
### 5.8.7 Correlation with Insight/StatsD Metrics
|
||||
|
||||
To correlate traces with existing Beast Insight metrics:
|
||||
|
||||
**Step 1: Export Insight metrics to Prometheus**
|
||||
|
||||
```yaml
|
||||
# prometheus.yaml
|
||||
scrape_configs:
|
||||
- job_name: "xrpld-statsd"
|
||||
static_configs:
|
||||
- targets: ["statsd-exporter:9102"]
|
||||
```
|
||||
|
||||
**Step 2: Add exemplars to metrics**
|
||||
|
||||
OpenTelemetry SDK automatically adds exemplars (trace IDs) to metrics when using the Prometheus exporter. This links metrics spikes to specific traces.
|
||||
|
||||
**Step 3: Configure Grafana metric-to-trace link**
|
||||
|
||||
```yaml
|
||||
# In Prometheus data source
|
||||
jsonData:
|
||||
exemplarTraceIdDestinations:
|
||||
- name: trace_id
|
||||
datasourceUid: tempo
|
||||
```
|
||||
|
||||
**Step 4: Dashboard panel with exemplars**
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "RPC Latency with Trace Links",
|
||||
"type": "timeseries",
|
||||
"datasource": "Prometheus",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, rate(xrpld_rpc_duration_seconds_bucket[5m]))",
|
||||
"exemplar": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
This allows clicking on metric data points to jump directly to the related trace.
|
||||
|
||||
---
|
||||
|
||||
_Previous: [Code Samples](./04-code-samples.md)_ | _Next: [Implementation Phases](./06-implementation-phases.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
|
||||
@@ -1,575 +0,0 @@
|
||||
# Implementation Phases
|
||||
|
||||
> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
|
||||
> **Related**: [Configuration Reference](./05-configuration-reference.md) | [Observability Backends](./07-observability-backends.md)
|
||||
|
||||
---
|
||||
|
||||
## 6.1 Phase Overview
|
||||
|
||||
> **TxQ** = Transaction Queue
|
||||
|
||||
```mermaid
|
||||
gantt
|
||||
title OpenTelemetry Implementation Timeline
|
||||
dateFormat YYYY-MM-DD
|
||||
axisFormat Week %W
|
||||
|
||||
section Phase 1
|
||||
Core Infrastructure :p1, 2024-01-01, 2w
|
||||
SDK Integration :p1a, 2024-01-01, 4d
|
||||
Telemetry Interface :p1b, after p1a, 3d
|
||||
Configuration & CMake :p1c, after p1b, 3d
|
||||
Unit Tests :p1d, after p1c, 2d
|
||||
Buffer & Integration :p1e, after p1d, 2d
|
||||
|
||||
section Phase 2
|
||||
RPC Tracing :p2, after p1, 2w
|
||||
HTTP Context Extraction :p2a, after p1, 2d
|
||||
RPC Handler Instrumentation :p2b, after p2a, 4d
|
||||
PathFinding Instrumentation :p2f, after p2b, 2d
|
||||
TxQ Instrumentation :p2g, after p2f, 2d
|
||||
WebSocket Support :p2c, after p2g, 2d
|
||||
Integration Tests :p2d, after p2c, 2d
|
||||
Buffer & Review :p2e, after p2d, 4d
|
||||
|
||||
section Phase 3
|
||||
Transaction Tracing :p3, after p2, 2w
|
||||
Protocol Buffer Extension :p3a, after p2, 2d
|
||||
PeerImp Instrumentation :p3b, after p3a, 3d
|
||||
Fee Escalation Instrumentation :p3f, after p3b, 2d
|
||||
Relay Context Propagation :p3c, after p3f, 3d
|
||||
Multi-node Tests :p3d, after p3c, 2d
|
||||
Buffer & Review :p3e, after p3d, 4d
|
||||
|
||||
section Phase 4
|
||||
Consensus Tracing :p4, after p3, 2w
|
||||
Consensus Round Spans :p4a, after p3, 3d
|
||||
Proposal Handling :p4b, after p4a, 3d
|
||||
Validator List & Manifest Tracing :p4f, after p4b, 2d
|
||||
Amendment Voting Tracing :p4g, after p4f, 2d
|
||||
SHAMap Sync Tracing :p4h, after p4g, 2d
|
||||
Validation Tests :p4c, after p4h, 4d
|
||||
Buffer & Review :p4e, after p4c, 4d
|
||||
|
||||
section Phase 5
|
||||
Documentation & Deploy :p5, after p4, 1w
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6.2 Phase 1: Core Infrastructure (Weeks 1-2)
|
||||
|
||||
**Objective**: Establish foundational telemetry infrastructure
|
||||
|
||||
### Tasks
|
||||
|
||||
| Task | Description |
|
||||
| ---- | ----------------------------------------------------- |
|
||||
| 1.1 | Add OpenTelemetry C++ SDK to Conan/CMake |
|
||||
| 1.2 | Implement `Telemetry` interface and factory |
|
||||
| 1.3 | Implement `SpanGuard` RAII wrapper |
|
||||
| 1.4 | Implement configuration parser |
|
||||
| 1.5 | Integrate into `ApplicationImp` |
|
||||
| 1.6 | Add conditional compilation (`XRPL_ENABLE_TELEMETRY`) |
|
||||
| 1.7 | Create `NullTelemetry` no-op implementation |
|
||||
| 1.8 | Unit tests for core infrastructure |
|
||||
|
||||
### Exit Criteria
|
||||
|
||||
- [ ] OpenTelemetry SDK compiles and links
|
||||
- [ ] Telemetry can be enabled/disabled via config
|
||||
- [ ] Basic span creation works
|
||||
- [ ] No performance regression when disabled
|
||||
- [ ] Unit tests passing
|
||||
|
||||
---
|
||||
|
||||
## 6.3 Phase 2: RPC Tracing (Weeks 3-4)
|
||||
|
||||
> **TxQ** = Transaction Queue
|
||||
|
||||
**Objective**: Complete tracing for all RPC operations
|
||||
|
||||
### Tasks
|
||||
|
||||
| Task | Description |
|
||||
| ---- | -------------------------------------------------------------------------- |
|
||||
| 2.1 | Implement W3C Trace Context HTTP header extraction |
|
||||
| 2.2 | Instrument `ServerHandler::onRequest()` |
|
||||
| 2.3 | Instrument `RPCHandler::doCommand()` |
|
||||
| 2.4 | Add RPC-specific attributes |
|
||||
| 2.5 | Instrument WebSocket handler |
|
||||
| 2.6 | PathFinding instrumentation (`pathfind.request`, `pathfind.compute` spans) |
|
||||
| 2.7 | TxQ instrumentation (`txq.enqueue`, `txq.apply` spans) |
|
||||
| 2.8 | Integration tests for RPC tracing |
|
||||
| 2.9 | Performance benchmarks |
|
||||
| 2.10 | Documentation |
|
||||
|
||||
### Exit Criteria
|
||||
|
||||
- [ ] All RPC commands traced
|
||||
- [ ] Trace context propagates from HTTP headers
|
||||
- [ ] WebSocket and HTTP both instrumented
|
||||
- [ ] <1ms overhead per RPC call
|
||||
- [ ] Integration tests passing
|
||||
|
||||
---
|
||||
|
||||
## 6.4 Phase 3: Transaction Tracing (Weeks 5-6)
|
||||
|
||||
**Objective**: Trace transaction lifecycle across network
|
||||
|
||||
### Tasks
|
||||
|
||||
| Task | Description |
|
||||
| ---- | ---------------------------------------------------- |
|
||||
| 3.1 | Define `TraceContext` Protocol Buffer message |
|
||||
| 3.2 | Implement protobuf context serialization |
|
||||
| 3.3 | Instrument `PeerImp::handleTransaction()` |
|
||||
| 3.4 | Instrument `NetworkOPs::submitTransaction()` |
|
||||
| 3.5 | Instrument HashRouter integration |
|
||||
| 3.6 | Fee escalation instrumentation (`fee.escalate` span) |
|
||||
| 3.7 | Implement relay context propagation |
|
||||
| 3.8 | Integration tests (multi-node) |
|
||||
| 3.9 | Performance benchmarks |
|
||||
|
||||
### Exit Criteria
|
||||
|
||||
- [ ] Transaction traces span across nodes
|
||||
- [ ] Trace context in Protocol Buffer messages
|
||||
- [ ] HashRouter deduplication visible in traces
|
||||
- [ ] Multi-node integration tests passing
|
||||
- [ ] <5% overhead on transaction throughput
|
||||
|
||||
---
|
||||
|
||||
## 6.5 Phase 4: Consensus Tracing (Weeks 7-8)
|
||||
|
||||
**Objective**: Full observability into consensus rounds
|
||||
|
||||
### Tasks
|
||||
|
||||
| Task | Description |
|
||||
| ---- | ---------------------------------------------- |
|
||||
| 4.1 | Instrument `RCLConsensusAdaptor::startRound()` |
|
||||
| 4.2 | Instrument phase transitions |
|
||||
| 4.3 | Instrument proposal handling |
|
||||
| 4.4 | Instrument validation handling |
|
||||
| 4.5 | Add consensus-specific attributes |
|
||||
| 4.6 | Correlate with transaction traces |
|
||||
| 4.7 | Validator list and manifest tracing |
|
||||
| 4.8 | Amendment voting tracing |
|
||||
| 4.9 | SHAMap sync tracing |
|
||||
| 4.10 | Multi-validator integration tests |
|
||||
| 4.11 | Performance validation |
|
||||
|
||||
### Exit Criteria
|
||||
|
||||
- [ ] Complete consensus round traces
|
||||
- [ ] Phase transitions visible
|
||||
- [ ] Proposals and validations traced
|
||||
- [ ] No impact on consensus timing
|
||||
- [ ] Multi-validator test network validated
|
||||
|
||||
### Implementation Status — Phase 4a Plan
|
||||
|
||||
Phase 4a (establish-phase gap fill & cross-node correlation) will add:
|
||||
|
||||
- **Deterministic trace ID** derived from `previousLedger.id()` so all validators
|
||||
in the same round share the same `trace_id` (switchable via
|
||||
`consensus_trace_strategy` config: `"deterministic"` or `"attribute"`).
|
||||
See [Configuration Reference](./05-configuration-reference.md) for full
|
||||
configuration options.
|
||||
- **Round lifecycle spans**: `consensus.round` with round-to-round span links.
|
||||
- **Establish phase**: `consensus.establish`, `consensus.update_positions` (with
|
||||
`dispute.resolve` events), `consensus.check` (with threshold tracking).
|
||||
- **Mode changes**: `consensus.mode_change` spans.
|
||||
- **Validation**: `consensus.validation.send` with span link to round span
|
||||
(thread-safe cross-thread access via `roundSpanContext_` snapshot).
|
||||
- **Separation of concerns**: telemetry extracted to private helpers
|
||||
(`startRoundTracing`, `createValidationSpan`, `startEstablishTracing`,
|
||||
`updateEstablishTracing`, `endEstablishTracing`).
|
||||
|
||||
The `Phase4_taskList.md` spec document is introduced in the Phase 2 PR (#6424)
|
||||
and will contain the full task breakdown and implementation notes.
|
||||
|
||||
---
|
||||
|
||||
## 6.6 Phase 5: Documentation & Deployment (Week 9)
|
||||
|
||||
**Objective**: Production readiness
|
||||
|
||||
### Tasks
|
||||
|
||||
| Task | Description |
|
||||
| ---- | ----------------------------- |
|
||||
| 5.1 | Operator runbook |
|
||||
| 5.2 | Grafana dashboards |
|
||||
| 5.3 | Alert definitions |
|
||||
| 5.4 | Collector deployment examples |
|
||||
| 5.5 | Developer documentation |
|
||||
| 5.6 | Training materials |
|
||||
| 5.7 | Final integration testing |
|
||||
|
||||
---
|
||||
|
||||
## 6.7 Risk Assessment
|
||||
|
||||
```mermaid
|
||||
quadrantChart
|
||||
title Risk Assessment Matrix
|
||||
x-axis Low Impact --> High Impact
|
||||
y-axis Low Likelihood --> High Likelihood
|
||||
quadrant-1 Mitigate Immediately
|
||||
quadrant-2 Plan Mitigation
|
||||
quadrant-3 Accept Risk
|
||||
quadrant-4 Monitor Closely
|
||||
|
||||
SDK Compat: [0.2, 0.18]
|
||||
Protocol Chg: [0.75, 0.72]
|
||||
Perf Overhead: [0.58, 0.42]
|
||||
Context Prop: [0.4, 0.55]
|
||||
Memory Leaks: [0.85, 0.25]
|
||||
```
|
||||
|
||||
### Risk Details
|
||||
|
||||
| Risk | Likelihood | Impact | Mitigation |
|
||||
| ------------------------------------ | ---------- | ------ | --------------------------------------- |
|
||||
| Protocol changes break compatibility | Medium | High | Use high field numbers, optional fields |
|
||||
| Performance overhead unacceptable | Medium | Medium | Sampling, conditional compilation |
|
||||
| Context propagation complexity | Medium | Medium | Phased rollout, extensive testing |
|
||||
| SDK compatibility issues | Low | Medium | Pin SDK version, fallback to no-op |
|
||||
| Memory leaks in long-running nodes | Low | High | Memory profiling, bounded queues |
|
||||
|
||||
---
|
||||
|
||||
## 6.8 Success Metrics
|
||||
|
||||
| Metric | Target | Measurement |
|
||||
| ------------------------ | -------------------------------------------------------------- | --------------------- |
|
||||
| Trace coverage | >95% of transaction code paths (independent of sampling ratio) | Sampling verification |
|
||||
| CPU overhead | <3% | Benchmark tests |
|
||||
| Memory overhead | <10 MB | Memory profiling |
|
||||
| Latency impact (p99) | <2% | Performance tests |
|
||||
| Trace completeness | >99% spans with required attrs | Validation script |
|
||||
| Cross-node trace linkage | >90% of multi-hop transactions | Integration tests |
|
||||
|
||||
---
|
||||
|
||||
## 6.9 Quick Wins and Crawl-Walk-Run Strategy
|
||||
|
||||
> **TxQ** = Transaction Queue
|
||||
|
||||
This section outlines a prioritized approach to maximize ROI with minimal initial investment.
|
||||
|
||||
### 6.9.1 Crawl-Walk-Run Overview
|
||||
|
||||
<div align="center">
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph crawl["🐢 CRAWL (Week 1-2)"]
|
||||
direction LR
|
||||
c1[Core SDK Setup] ~~~ c2[RPC Tracing Only] ~~~ c3[PathFinding + TxQ Tracing] ~~~ c4[Single Node]
|
||||
end
|
||||
|
||||
subgraph walk["🚶 WALK (Week 3-5)"]
|
||||
direction LR
|
||||
w1[Transaction Tracing] ~~~ w2[Fee Escalation Tracing] ~~~ w3[Cross-Node Context] ~~~ w4[Basic Dashboards]
|
||||
end
|
||||
|
||||
subgraph run["🏃 RUN (Week 6-9)"]
|
||||
direction LR
|
||||
r1[Consensus Tracing] ~~~ r2[Validator, Amendment,<br/>SHAMap Tracing] ~~~ r3[Full Correlation] ~~~ r4[Production Deploy]
|
||||
end
|
||||
|
||||
crawl --> walk --> run
|
||||
|
||||
style crawl fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style walk fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style run fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style c1 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style c2 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style c3 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style c4 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style w1 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style w2 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style w3 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style w4 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style r1 fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style r2 fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style r3 fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style r4 fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **CRAWL (Weeks 1-2)**: Minimal investment -- set up the SDK, instrument RPC and PathFinding/TxQ handlers, and verify on a single node. Delivers immediate latency visibility.
|
||||
- **WALK (Weeks 3-5)**: Expand to transaction lifecycle tracing, fee escalation, cross-node context propagation, and basic Grafana dashboards. This is where distributed tracing starts working.
|
||||
- **RUN (Weeks 6-9)**: Full consensus instrumentation, validator/amendment/SHAMap tracing, end-to-end correlation, and production deployment with sampling and alerting.
|
||||
- **Arrows (crawl → walk → run)**: Each phase builds on the prior one; you cannot skip ahead because later phases depend on infrastructure established earlier.
|
||||
|
||||
### 6.9.2 Quick Wins (Immediate Value)
|
||||
|
||||
| Quick Win | Value | When to Deploy |
|
||||
| ------------------------------ | ------ | -------------- |
|
||||
| **RPC Command Tracing** | High | Week 2 |
|
||||
| **RPC Latency Histograms** | High | Week 2 |
|
||||
| **Error Rate Dashboard** | Medium | Week 2 |
|
||||
| **Transaction Submit Tracing** | High | Week 3 |
|
||||
| **Consensus Round Duration** | Medium | Week 6 |
|
||||
|
||||
### 6.9.3 CRAWL Phase (Weeks 1-2)
|
||||
|
||||
**Goal**: Get basic tracing working with minimal code changes.
|
||||
|
||||
**What You Get**:
|
||||
|
||||
- RPC request/response traces for all commands
|
||||
- Latency breakdown per RPC command
|
||||
- PathFinding and TxQ tracing (directly impacts RPC latency)
|
||||
- Error visibility with stack traces
|
||||
- Basic Grafana dashboard
|
||||
|
||||
**Code Changes**: ~15 lines in `ServerHandler.cpp`, ~40 lines in new telemetry module
|
||||
|
||||
**Why Start Here**:
|
||||
|
||||
- RPC is the lowest-risk, highest-visibility component
|
||||
- PathFinding and TxQ are RPC-adjacent and directly affect latency
|
||||
- Immediate value for debugging client issues
|
||||
- No cross-node complexity
|
||||
- Single file modification to existing code
|
||||
|
||||
### 6.9.4 WALK Phase (Weeks 3-5)
|
||||
|
||||
**Goal**: Add transaction lifecycle tracing across nodes.
|
||||
|
||||
**What You Get**:
|
||||
|
||||
- End-to-end transaction traces from submit to relay
|
||||
- Fee escalation tracing within the transaction pipeline
|
||||
- Cross-node correlation (see transaction path)
|
||||
- HashRouter deduplication visibility
|
||||
- Relay latency metrics
|
||||
|
||||
**Code Changes**: ~120 lines across 4 files, plus protobuf extension
|
||||
|
||||
**Why Do This Second**:
|
||||
|
||||
- Builds on RPC tracing (transactions submitted via RPC)
|
||||
- Fee escalation is integral to the transaction processing pipeline
|
||||
- Moderate complexity (requires context propagation)
|
||||
- High value for debugging transaction issues
|
||||
|
||||
### 6.9.5 RUN Phase (Weeks 6-9)
|
||||
|
||||
**Goal**: Full observability including consensus.
|
||||
|
||||
**What You Get**:
|
||||
|
||||
- Complete consensus round visibility
|
||||
- Phase transition timing
|
||||
- Validator proposal tracking
|
||||
- Validator list and manifest tracing
|
||||
- Amendment voting tracing
|
||||
- SHAMap sync tracing
|
||||
- Full end-to-end traces (client → RPC → TX → consensus → ledger)
|
||||
|
||||
**Code Changes**: ~100 lines across 3 consensus files, plus validator/amendment/SHAMap modules
|
||||
|
||||
**Why Do This Last**:
|
||||
|
||||
- Highest complexity (consensus is critical path)
|
||||
- Validator, amendment, and SHAMap components are lower priority
|
||||
- Requires thorough testing
|
||||
- Lower relative value (consensus issues are rarer)
|
||||
|
||||
### 6.9.6 ROI Prioritization Matrix
|
||||
|
||||
```mermaid
|
||||
quadrantChart
|
||||
title Implementation ROI Matrix
|
||||
x-axis Low Effort --> High Effort
|
||||
y-axis Low Value --> High Value
|
||||
quadrant-1 Quick Wins - Do First
|
||||
quadrant-2 Major Projects - Plan Carefully
|
||||
quadrant-3 Nice to Have - Optional
|
||||
quadrant-4 Time Sinks - Avoid
|
||||
|
||||
RPC Tracing: [0.15, 0.92]
|
||||
TX Submit Trace: [0.3, 0.78]
|
||||
TX Relay Trace: [0.5, 0.88]
|
||||
Consensus Trace: [0.72, 0.72]
|
||||
Peer Msg Trace: [0.85, 0.3]
|
||||
Ledger Acquire: [0.55, 0.52]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6.10 Definition of Done
|
||||
|
||||
> **TxQ** = Transaction Queue | **HA** = High Availability
|
||||
|
||||
Clear, measurable criteria for each phase.
|
||||
|
||||
### 6.10.1 Phase 1: Core Infrastructure
|
||||
|
||||
| Criterion | Measurement | Target |
|
||||
| --------------- | ---------------------------------------------------------- | ---------------------------- |
|
||||
| SDK Integration | `cmake --build` succeeds with `-DXRPL_ENABLE_TELEMETRY=ON` | ✅ Compiles |
|
||||
| Runtime Toggle | `enabled=0` produces zero overhead | <0.1% CPU difference |
|
||||
| Span Creation | Unit test creates and exports span | Span appears in Tempo |
|
||||
| Configuration | All config options parsed correctly | Config validation tests pass |
|
||||
| Documentation | Developer guide exists | PR approved |
|
||||
|
||||
**Definition of Done**: All criteria met, PR merged, no regressions in CI.
|
||||
|
||||
### 6.10.2 Phase 2: RPC Tracing
|
||||
|
||||
| Criterion | Measurement | Target |
|
||||
| ------------------ | ---------------------------------- | -------------------------- |
|
||||
| Coverage | All RPC commands instrumented | 100% of commands |
|
||||
| Context Extraction | traceparent header propagates | Integration test passes |
|
||||
| Attributes | Command, status, duration recorded | Validation script confirms |
|
||||
| Performance | RPC latency overhead | <1ms p99 |
|
||||
| Dashboard | Grafana dashboard deployed | Screenshot in docs |
|
||||
|
||||
**Definition of Done**: RPC traces visible in Tempo for all commands, dashboard shows latency distribution.
|
||||
|
||||
### 6.10.3 Phase 3: Transaction Tracing
|
||||
|
||||
| Criterion | Measurement | Target |
|
||||
| ---------------- | ------------------------------- | ---------------------------------- |
|
||||
| Local Trace | Submit → validate → TxQ traced | Single-node test passes |
|
||||
| Cross-Node | Context propagates via protobuf | Multi-node test passes |
|
||||
| Relay Visibility | relay_count attribute correct | Spot check 100 txs |
|
||||
| HashRouter | Deduplication visible in trace | Duplicate txs show suppressed=true |
|
||||
| Performance | TX throughput overhead | <5% degradation |
|
||||
|
||||
**Definition of Done**: Transaction traces span 3+ nodes in test network, performance within bounds.
|
||||
|
||||
### 6.10.4 Phase 4: Consensus Tracing
|
||||
|
||||
| Criterion | Measurement | Target |
|
||||
| -------------------- | ----------------------------- | ------------------------- |
|
||||
| Round Tracing | startRound creates root span | Unit test passes |
|
||||
| Phase Visibility | All phases have child spans | Integration test confirms |
|
||||
| Proposer Attribution | Proposer ID in attributes | Spot check 50 rounds |
|
||||
| Timing Accuracy | Phase durations match PerfLog | <5% variance |
|
||||
| No Consensus Impact | Round timing unchanged | Performance test passes |
|
||||
|
||||
**Definition of Done**: Consensus rounds fully traceable, no impact on consensus timing.
|
||||
|
||||
### 6.10.5 Phase 5: Production Deployment
|
||||
|
||||
| Criterion | Measurement | Target |
|
||||
| ------------ | ---------------------------- | -------------------------- |
|
||||
| Collector HA | Multiple collectors deployed | No single point of failure |
|
||||
| Sampling | Tail sampling configured | 10% base + errors + slow |
|
||||
| Retention | Data retained per policy | 7 days hot, 30 days warm |
|
||||
| Alerting | Alerts configured | Error spike, high latency |
|
||||
| Runbook | Operator documentation | Approved by ops team |
|
||||
| Training | Team trained | Session completed |
|
||||
|
||||
**Definition of Done**: Telemetry running in production, operators trained, alerts active.
|
||||
|
||||
### 6.10.6 Success Metrics Summary
|
||||
|
||||
| Phase | Primary Metric | Secondary Metric | Deadline |
|
||||
| ------- | ---------------------- | --------------------------- | ------------- |
|
||||
| Phase 1 | SDK compiles and runs | Zero overhead when disabled | End of Week 2 |
|
||||
| Phase 2 | 100% RPC coverage | <1ms latency overhead | End of Week 4 |
|
||||
| Phase 3 | Cross-node traces work | <5% throughput impact | End of Week 6 |
|
||||
| Phase 4 | Consensus fully traced | No consensus timing impact | End of Week 8 |
|
||||
| Phase 5 | Production deployment | Operators trained | End of Week 9 |
|
||||
|
||||
---
|
||||
|
||||
## 6.11 Recommended Implementation Order
|
||||
|
||||
Based on ROI analysis, implement in this exact order:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph week1["Week 1"]
|
||||
t1[1. OpenTelemetry SDK<br/>Conan/CMake integration]
|
||||
t2[2. Telemetry interface<br/>SpanGuard, config]
|
||||
end
|
||||
|
||||
subgraph week2["Week 2"]
|
||||
t3[3. RPC ServerHandler<br/>instrumentation]
|
||||
t4[4. Basic Tempo setup<br/>for testing]
|
||||
end
|
||||
|
||||
subgraph week3["Week 3"]
|
||||
t5[5. Transaction submit<br/>tracing]
|
||||
t6[6. Grafana dashboard<br/>v1]
|
||||
end
|
||||
|
||||
subgraph week4["Week 4"]
|
||||
t7[7. Protobuf context<br/>extension]
|
||||
t8[8. PeerImp tx.relay<br/>instrumentation]
|
||||
end
|
||||
|
||||
subgraph week5["Week 5"]
|
||||
t9[9. Multi-node<br/>integration tests]
|
||||
t10[10. Performance<br/>benchmarks]
|
||||
end
|
||||
|
||||
subgraph week6_8["Weeks 6-8"]
|
||||
t11[11. Consensus<br/>instrumentation]
|
||||
t12[12. Full integration<br/>testing]
|
||||
end
|
||||
|
||||
subgraph week9["Week 9"]
|
||||
t13[13. Production<br/>deployment]
|
||||
t14[14. Documentation<br/>& training]
|
||||
end
|
||||
|
||||
t1 --> t2 --> t3 --> t4
|
||||
t4 --> t5 --> t6
|
||||
t6 --> t7 --> t8
|
||||
t8 --> t9 --> t10
|
||||
t10 --> t11 --> t12
|
||||
t12 --> t13 --> t14
|
||||
|
||||
style week1 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style week2 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style week3 fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style week4 fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style week5 fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style week6_8 fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style week9 fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style t1 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style t2 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style t3 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style t4 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style t5 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style t6 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style t7 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style t8 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style t9 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style t10 fill:#ffe0b2,stroke:#ffcc80,color:#1e293b
|
||||
style t11 fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style t12 fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style t13 fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style t14 fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Week 1 (tasks 1-2)**: Foundation work -- integrate the OpenTelemetry SDK via Conan/CMake and build the `Telemetry` interface with `SpanGuard` and config parsing.
|
||||
- **Week 2 (tasks 3-4)**: First observable output -- instrument `ServerHandler` for RPC tracing and stand up Tempo so developers can see traces immediately.
|
||||
- **Weeks 3-5 (tasks 5-10)**: Transaction lifecycle -- add submit tracing, build the first Grafana dashboard, extend protobuf for cross-node context, instrument `PeerImp` relay, then validate with multi-node integration tests and performance benchmarks.
|
||||
- **Weeks 6-8 (tasks 11-12)**: Consensus deep-dive -- instrument consensus rounds and phases, then run full integration testing across all instrumented paths.
|
||||
- **Week 9 (tasks 13-14)**: Go-live -- deploy to production with sampling/alerting configured, and deliver documentation and operator training.
|
||||
- **Arrow chain (t1 → ... → t14)**: Strict sequential dependency; each task's output is a prerequisite for the next.
|
||||
|
||||
---
|
||||
|
||||
_Previous: [Configuration Reference](./05-configuration-reference.md)_ | _Next: [Observability Backends](./07-observability-backends.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
|
||||
@@ -1,641 +0,0 @@
|
||||
# Observability Backend Recommendations
|
||||
|
||||
> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
|
||||
> **Related**: [Implementation Phases](./06-implementation-phases.md) | [Appendix](./08-appendix.md)
|
||||
|
||||
---
|
||||
|
||||
## 7.1 Development/Testing Backends
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
| Backend | Pros | Cons | Use Case |
|
||||
| ---------- | ----------------------------------- | ---------------------- | ------------------- |
|
||||
| **Tempo** | Cost-effective, Grafana integration | Requires Grafana stack | Local dev, CI, Prod |
|
||||
| **Zipkin** | Simple, lightweight | Basic features | Quick prototyping |
|
||||
|
||||
### Quick Start with Tempo
|
||||
|
||||
```bash
|
||||
# Start Tempo with OTLP support
|
||||
docker run -d --name tempo \
|
||||
-p 3200:3200 \
|
||||
-p 4317:4317 \
|
||||
-p 4318:4318 \
|
||||
grafana/tempo:2.6.1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7.2 Production Backends
|
||||
|
||||
> **APM** = Application Performance Monitoring
|
||||
|
||||
| Backend | Pros | Cons | Use Case |
|
||||
| ----------------- | ----------------------------------------- | ---------------------- | --------------------------- |
|
||||
| **Grafana Tempo** | Cost-effective, Grafana integration | Requires Grafana stack | Most production deployments |
|
||||
| **Elastic APM** | Full observability stack, log correlation | Resource intensive | Existing Elastic users |
|
||||
| **Honeycomb** | Excellent query, high cardinality | SaaS cost | Deep debugging needs |
|
||||
| **Datadog APM** | Full platform, easy setup | SaaS cost | Enterprise with budget |
|
||||
|
||||
### Backend Selection Flowchart
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
start[Select Backend] --> budget{Budget<br/>Constraints?}
|
||||
|
||||
budget -->|Yes| oss[Open Source]
|
||||
budget -->|No| saas{Prefer<br/>SaaS?}
|
||||
|
||||
oss --> existing{Existing<br/>Stack?}
|
||||
existing -->|Grafana| tempo[Grafana Tempo]
|
||||
existing -->|Elastic| elastic[Elastic APM]
|
||||
existing -->|None| tempo
|
||||
|
||||
saas -->|Yes| enterprise{Enterprise<br/>Support?}
|
||||
saas -->|No| oss
|
||||
|
||||
enterprise -->|Yes| datadog[Datadog APM]
|
||||
enterprise -->|No| honeycomb[Honeycomb]
|
||||
|
||||
tempo --> final[Configure Collector]
|
||||
elastic --> final
|
||||
honeycomb --> final
|
||||
datadog --> final
|
||||
|
||||
style start fill:#0f172a,stroke:#020617,color:#fff
|
||||
style budget fill:#334155,stroke:#1e293b,color:#fff
|
||||
style oss fill:#1e293b,stroke:#0f172a,color:#fff
|
||||
style existing fill:#334155,stroke:#1e293b,color:#fff
|
||||
style saas fill:#334155,stroke:#1e293b,color:#fff
|
||||
style enterprise fill:#334155,stroke:#1e293b,color:#fff
|
||||
style final fill:#0f172a,stroke:#020617,color:#fff
|
||||
style tempo fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style elastic fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style honeycomb fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style datadog fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Budget Constraints? (Yes)**: Leads to open-source options. If you already run Grafana or Elastic, pick the matching backend; otherwise default to Grafana Tempo.
|
||||
- **Budget Constraints? (No) → Prefer SaaS?**: If you want a managed service, choose between Datadog (enterprise support) and Honeycomb (developer-focused). If not, fall back to open-source.
|
||||
- **Terminal nodes (Tempo / Elastic / Honeycomb / Datadog)**: Each represents a concrete backend choice, all of which feed into the same final step.
|
||||
- **Configure Collector**: Regardless of backend, you always finish by configuring the OTel Collector to export to your chosen destination.
|
||||
|
||||
---
|
||||
|
||||
## 7.3 Recommended Production Architecture
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **APM** = Application Performance Monitoring | **HA** = High Availability
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph validators["Validator Nodes"]
|
||||
v1[xrpld<br/>Validator 1]
|
||||
v2[xrpld<br/>Validator 2]
|
||||
end
|
||||
|
||||
subgraph stock["Stock Nodes"]
|
||||
s1[xrpld<br/>Stock 1]
|
||||
s2[xrpld<br/>Stock 2]
|
||||
end
|
||||
|
||||
subgraph collector["OTel Collector Cluster"]
|
||||
c1[Collector<br/>DC1]
|
||||
c2[Collector<br/>DC2]
|
||||
end
|
||||
|
||||
subgraph backends["Storage Backends"]
|
||||
tempo[(Grafana<br/>Tempo)]
|
||||
elastic[(Elastic<br/>APM)]
|
||||
archive[(S3/GCS<br/>Archive)]
|
||||
end
|
||||
|
||||
subgraph ui["Visualization"]
|
||||
grafana[Grafana<br/>Dashboards]
|
||||
end
|
||||
|
||||
v1 -->|OTLP| c1
|
||||
v2 -->|OTLP| c1
|
||||
s1 -->|OTLP| c2
|
||||
s2 -->|OTLP| c2
|
||||
|
||||
c1 --> tempo
|
||||
c1 --> elastic
|
||||
c2 --> tempo
|
||||
c2 --> archive
|
||||
|
||||
tempo --> grafana
|
||||
elastic --> grafana
|
||||
|
||||
%% Note: simplified single-collector-per-DC topology shown for clarity
|
||||
|
||||
style validators fill:#b71c1c,stroke:#7f1d1d,color:#ffffff
|
||||
style stock fill:#0d47a1,stroke:#082f6a,color:#ffffff
|
||||
style collector fill:#bf360c,stroke:#8c2809,color:#ffffff
|
||||
style backends fill:#1b5e20,stroke:#0d3d14,color:#ffffff
|
||||
style ui fill:#4a148c,stroke:#2e0d57,color:#ffffff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Validator / Stock Nodes**: All xrpld nodes emit trace data via OTLP. Validators and stock nodes are grouped separately because they may reside in different network zones.
|
||||
- **Collector Cluster (DC1, DC2)**: Regional collectors receive OTLP from nodes in their datacenter, apply processing (sampling, enrichment), and fan out to multiple backends.
|
||||
- **Storage Backends**: Tempo and Elastic provide queryable trace storage; S3/GCS Archive provides long-term cold storage for compliance or post-incident analysis.
|
||||
- **Grafana Dashboards**: The single visualization layer that queries both Tempo and Elastic, giving operators a unified view of all traces.
|
||||
- **Data flow direction**: Nodes → Collectors → Storage → Grafana. Each arrow represents a network hop; minimizing collector-to-backend hops reduces latency.
|
||||
|
||||
> **Note**: Production deployments should use multiple collector instances behind a load balancer for high availability. The diagram shows a simplified single-collector topology for clarity.
|
||||
|
||||
---
|
||||
|
||||
## 7.4 Architecture Considerations
|
||||
|
||||
### 7.4.1 Collector Placement
|
||||
|
||||
| Strategy | Description | Pros | Cons |
|
||||
| ------------- | -------------------- | ------------------------ | ----------------------- |
|
||||
| **Sidecar** | Collector per node | Isolation, simple config | Resource overhead |
|
||||
| **DaemonSet** | Collector per host | Shared resources | Complexity |
|
||||
| **Gateway** | Central collector(s) | Centralized processing | Single point of failure |
|
||||
|
||||
**Recommendation**: Use **Gateway** pattern with regional collectors for xrpld networks:
|
||||
|
||||
- One collector cluster per datacenter/region
|
||||
- Tail-based sampling at collector level
|
||||
- Multiple export destinations for redundancy
|
||||
|
||||
### 7.4.2 Sampling Strategy
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph head["Head Sampling (Node)"]
|
||||
hs[Node-level head sampling<br/>configurable, default: 100%<br/>recommended production: 10%]
|
||||
end
|
||||
|
||||
subgraph tail["Tail Sampling (Collector)"]
|
||||
ts1[Keep all errors]
|
||||
ts2[Keep slow >5s]
|
||||
ts3[Keep 10% rest]
|
||||
end
|
||||
|
||||
head --> tail
|
||||
|
||||
ts1 --> final[Final Traces]
|
||||
ts2 --> final
|
||||
ts3 --> final
|
||||
|
||||
style head fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style tail fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style hs fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style ts1 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style ts2 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style ts3 fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style final fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Head Sampling (Node)**: The first filter -- each xrpld node decides whether to sample a trace at creation time (default 100%, recommended 10% in production). This controls the volume leaving the node.
|
||||
- **Tail Sampling (Collector)**: The second filter -- the collector inspects completed traces and applies rules: keep all errors, keep anything slower than 5 seconds, and keep 10% of the remainder.
|
||||
- **Arrow head → tail**: All head-sampled traces flow to the collector, where tail sampling further reduces volume while preserving the most valuable data.
|
||||
- **Final Traces**: The output after both sampling stages; this is what gets stored and queried. The two-stage approach balances cost with debuggability.
|
||||
|
||||
### 7.4.3 Data Retention
|
||||
|
||||
| Environment | Hot Storage | Warm Storage | Cold Archive |
|
||||
| ----------- | ----------- | ------------ | ------------ |
|
||||
| Development | 24 hours | N/A | N/A |
|
||||
| Staging | 7 days | N/A | N/A |
|
||||
| Production | 7 days | 30 days | many years |
|
||||
|
||||
---
|
||||
|
||||
## 7.5 Integration Checklist
|
||||
|
||||
- [ ] Choose primary backend (Tempo recommended for cost/features)
|
||||
- [ ] Deploy collector cluster with high availability
|
||||
- [ ] Configure tail-based sampling for error/latency traces
|
||||
- [ ] Set up Grafana dashboards for trace visualization
|
||||
- [ ] Configure alerts for trace anomalies
|
||||
- [ ] Establish data retention policies
|
||||
- [ ] Test trace correlation with logs and metrics
|
||||
|
||||
---
|
||||
|
||||
## 7.6 Grafana Dashboard Examples
|
||||
|
||||
Pre-built dashboards for xrpld observability.
|
||||
|
||||
### 7.6.1 Consensus Health Dashboard
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "xrpld Consensus Health",
|
||||
"uid": "xrpld-consensus-health",
|
||||
"tags": ["xrpld", "consensus", "tracing"],
|
||||
"panels": [
|
||||
{
|
||||
"title": "Consensus Round Duration",
|
||||
"type": "timeseries",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=\"consensus.round\"} | avg(duration) by (resource.service.instance.id)"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 4000 },
|
||||
{ "color": "red", "value": 5000 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Phase Duration Breakdown",
|
||||
"type": "barchart",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=~\"consensus.phase.*\"} | avg(duration) by (name)"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Proposers per Round",
|
||||
"type": "stat",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=\"consensus.round\"} | avg(span.xrpl.consensus.proposers)"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 8 }
|
||||
},
|
||||
{
|
||||
"title": "Recent Slow Rounds (>5s)",
|
||||
"type": "table",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=\"consensus.round\"} | duration > 5s"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 12 }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 7.6.2 Node Overview Dashboard
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "xrpld Node Overview",
|
||||
"uid": "xrpld-node-overview",
|
||||
"panels": [
|
||||
{
|
||||
"title": "Active Nodes",
|
||||
"type": "stat",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\"} | count_over_time() by (resource.service.instance.id) | count()"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Total Transactions (1h)",
|
||||
"type": "stat",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=\"tx.receive\"} | count()"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Error Rate",
|
||||
"type": "gauge",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && status.code=error} | rate() / {resource.service.name=\"xrpld\"} | rate() * 100"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"max": 10,
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Service Map",
|
||||
"type": "nodeGraph",
|
||||
"datasource": "Tempo",
|
||||
"gridPos": { "h": 12, "w": 12, "x": 12, "y": 0 }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 7.6.3 Alert Rules
|
||||
|
||||
```yaml
|
||||
# grafana/provisioning/alerting/rippled-alerts.yaml
|
||||
apiVersion: 1
|
||||
|
||||
groups:
|
||||
- name: xrpld-tracing-alerts
|
||||
folder: xrpld
|
||||
interval: 1m
|
||||
rules:
|
||||
- uid: consensus-slow
|
||||
title: Consensus Round Slow
|
||||
condition: A
|
||||
data:
|
||||
- refId: A
|
||||
datasourceUid: tempo
|
||||
model:
|
||||
queryType: traceql
|
||||
query: '{resource.service.name="xrpld" && name="consensus.round"} | avg(duration) > 5s'
|
||||
# Note: Verify TraceQL aggregate queries are supported by your
|
||||
# Tempo version. Aggregate alerting (e.g., avg(duration)) requires
|
||||
# Tempo 2.3+ with TraceQL metrics enabled.
|
||||
for: 5m
|
||||
annotations:
|
||||
summary: Consensus rounds taking >5 seconds
|
||||
description: "Consensus duration: {{ $value }}ms"
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
- uid: rpc-error-spike
|
||||
title: RPC Error Rate Spike
|
||||
condition: B
|
||||
data:
|
||||
- refId: B
|
||||
datasourceUid: tempo
|
||||
model:
|
||||
queryType: traceql
|
||||
query: '{resource.service.name="xrpld" && name=~"rpc.command.*" && status.code=error} | rate() > 0.05'
|
||||
# Note: Verify TraceQL aggregate queries are supported by your
|
||||
# Tempo version. Aggregate alerting (e.g., rate()) requires
|
||||
# Tempo 2.3+ with TraceQL metrics enabled.
|
||||
for: 2m
|
||||
annotations:
|
||||
summary: RPC error rate >5%
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
- uid: tx-throughput-drop
|
||||
title: Transaction Throughput Drop
|
||||
condition: C
|
||||
data:
|
||||
- refId: C
|
||||
datasourceUid: tempo
|
||||
model:
|
||||
queryType: traceql
|
||||
query: '{resource.service.name="xrpld" && name="tx.receive"} | rate() < 10'
|
||||
for: 10m
|
||||
annotations:
|
||||
summary: Transaction throughput below threshold
|
||||
labels:
|
||||
severity: warning
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7.7 PerfLog and Insight Correlation
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
How to correlate OpenTelemetry traces with existing xrpld observability.
|
||||
|
||||
### 7.7.1 Correlation Architecture
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph xrpld["xrpld Node"]
|
||||
otel[OpenTelemetry<br/>Spans]
|
||||
perflog[PerfLog<br/>JSON Logs]
|
||||
insight[Beast Insight<br/>StatsD Metrics]
|
||||
end
|
||||
|
||||
subgraph collectors["Data Collection"]
|
||||
otelc[OTel Collector]
|
||||
promtail[Promtail/Fluentd]
|
||||
statsd[StatsD Exporter]
|
||||
end
|
||||
|
||||
subgraph storage["Storage"]
|
||||
tempo[(Tempo)]
|
||||
loki[(Loki)]
|
||||
prom[(Prometheus)]
|
||||
end
|
||||
|
||||
subgraph grafana["Grafana"]
|
||||
traces[Trace View]
|
||||
logs[Log View]
|
||||
metrics[Metrics View]
|
||||
corr[Correlation<br/>Panel]
|
||||
end
|
||||
|
||||
otel -->|OTLP| otelc --> tempo
|
||||
perflog -->|JSON| promtail --> loki
|
||||
insight -->|StatsD| statsd --> prom
|
||||
|
||||
tempo --> traces
|
||||
loki --> logs
|
||||
prom --> metrics
|
||||
|
||||
traces --> corr
|
||||
logs --> corr
|
||||
metrics --> corr
|
||||
|
||||
style xrpld fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style collectors fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style storage fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style grafana fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style otel fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style perflog fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style insight fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style otelc fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style promtail fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style statsd fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style tempo fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style loki fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style prom fill:#1b5e20,stroke:#0d3d14,color:#fff
|
||||
style traces fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style logs fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style metrics fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style corr fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **xrpld Node (three sources)**: A single node emits three independent data streams -- OpenTelemetry spans, PerfLog JSON logs, and Beast Insight StatsD metrics.
|
||||
- **Data Collection layer**: Each stream has its own collector -- OTel Collector for spans, Promtail/Fluentd for logs, and a StatsD exporter for metrics. They operate independently.
|
||||
- **Storage layer (Tempo, Loki, Prometheus)**: Each data type lands in a purpose-built store optimized for its query patterns (trace search, log grep, metric aggregation).
|
||||
- **Grafana Correlation Panel**: The key integration point -- Grafana queries all three stores and links them via shared fields (`trace_id`, `xrpl.tx.hash`, `ledger_seq`), enabling a single-pane debugging experience.
|
||||
|
||||
### 7.7.2 Correlation Fields
|
||||
|
||||
| Source | Field | Link To | Purpose |
|
||||
| ----------- | --------------------------- | ------------- | -------------------------- |
|
||||
| **Trace** | `trace_id` | Logs | Find log entries for trace |
|
||||
| **Trace** | `xrpl.tx.hash` | Logs, Metrics | Find TX-related data |
|
||||
| **Trace** | `xrpl.consensus.ledger.seq` | Logs | Find ledger-related logs |
|
||||
| **PerfLog** | `trace_id` (new) | Traces | Jump to trace from log |
|
||||
| **PerfLog** | `ledger_seq` | Traces | Find consensus trace |
|
||||
| **Insight** | `exemplar.trace_id` | Traces | Jump from metric spike |
|
||||
|
||||
### 7.7.3 Example: Debugging a Slow Transaction
|
||||
|
||||
**Step 1: Find the trace**
|
||||
|
||||
```
|
||||
# In Grafana Explore with Tempo
|
||||
{resource.service.name="xrpld" && span.xrpl.tx.hash="ABC123..."}
|
||||
```
|
||||
|
||||
**Step 2: Get the trace_id from the trace view**
|
||||
|
||||
```
|
||||
Trace ID: 4bf92f3577b34da6a3ce929d0e0e4736
|
||||
```
|
||||
|
||||
**Step 3: Find related PerfLog entries**
|
||||
|
||||
```
|
||||
# In Grafana Explore with Loki
|
||||
{job="xrpld"} |= "4bf92f3577b34da6a3ce929d0e0e4736"
|
||||
```
|
||||
|
||||
**Step 4: Check Insight metrics for the time window**
|
||||
|
||||
```
|
||||
# In Grafana with Prometheus
|
||||
rate(xrpld_tx_applied_total[1m])
|
||||
@ timestamp_from_trace
|
||||
```
|
||||
|
||||
### 7.7.4 Unified Dashboard Example
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "xrpld Unified Observability",
|
||||
"uid": "xrpld-unified",
|
||||
"panels": [
|
||||
{
|
||||
"title": "Transaction Latency (Traces)",
|
||||
"type": "timeseries",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\" && name=\"tx.receive\"} | histogram_over_time(duration)"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 6, "w": 8, "x": 0, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Transaction Rate (Metrics)",
|
||||
"type": "timeseries",
|
||||
"datasource": "Prometheus",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(xrpld_tx_received_total[5m])",
|
||||
"legendFormat": "{{ instance }}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"links": [
|
||||
{
|
||||
"title": "View traces",
|
||||
"url": "/explore?left={\"datasource\":\"Tempo\",\"query\":\"{resource.service.name=\\\"xrpld\\\" && name=\\\"tx.receive\\\"}\"}"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"gridPos": { "h": 6, "w": 8, "x": 8, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Recent Logs",
|
||||
"type": "logs",
|
||||
"datasource": "Loki",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{job=\"xrpld\"} | json"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 0 }
|
||||
},
|
||||
{
|
||||
"title": "Trace Search",
|
||||
"type": "table",
|
||||
"datasource": "Tempo",
|
||||
"targets": [
|
||||
{
|
||||
"queryType": "traceql",
|
||||
"query": "{resource.service.name=\"xrpld\"}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "traceID" },
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "View trace",
|
||||
"url": "/explore?left={\"datasource\":\"Tempo\",\"query\":\"${__value.raw}\"}"
|
||||
},
|
||||
{
|
||||
"title": "View logs",
|
||||
"url": "/explore?left={\"datasource\":\"Loki\",\"query\":\"{job=\\\"xrpld\\\"} |= \\\"${__value.raw}\\\"\"}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 6 }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
_Previous: [Implementation Phases](./06-implementation-phases.md)_ | _Next: [Appendix](./08-appendix.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
|
||||
@@ -1,195 +0,0 @@
|
||||
# Appendix
|
||||
|
||||
> **Parent Document**: [OpenTelemetryPlan.md](./OpenTelemetryPlan.md)
|
||||
> **Related**: [Observability Backends](./07-observability-backends.md)
|
||||
|
||||
---
|
||||
|
||||
## 8.1 Glossary
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **TxQ** = Transaction Queue
|
||||
|
||||
| Term | Definition |
|
||||
| --------------------- | ---------------------------------------------------------- |
|
||||
| **Span** | A unit of work with start/end time, name, and attributes |
|
||||
| **Trace** | A collection of spans representing a complete request flow |
|
||||
| **Trace ID** | 128-bit unique identifier for a trace |
|
||||
| **Span ID** | 64-bit unique identifier for a span within a trace |
|
||||
| **Context** | Carrier for trace/span IDs across boundaries |
|
||||
| **Propagator** | Component that injects/extracts context |
|
||||
| **Sampler** | Decides which traces to record |
|
||||
| **Exporter** | Sends spans to backend |
|
||||
| **Collector** | Receives, processes, and forwards telemetry |
|
||||
| **OTLP** | OpenTelemetry Protocol (wire format) |
|
||||
| **W3C Trace Context** | Standard HTTP headers for trace propagation |
|
||||
| **Baggage** | Key-value pairs propagated across service boundaries |
|
||||
| **Resource** | Entity producing telemetry (service, host, etc.) |
|
||||
| **Instrumentation** | Code that creates telemetry data |
|
||||
|
||||
### xrpld-Specific Terms
|
||||
|
||||
| Term | Definition |
|
||||
| ----------------- | ------------------------------------------------------------- |
|
||||
| **Overlay** | P2P network layer managing peer connections |
|
||||
| **Consensus** | XRP Ledger consensus algorithm (RCL) |
|
||||
| **Proposal** | Validator's suggested transaction set for a ledger |
|
||||
| **Validation** | Validator's signature on a closed ledger |
|
||||
| **HashRouter** | Component for transaction deduplication |
|
||||
| **JobQueue** | Thread pool for asynchronous task execution |
|
||||
| **PerfLog** | Existing performance logging system in xrpld |
|
||||
| **Beast Insight** | Existing metrics framework in xrpld |
|
||||
| **PathFinding** | Payment path computation engine for cross-currency payments |
|
||||
| **TxQ** | Transaction queue managing fee-based prioritization |
|
||||
| **LoadManager** | Dynamic fee escalation based on network load |
|
||||
| **SHAMap** | SHA-256 hash-based map (Merkle trie variant) for ledger state |
|
||||
|
||||
---
|
||||
|
||||
## 8.2 Span Hierarchy Visualization
|
||||
|
||||
> **TxQ** = Transaction Queue
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph trace["Trace: Transaction Lifecycle"]
|
||||
rpc["rpc.request<br/>(entry point)"]
|
||||
validate["tx.validate"]
|
||||
relay["tx.relay<br/>(parent span)"]
|
||||
|
||||
subgraph peers["Peer Spans"]
|
||||
p1["peer.send<br/>Peer A"]
|
||||
p2["peer.send<br/>Peer B"]
|
||||
p3["peer.send<br/>Peer C"]
|
||||
end
|
||||
|
||||
subgraph pathfinding["PathFinding Spans"]
|
||||
pathfind["pathfind.request"]
|
||||
pathcomp["pathfind.compute"]
|
||||
end
|
||||
|
||||
consensus["consensus.round"]
|
||||
apply["tx.apply"]
|
||||
|
||||
subgraph txqueue["TxQ Spans"]
|
||||
txq["txq.enqueue"]
|
||||
txqApply["txq.apply"]
|
||||
end
|
||||
|
||||
feeCalc["fee.escalate"]
|
||||
end
|
||||
|
||||
subgraph validators["Validator Spans"]
|
||||
valFetch["validator.list.fetch"]
|
||||
valManifest["validator.manifest"]
|
||||
end
|
||||
|
||||
rpc --> validate
|
||||
rpc --> pathfind
|
||||
pathfind --> pathcomp
|
||||
validate --> relay
|
||||
relay --> p1
|
||||
relay --> p2
|
||||
relay --> p3
|
||||
p1 -.->|"context propagation"| consensus
|
||||
consensus --> apply
|
||||
apply --> txq
|
||||
txq --> txqApply
|
||||
txq --> feeCalc
|
||||
|
||||
style trace fill:#0f172a,stroke:#020617,color:#fff
|
||||
style peers fill:#1e3a8a,stroke:#172554,color:#fff
|
||||
style pathfinding fill:#134e4a,stroke:#0f766e,color:#fff
|
||||
style txqueue fill:#064e3b,stroke:#047857,color:#fff
|
||||
style validators fill:#4c1d95,stroke:#6d28d9,color:#fff
|
||||
style rpc fill:#1d4ed8,stroke:#1e40af,color:#fff
|
||||
style validate fill:#047857,stroke:#064e3b,color:#fff
|
||||
style relay fill:#047857,stroke:#064e3b,color:#fff
|
||||
style p1 fill:#0e7490,stroke:#155e75,color:#fff
|
||||
style p2 fill:#0e7490,stroke:#155e75,color:#fff
|
||||
style p3 fill:#0e7490,stroke:#155e75,color:#fff
|
||||
style consensus fill:#fef3c7,stroke:#fde68a,color:#1e293b
|
||||
style apply fill:#047857,stroke:#064e3b,color:#fff
|
||||
style pathfind fill:#0e7490,stroke:#155e75,color:#fff
|
||||
style pathcomp fill:#0e7490,stroke:#155e75,color:#fff
|
||||
style txq fill:#047857,stroke:#064e3b,color:#fff
|
||||
style txqApply fill:#047857,stroke:#064e3b,color:#fff
|
||||
style feeCalc fill:#047857,stroke:#064e3b,color:#fff
|
||||
style valFetch fill:#6d28d9,stroke:#4c1d95,color:#fff
|
||||
style valManifest fill:#6d28d9,stroke:#4c1d95,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **rpc.request (blue, top)**: The entry point — every traced transaction starts as an RPC call; this root span is the parent of all downstream work.
|
||||
- **tx.validate and pathfind.request (green/teal, first fork)**: The RPC request fans out into transaction validation and, for cross-currency payments, a PathFinding branch (`pathfind.request` -> `pathfind.compute`).
|
||||
- **tx.relay -> Peer Spans (teal, middle)**: After validation, the transaction is relayed to peers A, B, and C in parallel; each `peer.send` is a sibling child span showing fan-out across the network.
|
||||
- **context propagation (dashed arrow)**: The dotted line from `peer.send Peer A` to `consensus.round` represents the trace context crossing a node boundary — the receiving validator picks up the same `trace_id` and continues the trace.
|
||||
- **consensus.round -> tx.apply -> TxQ Spans (green, lower)**: Once consensus accepts the transaction, it is applied to the ledger; the TxQ spans (`txq.enqueue`, `txq.apply`, `fee.escalate`) capture queue depth and fee escalation behavior.
|
||||
- **Validator Spans (purple, detached)**: `validator.list.fetch` and `validator.manifest` are independent workflows for UNL management — they run on their own traces and are linked to consensus via Span Links, not parent-child relationships.
|
||||
|
||||
---
|
||||
|
||||
## 8.3 References
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
### OpenTelemetry Resources
|
||||
|
||||
1. [OpenTelemetry C++ SDK](https://github.com/open-telemetry/opentelemetry-cpp)
|
||||
2. [OpenTelemetry Specification](https://opentelemetry.io/docs/specs/otel/)
|
||||
3. [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/)
|
||||
4. [OTLP Protocol Specification](https://opentelemetry.io/docs/specs/otlp/)
|
||||
|
||||
### Standards
|
||||
|
||||
5. [W3C Trace Context](https://www.w3.org/TR/trace-context/)
|
||||
6. [W3C Baggage](https://www.w3.org/TR/baggage/)
|
||||
7. [Protocol Buffers](https://protobuf.dev/)
|
||||
|
||||
### xrpld Resources
|
||||
|
||||
8. [xrpld Source Code](https://github.com/XRPLF/rippled)
|
||||
9. [XRP Ledger Documentation](https://xrpl.org/docs/)
|
||||
10. [xrpld Overlay README](https://github.com/XRPLF/rippled/blob/develop/src/xrpld/overlay/README.md)
|
||||
11. [xrpld RPC README](https://github.com/XRPLF/rippled/blob/develop/src/xrpld/rpc/README.md)
|
||||
12. [xrpld Consensus README](https://github.com/XRPLF/rippled/blob/develop/src/xrpld/app/consensus/README.md)
|
||||
|
||||
---
|
||||
|
||||
## 8.4 Version History
|
||||
|
||||
| Version | Date | Author | Changes |
|
||||
| ------- | ---------- | ------ | -------------------------------------------------------------- |
|
||||
| 1.0 | 2026-02-12 | - | Initial implementation plan |
|
||||
| 1.1 | 2026-02-13 | - | Refactored into modular documents |
|
||||
| 1.2 | 2026-03-24 | - | Review fixes: accuracy corrections, cross-document consistency |
|
||||
|
||||
---
|
||||
|
||||
## 8.5 Document Index
|
||||
|
||||
### Plan Documents
|
||||
|
||||
| Document | Description |
|
||||
| ---------------------------------------------------------------- | -------------------------------------------- |
|
||||
| [OpenTelemetryPlan.md](./OpenTelemetryPlan.md) | Master overview and executive summary |
|
||||
| [00-tracing-fundamentals.md](./00-tracing-fundamentals.md) | Distributed tracing concepts and OTel primer |
|
||||
| [01-architecture-analysis.md](./01-architecture-analysis.md) | xrpld architecture and trace points |
|
||||
| [02-design-decisions.md](./02-design-decisions.md) | SDK selection, exporters, span conventions |
|
||||
| [03-implementation-strategy.md](./03-implementation-strategy.md) | Directory structure, performance analysis |
|
||||
| [04-code-samples.md](./04-code-samples.md) | C++ code examples for all components |
|
||||
| [05-configuration-reference.md](./05-configuration-reference.md) | xrpld config, CMake, Collector configs |
|
||||
| [06-implementation-phases.md](./06-implementation-phases.md) | Timeline, tasks, risks, success metrics |
|
||||
| [07-observability-backends.md](./07-observability-backends.md) | Backend selection and architecture |
|
||||
| [08-appendix.md](./08-appendix.md) | Glossary, references, version history |
|
||||
|
||||
### Task Lists
|
||||
|
||||
| Document | Description |
|
||||
| ------------------------------------ | --------------------------------------------------- |
|
||||
| [POC_taskList.md](./POC_taskList.md) | Proof-of-concept telemetry integration |
|
||||
| [presentation.md](./presentation.md) | Presentation slides for OpenTelemetry plan overview |
|
||||
|
||||
---
|
||||
|
||||
_Previous: [Observability Backends](./07-observability-backends.md)_ | _Back to: [Overview](./OpenTelemetryPlan.md)_
|
||||
@@ -1,230 +0,0 @@
|
||||
# [OpenTelemetry](00-tracing-fundamentals.md) Distributed Tracing Implementation Plan for xrpld (xrpld)
|
||||
|
||||
## Executive Summary
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
This document provides a comprehensive implementation plan for integrating OpenTelemetry distributed tracing into the xrpld XRP Ledger node software. The plan addresses the unique challenges of a decentralized peer-to-peer system where trace context must propagate across network boundaries between independent nodes.
|
||||
|
||||
### Key Benefits
|
||||
|
||||
- **End-to-end transaction visibility**: Track transactions from submission through consensus to ledger inclusion
|
||||
- **Consensus round analysis**: Understand timing and behavior of consensus phases across validators
|
||||
- **RPC performance insights**: Identify slow handlers and optimize response times
|
||||
- **Network topology understanding**: Visualize message propagation patterns between peers
|
||||
- **Incident debugging**: Correlate events across distributed nodes during issues
|
||||
|
||||
### Estimated Performance Overhead
|
||||
|
||||
| Metric | Overhead | Notes |
|
||||
| ------------- | ---------- | ----------------------------------- |
|
||||
| CPU | 1-3% | Span creation and attribute setting |
|
||||
| Memory | 2-5 MB | Batch buffer for pending spans |
|
||||
| Network | 10-50 KB/s | Compressed OTLP export to collector |
|
||||
| Latency (p99) | <2% | With proper sampling configuration |
|
||||
|
||||
---
|
||||
|
||||
## Document Structure
|
||||
|
||||
This implementation plan is organized into modular documents for easier navigation:
|
||||
|
||||
<div align="center">
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
overview["📋 OpenTelemetryPlan.md<br/>(This Document)"]
|
||||
|
||||
subgraph fundamentals["Fundamentals"]
|
||||
fund["00-tracing-fundamentals.md"]
|
||||
end
|
||||
|
||||
subgraph analysis["Analysis & Design"]
|
||||
arch["01-architecture-analysis.md"]
|
||||
design["02-design-decisions.md"]
|
||||
end
|
||||
|
||||
subgraph impl["Implementation"]
|
||||
strategy["03-implementation-strategy.md"]
|
||||
code["04-code-samples.md"]
|
||||
config["05-configuration-reference.md"]
|
||||
end
|
||||
|
||||
subgraph deploy["Deployment & Planning"]
|
||||
phases["06-implementation-phases.md"]
|
||||
backends["07-observability-backends.md"]
|
||||
appendix["08-appendix.md"]
|
||||
poc["POC_taskList.md"]
|
||||
end
|
||||
|
||||
overview --> fundamentals
|
||||
overview --> analysis
|
||||
overview --> impl
|
||||
overview --> deploy
|
||||
|
||||
fund --> arch
|
||||
arch --> design
|
||||
design --> strategy
|
||||
strategy --> code
|
||||
code --> config
|
||||
config --> phases
|
||||
phases --> backends
|
||||
backends --> appendix
|
||||
phases --> poc
|
||||
|
||||
style overview fill:#1b5e20,stroke:#0d3d14,color:#fff,stroke-width:2px
|
||||
style fundamentals fill:#00695c,stroke:#004d40,color:#fff
|
||||
style fund fill:#00695c,stroke:#004d40,color:#fff
|
||||
style analysis fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style impl fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style deploy fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style arch fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style design fill:#0d47a1,stroke:#082f6a,color:#fff
|
||||
style strategy fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style code fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style config fill:#bf360c,stroke:#8c2809,color:#fff
|
||||
style phases fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style backends fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style appendix fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
style poc fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
| Section | Document | Description |
|
||||
| ------- | ---------------------------------------------------------- | ---------------------------------------------------------------------- |
|
||||
| **0** | [Tracing Fundamentals](./00-tracing-fundamentals.md) | Distributed tracing concepts, span relationships, context propagation |
|
||||
| **1** | [Architecture Analysis](./01-architecture-analysis.md) | xrpld component analysis, trace points, instrumentation priorities |
|
||||
| **2** | [Design Decisions](./02-design-decisions.md) | SDK selection, exporters, span naming, attributes, context propagation |
|
||||
| **3** | [Implementation Strategy](./03-implementation-strategy.md) | Directory structure, key principles, performance optimization |
|
||||
| **4** | [Code Samples](./04-code-samples.md) | C++ implementation examples for core infrastructure and key modules |
|
||||
| **5** | [Configuration Reference](./05-configuration-reference.md) | xrpld config, CMake integration, Collector configurations |
|
||||
| **6** | [Implementation Phases](./06-implementation-phases.md) | 5-phase timeline, tasks, risks, success metrics |
|
||||
| **7** | [Observability Backends](./07-observability-backends.md) | Backend selection guide and production architecture |
|
||||
| **8** | [Appendix](./08-appendix.md) | Glossary, references, version history |
|
||||
| **POC** | [POC Task List](./POC_taskList.md) | Proof of concept tasks for RPC tracing end-to-end demo |
|
||||
|
||||
---
|
||||
|
||||
## 0. Tracing Fundamentals
|
||||
|
||||
This document introduces distributed tracing concepts for readers unfamiliar with the domain. It covers what traces and spans are, how parent-child and follows-from relationships model causality, how context propagates across service boundaries, and how sampling controls data volume. It also maps these concepts to xrpld-specific scenarios like transaction relay and consensus.
|
||||
|
||||
➡️ **[Read Tracing Fundamentals](./00-tracing-fundamentals.md)**
|
||||
|
||||
---
|
||||
|
||||
## 1. Architecture Analysis
|
||||
|
||||
> **WS** = WebSocket | **TxQ** = Transaction Queue
|
||||
|
||||
The xrpld node consists of several key components that require instrumentation for comprehensive distributed tracing. The main areas include the RPC server (HTTP/WebSocket), Overlay P2P network, Consensus mechanism (RCLConsensus), JobQueue for async task execution, PathFinding, Transaction Queue (TxQ), fee escalation (LoadManager), ledger acquisition, validator management, and existing observability infrastructure (PerfLog, Insight/StatsD, Journal logging).
|
||||
|
||||
Key trace points span across transaction submission via RPC, peer-to-peer message propagation, consensus round execution, ledger building, path computation, transaction queue behavior, fee escalation, and validator health. The implementation prioritizes high-value, low-risk components first: RPC handlers provide immediate value with minimal risk, while consensus tracing requires careful implementation to avoid timing impacts.
|
||||
|
||||
➡️ **[Read full Architecture Analysis](./01-architecture-analysis.md)**
|
||||
|
||||
---
|
||||
|
||||
## 2. Design Decisions
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **CNCF** = Cloud Native Computing Foundation
|
||||
|
||||
The OpenTelemetry C++ SDK is selected for its CNCF backing, active development, and native performance characteristics. Traces are exported via OTLP/gRPC (primary) or OTLP/HTTP (fallback) to an OpenTelemetry Collector, which provides flexible routing and sampling.
|
||||
|
||||
Span naming follows a hierarchical `<component>.<operation>` convention (e.g., `rpc.submit`, `tx.relay`, `consensus.round`). Context propagation uses W3C Trace Context headers for HTTP and embedded Protocol Buffer fields for P2P messages. The implementation coexists with existing PerfLog and Insight observability systems through correlation IDs.
|
||||
|
||||
**Data Collection & Privacy**: Telemetry collects only operational metadata (timing, counts, hashes) — never sensitive content (private keys, balances, amounts, raw payloads). Privacy protection includes account hashing, configurable redaction, sampling, and collector-level filtering. Node operators retain full control over telemetry configuration.
|
||||
|
||||
➡️ **[Read full Design Decisions](./02-design-decisions.md)**
|
||||
|
||||
---
|
||||
|
||||
## 3. Implementation Strategy
|
||||
|
||||
The telemetry code is organized under `include/xrpl/telemetry/` for headers and `src/libxrpl/telemetry/` for implementation. Key principles include RAII-based span management via `SpanGuard`, conditional compilation with `XRPL_ENABLE_TELEMETRY`, and minimal runtime overhead through batch processing and efficient sampling.
|
||||
|
||||
Performance optimization strategies include probabilistic head sampling (10% default), tail-based sampling at the collector for errors and slow traces, batch export to reduce network overhead, and conditional instrumentation that compiles to no-ops when disabled.
|
||||
|
||||
➡️ **[Read full Implementation Strategy](./03-implementation-strategy.md)**
|
||||
|
||||
---
|
||||
|
||||
## 4. Code Samples
|
||||
|
||||
C++ implementation examples are provided for the core telemetry infrastructure and key modules:
|
||||
|
||||
- `Telemetry.h` - Core interface for tracer access and span creation
|
||||
- `SpanGuard.h` - RAII wrapper for automatic span lifecycle management
|
||||
- `TracingInstrumentation.h` - Macros for conditional instrumentation
|
||||
- Protocol Buffer extensions for trace context propagation
|
||||
- Module-specific instrumentation (RPC, Consensus, P2P, JobQueue)
|
||||
- Remaining modules (PathFinding, TxQ, Validator, etc.) follow the same patterns
|
||||
|
||||
➡️ **[View all Code Samples](./04-code-samples.md)**
|
||||
|
||||
---
|
||||
|
||||
## 5. Configuration Reference
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **APM** = Application Performance Monitoring
|
||||
|
||||
Configuration is handled through the `[telemetry]` section in `xrpld.cfg` with options for enabling/disabling, exporter selection, endpoint configuration, sampling ratios, and component-level filtering. CMake integration includes a `XRPL_ENABLE_TELEMETRY` option for compile-time control.
|
||||
|
||||
OpenTelemetry Collector configurations are provided for development and production (with tail-based sampling, Tempo, and Elastic APM). Docker Compose examples enable quick local development environment setup.
|
||||
|
||||
➡️ **[View full Configuration Reference](./05-configuration-reference.md)**
|
||||
|
||||
---
|
||||
|
||||
## 6. Implementation Phases
|
||||
|
||||
The implementation spans 9 weeks across 5 phases:
|
||||
|
||||
| Phase | Duration | Focus | Key Deliverables |
|
||||
| ----- | --------- | ------------------- | --------------------------------------------------- |
|
||||
| 1 | Weeks 1-2 | Core Infrastructure | SDK integration, Telemetry interface, Configuration |
|
||||
| 2 | Weeks 3-4 | RPC Tracing | HTTP context extraction, Handler instrumentation |
|
||||
| 3 | Weeks 5-6 | Transaction Tracing | Protocol Buffer context, Relay propagation |
|
||||
| 4 | Weeks 7-8 | Consensus Tracing | Round spans, Proposal/validation tracing |
|
||||
| 5 | Week 9 | Documentation | Runbook, Dashboards, Training |
|
||||
|
||||
**Total Effort**: 47 person-days (2 developers working in parallel)
|
||||
|
||||
➡️ **[View full Implementation Phases](./06-implementation-phases.md)**
|
||||
|
||||
---
|
||||
|
||||
## 7. Observability Backends
|
||||
|
||||
> **APM** = Application Performance Monitoring | **GCS** = Google Cloud Storage
|
||||
|
||||
Grafana Tempo is recommended for all environments due to its cost-effectiveness and Grafana integration, while Elastic APM is ideal for organizations with existing Elastic infrastructure.
|
||||
|
||||
The recommended production architecture uses a gateway collector pattern with regional collectors performing tail-based sampling, routing traces to multiple backends (Tempo for primary storage, Elastic for log correlation, S3/GCS for long-term archive).
|
||||
|
||||
➡️ **[View Observability Backend Recommendations](./07-observability-backends.md)**
|
||||
|
||||
---
|
||||
|
||||
## 8. Appendix
|
||||
|
||||
The appendix contains a glossary of OpenTelemetry and xrpld-specific terms, references to external documentation and specifications, version history for this implementation plan, and a complete document index.
|
||||
|
||||
➡️ **[View Appendix](./08-appendix.md)**
|
||||
|
||||
---
|
||||
|
||||
## POC Task List
|
||||
|
||||
A step-by-step task list for building a minimal end-to-end proof of concept that demonstrates distributed tracing in xrpld. The POC scope is limited to RPC tracing — showing request traces flowing from xrpld through an OpenTelemetry Collector into Tempo, viewable in Grafana.
|
||||
|
||||
➡️ **[View POC Task List](./POC_taskList.md)**
|
||||
|
||||
---
|
||||
|
||||
_This document provides a comprehensive implementation plan for integrating OpenTelemetry distributed tracing into the xrpld XRP Ledger node software. For detailed information on any section, follow the links to the corresponding sub-documents._
|
||||
@@ -1,636 +0,0 @@
|
||||
# OpenTelemetry POC Task List
|
||||
|
||||
> **Goal**: Build a minimal end-to-end proof of concept that demonstrates distributed tracing in xrpld. A successful POC will show RPC request traces flowing from xrpld through an OTel Collector into Tempo, viewable in Grafana.
|
||||
>
|
||||
> **Scope**: RPC tracing only (highest value, lowest risk per the [CRAWL phase](./06-implementation-phases.md#6102-quick-wins-immediate-value) in the implementation phases). No cross-node P2P context propagation or consensus tracing in the POC.
|
||||
|
||||
### Related Plan Documents
|
||||
|
||||
| Document | Relevance to POC |
|
||||
| ---------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [00-tracing-fundamentals.md](./00-tracing-fundamentals.md) | Core concepts: traces, spans, context propagation, sampling |
|
||||
| [01-architecture-analysis.md](./01-architecture-analysis.md) | RPC request flow (§1.5), key trace points (§1.6), instrumentation priority (§1.7) |
|
||||
| [02-design-decisions.md](./02-design-decisions.md) | SDK selection (§2.1), exporter config (§2.2), span naming (§2.3), attribute schema (§2.4), coexistence with PerfLog/Insight (§2.6) |
|
||||
| [03-implementation-strategy.md](./03-implementation-strategy.md) | Directory structure (§3.1), key principles (§3.2), performance overhead (§3.3-3.6), conditional compilation (§3.7.3), code intrusiveness (§3.9) |
|
||||
| [04-code-samples.md](./04-code-samples.md) | Telemetry interface (§4.1), SpanGuard (§4.2), macros (§4.3), RPC instrumentation (§4.5.3) |
|
||||
| [05-configuration-reference.md](./05-configuration-reference.md) | xrpld config (§5.1), config parser (§5.2), Application integration (§5.3), CMake (§5.4), Collector config (§5.5), Docker Compose (§5.6), Grafana (§5.8) |
|
||||
| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 1 core tasks (§6.2), Phase 2 RPC tasks (§6.3), quick wins (§6.10), definition of done (§6.11) |
|
||||
| [07-observability-backends.md](./07-observability-backends.md) | Tempo dev setup (§7.1), Grafana dashboards (§7.6), alert rules (§7.6.3) |
|
||||
|
||||
---
|
||||
|
||||
## Task 0: Docker Observability Stack Setup
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
**Objective**: Stand up the backend infrastructure to receive, store, and display traces.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Create `docker/telemetry/docker-compose.yml` in the repo with three services:
|
||||
1. **OpenTelemetry Collector** (`otel/opentelemetry-collector-contrib:0.92.0`)
|
||||
- Expose ports `4317` (OTLP gRPC) and `4318` (OTLP HTTP)
|
||||
- Expose port `13133` (health check)
|
||||
- Mount a config file `docker/telemetry/otel-collector-config.yaml`
|
||||
2. **Tempo** (`grafana/tempo:2.6.1`)
|
||||
- Expose port `3200` (HTTP API) and `4317` (OTLP gRPC, internal)
|
||||
3. **Grafana** (`grafana/grafana:latest`) — optional but useful
|
||||
- Expose port `3000`
|
||||
- Enable anonymous admin access for local dev (`GF_AUTH_ANONYMOUS_ENABLED=true`, `GF_AUTH_ANONYMOUS_ORG_ROLE=Admin`)
|
||||
- Provision Tempo as a data source via `docker/telemetry/grafana/provisioning/datasources/tempo.yaml`
|
||||
|
||||
- Create `docker/telemetry/otel-collector-config.yaml`:
|
||||
|
||||
```yaml
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 1s
|
||||
send_batch_size: 100
|
||||
|
||||
exporters:
|
||||
logging:
|
||||
verbosity: detailed
|
||||
otlp/tempo:
|
||||
endpoint: tempo:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [logging, otlp/tempo]
|
||||
```
|
||||
|
||||
- Create Grafana Tempo datasource provisioning file at `docker/telemetry/grafana/provisioning/datasources/tempo.yaml`:
|
||||
```yaml
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
access: proxy
|
||||
url: http://tempo:3200
|
||||
```
|
||||
|
||||
**Verification**: Run `docker compose -f docker/telemetry/docker-compose.yml up -d`, then:
|
||||
|
||||
- `curl http://localhost:13133` returns healthy (Collector)
|
||||
- `http://localhost:3000` opens Grafana (Tempo datasource available, no traces yet)
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [05-configuration-reference.md §5.5](./05-configuration-reference.md) — Collector config (dev YAML with Tempo exporter)
|
||||
- [05-configuration-reference.md §5.6](./05-configuration-reference.md) — Docker Compose development environment
|
||||
- [07-observability-backends.md §7.1](./07-observability-backends.md) — Tempo quick start and backend selection
|
||||
- [05-configuration-reference.md §5.8](./05-configuration-reference.md) — Grafana datasource provisioning and dashboards
|
||||
|
||||
---
|
||||
|
||||
## Task 1: Add OpenTelemetry C++ SDK Dependency
|
||||
|
||||
**Objective**: Make `opentelemetry-cpp` available to the build system.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Edit `conanfile.py` to add `opentelemetry-cpp` as an **optional** dependency. The gRPC otel plugin flag (`"grpc/*:otel_plugin": False`) in the existing conanfile may need to remain false — we pull the OTel SDK separately.
|
||||
- Add a Conan option: `with_telemetry = [True, False]` defaulting to `False`
|
||||
- When `with_telemetry` is `True`, add `opentelemetry-cpp` to `self.requires()`
|
||||
- Required OTel Conan components: `opentelemetry-cpp` (which bundles api, sdk, and exporters). If the package isn't in Conan Center, consider using `FetchContent` in CMake or building from source as a fallback.
|
||||
- Edit `CMakeLists.txt`:
|
||||
- Add option: `option(XRPL_ENABLE_TELEMETRY "Enable OpenTelemetry tracing" OFF)`
|
||||
- When ON, `find_package(opentelemetry-cpp CONFIG REQUIRED)` and add compile definition `XRPL_ENABLE_TELEMETRY`
|
||||
- When OFF, do nothing (zero build impact)
|
||||
- Verify the build succeeds with `-DXRPL_ENABLE_TELEMETRY=OFF` (no regressions) and with `-DXRPL_ENABLE_TELEMETRY=ON` (SDK links successfully).
|
||||
|
||||
**Key files**:
|
||||
|
||||
- `conanfile.py`
|
||||
- `CMakeLists.txt`
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [05-configuration-reference.md §5.4](./05-configuration-reference.md) — CMake integration, `FindOpenTelemetry.cmake`, `XRPL_ENABLE_TELEMETRY` option
|
||||
- [03-implementation-strategy.md §3.2](./03-implementation-strategy.md) — Key principle: zero-cost when disabled via compile-time flags
|
||||
- [02-design-decisions.md §2.1](./02-design-decisions.md) — SDK selection rationale and required OTel components
|
||||
|
||||
---
|
||||
|
||||
## Task 2: Create Core Telemetry Interface and NullTelemetry
|
||||
|
||||
**Objective**: Define the `Telemetry` abstract interface and a no-op implementation so the rest of the codebase can reference telemetry without hard-depending on the OTel SDK.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Create `include/xrpl/telemetry/Telemetry.h`:
|
||||
- Define `namespace xrpl::telemetry`
|
||||
- Define `struct Telemetry::Setup` holding: `enabled`, `exporterEndpoint`, `samplingRatio`, `serviceName`, `serviceVersion`, `serviceInstanceId`, `traceRpc`, `traceTransactions`, `traceConsensus`, `tracePeer`
|
||||
- Define abstract `class Telemetry` with:
|
||||
- `virtual void start() = 0;`
|
||||
- `virtual void stop() = 0;`
|
||||
- `virtual bool isEnabled() const = 0;`
|
||||
- `virtual nostd::shared_ptr<Tracer> getTracer(string_view name = "xrpld") = 0;`
|
||||
- `virtual nostd::shared_ptr<Span> startSpan(string_view name, SpanKind kind = kInternal) = 0;`
|
||||
- `virtual nostd::shared_ptr<Span> startSpan(string_view name, Context const& parentContext, SpanKind kind = kInternal) = 0;`
|
||||
- `virtual bool shouldTraceRpc() const = 0;`
|
||||
- `virtual bool shouldTraceTransactions() const = 0;`
|
||||
- `virtual bool shouldTraceConsensus() const = 0;`
|
||||
- Factory: `std::unique_ptr<Telemetry> make_Telemetry(Setup const&, beast::Journal);`
|
||||
- Config parser: `Telemetry::Setup setup_Telemetry(Section const&, std::string const& nodePublicKey, std::string const& version);`
|
||||
|
||||
- Create `include/xrpl/telemetry/SpanGuard.h`:
|
||||
- RAII guard that takes an `nostd::shared_ptr<Span>`, creates a `Scope`, and calls `span->End()` in destructor.
|
||||
- Convenience: `setAttribute()`, `setOk()`, `setStatus()`, `addEvent()`, `recordException()`, `context()`
|
||||
- See [04-code-samples.md](./04-code-samples.md) §4.2 for the full implementation.
|
||||
|
||||
- Create `src/libxrpl/telemetry/NullTelemetry.cpp`:
|
||||
- Implements `Telemetry` with all no-ops.
|
||||
- `isEnabled()` returns `false`, `startSpan()` returns a noop span.
|
||||
- This is used when `XRPL_ENABLE_TELEMETRY` is OFF or `enabled=0` in config.
|
||||
|
||||
- Guard all OTel SDK headers behind `#ifdef XRPL_ENABLE_TELEMETRY`. The `NullTelemetry` implementation should compile without the OTel SDK present.
|
||||
|
||||
**Key new files**:
|
||||
|
||||
- `include/xrpl/telemetry/Telemetry.h`
|
||||
- `include/xrpl/telemetry/SpanGuard.h`
|
||||
- `src/libxrpl/telemetry/NullTelemetry.cpp`
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [04-code-samples.md §4.1](./04-code-samples.md) — Full `Telemetry` interface with `Setup` struct, lifecycle, tracer access, span creation, and component filtering methods
|
||||
- [04-code-samples.md §4.2](./04-code-samples.md) — Full `SpanGuard` RAII implementation and `NullSpanGuard` no-op class
|
||||
- [03-implementation-strategy.md §3.1](./03-implementation-strategy.md) — Directory structure: `include/xrpl/telemetry/` for headers, `src/libxrpl/telemetry/` for implementation
|
||||
- [03-implementation-strategy.md §3.7.3](./03-implementation-strategy.md) — Conditional instrumentation and zero-cost compile-time disabled pattern
|
||||
|
||||
---
|
||||
|
||||
## Task 3: Implement OTel-Backed Telemetry
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
**Objective**: Implement the real `Telemetry` class that initializes the OTel SDK, configures the OTLP exporter and batch processor, and creates tracers/spans.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Create `src/libxrpl/telemetry/Telemetry.cpp` (compiled only when `XRPL_ENABLE_TELEMETRY=ON`):
|
||||
- `class TelemetryImpl : public Telemetry` that:
|
||||
- In `start()`: creates a `TracerProvider` with:
|
||||
- Resource attributes: `service.name`, `service.version`, `service.instance.id`
|
||||
- An `OtlpHttpExporter` pointed at `setup.exporterEndpoint` (default `localhost:4318`)
|
||||
- A `BatchSpanProcessor` with configurable batch size and delay
|
||||
- A `TraceIdRatioBasedSampler` using `setup.samplingRatio`
|
||||
- Sets the global `TracerProvider`
|
||||
- In `stop()`: calls `ForceFlush()` then shuts down the provider
|
||||
- In `startSpan()`: delegates to `getTracer()->StartSpan(name, ...)`
|
||||
- `shouldTraceRpc()` etc. read from `Setup` fields
|
||||
|
||||
- Create `src/libxrpl/telemetry/TelemetryConfig.cpp`:
|
||||
- `setup_Telemetry()` parses the `[telemetry]` config section from `xrpld.cfg`
|
||||
- Maps config keys: `enabled`, `exporter`, `endpoint`, `sampling_ratio`, `trace_rpc`, `trace_transactions`, `trace_consensus`, `trace_peer`
|
||||
|
||||
- Wire `make_Telemetry()` factory:
|
||||
- If `setup.enabled` is true AND `XRPL_ENABLE_TELEMETRY` is defined: return `TelemetryImpl`
|
||||
- Otherwise: return `NullTelemetry`
|
||||
|
||||
- Add telemetry source files to CMake. When `XRPL_ENABLE_TELEMETRY=ON`, compile `Telemetry.cpp` and `TelemetryConfig.cpp` and link against `opentelemetry-cpp::api`, `opentelemetry-cpp::sdk`, `opentelemetry-cpp::otlp_grpc_exporter`. When OFF, compile only `NullTelemetry.cpp`.
|
||||
|
||||
**Key new files**:
|
||||
|
||||
- `src/libxrpl/telemetry/Telemetry.cpp`
|
||||
- `src/libxrpl/telemetry/TelemetryConfig.cpp`
|
||||
|
||||
**Key modified files**:
|
||||
|
||||
- `CMakeLists.txt` (add telemetry library target)
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [04-code-samples.md §4.1](./04-code-samples.md) — `Telemetry` interface that `TelemetryImpl` must implement
|
||||
- [05-configuration-reference.md §5.2](./05-configuration-reference.md) — `setup_Telemetry()` config parser implementation
|
||||
- [02-design-decisions.md §2.2](./02-design-decisions.md) — OTLP/gRPC exporter config (endpoint, TLS options)
|
||||
- [02-design-decisions.md §2.4.1](./02-design-decisions.md) — Resource attributes: `service.name`, `service.version`, `service.instance.id`, `xrpl.network.id`
|
||||
- [03-implementation-strategy.md §3.4](./03-implementation-strategy.md) — Per-operation CPU costs and overhead budget for span creation
|
||||
- [03-implementation-strategy.md §3.5](./03-implementation-strategy.md) — Memory overhead: static (~456 KB) and dynamic (~1.2 MB) budgets
|
||||
|
||||
---
|
||||
|
||||
## Task 4: Integrate Telemetry into Application Lifecycle
|
||||
|
||||
**Objective**: Wire the `Telemetry` object into the `ServiceRegistry` / `Application` so all components can access it.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Edit `include/xrpl/core/ServiceRegistry.h`:
|
||||
- Forward-declare `namespace telemetry { class Telemetry; }` inside `namespace xrpl`
|
||||
- Add pure virtual method: `virtual telemetry::Telemetry& getTelemetry() = 0;`
|
||||
- (`Application` extends `ServiceRegistry`, so this is automatically available on `Application` too)
|
||||
|
||||
- Edit `src/xrpld/app/main/Application.cpp` (the `ApplicationImp` class):
|
||||
- Add member: `std::unique_ptr<telemetry::Telemetry> telemetry_;`
|
||||
- In the member initializer list, construct telemetry with an empty
|
||||
`serviceInstanceId` (node identity is not yet known):
|
||||
```cpp
|
||||
, telemetry_(
|
||||
telemetry::make_Telemetry(
|
||||
telemetry::setup_Telemetry(
|
||||
config_->section("telemetry"),
|
||||
"", // Updated later via setServiceInstanceId()
|
||||
BuildInfo::getVersionString()),
|
||||
logs_->journal("Telemetry")))
|
||||
```
|
||||
- In `setup()`, after `nodeIdentity_` is resolved, inject the node
|
||||
public key as the service instance ID:
|
||||
```cpp
|
||||
if (!config_->section("telemetry").exists("service_instance_id"))
|
||||
telemetry_->setServiceInstanceId(
|
||||
toBase58(TokenType::NodePublic, nodeIdentity_->first));
|
||||
```
|
||||
- In `start()`: call `telemetry_->start()`
|
||||
- In `run()` (shutdown path): call `telemetry_->stop()` (to flush pending spans)
|
||||
- Implement `getTelemetry()` override: return `*telemetry_`
|
||||
|
||||
- Add `[telemetry]` section to the example config `cfg/xrpld-example.cfg`:
|
||||
```ini
|
||||
# [telemetry]
|
||||
# enabled=1
|
||||
# endpoint=http://localhost:4318/v1/traces
|
||||
# sampling_ratio=1.0
|
||||
# trace_rpc=1
|
||||
```
|
||||
|
||||
> **Access patterns**: Components holding `ServiceRegistry&` (e.g.
|
||||
> `NetworkOPsImp`) call `registry_.get().getTelemetry()`. Components
|
||||
> holding `Application&` (e.g. `ServerHandler`, `PeerImp`,
|
||||
> `RCLConsensusAdaptor`) call `app_.getTelemetry()` directly. Both
|
||||
> resolve to the same `Telemetry` instance.
|
||||
|
||||
**Key modified files**:
|
||||
|
||||
- `include/xrpl/core/ServiceRegistry.h`
|
||||
- `src/xrpld/app/main/Application.cpp`
|
||||
- `cfg/xrpld-example.cfg` (example config)
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [05-configuration-reference.md §5.3](./05-configuration-reference.md) — `ApplicationImp` changes: member declaration, constructor init, `start()`/`stop()` wiring, `getTelemetry()` override
|
||||
- [05-configuration-reference.md §5.1](./05-configuration-reference.md) — `[telemetry]` config section format and all option defaults
|
||||
- [03-implementation-strategy.md §3.9.2](./03-implementation-strategy.md) — File impact assessment: `Application.cpp` ~15 lines added, ~3 changed (Low risk)
|
||||
|
||||
---
|
||||
|
||||
## Task 5: Create Instrumentation Macros
|
||||
|
||||
**Objective**: Define convenience macros that make instrumenting code one-liners, and that compile to zero-cost no-ops when telemetry is disabled.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Create `src/xrpld/telemetry/TracingInstrumentation.h`:
|
||||
- When `XRPL_ENABLE_TELEMETRY` is defined:
|
||||
|
||||
```cpp
|
||||
#define XRPL_TRACE_SPAN(telemetry, name) \
|
||||
auto _xrpl_span_ = (telemetry).startSpan(name); \
|
||||
::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
|
||||
|
||||
#define XRPL_TRACE_RPC(telemetry, name) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((telemetry).shouldTraceRpc()) { \
|
||||
_xrpl_guard_.emplace((telemetry).startSpan(name)); \
|
||||
}
|
||||
|
||||
#define XRPL_TRACE_SET_ATTR(key, value) \
|
||||
if (_xrpl_guard_.has_value()) { \
|
||||
_xrpl_guard_->setAttribute(key, value); \
|
||||
}
|
||||
|
||||
#define XRPL_TRACE_EXCEPTION(e) \
|
||||
if (_xrpl_guard_.has_value()) { \
|
||||
_xrpl_guard_->recordException(e); \
|
||||
}
|
||||
```
|
||||
|
||||
- When `XRPL_ENABLE_TELEMETRY` is NOT defined, all macros expand to `((void)0)`
|
||||
|
||||
**Key new file**:
|
||||
|
||||
- `src/xrpld/telemetry/TracingInstrumentation.h`
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [04-code-samples.md §4.3](./04-code-samples.md) — Full macro definitions for `XRPL_TRACE_SPAN`, `XRPL_TRACE_RPC`, `XRPL_TRACE_CONSENSUS`, `XRPL_TRACE_SET_ATTR`, `XRPL_TRACE_EXCEPTION` with both enabled and disabled branches
|
||||
- [03-implementation-strategy.md §3.7.3](./03-implementation-strategy.md) — Conditional instrumentation pattern: compile-time `#ifndef` and runtime `shouldTrace*()` checks
|
||||
- [03-implementation-strategy.md §3.9.7](./03-implementation-strategy.md) — Before/after code examples showing minimal intrusiveness (~1-3 lines per instrumentation point)
|
||||
|
||||
---
|
||||
|
||||
## Task 6: Instrument RPC ServerHandler
|
||||
|
||||
> **WS** = WebSocket
|
||||
|
||||
**Objective**: Add tracing to the HTTP RPC entry point so every incoming RPC request creates a span.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Edit `src/xrpld/rpc/detail/ServerHandler.cpp`:
|
||||
- `#include` the `TracingInstrumentation.h` header
|
||||
- In `ServerHandler::onRequest(Session& session)`:
|
||||
- At the top of the method, add: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request");`
|
||||
- After the RPC command name is extracted, set attribute: `XRPL_TRACE_SET_ATTR("xrpl.rpc.command", command);`
|
||||
- After the response status is known, set: `XRPL_TRACE_SET_ATTR("http.status_code", static_cast<int64_t>(statusCode));`
|
||||
- Wrap error paths with: `XRPL_TRACE_EXCEPTION(e);`
|
||||
- In `ServerHandler::processRequest(...)`:
|
||||
- Add a child span: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.process");`
|
||||
- Set method attribute: `XRPL_TRACE_SET_ATTR("xrpl.rpc.method", request_method);`
|
||||
- In `ServerHandler::onWSMessage(...)` (WebSocket path):
|
||||
- Add: `XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.ws.message");`
|
||||
|
||||
- The goal is to see spans like:
|
||||
```
|
||||
rpc.request
|
||||
└── rpc.process
|
||||
```
|
||||
in Tempo/Grafana for every HTTP RPC call.
|
||||
|
||||
**Key modified file**:
|
||||
|
||||
- `src/xrpld/rpc/detail/ServerHandler.cpp` (~15-25 lines added)
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [04-code-samples.md §4.5.3](./04-code-samples.md) — Complete `ServerHandler::onRequest()` instrumented code sample with W3C header extraction, span creation, attribute setting, and error handling
|
||||
- [01-architecture-analysis.md §1.5](./01-architecture-analysis.md) — RPC request flow diagram: HTTP request -> attributes -> jobqueue.enqueue -> rpc.command -> response
|
||||
- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — Key trace points table: `rpc.request` in `ServerHandler.cpp::onRequest()` (Priority: High)
|
||||
- [02-design-decisions.md §2.3](./02-design-decisions.md) — Span naming convention: `rpc.request`, `rpc.command.*`
|
||||
- [02-design-decisions.md §2.4.2](./02-design-decisions.md) — RPC span attributes: `xrpl.rpc.command`, `xrpl.rpc.version`, `xrpl.rpc.role`, `xrpl.rpc.params`
|
||||
- [03-implementation-strategy.md §3.9.2](./03-implementation-strategy.md) — File impact: `ServerHandler.cpp` ~40 lines added, ~10 changed (Low risk)
|
||||
|
||||
---
|
||||
|
||||
## Task 7: Instrument RPC Command Execution
|
||||
|
||||
**Objective**: Add per-command tracing inside the RPC handler so each command (e.g., `submit`, `account_info`, `server_info`) gets its own child span.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Edit `src/xrpld/rpc/detail/RPCHandler.cpp`:
|
||||
- `#include` the `TracingInstrumentation.h` header
|
||||
- In `doCommand(RPC::JsonContext& context, Json::Value& result)`:
|
||||
- At the top: `XRPL_TRACE_RPC(context.app.getTelemetry(), "rpc.command." + context.method);`
|
||||
- Set attributes:
|
||||
- `XRPL_TRACE_SET_ATTR("xrpl.rpc.command", context.method);`
|
||||
- `XRPL_TRACE_SET_ATTR("xrpl.rpc.version", static_cast<int64_t>(context.apiVersion));`
|
||||
- `XRPL_TRACE_SET_ATTR("xrpl.rpc.role", (context.role == Role::ADMIN) ? "admin" : "user");`
|
||||
- On success: `XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");`
|
||||
- On error: `XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "error");` and set the error message
|
||||
|
||||
- After this, traces in Tempo/Grafana should look like:
|
||||
```
|
||||
rpc.request (xrpl.rpc.command=account_info)
|
||||
└── rpc.process
|
||||
└── rpc.command.account_info (xrpl.rpc.version=2, xrpl.rpc.role=user, xrpl.rpc.status=success)
|
||||
```
|
||||
|
||||
**Key modified file**:
|
||||
|
||||
- `src/xrpld/rpc/detail/RPCHandler.cpp` (~15-20 lines added)
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [04-code-samples.md §4.5.3](./04-code-samples.md) — `ServerHandler::onRequest()` code sample (includes child span pattern for `rpc.command.*`)
|
||||
- [02-design-decisions.md §2.3](./02-design-decisions.md) — Span naming: `rpc.command.*` pattern with dynamic command name (e.g., `rpc.command.server_info`)
|
||||
- [02-design-decisions.md §2.4.2](./02-design-decisions.md) — RPC attribute schema: `xrpl.rpc.command`, `xrpl.rpc.version`, `xrpl.rpc.role`, `xrpl.rpc.status`
|
||||
- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — Key trace points table: `rpc.command.*` in `RPCHandler.cpp::doCommand()` (Priority: High)
|
||||
- [02-design-decisions.md §2.6.5](./02-design-decisions.md) — Correlation with PerfLog: how `doCommand()` can link trace_id with existing PerfLog entries
|
||||
- [03-implementation-strategy.md §3.4.4](./03-implementation-strategy.md) — RPC request overhead budget: ~1.75 μs total per request
|
||||
|
||||
---
|
||||
|
||||
## Task 8: Build, Run, and Verify End-to-End
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
**Objective**: Prove the full pipeline works: xrpld emits traces -> OTel Collector receives them -> Tempo stores them for Grafana visualization.
|
||||
|
||||
**What to do**:
|
||||
|
||||
1. **Start the Docker stack**:
|
||||
|
||||
```bash
|
||||
docker compose -f docker/telemetry/docker-compose.yml up -d
|
||||
```
|
||||
|
||||
Verify Collector health: `curl http://localhost:13133`
|
||||
|
||||
2. **Build xrpld with telemetry**:
|
||||
|
||||
```bash
|
||||
# Adjust for your actual build workflow
|
||||
conan install . --build=missing -o with_telemetry=True
|
||||
cmake --preset default -DXRPL_ENABLE_TELEMETRY=ON
|
||||
cmake --build --preset default
|
||||
```
|
||||
|
||||
3. **Configure xrpld**:
|
||||
Add to `xrpld.cfg` (or your local test config):
|
||||
|
||||
```ini
|
||||
[telemetry]
|
||||
enabled=1
|
||||
endpoint=localhost:4317
|
||||
sampling_ratio=1.0
|
||||
trace_rpc=1
|
||||
```
|
||||
|
||||
4. **Start xrpld** in standalone mode:
|
||||
|
||||
```bash
|
||||
./rippled --conf xrpld.cfg -a --start
|
||||
```
|
||||
|
||||
5. **Generate RPC traffic**:
|
||||
|
||||
```bash
|
||||
# server_info
|
||||
curl -s -X POST http://localhost:5005 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"method":"server_info","params":[{}]}'
|
||||
|
||||
# ledger
|
||||
curl -s -X POST http://localhost:5005 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"method":"ledger","params":[{"ledger_index":"current"}]}'
|
||||
|
||||
# account_info (will error in standalone, that's fine — we trace errors too)
|
||||
curl -s -X POST http://localhost:5005 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"method":"account_info","params":[{"account":"rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh"}]}'
|
||||
```
|
||||
|
||||
6. **Verify in Grafana (Tempo)**:
|
||||
- Open `http://localhost:3000`
|
||||
- Navigate to Explore → select Tempo datasource
|
||||
- Search for service `xrpld`
|
||||
- Confirm you see traces with spans: `rpc.request` -> `rpc.process` -> `rpc.command.server_info`
|
||||
- Click into a trace and verify attributes: `xrpl.rpc.command`, `xrpl.rpc.status`, `xrpl.rpc.version`
|
||||
|
||||
7. **Verify zero-overhead when disabled**:
|
||||
- Rebuild with `XRPL_ENABLE_TELEMETRY=OFF`, or set `enabled=0` in config
|
||||
- Run the same RPC calls
|
||||
- Confirm no new traces appear and no errors in xrpld logs
|
||||
|
||||
**Verification Checklist**:
|
||||
|
||||
- [ ] Docker stack starts without errors
|
||||
- [ ] xrpld builds with `-DXRPL_ENABLE_TELEMETRY=ON`
|
||||
- [ ] xrpld starts and connects to OTel Collector (check xrpld logs for telemetry messages)
|
||||
- [ ] Traces appear in Grafana/Tempo under service "xrpld"
|
||||
- [ ] Span hierarchy is correct (parent-child relationships)
|
||||
- [ ] Span attributes are populated (`xrpl.rpc.command`, `xrpl.rpc.status`, etc.)
|
||||
- [ ] Error spans show error status and message
|
||||
- [ ] Building with `XRPL_ENABLE_TELEMETRY=OFF` produces no regressions
|
||||
- [ ] Setting `enabled=0` at runtime produces no traces and no errors
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [06-implementation-phases.md §6.11.1](./06-implementation-phases.md) — Phase 1 definition of done: SDK compiles, runtime toggle works, span creation verified in Tempo, config validation passes
|
||||
- [06-implementation-phases.md §6.11.2](./06-implementation-phases.md#6112-phase-2-rpc-tracing) — Phase 2 definition of done: 100% RPC coverage, traceparent propagation, <1ms p99 overhead, dashboard deployed
|
||||
- [06-implementation-phases.md §6.8](./06-implementation-phases.md) — Success metrics: trace coverage >95%, CPU overhead <3%, memory <5 MB, latency impact <2%
|
||||
- [03-implementation-strategy.md §3.9.5](./03-implementation-strategy.md) — Backward compatibility: config optional, protocol unchanged, `XRPL_ENABLE_TELEMETRY=OFF` produces identical binary
|
||||
- [01-architecture-analysis.md §1.8](./01-architecture-analysis.md) — Observable outcomes: what traces, metrics, and dashboards to expect
|
||||
|
||||
---
|
||||
|
||||
## Task 9: Document POC Results and Next Steps
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **WS** = WebSocket
|
||||
|
||||
**Objective**: Capture findings, screenshots, and remaining work for the team.
|
||||
|
||||
**What to do**:
|
||||
|
||||
- Take screenshots of Grafana/Tempo showing:
|
||||
- The service list with "xrpld"
|
||||
- A trace with the full span tree
|
||||
- Span detail view showing attributes
|
||||
- Document any issues encountered (build issues, SDK quirks, missing attributes)
|
||||
- Note performance observations (build time impact, any noticeable runtime overhead)
|
||||
- Write a short summary of what the POC proves and what it doesn't cover yet:
|
||||
- **Proves**: OTel SDK integrates with xrpld, OTLP export works, RPC traces visible
|
||||
- **Doesn't cover**: Cross-node P2P context propagation, consensus tracing, protobuf trace context, W3C traceparent header extraction, tail-based sampling, production deployment
|
||||
- Outline next steps (mapping to the full plan phases):
|
||||
- [Phase 2](./06-implementation-phases.md) completion: [W3C header extraction](./02-design-decisions.md) (§2.5), WebSocket tracing, all [RPC handlers](./01-architecture-analysis.md) (§1.6)
|
||||
- [Phase 3](./06-implementation-phases.md): [Protobuf `TraceContext` message](./04-code-samples.md) (§4.4), [transaction relay tracing](./04-code-samples.md) (§4.5.1) across nodes
|
||||
- [Phase 4](./06-implementation-phases.md): [Consensus round and phase tracing](./04-code-samples.md) (§4.5.2)
|
||||
- [Phase 5](./06-implementation-phases.md): [Production collector config](./05-configuration-reference.md) (§5.5.2), [Grafana dashboards](./07-observability-backends.md) (§7.6), [alerting](./07-observability-backends.md) (§7.6.3)
|
||||
|
||||
**Reference**:
|
||||
|
||||
- [06-implementation-phases.md §6.1](./06-implementation-phases.md) — Full 5-phase timeline overview and Gantt chart
|
||||
- [06-implementation-phases.md §6.10](./06-implementation-phases.md) — Crawl-Walk-Run strategy: POC is the CRAWL phase, next steps are WALK and RUN
|
||||
- [06-implementation-phases.md §6.12](./06-implementation-phases.md) — Recommended implementation order (14 steps across 9 weeks)
|
||||
- [03-implementation-strategy.md §3.9](./03-implementation-strategy.md) — Code intrusiveness assessment and risk matrix for each remaining component
|
||||
- [07-observability-backends.md §7.2](./07-observability-backends.md) — Production backend selection (Tempo, Elastic APM, Honeycomb, Datadog)
|
||||
- [02-design-decisions.md §2.5](./02-design-decisions.md) — Context propagation design: W3C HTTP headers, protobuf P2P, JobQueue internal
|
||||
- [00-tracing-fundamentals.md](./00-tracing-fundamentals.md) — Reference for team onboarding on distributed tracing concepts
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Task | Description | New Files | Modified Files | Depends On |
|
||||
| ---- | ------------------------------------ | --------- | -------------- | ---------- |
|
||||
| 0 | Docker observability stack | 4 | 0 | — |
|
||||
| 1 | OTel C++ SDK dependency | 0 | 2 | — |
|
||||
| 2 | Core Telemetry interface + NullImpl | 3 | 0 | 1 |
|
||||
| 3 | OTel-backed Telemetry implementation | 2 | 1 | 1, 2 |
|
||||
| 4 | Application lifecycle integration | 0 | 3 | 2, 3 |
|
||||
| 5 | Instrumentation macros | 1 | 0 | 2 |
|
||||
| 6 | Instrument RPC ServerHandler | 0 | 1 | 4, 5 |
|
||||
| 7 | Instrument RPC command execution | 0 | 1 | 4, 5 |
|
||||
| 8 | End-to-end verification | 0 | 0 | 0-7 |
|
||||
| 9 | Document results and next steps | 1 | 0 | 8 |
|
||||
|
||||
**Parallel work**: Tasks 0 and 1 can run in parallel. Tasks 2 and 5 have no dependency on each other. Tasks 6 and 7 can be done in parallel once Tasks 4 and 5 are complete.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps (Post-POC)
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **WS** = WebSocket
|
||||
|
||||
### Metrics Pipeline for Grafana Dashboards
|
||||
|
||||
The current POC exports **traces only**. Grafana's Explore view can query Tempo for individual traces, but time-series charts (latency histograms, request throughput, error rates) require a **metrics pipeline**. To enable this:
|
||||
|
||||
1. **Add a `spanmetrics` connector** to the OTel Collector config that derives RED metrics (Rate, Errors, Duration) from trace spans automatically:
|
||||
|
||||
```yaml
|
||||
connectors:
|
||||
spanmetrics:
|
||||
histogram:
|
||||
explicit:
|
||||
buckets: [1ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 5s]
|
||||
dimensions:
|
||||
- name: xrpl.rpc.command
|
||||
- name: xrpl.rpc.status
|
||||
|
||||
exporters:
|
||||
prometheus:
|
||||
endpoint: 0.0.0.0:8889
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [debug, otlp/tempo, spanmetrics]
|
||||
metrics:
|
||||
receivers: [spanmetrics]
|
||||
exporters: [prometheus]
|
||||
```
|
||||
|
||||
2. **Add Prometheus** to the Docker Compose stack to scrape the collector's metrics endpoint.
|
||||
|
||||
3. **Add Prometheus as a Grafana datasource** and build dashboards for:
|
||||
- RPC request latency (p50/p95/p99) by command
|
||||
- RPC throughput (requests/sec) by command
|
||||
- Error rate by command
|
||||
- Span duration distribution
|
||||
|
||||
### Additional Instrumentation
|
||||
|
||||
- **W3C `traceparent` header extraction** in `ServerHandler` to support cross-service context propagation from external callers
|
||||
- **WebSocket RPC tracing** in `ServerHandler::onWSMessage()`
|
||||
- **Transaction relay tracing** across nodes using protobuf `TraceContext` messages
|
||||
- **Consensus round and phase tracing** for validator coordination visibility
|
||||
- **Ledger close tracing** to measure close-to-validated latency
|
||||
|
||||
### Production Hardening
|
||||
|
||||
- **Tail-based sampling** in the OTel Collector to reduce volume while retaining error/slow traces
|
||||
- **TLS configuration** for the OTLP exporter in production deployments
|
||||
- **Resource limits** on the batch processor queue to prevent unbounded memory growth
|
||||
- **Health monitoring** for the telemetry pipeline itself (collector lag, export failures)
|
||||
|
||||
### POC Lessons Learned
|
||||
|
||||
Issues encountered during POC implementation that inform future work:
|
||||
|
||||
| Issue | Resolution | Impact on Future Work |
|
||||
| -------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ---------------------------------------------------------------- |
|
||||
| Conan lockfile rejected `opentelemetry-cpp/1.18.0` | Used `--lockfile=""` to bypass | Lockfile must be regenerated when adding new dependencies |
|
||||
| Conan package only builds OTLP HTTP exporter, not gRPC | Switched from gRPC to HTTP exporter (`localhost:4318/v1/traces`) | HTTP exporter is the default; gRPC requires custom Conan profile |
|
||||
| CMake target `opentelemetry-cpp::api` etc. don't exist in Conan package | Use umbrella target `opentelemetry-cpp::opentelemetry-cpp` | Conan targets differ from upstream CMake targets |
|
||||
| OTel Collector `logging` exporter deprecated | Renamed to `debug` exporter | Use `debug` in all collector configs going forward |
|
||||
| Macro parameter `telemetry` collided with `::xrpl::telemetry::` namespace | Renamed macro params to `_tel_obj_`, `_span_name_` | Avoid common words as macro parameter names |
|
||||
| `opentelemetry::trace::Scope` creates new context on move | Store scope as member, create once in constructor | SpanGuard move semantics need care with Scope lifecycle |
|
||||
| `TracerProviderFactory::Create` returns `unique_ptr<sdk::TracerProvider>`, not `nostd::shared_ptr` | Use `std::shared_ptr` member, wrap in `nostd::shared_ptr` for global provider | OTel SDK factory return types don't match API provider types |
|
||||
@@ -1,673 +0,0 @@
|
||||
# OpenTelemetry Distributed Tracing for xrpld
|
||||
|
||||
---
|
||||
|
||||
## Slide 1: Introduction
|
||||
|
||||
> **CNCF** = Cloud Native Computing Foundation
|
||||
|
||||
### What is OpenTelemetry?
|
||||
|
||||
OpenTelemetry is an open-source, CNCF-backed observability framework for distributed tracing, metrics, and logs.
|
||||
|
||||
### Why OpenTelemetry for xrpld?
|
||||
|
||||
- **End-to-End Transaction Visibility**: Track transactions from submission → consensus → ledger inclusion
|
||||
- **Cross-Node Correlation**: Follow requests across multiple independent nodes using a unique `trace_id`
|
||||
- **Consensus Round Analysis**: Understand timing and behavior across validators
|
||||
- **Incident Debugging**: Correlate events across distributed nodes during issues
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Node A<br/>tx.receive<br/>trace_id: abc123"] --> B["Node B<br/>tx.relay<br/>trace_id: abc123"] --> C["Node C<br/>tx.validate<br/>trace_id: abc123"] --> D["Node D<br/>ledger.apply<br/>trace_id: abc123"]
|
||||
|
||||
style A fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style B fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style C fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style D fill:#e65100,stroke:#bf360c,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Node A (blue, leftmost)**: The originating node that first receives the transaction and assigns a new `trace_id: abc123`; this ID becomes the correlation key for the entire distributed trace.
|
||||
- **Node B and Node C (green, middle)**: Relay and validation nodes — each creates its own span but carries the same `trace_id`, so their work is linked to the original submission without any central coordinator.
|
||||
- **Node D (orange, rightmost)**: The final node that applies the transaction to the ledger; the trace now spans the full lifecycle from submission to ledger inclusion.
|
||||
- **Left-to-right flow**: The horizontal progression shows the real-world message path — a transaction hops from node to node, and the shared `trace_id` stitches all hops into a single queryable trace.
|
||||
|
||||
> **Trace ID: abc123** — All nodes share the same trace, enabling cross-node correlation.
|
||||
|
||||
---
|
||||
|
||||
## Slide 2: OpenTelemetry vs Open Source Alternatives
|
||||
|
||||
> **CNCF** = Cloud Native Computing Foundation
|
||||
|
||||
| Feature | OpenTelemetry | Jaeger | Zipkin | SkyWalking | Pinpoint | Prometheus |
|
||||
| ------------------- | ---------------- | ---------------- | ------------------ | ---------- | ---------- | ---------- |
|
||||
| **Tracing** | YES | YES | YES | YES | YES | NO |
|
||||
| **Metrics** | YES | NO | NO | YES | YES | YES |
|
||||
| **Logs** | YES | NO | NO | YES | NO | NO |
|
||||
| **C++ SDK** | YES Official | YES (Deprecated) | YES (Unmaintained) | NO | NO | YES |
|
||||
| **Vendor Neutral** | YES Primary goal | NO | NO | NO | NO | NO |
|
||||
| **Instrumentation** | Manual + Auto | Manual | Manual | Auto-first | Auto-first | Manual |
|
||||
| **Backend** | Any (exporters) | Self | Self | Self | Self | Self |
|
||||
| **CNCF Status** | Incubating | Graduated | NO | Incubating | NO | Graduated |
|
||||
|
||||
> **Why OpenTelemetry?** It's the only actively maintained, full-featured C++ option with vendor neutrality — allowing export to Tempo, Prometheus, Grafana, or any commercial backend without changing instrumentation.
|
||||
|
||||
---
|
||||
|
||||
## Slide 3: Adoption Scope — Traces Only (Current Plan)
|
||||
|
||||
OpenTelemetry supports three signal types: **Traces**, **Metrics**, and **Logs**. xrpld already captures metrics (StatsD via Beast Insight) and logs (Journal/PerfLog). The question is: how much of OTel do we adopt?
|
||||
|
||||
> **Scenario A**: Add distributed tracing. Keep StatsD for metrics and Journal for logs.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph xrpld["xrpld Process"]
|
||||
direction TB
|
||||
OTel["OTel SDK<br/>(Traces)"]
|
||||
Insight["Beast Insight<br/>(StatsD Metrics)"]
|
||||
Journal["Journal + PerfLog<br/>(Logging)"]
|
||||
end
|
||||
|
||||
OTel -->|"OTLP"| Collector["OTel Collector"]
|
||||
Insight -->|"UDP"| StatsD["StatsD Server"]
|
||||
Journal -->|"File I/O"| LogFile["perf.log / debug.log"]
|
||||
|
||||
Collector --> Tempo["Tempo"]
|
||||
StatsD --> Graphite["Graphite / Grafana"]
|
||||
LogFile --> Loki["Loki (optional)"]
|
||||
|
||||
style xrpld fill:#424242,stroke:#212121,color:#fff
|
||||
style OTel fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style Insight fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style Journal fill:#e65100,stroke:#bf360c,color:#fff
|
||||
style Collector fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
```
|
||||
|
||||
| Aspect | Details |
|
||||
| ------------------------------ | --------------------------------------------------------------------------------------------------------------- |
|
||||
| **What changes for operators** | Deploy OTel Collector + trace backend. Existing StatsD and log pipelines stay as-is. |
|
||||
| **Codebase impact** | New `Telemetry` module (~1500 LOC). Beast Insight and Journal untouched. |
|
||||
| **New capabilities** | Cross-node trace correlation, span-based debugging, request lifecycle visibility. |
|
||||
| **What we still can't do** | Correlate metrics with specific traces natively. StatsD metrics remain fire-and-forget with no trace exemplars. |
|
||||
| **Maintenance burden** | Three separate observability systems to maintain (OTel + StatsD + Journal). |
|
||||
| **Risk** | Lowest — additive change, no existing systems disturbed. |
|
||||
|
||||
---
|
||||
|
||||
## Slide 4: Future Adoption — Metrics & Logs via OTel
|
||||
|
||||
### Scenario B: + OTel Metrics (Replace StatsD)
|
||||
|
||||
> Migrate StatsD to OTel Metrics API, exposing Prometheus-compatible metrics. Remove Beast Insight.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph xrpld["xrpld Process"]
|
||||
direction TB
|
||||
OTel["OTel SDK<br/>(Traces + Metrics)"]
|
||||
Journal["Journal + PerfLog<br/>(Logging)"]
|
||||
end
|
||||
|
||||
OTel -->|"OTLP"| Collector["OTel Collector"]
|
||||
Journal -->|"File I/O"| LogFile["perf.log / debug.log"]
|
||||
|
||||
Collector --> Tempo["Tempo<br/>(Traces)"]
|
||||
Collector --> Prom["Prometheus<br/>(Metrics)"]
|
||||
LogFile --> Loki["Loki (optional)"]
|
||||
|
||||
style xrpld fill:#424242,stroke:#212121,color:#fff
|
||||
style OTel fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style Journal fill:#e65100,stroke:#bf360c,color:#fff
|
||||
style Collector fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
```
|
||||
|
||||
- **Better metrics?** Yes — Prometheus gives native histograms (p50/p95/p99), multi-dimensional labels, and exemplars linking metric spikes to traces.
|
||||
- **Codebase**: Remove `Beast::Insight` + `StatsDCollector` (~2000 LOC). Single SDK for traces and metrics.
|
||||
- **Operator effort**: Rewrite dashboards from StatsD/Graphite queries to PromQL. Run both in parallel during transition.
|
||||
- **Risk**: Medium — operators must migrate monitoring infrastructure.
|
||||
|
||||
### Scenario C: + OTel Logs (Full Stack)
|
||||
|
||||
> Also replace Journal logging with OTel Logs API. Single SDK for everything.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph xrpld["xrpld Process"]
|
||||
OTel["OTel SDK<br/>(Traces + Metrics + Logs)"]
|
||||
end
|
||||
|
||||
OTel -->|"OTLP"| Collector["OTel Collector"]
|
||||
|
||||
Collector --> Tempo["Tempo<br/>(Traces)"]
|
||||
Collector --> Prom["Prometheus<br/>(Metrics)"]
|
||||
Collector --> Loki["Loki / Elastic<br/>(Logs)"]
|
||||
|
||||
style xrpld fill:#424242,stroke:#212121,color:#fff
|
||||
style OTel fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style Collector fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
```
|
||||
|
||||
- **Structured logging**: OTel Logs API outputs structured records with `trace_id`, `span_id`, severity, and attributes by design.
|
||||
- **Full correlation**: Every log line carries `trace_id`. Click trace → see logs. Click metric spike → see trace → see logs.
|
||||
- **Codebase**: Remove Beast Insight (~2000 LOC) + simplify Journal/PerfLog (~3000 LOC). One dependency instead of three.
|
||||
- **Risk**: Highest — `beast::Journal` is deeply embedded in every component. Large refactor. OTel C++ Logs API is newer (stable since v1.11, less battle-tested).
|
||||
|
||||
### Recommendation
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Phase 1<br/><b>Traces Only</b><br/>(Current Plan)"] --> B["Phase 2<br/><b>+ Metrics</b><br/>(Replace StatsD)"] --> C["Phase 3<br/><b>+ Logs</b><br/>(Full OTel)"]
|
||||
|
||||
style A fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style B fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style C fill:#e65100,stroke:#bf360c,color:#fff
|
||||
```
|
||||
|
||||
| Phase | Signal | Strategy | Risk |
|
||||
| -------------------- | --------- | -------------------------------------------------------------- | ------ |
|
||||
| **Phase 1** (now) | Traces | Add OTel traces. Keep StatsD and Journal. Prove value. | Low |
|
||||
| **Phase 2** (future) | + Metrics | Migrate StatsD → Prometheus via OTel. Remove Beast Insight. | Medium |
|
||||
| **Phase 3** (future) | + Logs | Adopt OTel Logs API. Align with structured logging initiative. | High |
|
||||
|
||||
> **Key Takeaway**: Start with traces (unique value, lowest risk), then incrementally adopt metrics and logs as the OTel infrastructure proves itself.
|
||||
|
||||
---
|
||||
|
||||
## Slide 5: Comparison with xrpld's Existing Solutions
|
||||
|
||||
### Current Observability Stack
|
||||
|
||||
| Aspect | PerfLog (JSON) | StatsD (Metrics) | OpenTelemetry (NEW) |
|
||||
| --------------------- | --------------------- | --------------------- | --------------------------- |
|
||||
| **Type** | Logging | Metrics | Distributed Tracing |
|
||||
| **Scope** | Single node | Single node | **Cross-node** |
|
||||
| **Data** | JSON log entries | Counters, gauges | Spans with context |
|
||||
| **Correlation** | By timestamp | By metric name | By `trace_id` |
|
||||
| **Overhead** | Low (file I/O) | Low (UDP) | Low-Medium (configurable) |
|
||||
| **Question Answered** | "What happened here?" | "How many? How fast?" | **"What was the journey?"** |
|
||||
|
||||
### Use Case Matrix
|
||||
|
||||
| Scenario | PerfLog | StatsD | OpenTelemetry |
|
||||
| -------------------------------- | ------- | ------ | ------------- |
|
||||
| "How many TXs per second?" | ❌ | ✅ | ❌ |
|
||||
| "Why was this specific TX slow?" | ⚠️ | ❌ | ✅ |
|
||||
| "Which node delayed consensus?" | ❌ | ❌ | ✅ |
|
||||
| "Show TX journey across 5 nodes" | ❌ | ❌ | ✅ |
|
||||
|
||||
> **Key Insight**: In the **traces-only** approach (Phase 1), OpenTelemetry **complements** existing systems. In future phases, OTel metrics and logs could **replace** StatsD and Journal respectively — see Slides 3-4 for the full adoption roadmap.
|
||||
|
||||
---
|
||||
|
||||
## Slide 6: Architecture
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol | **WS** = WebSocket
|
||||
|
||||
### High-Level Integration Architecture
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph xrpld["xrpld Node"]
|
||||
subgraph services["Core Services"]
|
||||
direction LR
|
||||
RPC["RPC Server<br/>(HTTP/WS)"] ~~~ Overlay["Overlay<br/>(P2P Network)"] ~~~ Consensus["Consensus<br/>(RCLConsensus)"]
|
||||
end
|
||||
|
||||
Telemetry["Telemetry Module<br/>(OpenTelemetry SDK)"]
|
||||
|
||||
services --> Telemetry
|
||||
end
|
||||
|
||||
Telemetry -->|OTLP/gRPC| Collector["OTel Collector"]
|
||||
|
||||
Collector --> Tempo["Grafana Tempo"]
|
||||
Collector --> Elastic["Elastic APM"]
|
||||
|
||||
style xrpld fill:#424242,stroke:#212121,color:#fff
|
||||
style services fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style Telemetry fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style Collector fill:#e65100,stroke:#bf360c,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Core Services (blue, top)**: RPC Server, Overlay, and Consensus are the three primary components that generate trace data — they represent the entry points for client requests, peer messages, and consensus rounds respectively.
|
||||
- **Telemetry Module (green, middle)**: The OpenTelemetry SDK sits below the core services and receives span data from all three; it acts as a single collection point within the xrpld process.
|
||||
- **OTel Collector (orange, center)**: An external process that receives spans over OTLP/gRPC from the Telemetry Module; it decouples xrpld from backend choices and handles batching, sampling, and routing.
|
||||
- **Backends (bottom row)**: Tempo and Elastic APM are interchangeable — the Collector fans out to any combination, so operators can switch backends without modifying xrpld code.
|
||||
- **Top-to-bottom flow**: Data flows from instrumented code down through the SDK, out over the network to the Collector, and finally into storage/visualization backends.
|
||||
|
||||
### Context Propagation
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Client
|
||||
participant NodeA as Node A
|
||||
participant NodeB as Node B
|
||||
|
||||
Client->>NodeA: Submit TX (no context)
|
||||
Note over NodeA: Creates trace_id: abc123<br/>span: tx.receive
|
||||
NodeA->>NodeB: Relay TX<br/>(traceparent: abc123)
|
||||
Note over NodeB: Links to trace_id: abc123<br/>span: tx.relay
|
||||
```
|
||||
|
||||
- **HTTP/RPC**: W3C Trace Context headers (`traceparent`)
|
||||
- **P2P Messages**: Protocol Buffer extension fields
|
||||
|
||||
---
|
||||
|
||||
## Slide 7: Implementation Plan
|
||||
|
||||
### 5-Phase Rollout (9 Weeks)
|
||||
|
||||
> **Note**: Dates shown are relative to project start, not calendar dates.
|
||||
|
||||
```mermaid
|
||||
gantt
|
||||
title Implementation Timeline
|
||||
dateFormat YYYY-MM-DD
|
||||
axisFormat Week %W
|
||||
|
||||
section Phase 1
|
||||
Core Infrastructure :p1, 2024-01-01, 2w
|
||||
|
||||
section Phase 2
|
||||
RPC Tracing :p2, after p1, 2w
|
||||
|
||||
section Phase 3
|
||||
Transaction Tracing :p3, after p2, 2w
|
||||
|
||||
section Phase 4
|
||||
Consensus Tracing :p4, after p3, 2w
|
||||
|
||||
section Phase 5
|
||||
Documentation :p5, after p4, 1w
|
||||
```
|
||||
|
||||
### Phase Details
|
||||
|
||||
| Phase | Focus | Key Deliverables | Effort |
|
||||
| ----- | ------------------- | -------------------------------------------- | ------- |
|
||||
| 1 | Core Infrastructure | SDK integration, Telemetry interface, Config | 10 days |
|
||||
| 2 | RPC Tracing | HTTP context extraction, Handler spans | 10 days |
|
||||
| 3 | Transaction Tracing | Protobuf context, P2P relay propagation | 10 days |
|
||||
| 4 | Consensus Tracing | Round spans, Proposal/validation tracing | 10 days |
|
||||
| 5 | Documentation | Runbook, Dashboards, Training | 7 days |
|
||||
|
||||
**Total Effort**: ~47 developer-days (2 developers)
|
||||
|
||||
> **Future Phases** (not in current scope): After traces are stable, OTel metrics can replace StatsD (~3 weeks), and OTel logs can replace Journal (~4 weeks, aligned with structured logging initiative). See Slides 3-4 for the full adoption roadmap.
|
||||
|
||||
---
|
||||
|
||||
## Slide 8: Performance Overhead
|
||||
|
||||
> **OTLP** = OpenTelemetry Protocol
|
||||
|
||||
### Estimated System Impact
|
||||
|
||||
| Metric | Overhead | Notes |
|
||||
| ----------------- | ---------- | ------------------------------------------------ |
|
||||
| **CPU** | 1-3% | Span creation and attribute setting |
|
||||
| **Memory** | ~10 MB | SDK statics + batch buffer + worker thread stack |
|
||||
| **Network** | 10-50 KB/s | Compressed OTLP export to collector |
|
||||
| **Latency (p99)** | <2% | With proper sampling configuration |
|
||||
|
||||
#### How We Arrived at These Numbers
|
||||
|
||||
**Assumptions (XRPL mainnet baseline)**:
|
||||
|
||||
| Parameter | Value | Source |
|
||||
| ------------------------- | ---------------------- | --------------------------------------------------------------------------------------------------- |
|
||||
| Transaction throughput | ~25 TPS (peaks to ~50) | Mainnet average |
|
||||
| Default peers per node | 21 | `peerfinder/detail/Tuning.h` (`defaultMaxPeers`) |
|
||||
| Consensus round frequency | ~1 round / 3-4 seconds | `ConsensusParms.h` (`ledgerMIN_CONSENSUS=1950ms`) |
|
||||
| Proposers per round | ~20-35 | Mainnet UNL size |
|
||||
| P2P message rate | ~160 msgs/sec | See message breakdown below |
|
||||
| Avg TX processing time | ~200 μs | Profiled baseline |
|
||||
| Single span creation cost | 500-1000 ns | OTel C++ SDK benchmarks (see [3.5.4](./03-implementation-strategy.md#354-performance-data-sources)) |
|
||||
|
||||
**P2P message breakdown** (per node, mainnet):
|
||||
|
||||
| Message Type | Rate | Derivation |
|
||||
| ------------- | ------------ | --------------------------------------------------------------------- |
|
||||
| TMTransaction | ~100/sec | ~25 TPS × ~4 relay hops per TX, deduplicated by HashRouter |
|
||||
| TMValidation | ~50/sec | ~35 validators × ~1 validation/3s round ≈ ~12/sec, plus relay fan-out |
|
||||
| TMProposeSet | ~10/sec | ~35 proposers / 3s round ≈ ~12/round, clustered in establish phase |
|
||||
| **Total** | **~160/sec** | **Only traced message types counted** |
|
||||
|
||||
**CPU (1-3%) — Calculation**:
|
||||
|
||||
Per-transaction tracing cost breakdown:
|
||||
|
||||
| Operation | Cost | Notes |
|
||||
| ----------------------------------------------- | ----------- | ------------------------------------------ |
|
||||
| `tx.receive` span (create + end + 4 attributes) | ~1400 ns | ~1000ns create + ~200ns end + 4×50ns attrs |
|
||||
| `tx.validate` span | ~1200 ns | ~1000ns create + ~200ns for 2 attributes |
|
||||
| `tx.relay` span | ~1200 ns | ~1000ns create + ~200ns for 2 attributes |
|
||||
| Context injection into P2P message | ~200 ns | Serialize trace_id + span_id into protobuf |
|
||||
| **Total per TX** | **~4.0 μs** | |
|
||||
|
||||
> **CPU overhead**: 4.0 μs / 200 μs baseline = **~2.0% per transaction**. Under high load with consensus + RPC spans overlapping, reaches ~3%. Consensus itself adds only ~36 μs per 3-second round (~0.001%), so the TX path dominates. On production server hardware (3+ GHz Xeon), span creation drops to ~500-600 ns, bringing per-TX cost to ~2.6 μs (~1.3%). See [Section 3.5.4](./03-implementation-strategy.md#354-performance-data-sources) for benchmark sources.
|
||||
|
||||
**Memory (~10 MB) — Calculation**:
|
||||
|
||||
| Component | Size | Notes |
|
||||
| --------------------------------------------- | ------------------ | ------------------------------------- |
|
||||
| TracerProvider + Exporter (gRPC channel init) | ~320 KB | Allocated once at startup |
|
||||
| BatchSpanProcessor (circular buffer) | ~16 KB | 2049 × 8-byte AtomicUniquePtr entries |
|
||||
| BatchSpanProcessor (worker thread stack) | ~8 MB | Default Linux thread stack size |
|
||||
| Active spans (in-flight, max ~1000) | ~500-800 KB | ~500-800 bytes/span × 1000 concurrent |
|
||||
| Export queue (batch buffer, max 2048 spans) | ~1 MB | ~500 bytes/span × 2048 queue depth |
|
||||
| Thread-local context storage (~100 threads) | ~6.4 KB | ~64 bytes/thread |
|
||||
| **Total** | **~10 MB ceiling** | |
|
||||
|
||||
> Memory plateaus once the export queue fills — the `max_queue_size=2048` config bounds growth.
|
||||
> The worker thread stack (~8 MB) dominates the static footprint but is virtual memory; actual RSS
|
||||
> depends on stack usage (typically much less). Active spans are larger than originally estimated
|
||||
> (~500-800 bytes) because the OTel SDK `Span` object includes a mutex (~40 bytes), `SpanData`
|
||||
> recordable (~250 bytes base), and `std::map`-based attribute storage (~200-500 bytes for 3-5
|
||||
> string attributes). See [Section 3.5.4](./03-implementation-strategy.md#354-performance-data-sources) for source references.
|
||||
|
||||
**Network (10-50 KB/s) — Calculation**:
|
||||
|
||||
Two sources of network overhead:
|
||||
|
||||
**(A) OTLP span export to Collector:**
|
||||
|
||||
| Sampling Rate | Effective Spans/sec | Avg Span Size (compressed) | Bandwidth |
|
||||
| -------------------------- | ------------------- | -------------------------- | ------------ |
|
||||
| 100% (dev only) | ~500 | ~500 bytes | ~250 KB/s |
|
||||
| **10% (recommended prod)** | **~50** | **~500 bytes** | **~25 KB/s** |
|
||||
| 1% (minimal) | ~5 | ~500 bytes | ~2.5 KB/s |
|
||||
|
||||
> The ~500 spans/sec at 100% comes from: ~100 TX spans + ~160 P2P context spans + ~23 consensus spans/round + ~50 RPC spans = ~500/sec. OTLP protobuf with gzip compression yields ~500 bytes/span average.
|
||||
|
||||
**(B) P2P trace context overhead** (added to existing messages, always-on regardless of sampling):
|
||||
|
||||
| Message Type | Rate | Context Size | Bandwidth |
|
||||
| ------------- | -------- | ------------ | ------------- |
|
||||
| TMTransaction | ~100/sec | 29 bytes | ~2.9 KB/s |
|
||||
| TMValidation | ~50/sec | 29 bytes | ~1.5 KB/s |
|
||||
| TMProposeSet | ~10/sec | 29 bytes | ~0.3 KB/s |
|
||||
| **Total P2P** | | | **~4.7 KB/s** |
|
||||
|
||||
> **Combined**: 25 KB/s (OTLP export at 10%) + 5 KB/s (P2P context) ≈ **~30 KB/s typical**. The 10-50 KB/s range covers 10-20% sampling under normal to peak mainnet load.
|
||||
|
||||
**Latency (<2%) — Calculation**:
|
||||
|
||||
| Path | Tracing Cost | Baseline | Overhead |
|
||||
| ------------------------------ | ------------ | -------- | -------- |
|
||||
| Fast RPC (e.g., `server_info`) | 2.75 μs | ~1 ms | 0.275% |
|
||||
| Slow RPC (e.g., `path_find`) | 2.75 μs | ~100 ms | 0.003% |
|
||||
| Transaction processing | 4.0 μs | ~200 μs | 2.0% |
|
||||
| Consensus round | 36 μs | ~3 sec | 0.001% |
|
||||
|
||||
> At p99, even the worst case (TX processing at 2.0%) is within the 1-3% range. RPC and consensus overhead are negligible. On production hardware, TX overhead drops to ~1.3%.
|
||||
|
||||
### Per-Message Overhead (Context Propagation)
|
||||
|
||||
Each P2P message carries trace context with the following overhead:
|
||||
|
||||
| Field | Size | Description |
|
||||
| ------------- | ------------- | ----------------------------------------- |
|
||||
| `trace_id` | 16 bytes | Unique identifier for the entire trace |
|
||||
| `span_id` | 8 bytes | Current span (becomes parent on receiver) |
|
||||
| `trace_flags` | 1 byte | Sampling decision flags |
|
||||
| `trace_state` | 0-4 bytes | Optional vendor-specific data |
|
||||
| **Total** | **~29 bytes** | **Added per traced P2P message** |
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph msg["P2P Message with Trace Context"]
|
||||
A["Original Message<br/>(variable size)"] --> B["+ TraceContext<br/>(~29 bytes)"]
|
||||
end
|
||||
|
||||
subgraph breakdown["Context Breakdown"]
|
||||
C["trace_id<br/>16 bytes"]
|
||||
D["span_id<br/>8 bytes"]
|
||||
E["flags<br/>1 byte"]
|
||||
F["state<br/>0-4 bytes"]
|
||||
end
|
||||
|
||||
B --> breakdown
|
||||
|
||||
style A fill:#424242,stroke:#212121,color:#fff
|
||||
style B fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style C fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style D fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style E fill:#e65100,stroke:#bf360c,color:#fff
|
||||
style F fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **Original Message (gray, left)**: The existing P2P message payload of variable size — this is unchanged; trace context is appended, never modifying the original data.
|
||||
- **+ TraceContext (green, right of message)**: The additional 29-byte context block attached to each traced message; the arrow from the original message shows it is a pure addition.
|
||||
- **Context Breakdown (right subgraph)**: The four fields — `trace_id` (16 bytes), `span_id` (8 bytes), `flags` (1 byte), and `state` (0-4 bytes) — show exactly what is added and their individual sizes.
|
||||
- **Color coding**: Blue fields (`trace_id`, `span_id`) are the core identifiers required for trace correlation; orange (`flags`) controls sampling decisions; purple (`state`) is optional vendor data typically omitted.
|
||||
|
||||
> **Note**: 29 bytes represents ~1-6% overhead depending on message size (500B simple TX to 5KB proposal), which is acceptable for the observability benefits provided.
|
||||
|
||||
### Mitigation Strategies
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["Head Sampling<br/>10% default"] --> B["Tail Sampling<br/>Keep errors/slow"] --> C["Batch Export<br/>Reduce I/O"] --> D["Conditional Compile<br/>XRPL_ENABLE_TELEMETRY"]
|
||||
|
||||
style A fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style B fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style C fill:#e65100,stroke:#bf360c,color:#fff
|
||||
style D fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
```
|
||||
|
||||
> For a detailed explanation of head vs. tail sampling, see Slide 9.
|
||||
|
||||
### Kill Switches (Rollback Options)
|
||||
|
||||
1. **Config Disable**: Set `enabled=0` in config → instant disable, no restart needed for sampling
|
||||
2. **Rebuild**: Compile with `XRPL_ENABLE_TELEMETRY=OFF` → zero overhead (no-op)
|
||||
3. **Full Revert**: Clean separation allows easy commit reversion
|
||||
|
||||
---
|
||||
|
||||
## Slide 9: Sampling Strategies — Head vs. Tail
|
||||
|
||||
> Sampling controls **which traces are recorded and exported**. Without sampling, every operation generates a trace — at 500+ spans/sec, this overwhelms storage and network. Sampling lets you keep the signal, discard the noise.
|
||||
|
||||
### Head Sampling (Decision at Start)
|
||||
|
||||
The sampling decision is made **when a trace begins**, before any work is done. A random number is generated; if it falls within the configured ratio, the entire trace is recorded. Otherwise, the trace is silently dropped.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["New Request<br/>Arrives"] --> B{"Random < 10%?"}
|
||||
B -->|"Yes (1 in 10)"| C["Record Entire Trace<br/>(all spans)"]
|
||||
B -->|"No (9 in 10)"| D["Drop Entire Trace<br/>(zero overhead)"]
|
||||
|
||||
style C fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style D fill:#c62828,stroke:#8c2809,color:#fff
|
||||
style B fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
```
|
||||
|
||||
| Aspect | Details |
|
||||
| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| **Where it runs** | Inside xrpld (SDK-level). Configured via `sampling_ratio` in `xrpld.cfg`. |
|
||||
| **When the decision happens** | At trace creation time — before the first span is even populated. |
|
||||
| **How it works** | `sampling_ratio=0.1` means each trace has a 10% probability of being recorded. Dropped traces incur near-zero overhead (no spans created, no attributes set, no export). |
|
||||
| **Propagation** | Once a trace is sampled, the `trace_flags` field (1 byte in the context header) tells downstream nodes to also sample it. Unsampled traces propagate `trace_flags=0`, so downstream nodes skip them too. |
|
||||
| **Pros** | Lowest overhead. Simple to configure. Predictable resource usage. |
|
||||
| **Cons** | **Blind** — it doesn't know if the trace will be interesting. A rare error or slow consensus round has only a 10% chance of being captured. |
|
||||
| **Best for** | High-volume, steady-state traffic where most traces look similar (e.g., routine RPC requests). |
|
||||
|
||||
**xrpld configuration**:
|
||||
|
||||
```ini
|
||||
[telemetry]
|
||||
# Record 10% of traces (recommended for production)
|
||||
sampling_ratio=0.1
|
||||
```
|
||||
|
||||
### Tail Sampling (Decision at End)
|
||||
|
||||
The sampling decision is made **after the trace completes**, based on its actual content — was it slow? Did it error? Was it a consensus round? This requires buffering complete traces before deciding.
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
A["All Traces<br/>Buffered (100%)"] --> B["OTel Collector<br/>Evaluates Rules"]
|
||||
|
||||
B --> C{"Error?"}
|
||||
C -->|Yes| K["KEEP"]
|
||||
|
||||
C -->|No| D{"Slow?<br/>(>5s consensus,<br/>>1s RPC)"}
|
||||
D -->|Yes| K
|
||||
|
||||
D -->|No| E{"Random < 10%?"}
|
||||
E -->|Yes| K
|
||||
E -->|No| F["DROP"]
|
||||
|
||||
style K fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
style F fill:#c62828,stroke:#8c2809,color:#fff
|
||||
style B fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style C fill:#e65100,stroke:#bf360c,color:#fff
|
||||
style D fill:#e65100,stroke:#bf360c,color:#fff
|
||||
style E fill:#4a148c,stroke:#2e0d57,color:#fff
|
||||
```
|
||||
|
||||
| Aspect | Details |
|
||||
| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| **Where it runs** | In the **OTel Collector** (external process), not inside xrpld. xrpld exports 100% of traces; the Collector decides what to keep. |
|
||||
| **When the decision happens** | After the Collector has received all spans for a trace (waits `decision_wait=10s` for stragglers). |
|
||||
| **How it works** | Policy rules evaluate the completed trace: keep all errors, keep slow operations above a threshold, keep all consensus rounds, then probabilistically sample the rest at 10%. |
|
||||
| **Pros** | **Never misses important traces**. Errors, slow requests, and consensus anomalies are always captured regardless of probability. |
|
||||
| **Cons** | Higher resource usage — xrpld must export 100% of spans to the Collector, which buffers them in memory before deciding. The Collector needs more RAM (configured via `num_traces` and `decision_wait`). |
|
||||
| **Best for** | Production troubleshooting where you can't afford to miss errors or anomalies. |
|
||||
|
||||
**Collector configuration** (tail sampling rules for xrpld):
|
||||
|
||||
```yaml
|
||||
processors:
|
||||
tail_sampling:
|
||||
decision_wait: 10s # Wait for all spans in a trace
|
||||
num_traces: 100000 # Buffer up to 100K concurrent traces
|
||||
policies:
|
||||
- name: errors # Always keep error traces
|
||||
type: status_code
|
||||
status_code: { status_codes: [ERROR] }
|
||||
|
||||
- name: slow-consensus # Keep consensus rounds >5s
|
||||
type: latency
|
||||
latency: { threshold_ms: 5000 }
|
||||
|
||||
- name: slow-rpc # Keep slow RPC requests >1s
|
||||
type: latency
|
||||
latency: { threshold_ms: 1000 }
|
||||
|
||||
- name: probabilistic # Sample 10% of everything else
|
||||
type: probabilistic
|
||||
probabilistic: { sampling_percentage: 10 }
|
||||
```
|
||||
|
||||
### Head vs. Tail — Side-by-Side
|
||||
|
||||
| | Head Sampling | Tail Sampling |
|
||||
| ----------------------------- | ---------------------------------------- | ------------------------------------------------ |
|
||||
| **Decision point** | Trace start (inside xrpld) | Trace end (in OTel Collector) |
|
||||
| **Knows trace content?** | No (random coin flip) | Yes (evaluates completed trace) |
|
||||
| **Overhead on xrpld** | Lowest (dropped traces = no-op) | Higher (must export 100% to Collector) |
|
||||
| **Collector resource usage** | Low (receives only sampled traces) | Higher (buffers all traces before deciding) |
|
||||
| **Captures all errors?** | No (only if trace was randomly selected) | **Yes** (error policy catches them) |
|
||||
| **Captures slow operations?** | No (random) | **Yes** (latency policy catches them) |
|
||||
| **Configuration** | `xrpld.cfg`: `sampling_ratio=0.1` | `otel-collector.yaml`: `tail_sampling` processor |
|
||||
| **Best for** | High-throughput steady-state | Troubleshooting & anomaly detection |
|
||||
|
||||
### Recommended Strategy for xrpld
|
||||
|
||||
Use **both** in a layered approach:
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph xrpld["xrpld (Head Sampling)"]
|
||||
HS["sampling_ratio=1.0<br/>(export everything)"]
|
||||
end
|
||||
|
||||
subgraph collector["OTel Collector (Tail Sampling)"]
|
||||
TS["Keep: errors + slow + 10% random<br/>Drop: routine traces"]
|
||||
end
|
||||
|
||||
subgraph storage["Backend Storage"]
|
||||
ST["Only interesting traces<br/>stored long-term"]
|
||||
end
|
||||
|
||||
xrpld -->|"100% of spans"| collector -->|"~15-20% kept"| storage
|
||||
|
||||
style xrpld fill:#424242,stroke:#212121,color:#fff
|
||||
style collector fill:#1565c0,stroke:#0d47a1,color:#fff
|
||||
style storage fill:#2e7d32,stroke:#1b5e20,color:#fff
|
||||
```
|
||||
|
||||
> **Why this works**: xrpld exports everything (no blind drops), the Collector applies intelligent filtering (keep errors/slow/anomalies, sample the rest), and only ~15-20% of traces reach storage. If Collector resource usage becomes a concern, add head sampling at `sampling_ratio=0.5` to halve the export volume while still giving the Collector enough data for good tail-sampling decisions.
|
||||
|
||||
---
|
||||
|
||||
## Slide 10: Data Collection & Privacy
|
||||
|
||||
### What Data is Collected
|
||||
|
||||
| Category | Attributes Collected | Purpose |
|
||||
| --------------- | ------------------------------------------------------------------------------------ | --------------------------- |
|
||||
| **Transaction** | `tx.hash`, `tx.type`, `tx.result`, `tx.fee`, `ledger_index` | Trace transaction lifecycle |
|
||||
| **Consensus** | `round`, `phase`, `mode`, `proposers` (count of proposing validators), `duration_ms` | Analyze consensus timing |
|
||||
| **RPC** | `command`, `version`, `status`, `duration_ms` | Monitor RPC performance |
|
||||
| **Peer** | `peer.id`(public key), `latency_ms`, `message.type`, `message.size` | Network topology analysis |
|
||||
| **Ledger** | `ledger.hash`, `ledger.index`, `close_time`, `tx_count` | Ledger progression tracking |
|
||||
| **Job** | `job.type`, `queue_ms`, `worker` | JobQueue performance |
|
||||
|
||||
### What is NOT Collected (Privacy Guarantees)
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph notCollected["❌ NOT Collected"]
|
||||
direction LR
|
||||
A["Private Keys"] ~~~ B["Account Balances"] ~~~ C["Transaction Amounts"]
|
||||
end
|
||||
|
||||
subgraph alsoNot["❌ Also Excluded"]
|
||||
direction LR
|
||||
D["IP Addresses<br/>(configurable)"] ~~~ E["Personal Data"] ~~~ F["Raw TX Payloads"]
|
||||
end
|
||||
|
||||
style A fill:#c62828,stroke:#8c2809,color:#fff
|
||||
style B fill:#c62828,stroke:#8c2809,color:#fff
|
||||
style C fill:#c62828,stroke:#8c2809,color:#fff
|
||||
style D fill:#c62828,stroke:#8c2809,color:#fff
|
||||
style E fill:#c62828,stroke:#8c2809,color:#fff
|
||||
style F fill:#c62828,stroke:#8c2809,color:#fff
|
||||
```
|
||||
|
||||
**Reading the diagram:**
|
||||
|
||||
- **NOT Collected (top row, red)**: Private Keys, Account Balances, and Transaction Amounts are explicitly excluded — these are financial/security-sensitive fields that telemetry never touches.
|
||||
- **Also Excluded (bottom row, red)**: IP Addresses (configurable per deployment), Personal Data, and Raw TX Payloads are also excluded — these protect operator and user privacy.
|
||||
- **All-red styling**: Every box is styled in red to visually reinforce that these are hard exclusions, not optional — the telemetry system has no code path to collect any of these fields.
|
||||
- **Two-row layout**: The split between "NOT Collected" and "Also Excluded" distinguishes between financial data (top) and operational/personal data (bottom), making the privacy boundaries clear to auditors.
|
||||
|
||||
### Privacy Protection Mechanisms
|
||||
|
||||
| Mechanism | Description |
|
||||
| -------------------------- | ------------------------------------------------------------- |
|
||||
| **Account Hashing** | `xrpl.tx.account` is hashed at collector level before storage |
|
||||
| **Configurable Redaction** | Sensitive fields can be excluded via config |
|
||||
| **Sampling** | Only 10% of traces recorded by default (reduces exposure) |
|
||||
| **Local Control** | Node operators control what gets exported |
|
||||
| **No Raw Payloads** | Transaction content is never recorded, only metadata |
|
||||
|
||||
> **Key Principle**: Telemetry collects **operational metadata** (timing, counts, hashes) — never **sensitive content** (keys, balances, amounts).
|
||||
|
||||
---
|
||||
|
||||
_End of Presentation_
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [[ $# -ne 1 || "$1" == "--help" || "$1" == "-h" ]]; then
|
||||
name=$(basename $0)
|
||||
cat <<-USAGE
|
||||
name=$( basename $0 )
|
||||
cat <<- USAGE
|
||||
Usage: $name <username>
|
||||
|
||||
Where <username> is the Github username of the upstream repo. e.g. XRPLF
|
||||
@@ -14,7 +14,7 @@ fi
|
||||
shift
|
||||
user="$1"
|
||||
# Get the origin URL. Expect it be an SSH-style URL
|
||||
origin=$(git remote get-url origin)
|
||||
origin=$( git remote get-url origin )
|
||||
if [[ "${origin}" == "" ]]; then
|
||||
echo Invalid origin remote >&2
|
||||
exit 1
|
||||
@@ -22,11 +22,11 @@ fi
|
||||
# echo "Origin: ${origin}"
|
||||
# Parse the origin
|
||||
ifs_orig="${IFS}"
|
||||
IFS=':' read remote originpath <<<"${origin}"
|
||||
IFS=':' read remote originpath <<< "${origin}"
|
||||
# echo "Remote: ${remote}, Originpath: ${originpath}"
|
||||
IFS='@' read sshuser server <<<"${remote}"
|
||||
IFS='@' read sshuser server <<< "${remote}"
|
||||
# echo "SSHUser: ${sshuser}, Server: ${server}"
|
||||
IFS='/' read originuser repo <<<"${originpath}"
|
||||
IFS='/' read originuser repo <<< "${originpath}"
|
||||
# echo "Originuser: ${originuser}, Repo: ${repo}"
|
||||
if [[ "${sshuser}" == "" || "${server}" == "" || "${originuser}" == "" || "${repo}" == "" ]]; then
|
||||
echo "Can't parse origin URL: ${origin}" >&2
|
||||
@@ -35,9 +35,9 @@ fi
|
||||
upstream="https://${server}/${user}/${repo}"
|
||||
upstreampush="${remote}:${user}/${repo}"
|
||||
upstreamgroup="upstream upstream-push"
|
||||
current=$(git remote get-url upstream 2>/dev/null)
|
||||
currentpush=$(git remote get-url upstream-push 2>/dev/null)
|
||||
currentgroup=$(git config remotes.upstreams)
|
||||
current=$( git remote get-url upstream 2>/dev/null )
|
||||
currentpush=$( git remote get-url upstream-push 2>/dev/null )
|
||||
currentgroup=$( git config remotes.upstreams )
|
||||
if [[ "${current}" == "${upstream}" ]]; then
|
||||
echo "Upstream already set up correctly. Skip"
|
||||
elif [[ -n "${current}" && "${current}" != "${upstream}" && "${current}" != "${upstreampush}" ]]; then
|
||||
@@ -45,9 +45,9 @@ elif [[ -n "${current}" && "${current}" != "${upstream}" && "${current}" != "${u
|
||||
else
|
||||
if [[ "${current}" == "${upstreampush}" ]]; then
|
||||
echo "Upstream set to dangerous push URL. Update."
|
||||
_run git remote rename upstream upstream-push ||
|
||||
_run git remote remove upstream
|
||||
currentpush=$(git remote get-url upstream-push 2>/dev/null)
|
||||
_run git remote rename upstream upstream-push || \
|
||||
_run git remote remove upstream
|
||||
currentpush=$( git remote get-url upstream-push 2>/dev/null )
|
||||
fi
|
||||
_run git remote add upstream "${upstream}"
|
||||
fi
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [[ $# -lt 3 || "$1" == "--help" || "$1" = "-h" ]]; then
|
||||
name=$(basename $0)
|
||||
cat <<-USAGE
|
||||
name=$( basename $0 )
|
||||
cat <<- USAGE
|
||||
Usage: $name workbranch base/branch user/branch [user/branch [...]]
|
||||
|
||||
* workbranch will be created locally from base/branch
|
||||
@@ -16,7 +16,7 @@ fi
|
||||
work="$1"
|
||||
shift
|
||||
|
||||
branches=($(echo "${@}" | sed "s/:/\//"))
|
||||
branches=( $( echo "${@}" | sed "s/:/\//" ) )
|
||||
base="${branches[0]}"
|
||||
unset branches[0]
|
||||
|
||||
@@ -24,10 +24,10 @@ set -e
|
||||
|
||||
users=()
|
||||
for b in "${branches[@]}"; do
|
||||
users+=($(echo $b | cut -d/ -f1))
|
||||
users+=( $( echo $b | cut -d/ -f1 ) )
|
||||
done
|
||||
|
||||
users=($(printf '%s\n' "${users[@]}" | sort -u))
|
||||
users=( $( printf '%s\n' "${users[@]}" | sort -u ) )
|
||||
|
||||
git fetch --multiple upstreams "${users[@]}"
|
||||
git checkout -B "$work" --no-track "$base"
|
||||
@@ -40,7 +40,7 @@ done
|
||||
# Make sure the commits look right
|
||||
git log --show-signature "$base..HEAD"
|
||||
|
||||
parts=($(echo $base | sed "s/\// /"))
|
||||
parts=( $( echo $base | sed "s/\// /" ) )
|
||||
repo="${parts[0]}"
|
||||
b="${parts[1]}"
|
||||
push=$repo
|
||||
@@ -50,7 +50,7 @@ fi
|
||||
if [[ "$repo" == "upstream" ]]; then
|
||||
repo="upstreams"
|
||||
fi
|
||||
cat <<PUSH
|
||||
cat << PUSH
|
||||
|
||||
-------------------------------------------------------------------
|
||||
This script will not push. Verify everything is correct, then push
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [[ $# -ne 3 || "$1" == "--help" || "$1" = "-h" ]]; then
|
||||
name=$(basename $0)
|
||||
cat <<-USAGE
|
||||
name=$( basename $0 )
|
||||
cat <<- USAGE
|
||||
Usage: $name workbranch base/branch version
|
||||
|
||||
* workbranch will be created locally from base/branch. If it exists,
|
||||
@@ -16,7 +16,7 @@ fi
|
||||
work="$1"
|
||||
shift
|
||||
|
||||
base=$(echo "$1" | sed "s/:/\//")
|
||||
base=$( echo "$1" | sed "s/:/\//" )
|
||||
shift
|
||||
|
||||
version=$1
|
||||
@@ -28,16 +28,16 @@ git fetch upstreams
|
||||
|
||||
git checkout -B "${work}" --no-track "${base}"
|
||||
|
||||
push=$(git rev-parse --abbrev-ref --symbolic-full-name '@{push}' \
|
||||
2>/dev/null) || true
|
||||
push=$( git rev-parse --abbrev-ref --symbolic-full-name '@{push}' \
|
||||
2>/dev/null ) || true
|
||||
if [[ "${push}" != "" ]]; then
|
||||
echo "Warning: ${push} may already exist."
|
||||
fi
|
||||
|
||||
build=$(find -name BuildInfo.cpp)
|
||||
sed 's/\(^.*versionString =\).*$/\1 "'${version}'"/' ${build} >version.cpp &&
|
||||
diff "${build}" version.cpp && exit 1 ||
|
||||
mv -vi version.cpp ${build}
|
||||
build=$( find -name BuildInfo.cpp )
|
||||
sed 's/\(^.*versionString =\).*$/\1 "'${version}'"/' ${build} > version.cpp && \
|
||||
diff "${build}" version.cpp && exit 1 || \
|
||||
mv -vi version.cpp ${build}
|
||||
|
||||
git diff
|
||||
|
||||
@@ -47,7 +47,7 @@ git commit -S -m "Set version to ${version}"
|
||||
|
||||
git log --oneline --first-parent ${base}^..
|
||||
|
||||
cat <<PUSH
|
||||
cat << PUSH
|
||||
|
||||
-------------------------------------------------------------------
|
||||
This script will not push. Verify everything is correct, then push
|
||||
|
||||
@@ -168,13 +168,7 @@ def main():
|
||||
if not os.environ.get("TIDY"):
|
||||
return 0
|
||||
|
||||
repo_root = Path(
|
||||
subprocess.check_output(
|
||||
["git", "rev-parse", "--show-toplevel"],
|
||||
cwd=Path(__file__).parent,
|
||||
text=True,
|
||||
).strip()
|
||||
)
|
||||
repo_root = Path(__file__).parent.parent
|
||||
files = staged_files(repo_root)
|
||||
if not files:
|
||||
return 0
|
||||
|
||||
@@ -953,21 +953,6 @@
|
||||
#
|
||||
# Optional keys for NuDB and RocksDB:
|
||||
#
|
||||
# cache_size Size of cache for database records. Default is 16384.
|
||||
# Setting this value to 0 will use the default value.
|
||||
#
|
||||
# cache_age Length of time in minutes to keep database records
|
||||
# cached. Default is 5 minutes. Setting this value to
|
||||
# 0 will use the default value.
|
||||
#
|
||||
# Note: if cache_size or cache_age is not specified,
|
||||
# default values will be used for the unspecified
|
||||
# parameter.
|
||||
#
|
||||
# Note: the cache will not be created if online_delete
|
||||
# is specified, because the rotating NodeStore does
|
||||
# not use this cache).
|
||||
#
|
||||
# fast_load Boolean. If set, load the last persisted ledger
|
||||
# from disk upon process start before syncing to
|
||||
# the network. This is likely to improve performance
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
# Python dependencies for XRP Ledger code generation scripts
|
||||
#
|
||||
# These packages are required to run the code generation scripts that
|
||||
# parse macro files and generate C++ wrapper classes.
|
||||
|
||||
# C preprocessor for Python - used to preprocess macro files
|
||||
pcpp>=1.30
|
||||
|
||||
# Parser combinator library - used to parse the macro DSL
|
||||
pyparsing>=3.0.0
|
||||
|
||||
# Template engine - used to generate C++ code from templates
|
||||
Mako>=1.2.2
|
||||
@@ -1,105 +1,13 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile requirements.in --generate-hashes --output-file requirements.txt
|
||||
mako==1.3.12 \
|
||||
--hash=sha256:8f61569480282dbf557145ce441e4ba888be453c30989f879f0d652e39f53ea9 \
|
||||
--hash=sha256:9f778e93289bd410bb35daadeb4fc66d95a746f0b75777b942088b7fd7af550a
|
||||
# via -r requirements.in
|
||||
markupsafe==3.0.3 \
|
||||
--hash=sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f \
|
||||
--hash=sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a \
|
||||
--hash=sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf \
|
||||
--hash=sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19 \
|
||||
--hash=sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf \
|
||||
--hash=sha256:0f4b68347f8c5eab4a13419215bdfd7f8c9b19f2b25520968adfad23eb0ce60c \
|
||||
--hash=sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175 \
|
||||
--hash=sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219 \
|
||||
--hash=sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb \
|
||||
--hash=sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6 \
|
||||
--hash=sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab \
|
||||
--hash=sha256:15d939a21d546304880945ca1ecb8a039db6b4dc49b2c5a400387cdae6a62e26 \
|
||||
--hash=sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1 \
|
||||
--hash=sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce \
|
||||
--hash=sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218 \
|
||||
--hash=sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634 \
|
||||
--hash=sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695 \
|
||||
--hash=sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad \
|
||||
--hash=sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73 \
|
||||
--hash=sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c \
|
||||
--hash=sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe \
|
||||
--hash=sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa \
|
||||
--hash=sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559 \
|
||||
--hash=sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa \
|
||||
--hash=sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37 \
|
||||
--hash=sha256:3537e01efc9d4dccdf77221fb1cb3b8e1a38d5428920e0657ce299b20324d758 \
|
||||
--hash=sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f \
|
||||
--hash=sha256:38664109c14ffc9e7437e86b4dceb442b0096dfe3541d7864d9cbe1da4cf36c8 \
|
||||
--hash=sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d \
|
||||
--hash=sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c \
|
||||
--hash=sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97 \
|
||||
--hash=sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a \
|
||||
--hash=sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19 \
|
||||
--hash=sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9 \
|
||||
--hash=sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9 \
|
||||
--hash=sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc \
|
||||
--hash=sha256:591ae9f2a647529ca990bc681daebdd52c8791ff06c2bfa05b65163e28102ef2 \
|
||||
--hash=sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4 \
|
||||
--hash=sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354 \
|
||||
--hash=sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50 \
|
||||
--hash=sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698 \
|
||||
--hash=sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9 \
|
||||
--hash=sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b \
|
||||
--hash=sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc \
|
||||
--hash=sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115 \
|
||||
--hash=sha256:7c3fb7d25180895632e5d3148dbdc29ea38ccb7fd210aa27acbd1201a1902c6e \
|
||||
--hash=sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485 \
|
||||
--hash=sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f \
|
||||
--hash=sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12 \
|
||||
--hash=sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025 \
|
||||
--hash=sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009 \
|
||||
--hash=sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d \
|
||||
--hash=sha256:949b8d66bc381ee8b007cd945914c721d9aba8e27f71959d750a46f7c282b20b \
|
||||
--hash=sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a \
|
||||
--hash=sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5 \
|
||||
--hash=sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f \
|
||||
--hash=sha256:a320721ab5a1aba0a233739394eb907f8c8da5c98c9181d1161e77a0c8e36f2d \
|
||||
--hash=sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1 \
|
||||
--hash=sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287 \
|
||||
--hash=sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6 \
|
||||
--hash=sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f \
|
||||
--hash=sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581 \
|
||||
--hash=sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed \
|
||||
--hash=sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b \
|
||||
--hash=sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c \
|
||||
--hash=sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026 \
|
||||
--hash=sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8 \
|
||||
--hash=sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676 \
|
||||
--hash=sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6 \
|
||||
--hash=sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e \
|
||||
--hash=sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d \
|
||||
--hash=sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d \
|
||||
--hash=sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01 \
|
||||
--hash=sha256:df2449253ef108a379b8b5d6b43f4b1a8e81a061d6537becd5582fba5f9196d7 \
|
||||
--hash=sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419 \
|
||||
--hash=sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795 \
|
||||
--hash=sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1 \
|
||||
--hash=sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5 \
|
||||
--hash=sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d \
|
||||
--hash=sha256:e8fc20152abba6b83724d7ff268c249fa196d8259ff481f3b1476383f8f24e42 \
|
||||
--hash=sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe \
|
||||
--hash=sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda \
|
||||
--hash=sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e \
|
||||
--hash=sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737 \
|
||||
--hash=sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523 \
|
||||
--hash=sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591 \
|
||||
--hash=sha256:f71a396b3bf33ecaa1626c255855702aca4d3d9fea5e051b41ac59a9c1c41edc \
|
||||
--hash=sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a \
|
||||
--hash=sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50
|
||||
# via mako
|
||||
pcpp==1.30 \
|
||||
--hash=sha256:05fe08292b6da57f385001c891a87f40d6aa7f46787b03e8ba326d20a3297c6e \
|
||||
--hash=sha256:5af9fbce55f136d7931ae915fae03c34030a3b36c496e72d9636cedc8e2543a1
|
||||
# via -r requirements.in
|
||||
pyparsing==3.3.2 \
|
||||
--hash=sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d \
|
||||
--hash=sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc
|
||||
# via -r requirements.in
|
||||
# Python dependencies for XRP Ledger code generation scripts
|
||||
#
|
||||
# These packages are required to run the code generation scripts that
|
||||
# parse macro files and generate C++ wrapper classes.
|
||||
|
||||
# C preprocessor for Python - used to preprocess macro files
|
||||
pcpp>=1.30
|
||||
|
||||
# Parser combinator library - used to parse the macro DSL
|
||||
pyparsing>=3.0.0
|
||||
|
||||
# Template engine - used to generate C++ code from templates
|
||||
Mako>=1.2.2
|
||||
|
||||
@@ -65,7 +65,6 @@ words:
|
||||
- Btrfs
|
||||
- Buildx
|
||||
- canonicality
|
||||
- CGNAT
|
||||
- changespq
|
||||
- checkme
|
||||
- choco
|
||||
@@ -94,7 +93,6 @@ words:
|
||||
- daria
|
||||
- dcmake
|
||||
- dearmor
|
||||
- dedented
|
||||
- deleteme
|
||||
- demultiplexer
|
||||
- deserializaton
|
||||
@@ -117,8 +115,6 @@ words:
|
||||
- fmtdur
|
||||
- fsanitize
|
||||
- funclets
|
||||
- gantt
|
||||
- Gantt
|
||||
- gcov
|
||||
- gcovr
|
||||
- ghead
|
||||
@@ -164,11 +160,12 @@ words:
|
||||
- mathbunnyru
|
||||
- mcmodel
|
||||
- MEMORYSTATUSEX
|
||||
- MPTAMM
|
||||
- MPTDEX
|
||||
- Merkle
|
||||
- Metafuncton
|
||||
- misprediction
|
||||
- missingok
|
||||
- MPTAMM
|
||||
- mptbalance
|
||||
- MPTDEX
|
||||
- mptflags
|
||||
@@ -202,16 +199,12 @@ words:
|
||||
- nonxrp
|
||||
- noreplace
|
||||
- noripple
|
||||
- nostd
|
||||
- nostdinc
|
||||
- notifempty
|
||||
- nudb
|
||||
- nullptr
|
||||
- nunl
|
||||
- Nyffenegger
|
||||
- onlatest
|
||||
- ostr
|
||||
- otelc
|
||||
- pargs
|
||||
- partitioner
|
||||
- paychan
|
||||
@@ -261,7 +254,6 @@ words:
|
||||
- sfields
|
||||
- shamap
|
||||
- shamapitem
|
||||
- shfmt
|
||||
- shlibs
|
||||
- sidechain
|
||||
- SIGGOOD
|
||||
@@ -289,7 +281,6 @@ words:
|
||||
- takerpays
|
||||
- ters
|
||||
- TMEndpointv2
|
||||
- traceql
|
||||
- trixie
|
||||
- tx
|
||||
- txid
|
||||
@@ -297,7 +288,6 @@ words:
|
||||
- txjson
|
||||
- txn
|
||||
- txns
|
||||
- txqueue
|
||||
- txs
|
||||
- ubsan
|
||||
- UBSAN
|
||||
@@ -308,7 +298,6 @@ words:
|
||||
- unauthorizing
|
||||
- unergonomic
|
||||
- unfetched
|
||||
- unfindable
|
||||
- unflatten
|
||||
- unfund
|
||||
- unimpair
|
||||
@@ -345,5 +334,4 @@ words:
|
||||
- xrplf
|
||||
- xxhash
|
||||
- xxhasher
|
||||
- xychart
|
||||
- zpages
|
||||
- CGNAT
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Sanity-check that the sanitizer runtimes shipped with g++/clang++ work
|
||||
# end-to-end against the system loader: compile each example with both
|
||||
# compilers, run it, and confirm the expected diagnostic is emitted.
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
cpp_files_dir="${1:?usage: $0 <cpp_files_dir>}"
|
||||
|
||||
case "$(uname -m)" in
|
||||
x86_64) loader=/lib64/ld-linux-x86-64.so.2 ;;
|
||||
aarch64) loader=/lib/ld-linux-aarch64.so.1 ;;
|
||||
*)
|
||||
echo "Unsupported arch: $(uname -m)" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
declare -A sanitize=(
|
||||
[asan]="-fsanitize=address"
|
||||
[tsan]="-fsanitize=thread"
|
||||
[ubsan]="-fsanitize=undefined"
|
||||
)
|
||||
declare -A expect=(
|
||||
[asan]="heap-use-after-free"
|
||||
[tsan]="data race"
|
||||
[ubsan]="signed integer overflow"
|
||||
)
|
||||
|
||||
for compiler in g++ clang++; do
|
||||
for name in asan tsan ubsan; do
|
||||
bin="/tmp/${name}-${compiler}"
|
||||
echo "=== Build ${name} with ${compiler} ==="
|
||||
"$compiler" -std=c++20 -O1 -g ${sanitize[$name]} \
|
||||
-Wl,--dynamic-linker=$loader \
|
||||
"${cpp_files_dir}/${name}.cpp" -o "$bin"
|
||||
echo "=== Run ${name}-${compiler} ==="
|
||||
output=$("$bin" 2>&1) || true
|
||||
echo "$output"
|
||||
echo "$output" | grep -q "${expect[$name]}" ||
|
||||
{
|
||||
echo "expected '${expect[$name]}' from $bin"
|
||||
exit 1
|
||||
}
|
||||
rm -f "$bin"
|
||||
done
|
||||
done
|
||||
@@ -1,28 +0,0 @@
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <iostream>
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
__attribute__((noinline))
|
||||
#elif defined(_MSC_VER)
|
||||
__declspec(noinline)
|
||||
#endif
|
||||
int
|
||||
read_after_free(volatile int* array, std::size_t index)
|
||||
{
|
||||
std::atomic_signal_fence(std::memory_order_seq_cst);
|
||||
int value = array[index];
|
||||
std::atomic_signal_fence(std::memory_order_seq_cst);
|
||||
return value;
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
int* array = new int[5]{10, 20, 30, 40, 50};
|
||||
delete[] array;
|
||||
|
||||
std::cout << "Value at index 2: " << read_after_free(array, 2) << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
static int kCounter = 0;
|
||||
|
||||
void
|
||||
increment()
|
||||
{
|
||||
for (int i = 0; i < 100'000; ++i)
|
||||
{
|
||||
++kCounter;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
std::thread t1(increment);
|
||||
std::thread t2(increment);
|
||||
|
||||
t1.join();
|
||||
t2.join();
|
||||
|
||||
std::cout << "Final counter value: " << kCounter << std::endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
int maxInt = std::numeric_limits<int>::max();
|
||||
int volatile one = 1;
|
||||
std::cout << "Current max: " << maxInt << std::endl;
|
||||
int overflowed = maxInt + one;
|
||||
std::cout << "Overflowed result: " << overflowed << std::endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -45,30 +45,8 @@ COPY --from=builder /tmp/build/result /nix/ci-env
|
||||
|
||||
ENV PATH="/nix/ci-env/bin:$PATH"
|
||||
|
||||
# Externally-built dynamically-linked ELF binaries hard-code the loader path
|
||||
# (e.g. /lib64/ld-linux-x86-64.so.2) in their PT_INTERP header. Copy the
|
||||
# loader from the Nix store to that path when the base image doesn't already
|
||||
# provide one (i.e. on nixos/nix).
|
||||
RUN <<EOF
|
||||
case "$(uname -m)" in
|
||||
x86_64) target=/lib64/ld-linux-x86-64.so.2 ;;
|
||||
aarch64) target=/lib/ld-linux-aarch64.so.1 ;;
|
||||
*) echo "Unsupported arch: $(uname -m)" >&2; exit 1 ;;
|
||||
esac
|
||||
if [ ! -e "$target" ]; then
|
||||
# Use the loader from the same glibc that gcc links libc against, so
|
||||
# ld-linux and libc/libpthread share GLIBC_PRIVATE symbols at runtime.
|
||||
src="$(dirname "$(gcc -print-file-name=libc.so.6)")/$(basename "$target")"
|
||||
[ -e "$src" ] || { echo "ld-linux not found at $src" >&2; exit 1; }
|
||||
mkdir -p "$(dirname "$target")"
|
||||
cp "$src" "$target"
|
||||
fi
|
||||
EOF
|
||||
|
||||
RUN <<EOF
|
||||
ccache --version
|
||||
clang --version
|
||||
clang++ --version
|
||||
clang-format --version
|
||||
cmake --version
|
||||
conan --version
|
||||
@@ -86,10 +64,3 @@ python3 --version
|
||||
run-clang-tidy --help
|
||||
vim --version
|
||||
EOF
|
||||
|
||||
# Sanity-check that the sanitizer runtimes shipped with g++/clang++ work
|
||||
# end-to-end against the system loader.
|
||||
COPY docker/cpp_files/ /tmp/cpp_files/
|
||||
COPY docker/check-sanitizers.sh /tmp/check-sanitizers.sh
|
||||
|
||||
RUN grep -qi ubuntu /etc/os-release 2>/dev/null && /tmp/check-sanitizers.sh /tmp/cpp_files || true
|
||||
|
||||
4
flake.lock
generated
4
flake.lock
generated
@@ -15,7 +15,7 @@
|
||||
"type": "indirect"
|
||||
}
|
||||
},
|
||||
"nixpkgs-custom-glibc": {
|
||||
"nixpkgs-glibc231": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1593520194,
|
||||
@@ -35,7 +35,7 @@
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs",
|
||||
"nixpkgs-custom-glibc": "nixpkgs-custom-glibc"
|
||||
"nixpkgs-glibc231": "nixpkgs-glibc231"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -6,16 +6,16 @@
|
||||
# version — matches the system libc on Ubuntu 20.04 LTS. Imported
|
||||
# manually (flake = false) because this revision predates nixpkgs'
|
||||
# own flake.nix.
|
||||
nixpkgs-custom-glibc = {
|
||||
nixpkgs-glibc231 = {
|
||||
url = "github:NixOS/nixpkgs/9cd98386a38891d1074fc18036b842dc4416f562";
|
||||
flake = false;
|
||||
};
|
||||
};
|
||||
|
||||
outputs =
|
||||
{ nixpkgs, nixpkgs-custom-glibc, ... }:
|
||||
{ nixpkgs, nixpkgs-glibc231, ... }:
|
||||
let
|
||||
forEachSystem = import ./nix/utils.nix { inherit nixpkgs nixpkgs-custom-glibc; };
|
||||
forEachSystem = import ./nix/utils.nix { inherit nixpkgs nixpkgs-glibc231; };
|
||||
in
|
||||
{
|
||||
devShells = forEachSystem (import ./nix/devshell.nix);
|
||||
|
||||
@@ -2,16 +2,12 @@
|
||||
|
||||
#include <xrpl/beast/utility/instrumentation.h>
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <set>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace xrpl {
|
||||
|
||||
@@ -42,58 +38,17 @@ isPowerOfTen(T value)
|
||||
return logTen(value).has_value();
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
|
||||
/** Builds a table of the powers of 10
|
||||
*
|
||||
* This function is marked consteval, so it can only be run in
|
||||
* a constexpr context. This assures that it is and can only be run at
|
||||
* compile time. Doing it at runtime would be pretty wasteful and
|
||||
* inefficient.
|
||||
*/
|
||||
constexpr std::size_t kInt64Digits = 20;
|
||||
consteval std::array<std::uint64_t, kInt64Digits>
|
||||
buildPowersOfTen()
|
||||
{
|
||||
std::array<std::uint64_t, kInt64Digits> result{};
|
||||
|
||||
std::uint64_t power = 1;
|
||||
std::size_t exponent = 0;
|
||||
// end the loop early so it doesn't overflow;
|
||||
for (; exponent < result.size() - 1; ++exponent, power *= 10)
|
||||
{
|
||||
result[exponent] = power;
|
||||
if (power > std::numeric_limits<std::uint64_t>::max() / 10)
|
||||
throw std::logic_error("Power of 10 table is too big");
|
||||
}
|
||||
result[exponent] = power;
|
||||
if (power < std::numeric_limits<std::uint64_t>::max() / 10)
|
||||
throw std::logic_error("Power of 10 table is not big enough for the uint64_t type");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
constexpr std::array<std::uint64_t, detail::kInt64Digits> kPowerOfTen = detail::buildPowersOfTen();
|
||||
|
||||
static_assert(kPowerOfTen[0] == 1);
|
||||
static_assert(kPowerOfTen[1] == 10);
|
||||
static_assert(kPowerOfTen[10] == 10'000'000'000);
|
||||
static_assert(
|
||||
isPowerOfTen(kPowerOfTen.back()) && *logTen(kPowerOfTen.back()) == detail::kInt64Digits - 1);
|
||||
|
||||
/** MantissaRange defines a range for the mantissa of a normalized Number.
|
||||
*
|
||||
* The mantissa is in the range [min, max], where
|
||||
* * min is a power of 10, and
|
||||
* * max = min * 10 - 1.
|
||||
*
|
||||
* The MantissaScale enum indicates properties of the range: size, and some behavioral
|
||||
* options. This intentionally restricts the number of unique MantissaRanges that can
|
||||
* be instantiated: one for each scale.
|
||||
* The mantissa_scale enum indicates whether the range is "small" or "large".
|
||||
* This intentionally restricts the number of MantissaRanges that can be
|
||||
* instantiated to two: one for each scale.
|
||||
*
|
||||
* The "Small" scale is based on the behavior of STAmount for IOUs. It has a min
|
||||
* The "small" scale is based on the behavior of STAmount for IOUs. It has a min
|
||||
* value of 10^15, and a max value of 10^16-1. This was sufficient for
|
||||
* uses before Lending Protocol was implemented, mostly related to AMM.
|
||||
*
|
||||
@@ -104,100 +59,46 @@ static_assert(
|
||||
* STNumber field type, and for internal calculations. That necessitated the
|
||||
* "large" scale.
|
||||
*
|
||||
* The "Large" scales are intended to represent all values that can be represented
|
||||
* The "large" scale is intended to represent all values that can be represented
|
||||
* by an STAmount - IOUs, XRP, and MPTs. It has a min value of 10^18, and a max
|
||||
* value of 10^19-1. "LargeLegacy" is like "Large", but preserves
|
||||
* a rounding error when a computation results in a mantissa of
|
||||
* Number::kMaxRep that needs to be rounded up, but rounds down
|
||||
* instead. It will maintain consistent behavior until the fixCleanup3_2_0
|
||||
* amendment is enabled.
|
||||
* value of 10^19-1.
|
||||
*
|
||||
* Note that if the mentioned amendments are eventually retired, this class
|
||||
* should be left in place, but the "Small" scale option should be removed. This
|
||||
* should be left in place, but the "small" scale option should be removed. This
|
||||
* will allow for future expansion beyond 64-bits if it is ever needed.
|
||||
*/
|
||||
struct MantissaRange final
|
||||
struct MantissaRange
|
||||
{
|
||||
using rep = std::uint64_t;
|
||||
enum class MantissaScale { Small, Large };
|
||||
|
||||
enum class MantissaScale {
|
||||
Small,
|
||||
// LargeLegacy can be removed when fixCleanup3_2_0 is retired
|
||||
LargeLegacy,
|
||||
Large,
|
||||
};
|
||||
|
||||
// This entire enum can be removed when fixCleanup3_2_0 is retired
|
||||
enum class CuspRoundingFix : bool {
|
||||
Disabled = false,
|
||||
Enabled = true,
|
||||
};
|
||||
|
||||
explicit constexpr MantissaRange(MantissaScale sc) : scale(sc)
|
||||
explicit constexpr MantissaRange(MantissaScale scale)
|
||||
: min(getMin(scale)), log(logTen(min).value_or(-1)), scale(scale)
|
||||
{
|
||||
}
|
||||
|
||||
MantissaScale const scale;
|
||||
int const log{getExponent(scale)};
|
||||
rep const min{getMin(scale, log)};
|
||||
rep const max{(min * 10) - 1};
|
||||
CuspRoundingFix const cuspRoundingFixEnabled{isCuspFixEnabled(scale)};
|
||||
|
||||
static MantissaRange const&
|
||||
getMantissaRange(MantissaScale scale);
|
||||
|
||||
static std::set<MantissaScale> const&
|
||||
getAllScales();
|
||||
rep min;
|
||||
rep max{(min * 10) - 1};
|
||||
int log;
|
||||
MantissaScale scale;
|
||||
|
||||
private:
|
||||
static constexpr int
|
||||
getExponent(MantissaScale scale)
|
||||
static constexpr rep
|
||||
getMin(MantissaScale scale)
|
||||
{
|
||||
switch (scale)
|
||||
{
|
||||
case MantissaScale::Small:
|
||||
return 15;
|
||||
case MantissaScale::LargeLegacy:
|
||||
return 1'000'000'000'000'000ULL;
|
||||
case MantissaScale::Large:
|
||||
return 18;
|
||||
// LCOV_EXCL_START
|
||||
return 1'000'000'000'000'000'000ULL;
|
||||
default:
|
||||
// If called in a constexpr context, this throw assures that the build fails if an
|
||||
// Since this can never be called outside a non-constexpr
|
||||
// context, this throw assures that the build fails if an
|
||||
// invalid scale is used.
|
||||
throw std::runtime_error("Unknown mantissa scale");
|
||||
// LCOV_EXCL_STOP
|
||||
}
|
||||
}
|
||||
|
||||
// Keep this function for future use with different ways to compute
|
||||
// the ranges.
|
||||
static constexpr rep
|
||||
getMin(MantissaScale scale, int exponent)
|
||||
{
|
||||
if (exponent < 0 || exponent >= kPowerOfTen.size())
|
||||
throw std::runtime_error("Invalid exponent"); // LCOV_EXCL_LINE
|
||||
return kPowerOfTen[exponent];
|
||||
}
|
||||
|
||||
static constexpr CuspRoundingFix
|
||||
isCuspFixEnabled(MantissaScale scale)
|
||||
{
|
||||
switch (scale)
|
||||
{
|
||||
case MantissaScale::Small:
|
||||
case MantissaScale::LargeLegacy:
|
||||
return CuspRoundingFix::Disabled;
|
||||
case MantissaScale::Large:
|
||||
return CuspRoundingFix::Enabled;
|
||||
default:
|
||||
// If called in a constexpr context, this throw assures that the build fails if an
|
||||
// invalid scale is used.
|
||||
throw std::runtime_error("Unknown mantissa scale"); // LCOV_EXCL_LINE
|
||||
}
|
||||
}
|
||||
|
||||
static std::unordered_map<MantissaScale, MantissaRange> const&
|
||||
getRanges();
|
||||
};
|
||||
|
||||
// Like std::integral, but only 64-bit integral types.
|
||||
@@ -302,7 +203,7 @@ concept Integral64 = std::is_same_v<T, std::int64_t> || std::is_same_v<T, std::u
|
||||
* amendments are enabled to determine which result to expect.
|
||||
*
|
||||
*/
|
||||
class Number final
|
||||
class Number
|
||||
{
|
||||
using rep = std::int64_t;
|
||||
using internalrep = MantissaRange::rep;
|
||||
@@ -523,28 +424,49 @@ public:
|
||||
return kRange.get().log;
|
||||
}
|
||||
|
||||
/// oneSmall is needed because the ranges are private
|
||||
static constexpr Number
|
||||
oneSmall();
|
||||
/// oneLarge is needed because the ranges are private
|
||||
static constexpr Number
|
||||
oneLarge();
|
||||
|
||||
// And one is needed because it needs to choose between oneSmall and
|
||||
// oneLarge based on the current range
|
||||
static Number
|
||||
one();
|
||||
|
||||
template <
|
||||
auto MinMantissa,
|
||||
auto MaxMantissa,
|
||||
Integral64 T = std::decay_t<decltype(MinMantissa)>>
|
||||
template <Integral64 T>
|
||||
[[nodiscard]]
|
||||
std::pair<T, int>
|
||||
normalizeToRange() const;
|
||||
normalizeToRange(T minMantissa, T maxMantissa) const;
|
||||
|
||||
private:
|
||||
static thread_local RoundingMode mode;
|
||||
// The available ranges for mantissa
|
||||
|
||||
static constexpr MantissaRange kSmallRange{MantissaRange::MantissaScale::Small};
|
||||
static_assert(isPowerOfTen(kSmallRange.min));
|
||||
static_assert(kSmallRange.min == 1'000'000'000'000'000LL);
|
||||
static_assert(kSmallRange.max == 9'999'999'999'999'999LL);
|
||||
static_assert(kSmallRange.log == 15);
|
||||
static_assert(kSmallRange.min < kMaxRep);
|
||||
static_assert(kSmallRange.max < kMaxRep);
|
||||
static constexpr MantissaRange kLargeRange{MantissaRange::MantissaScale::Large};
|
||||
static_assert(isPowerOfTen(kLargeRange.min));
|
||||
static_assert(kLargeRange.min == 1'000'000'000'000'000'000ULL);
|
||||
static_assert(kLargeRange.max == internalrep(9'999'999'999'999'999'999ULL));
|
||||
static_assert(kLargeRange.log == 18);
|
||||
static_assert(kLargeRange.min < kMaxRep);
|
||||
static_assert(kLargeRange.max > kMaxRep);
|
||||
|
||||
// The range for the mantissa when normalized.
|
||||
// Use reference_wrapper to avoid making copies, and prevent accidentally
|
||||
// changing the values inside the range.
|
||||
static thread_local std::reference_wrapper<MantissaRange const> kRange;
|
||||
|
||||
void
|
||||
normalize(MantissaRange const& range);
|
||||
normalize();
|
||||
|
||||
/** Normalize Number components to an arbitrary range.
|
||||
*
|
||||
@@ -559,8 +481,7 @@ private:
|
||||
T& mantissa,
|
||||
int& exponent,
|
||||
internalrep const& minMantissa,
|
||||
internalrep const& maxMantissa,
|
||||
MantissaRange::CuspRoundingFix cuspRoundingFixEnabled);
|
||||
internalrep const& maxMantissa);
|
||||
|
||||
template <class T>
|
||||
friend void
|
||||
@@ -569,9 +490,7 @@ private:
|
||||
T& mantissa,
|
||||
int& exponent,
|
||||
MantissaRange::rep const& minMantissa,
|
||||
MantissaRange::rep const& maxMantissa,
|
||||
MantissaRange::CuspRoundingFix cuspRoundingFixEnabled,
|
||||
bool dropped);
|
||||
MantissaRange::rep const& maxMantissa);
|
||||
|
||||
[[nodiscard]] bool
|
||||
isnormal() const noexcept;
|
||||
@@ -607,7 +526,7 @@ static constexpr Number kNumZero{};
|
||||
inline Number::Number(bool negative, internalrep mantissa, int exponent, Normalized)
|
||||
: Number(negative, mantissa, exponent, Unchecked{})
|
||||
{
|
||||
normalize(kRange);
|
||||
normalize();
|
||||
}
|
||||
|
||||
inline Number::Number(internalrep mantissa, int exponent, Normalized)
|
||||
@@ -777,21 +696,10 @@ Number::isnormal() const noexcept
|
||||
kMinExponent <= exponent_ && exponent_ <= kMaxExponent);
|
||||
}
|
||||
|
||||
template <auto MinMantissa, auto MaxMantissa, Integral64 T>
|
||||
template <Integral64 T>
|
||||
std::pair<T, int>
|
||||
Number::normalizeToRange() const
|
||||
Number::normalizeToRange(T minMantissa, T maxMantissa) const
|
||||
{
|
||||
static_assert(std::is_same_v<T, std::uint64_t> || std::is_same_v<T, std::int64_t>);
|
||||
static_assert(std::is_same_v<T, std::decay_t<decltype(MinMantissa)>>);
|
||||
static_assert(std::is_same_v<T, std::decay_t<decltype(MaxMantissa)>>);
|
||||
auto constexpr kMIN = static_cast<T>(MinMantissa);
|
||||
auto constexpr kMAX = static_cast<T>(MaxMantissa);
|
||||
static_assert(kMIN > 0);
|
||||
static_assert(kMIN % 10 == 0);
|
||||
static_assert(isPowerOfTen(kMIN));
|
||||
static_assert(kMAX % 10 == 9);
|
||||
static_assert((kMAX + 1) / 10 == kMIN);
|
||||
|
||||
bool negative = negative_;
|
||||
internalrep mantissa = mantissa_;
|
||||
int exponent = exponent_;
|
||||
@@ -803,10 +711,7 @@ Number::normalizeToRange() const
|
||||
"xrpl::Number::normalizeToRange",
|
||||
"Number is non-negative for unsigned range.");
|
||||
}
|
||||
// Don't need to worry about the cuspRounding fix because rounding up will never take the
|
||||
// mantissa over maxMantissa with a ones digit value other than 0. 0 can safely be truncated.
|
||||
Number::normalize(
|
||||
negative, mantissa, exponent, kMIN, kMAX, MantissaRange::CuspRoundingFix::Disabled);
|
||||
Number::normalize(negative, mantissa, exponent, minMantissa, maxMantissa);
|
||||
|
||||
auto const sign = negative ? -1 : 1;
|
||||
return std::make_pair(static_cast<T>(sign * mantissa), exponent);
|
||||
@@ -858,8 +763,6 @@ to_string(MantissaRange::MantissaScale const& scale)
|
||||
{
|
||||
case MantissaRange::MantissaScale::Small:
|
||||
return "small";
|
||||
case MantissaRange::MantissaScale::LargeLegacy:
|
||||
return "largeLegacy";
|
||||
case MantissaRange::MantissaScale::Large:
|
||||
return "large";
|
||||
default:
|
||||
|
||||
@@ -181,14 +181,14 @@ private:
|
||||
beast::insight::Collector::ptr const& collector)
|
||||
: hook(collector->makeHook(handler))
|
||||
, size(collector->makeGauge(prefix, "size"))
|
||||
, hitRate(collector->makeGauge(prefix, "hit_rate"))
|
||||
, hit_rate(collector->makeGauge(prefix, "hit_rate"))
|
||||
|
||||
{
|
||||
}
|
||||
|
||||
beast::insight::Hook hook;
|
||||
beast::insight::Gauge size;
|
||||
beast::insight::Gauge hitRate;
|
||||
beast::insight::Gauge hit_rate;
|
||||
|
||||
std::size_t hits{0};
|
||||
std::size_t misses{0};
|
||||
@@ -197,16 +197,16 @@ private:
|
||||
class KeyOnlyEntry
|
||||
{
|
||||
public:
|
||||
clock_type::time_point lastAccess;
|
||||
clock_type::time_point last_access;
|
||||
|
||||
explicit KeyOnlyEntry(clock_type::time_point const& lastAccess) : lastAccess(lastAccess)
|
||||
explicit KeyOnlyEntry(clock_type::time_point const& lastAccess) : last_access(lastAccess)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
touch(clock_type::time_point const& now)
|
||||
{
|
||||
lastAccess = now;
|
||||
last_access = now;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -214,10 +214,10 @@ private:
|
||||
{
|
||||
public:
|
||||
shared_weak_combo_pointer_type ptr;
|
||||
clock_type::time_point lastAccess;
|
||||
clock_type::time_point last_access;
|
||||
|
||||
ValueEntry(clock_type::time_point const& lastAccess, shared_pointer_type const& ptr)
|
||||
: ptr(ptr), lastAccess(lastAccess)
|
||||
: ptr(ptr), last_access(lastAccess)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -246,7 +246,7 @@ private:
|
||||
void
|
||||
touch(clock_type::time_point const& now)
|
||||
{
|
||||
lastAccess = now;
|
||||
last_access = now;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -286,13 +286,13 @@ private:
|
||||
std::string name_;
|
||||
|
||||
// Desired number of cache entries (0 = ignore)
|
||||
int const targetSize_;
|
||||
int const target_size_;
|
||||
|
||||
// Desired maximum cache age
|
||||
clock_type::duration const targetAge_;
|
||||
clock_type::duration const target_age_;
|
||||
|
||||
// Number of items cached
|
||||
int cacheCount_{0};
|
||||
int cache_count_{0};
|
||||
cache_type cache_; // Hold strong reference to recent objects
|
||||
std::uint64_t hits_{0};
|
||||
std::uint64_t misses_{0};
|
||||
|
||||
@@ -34,8 +34,8 @@ inline TaggedCache<
|
||||
, clock_(clock)
|
||||
, stats_(name, std::bind(&TaggedCache::collectMetrics, this), collector)
|
||||
, name_(name)
|
||||
, targetSize_(size)
|
||||
, targetAge_(expiration)
|
||||
, target_size_(size)
|
||||
, target_age_(expiration)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
getCacheSize() const
|
||||
{
|
||||
std::scoped_lock const lock(mutex_);
|
||||
return cacheCount_;
|
||||
return cache_count_;
|
||||
}
|
||||
|
||||
template <
|
||||
@@ -139,7 +139,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
{
|
||||
std::scoped_lock const lock(mutex_);
|
||||
cache_.clear();
|
||||
cacheCount_ = 0;
|
||||
cache_count_ = 0;
|
||||
}
|
||||
|
||||
template <
|
||||
@@ -157,7 +157,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
{
|
||||
std::scoped_lock const lock(mutex_);
|
||||
cache_.clear();
|
||||
cacheCount_ = 0;
|
||||
cache_count_ = 0;
|
||||
hits_ = 0;
|
||||
misses_ = 0;
|
||||
}
|
||||
@@ -213,21 +213,21 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
{
|
||||
std::scoped_lock const lock(mutex_);
|
||||
|
||||
if (targetSize_ == 0 || (static_cast<int>(cache_.size()) <= targetSize_))
|
||||
if (target_size_ == 0 || (static_cast<int>(cache_.size()) <= target_size_))
|
||||
{
|
||||
whenExpire = now - targetAge_;
|
||||
whenExpire = now - target_age_;
|
||||
}
|
||||
else
|
||||
{
|
||||
whenExpire = now - (targetAge_ * targetSize_ / cache_.size());
|
||||
whenExpire = now - (target_age_ * target_size_ / cache_.size());
|
||||
|
||||
clock_type::duration const minimumAge(std::chrono::seconds(1));
|
||||
if (whenExpire > (now - minimumAge))
|
||||
whenExpire = now - minimumAge;
|
||||
|
||||
JLOG(journal_.trace())
|
||||
<< name_ << " is growing fast " << cache_.size() << " of " << targetSize_
|
||||
<< " aging at " << (now - whenExpire).count() << " of " << targetAge_.count();
|
||||
<< name_ << " is growing fast " << cache_.size() << " of " << target_size_
|
||||
<< " aging at " << (now - whenExpire).count() << " of " << target_age_.count();
|
||||
}
|
||||
|
||||
std::vector<std::thread> workers;
|
||||
@@ -242,7 +242,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
for (std::thread& worker : workers)
|
||||
worker.join();
|
||||
|
||||
cacheCount_ -= allRemovals;
|
||||
cache_count_ -= allRemovals;
|
||||
}
|
||||
// At this point allStuffToSweep will go out of scope outside the lock
|
||||
// and decrement the reference count on each strong pointer.
|
||||
@@ -280,7 +280,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
|
||||
if (entry.isCached())
|
||||
{
|
||||
--cacheCount_;
|
||||
--cache_count_;
|
||||
entry.ptr.convertToWeak();
|
||||
ret = true;
|
||||
}
|
||||
@@ -317,7 +317,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
std::piecewise_construct,
|
||||
std::forward_as_tuple(key),
|
||||
std::forward_as_tuple(clock_.now(), data));
|
||||
++cacheCount_;
|
||||
++cache_count_;
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -366,12 +366,12 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
data = cachedData;
|
||||
}
|
||||
|
||||
++cacheCount_;
|
||||
++cache_count_;
|
||||
return true;
|
||||
}
|
||||
|
||||
entry.ptr = data;
|
||||
++cacheCount_;
|
||||
++cache_count_;
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -477,7 +477,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
auto [it, inserted] = cache_.emplace(
|
||||
std::piecewise_construct, std::forward_as_tuple(key), std::forward_as_tuple(now));
|
||||
if (!inserted)
|
||||
it->second.lastAccess = now;
|
||||
it->second.last_access = now;
|
||||
return inserted;
|
||||
}
|
||||
|
||||
@@ -626,7 +626,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
if (entry.isCached())
|
||||
{
|
||||
// independent of cache size, so not counted as a hit
|
||||
++cacheCount_;
|
||||
++cache_count_;
|
||||
entry.touch(clock_.now());
|
||||
return entry.ptr.getStrong();
|
||||
}
|
||||
@@ -658,7 +658,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
if (total != 0)
|
||||
hitRate = (hits_ * 100) / total;
|
||||
}
|
||||
stats_.hitRate.set(hitRate);
|
||||
stats_.hit_rate.set(hitRate);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -706,7 +706,7 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
++cit;
|
||||
}
|
||||
}
|
||||
else if (cit->second.lastAccess <= whenExpire)
|
||||
else if (cit->second.last_access <= whenExpire)
|
||||
{
|
||||
// strong, expired
|
||||
++cacheRemovals;
|
||||
@@ -773,12 +773,12 @@ TaggedCache<Key, T, IsKeyCache, SharedWeakUnionPointer, SharedPointerType, Hash,
|
||||
auto cit = partition.begin();
|
||||
while (cit != partition.end())
|
||||
{
|
||||
if (cit->second.lastAccess > now)
|
||||
if (cit->second.last_access > now)
|
||||
{
|
||||
cit->second.lastAccess = now;
|
||||
cit->second.last_access = now;
|
||||
++cit;
|
||||
}
|
||||
else if (cit->second.lastAccess <= whenExpire)
|
||||
else if (cit->second.last_access <= whenExpire)
|
||||
{
|
||||
cit = partition.erase(cit);
|
||||
}
|
||||
|
||||
@@ -24,20 +24,20 @@ namespace xrpl {
|
||||
template <class EF>
|
||||
class ScopeExit
|
||||
{
|
||||
EF exitFunction_;
|
||||
bool executeOnDestruction_{true};
|
||||
EF exit_function_;
|
||||
bool execute_on_destruction_{true};
|
||||
|
||||
public:
|
||||
~ScopeExit()
|
||||
{
|
||||
if (executeOnDestruction_)
|
||||
exitFunction_();
|
||||
if (execute_on_destruction_)
|
||||
exit_function_();
|
||||
}
|
||||
|
||||
ScopeExit(ScopeExit&& rhs) noexcept(
|
||||
std::is_nothrow_move_constructible_v<EF> || std::is_nothrow_copy_constructible_v<EF>)
|
||||
: exitFunction_{std::forward<EF>(rhs.exitFunction_)}
|
||||
, executeOnDestruction_{rhs.executeOnDestruction_}
|
||||
: exit_function_{std::forward<EF>(rhs.exit_function_)}
|
||||
, execute_on_destruction_{rhs.execute_on_destruction_}
|
||||
{
|
||||
rhs.release();
|
||||
}
|
||||
@@ -51,7 +51,7 @@ public:
|
||||
std::enable_if_t<
|
||||
!std::is_same_v<std::remove_cv_t<EFP>, ScopeExit> &&
|
||||
std::is_constructible_v<EF, EFP>>* = 0) noexcept
|
||||
: exitFunction_{std::forward<EFP>(f)}
|
||||
: exit_function_{std::forward<EFP>(f)}
|
||||
{
|
||||
static_assert(std::is_nothrow_constructible_v<EF, decltype(std::forward<EFP>(f))>);
|
||||
}
|
||||
@@ -59,7 +59,7 @@ public:
|
||||
void
|
||||
release() noexcept
|
||||
{
|
||||
executeOnDestruction_ = false;
|
||||
execute_on_destruction_ = false;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -69,22 +69,22 @@ ScopeExit(EF) -> ScopeExit<EF>;
|
||||
template <class EF>
|
||||
class ScopeFail
|
||||
{
|
||||
EF exitFunction_;
|
||||
bool executeOnDestruction_{true};
|
||||
int uncaughtOnCreation_{std::uncaught_exceptions()};
|
||||
EF exit_function_;
|
||||
bool execute_on_destruction_{true};
|
||||
int uncaught_on_creation_{std::uncaught_exceptions()};
|
||||
|
||||
public:
|
||||
~ScopeFail()
|
||||
{
|
||||
if (executeOnDestruction_ && std::uncaught_exceptions() > uncaughtOnCreation_)
|
||||
exitFunction_();
|
||||
if (execute_on_destruction_ && std::uncaught_exceptions() > uncaught_on_creation_)
|
||||
exit_function_();
|
||||
}
|
||||
|
||||
ScopeFail(ScopeFail&& rhs) noexcept(
|
||||
std::is_nothrow_move_constructible_v<EF> || std::is_nothrow_copy_constructible_v<EF>)
|
||||
: exitFunction_{std::forward<EF>(rhs.exitFunction_)}
|
||||
, executeOnDestruction_{rhs.executeOnDestruction_}
|
||||
, uncaughtOnCreation_{rhs.uncaughtOnCreation_}
|
||||
: exit_function_{std::forward<EF>(rhs.exit_function_)}
|
||||
, execute_on_destruction_{rhs.execute_on_destruction_}
|
||||
, uncaught_on_creation_{rhs.uncaught_on_creation_}
|
||||
{
|
||||
rhs.release();
|
||||
}
|
||||
@@ -98,7 +98,7 @@ public:
|
||||
std::enable_if_t<
|
||||
!std::is_same_v<std::remove_cv_t<EFP>, ScopeFail> &&
|
||||
std::is_constructible_v<EF, EFP>>* = 0) noexcept
|
||||
: exitFunction_{std::forward<EFP>(f)}
|
||||
: exit_function_{std::forward<EFP>(f)}
|
||||
{
|
||||
static_assert(std::is_nothrow_constructible_v<EF, decltype(std::forward<EFP>(f))>);
|
||||
}
|
||||
@@ -106,7 +106,7 @@ public:
|
||||
void
|
||||
release() noexcept
|
||||
{
|
||||
executeOnDestruction_ = false;
|
||||
execute_on_destruction_ = false;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -116,22 +116,22 @@ ScopeFail(EF) -> ScopeFail<EF>;
|
||||
template <class EF>
|
||||
class ScopeSuccess
|
||||
{
|
||||
EF exitFunction_;
|
||||
bool executeOnDestruction_{true};
|
||||
int uncaughtOnCreation_{std::uncaught_exceptions()};
|
||||
EF exit_function_;
|
||||
bool execute_on_destruction_{true};
|
||||
int uncaught_on_creation_{std::uncaught_exceptions()};
|
||||
|
||||
public:
|
||||
~ScopeSuccess() noexcept(noexcept(exitFunction_()))
|
||||
~ScopeSuccess() noexcept(noexcept(exit_function_()))
|
||||
{
|
||||
if (executeOnDestruction_ && std::uncaught_exceptions() <= uncaughtOnCreation_)
|
||||
exitFunction_();
|
||||
if (execute_on_destruction_ && std::uncaught_exceptions() <= uncaught_on_creation_)
|
||||
exit_function_();
|
||||
}
|
||||
|
||||
ScopeSuccess(ScopeSuccess&& rhs) noexcept(
|
||||
std::is_nothrow_move_constructible_v<EF> || std::is_nothrow_copy_constructible_v<EF>)
|
||||
: exitFunction_{std::forward<EF>(rhs.exitFunction_)}
|
||||
, executeOnDestruction_{rhs.executeOnDestruction_}
|
||||
, uncaughtOnCreation_{rhs.uncaughtOnCreation_}
|
||||
: exit_function_{std::forward<EF>(rhs.exit_function_)}
|
||||
, execute_on_destruction_{rhs.execute_on_destruction_}
|
||||
, uncaught_on_creation_{rhs.uncaught_on_creation_}
|
||||
{
|
||||
rhs.release();
|
||||
}
|
||||
@@ -146,14 +146,14 @@ public:
|
||||
!std::is_same_v<std::remove_cv_t<EFP>, ScopeSuccess> &&
|
||||
std::is_constructible_v<EF, EFP>>* =
|
||||
0) noexcept(std::is_nothrow_constructible_v<EF, EFP> || std::is_nothrow_constructible_v<EF, EFP&>)
|
||||
: exitFunction_{std::forward<EFP>(f)}
|
||||
: exit_function_{std::forward<EFP>(f)}
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
release() noexcept
|
||||
{
|
||||
executeOnDestruction_ = false;
|
||||
execute_on_destruction_ = false;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -77,8 +77,8 @@ private:
|
||||
|
||||
std::ostream& os_;
|
||||
Results results_;
|
||||
SuiteResults suiteResults_;
|
||||
CaseResults caseResults_;
|
||||
SuiteResults suite_results_;
|
||||
CaseResults case_results_;
|
||||
|
||||
public:
|
||||
Reporter(Reporter const&) = delete;
|
||||
@@ -196,22 +196,22 @@ template <class Unused>
|
||||
void
|
||||
Reporter<Unused>::onSuiteBegin(SuiteInfo const& info)
|
||||
{
|
||||
suiteResults_ = SuiteResults{info.fullName()};
|
||||
suite_results_ = SuiteResults{info.fullName()};
|
||||
}
|
||||
|
||||
template <class Unused>
|
||||
void
|
||||
Reporter<Unused>::onSuiteEnd()
|
||||
{
|
||||
results_.add(suiteResults_);
|
||||
results_.add(suite_results_);
|
||||
}
|
||||
|
||||
template <class Unused>
|
||||
void
|
||||
Reporter<Unused>::onCaseBegin(std::string const& name)
|
||||
{
|
||||
caseResults_ = CaseResults(name);
|
||||
os_ << suiteResults_.name << (caseResults_.name.empty() ? "" : (" " + caseResults_.name))
|
||||
case_results_ = CaseResults(name);
|
||||
os_ << suite_results_.name << (case_results_.name.empty() ? "" : (" " + case_results_.name))
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
@@ -219,23 +219,23 @@ template <class Unused>
|
||||
void
|
||||
Reporter<Unused>::onCaseEnd()
|
||||
{
|
||||
suiteResults_.add(caseResults_);
|
||||
suite_results_.add(case_results_);
|
||||
}
|
||||
|
||||
template <class Unused>
|
||||
void
|
||||
Reporter<Unused>::onPass()
|
||||
{
|
||||
++caseResults_.total;
|
||||
++case_results_.total;
|
||||
}
|
||||
|
||||
template <class Unused>
|
||||
void
|
||||
Reporter<Unused>::onFail(std::string const& reason)
|
||||
{
|
||||
++caseResults_.failed;
|
||||
++caseResults_.total;
|
||||
os_ << "#" << caseResults_.total << " failed" << (reason.empty() ? "" : ": ") << reason
|
||||
++case_results_.failed;
|
||||
++case_results_.total;
|
||||
os_ << "#" << case_results_.total << " failed" << (reason.empty() ? "" : ": ") << reason
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ inline bool
|
||||
JobQueue::Coro::post()
|
||||
{
|
||||
{
|
||||
std::scoped_lock const lk(mutexRun_);
|
||||
std::scoped_lock const lk(mutex_run_);
|
||||
running_ = true;
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ JobQueue::Coro::post()
|
||||
}
|
||||
|
||||
// The coroutine will not run. Clean up running_.
|
||||
std::scoped_lock const lk(mutexRun_);
|
||||
std::scoped_lock const lk(mutex_run_);
|
||||
running_ = false;
|
||||
cv_.notify_all();
|
||||
return false;
|
||||
@@ -68,7 +68,7 @@ inline void
|
||||
JobQueue::Coro::resume()
|
||||
{
|
||||
{
|
||||
std::scoped_lock const lk(mutexRun_);
|
||||
std::scoped_lock const lk(mutex_run_);
|
||||
running_ = true;
|
||||
}
|
||||
{
|
||||
@@ -92,7 +92,7 @@ JobQueue::Coro::resume()
|
||||
}
|
||||
detail::getLocalValues().release();
|
||||
detail::getLocalValues().reset(saved);
|
||||
std::scoped_lock const lk(mutexRun_);
|
||||
std::scoped_lock const lk(mutex_run_);
|
||||
running_ = false;
|
||||
cv_.notify_all();
|
||||
}
|
||||
@@ -127,7 +127,7 @@ JobQueue::Coro::expectEarlyExit()
|
||||
inline void
|
||||
JobQueue::Coro::join()
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutexRun_);
|
||||
std::unique_lock<std::mutex> lk(mutex_run_);
|
||||
cv_.wait(lk, [this]() { return !running_; });
|
||||
}
|
||||
|
||||
|
||||
@@ -127,7 +127,7 @@ private:
|
||||
std::function<void()> job_;
|
||||
std::shared_ptr<LoadEvent> loadEvent_;
|
||||
std::string name_;
|
||||
clock_type::time_point queueTime_;
|
||||
clock_type::time_point queue_time_;
|
||||
};
|
||||
|
||||
using JobCounter = ClosureCounter<void>;
|
||||
|
||||
@@ -52,7 +52,7 @@ public:
|
||||
std::string name_;
|
||||
bool running_{false};
|
||||
std::mutex mutex_;
|
||||
std::mutex mutexRun_;
|
||||
std::mutex mutex_run_;
|
||||
std::condition_variable cv_;
|
||||
boost::coroutines2::coroutine<void>::push_type* yield_{};
|
||||
boost::coroutines2::coroutine<void>::pull_type coro_;
|
||||
@@ -246,7 +246,7 @@ private:
|
||||
// Statistics tracking
|
||||
perf::PerfLog& perfLog_;
|
||||
beast::insight::Collector::ptr collector_;
|
||||
beast::insight::Gauge jobCount_;
|
||||
beast::insight::Gauge job_count_;
|
||||
beast::insight::Hook hook_;
|
||||
|
||||
std::condition_variable cv_;
|
||||
|
||||
@@ -161,7 +161,7 @@ public:
|
||||
* While the JSON spec doesn't explicitly disallow this, you should avoid
|
||||
* calling this method twice with the same tag for the same object.
|
||||
*
|
||||
* If CHECK_JSON_WRITER is defined, this function throws an exception if
|
||||
* If CHECK_JSON_WRITER is defined, this function throws an exception if if
|
||||
* the tag you use has already been used in this object.
|
||||
*/
|
||||
template <typename Type>
|
||||
|
||||
@@ -9,7 +9,7 @@ class BookDirs
|
||||
private:
|
||||
ReadView const* view_ = nullptr;
|
||||
uint256 const root_;
|
||||
uint256 const nextQuality_;
|
||||
uint256 const next_quality_;
|
||||
uint256 const key_;
|
||||
std::shared_ptr<SLE const> sle_ = nullptr;
|
||||
unsigned int entry_ = 0;
|
||||
@@ -67,15 +67,15 @@ private:
|
||||
friend class BookDirs;
|
||||
|
||||
const_iterator(ReadView const& view, uint256 const& root, uint256 const& dirKey)
|
||||
: view_(&view), root_(root), key_(dirKey), curKey_(dirKey)
|
||||
: view_(&view), root_(root), key_(dirKey), cur_key_(dirKey)
|
||||
{
|
||||
}
|
||||
|
||||
ReadView const* view_ = nullptr;
|
||||
uint256 root_;
|
||||
uint256 nextQuality_;
|
||||
uint256 next_quality_;
|
||||
uint256 key_;
|
||||
uint256 curKey_;
|
||||
uint256 cur_key_;
|
||||
std::shared_ptr<SLE const> sle_;
|
||||
unsigned int entry_ = 0;
|
||||
uint256 index_;
|
||||
|
||||
@@ -76,7 +76,7 @@ private:
|
||||
|
||||
// monotonic_resource_ must outlive `items_`. Make a pointer so it may be
|
||||
// easily moved.
|
||||
std::unique_ptr<boost::container::pmr::monotonic_buffer_resource> monotonicResource_;
|
||||
std::unique_ptr<boost::container::pmr::monotonic_buffer_resource> monotonic_resource_;
|
||||
txs_map txs_;
|
||||
Rules rules_;
|
||||
LedgerHeader header_;
|
||||
|
||||
@@ -57,7 +57,7 @@ isVaultPseudoAccountFrozen(
|
||||
ReadView const& view,
|
||||
AccountID const& account,
|
||||
MPTIssue const& mptShare,
|
||||
std::uint8_t depth);
|
||||
int depth);
|
||||
|
||||
[[nodiscard]] bool
|
||||
isLPTokenFrozen(
|
||||
|
||||
@@ -22,14 +22,14 @@ public:
|
||||
static constexpr size_t kInitialBufferSize = kilobytes(256);
|
||||
|
||||
RawStateTable()
|
||||
: monotonicResource_{std::make_unique<boost::container::pmr::monotonic_buffer_resource>(
|
||||
: monotonic_resource_{std::make_unique<boost::container::pmr::monotonic_buffer_resource>(
|
||||
kInitialBufferSize)}
|
||||
, items_{monotonicResource_.get()} {};
|
||||
, items_{monotonic_resource_.get()} {};
|
||||
|
||||
RawStateTable(RawStateTable const& rhs)
|
||||
: monotonicResource_{std::make_unique<boost::container::pmr::monotonic_buffer_resource>(
|
||||
: monotonic_resource_{std::make_unique<boost::container::pmr::monotonic_buffer_resource>(
|
||||
kInitialBufferSize)}
|
||||
, items_{rhs.items_, monotonicResource_.get()}
|
||||
, items_{rhs.items_, monotonic_resource_.get()}
|
||||
, dropsDestroyed_{rhs.dropsDestroyed_} {};
|
||||
|
||||
RawStateTable(RawStateTable&&) = default;
|
||||
@@ -101,7 +101,7 @@ private:
|
||||
boost::container::pmr::polymorphic_allocator<std::pair<key_type const, SleAction>>>;
|
||||
// monotonic_resource_ must outlive `items_`. Make a pointer so it may be
|
||||
// easily moved.
|
||||
std::unique_ptr<boost::container::pmr::monotonic_buffer_resource> monotonicResource_;
|
||||
std::unique_ptr<boost::container::pmr::monotonic_buffer_resource> monotonic_resource_;
|
||||
items_t items_;
|
||||
|
||||
XRPAmount dropsDestroyed_{0};
|
||||
|
||||
@@ -4,38 +4,8 @@
|
||||
#include <xrpl/protocol/Rules.h>
|
||||
#include <xrpl/protocol/st.h>
|
||||
|
||||
#include <string_view>
|
||||
|
||||
namespace xrpl {
|
||||
|
||||
/**
|
||||
* Broker cover preclaim precision guard (fixCleanup3_2_0).
|
||||
*
|
||||
* Prevents a "silent sub-ULP no-op" where a deposit, withdrawal, or clawback
|
||||
* amount is so small that it rounds to zero at `sfCoverAvailable`'s scale.
|
||||
* Without this guard, both the pseudo trust-line and `sfCoverAvailable` would
|
||||
* identically absorb the rounded zero, resulting in a successful transaction
|
||||
* (tesSUCCESS) where no funds actually moved.
|
||||
*
|
||||
* @param view Read view (rules used for amendment gating).
|
||||
* @param sleBroker The loan broker SLE (read-only).
|
||||
* @param vaultAsset The underlying vault asset (the broker's cover asset).
|
||||
* @param amount The effective subtraction/addition amount.
|
||||
* @param j Journal for logging.
|
||||
* @param logPrefix Transactor name for log diagnostics.
|
||||
*
|
||||
* @return `tecPRECISION_LOSS` if the request rounds to zero at cover scale.
|
||||
* `tesSUCCESS` if the amendment is disabled or the request is safely supra-ULP.
|
||||
*/
|
||||
[[nodiscard]] TER
|
||||
canApplyToBrokerCover(
|
||||
ReadView const& view,
|
||||
SLE::const_ref sleBroker,
|
||||
Asset const& vaultAsset,
|
||||
STAmount const& amount,
|
||||
beast::Journal j,
|
||||
std::string_view logPrefix);
|
||||
|
||||
// Lending protocol has dependencies, so capture them here.
|
||||
bool
|
||||
checkLendingProtocolDependencies(Rules const& rules, STTx const& tx);
|
||||
@@ -203,21 +173,6 @@ getAssetsTotalScale(SLE::const_ref vaultSle)
|
||||
return scale(vaultSle->at(sfAssetsTotal), vaultSle->at(sfAsset));
|
||||
}
|
||||
|
||||
// Compute the minimum required broker cover, rounded consistently.
|
||||
// DebtTotal is a broker-level aggregate maintained at vault scale, so the
|
||||
// rounding must also use vault scale — never an individual loan's scale.
|
||||
inline Number
|
||||
minimumBrokerCover(Number const& debtTotal, TenthBips32 coverRateMinimum, SLE::const_ref vaultSle)
|
||||
{
|
||||
XRPL_ASSERT(
|
||||
vaultSle && vaultSle->getType() == ltVAULT, "xrpl::minimumBrokerCover : valid Vault sle");
|
||||
NumberRoundModeGuard const mg(Number::RoundingMode::Upward);
|
||||
return roundToAsset(
|
||||
vaultSle->at(sfAsset),
|
||||
tenthBipsOfValue(debtTotal, coverRateMinimum),
|
||||
getAssetsTotalScale(vaultSle));
|
||||
}
|
||||
|
||||
TER
|
||||
checkLoanGuards(
|
||||
Asset const& vaultAsset,
|
||||
@@ -461,7 +416,6 @@ loanAccruedInterest(
|
||||
|
||||
ExtendedPaymentComponents
|
||||
computeOverpaymentComponents(
|
||||
Rules const& rules,
|
||||
Asset const& asset,
|
||||
int32_t const loanScale,
|
||||
Number const& overpayment,
|
||||
|
||||
@@ -27,18 +27,14 @@ isGlobalFrozen(ReadView const& view, MPTIssue const& mptIssue);
|
||||
isIndividualFrozen(ReadView const& view, AccountID const& account, MPTIssue const& mptIssue);
|
||||
|
||||
[[nodiscard]] bool
|
||||
isFrozen(
|
||||
ReadView const& view,
|
||||
AccountID const& account,
|
||||
MPTIssue const& mptIssue,
|
||||
std::uint8_t depth = 0);
|
||||
isFrozen(ReadView const& view, AccountID const& account, MPTIssue const& mptIssue, int depth = 0);
|
||||
|
||||
[[nodiscard]] bool
|
||||
isAnyFrozen(
|
||||
ReadView const& view,
|
||||
std::initializer_list<AccountID> const& accounts,
|
||||
MPTIssue const& mptIssue,
|
||||
std::uint8_t depth = 0);
|
||||
int depth = 0);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
@@ -92,7 +88,7 @@ requireAuth(
|
||||
MPTIssue const& mptIssue,
|
||||
AccountID const& account,
|
||||
AuthType authType = AuthType::Legacy,
|
||||
std::uint8_t depth = 0);
|
||||
int depth = 0);
|
||||
|
||||
/** Enforce account has MPToken to match its authorization.
|
||||
*
|
||||
@@ -108,78 +104,23 @@ enforceMPTokenAuthorization(
|
||||
XRPAmount const& priorBalance,
|
||||
beast::Journal j);
|
||||
|
||||
/** Resolve the underlying asset of a vault share.
|
||||
*
|
||||
* Reads sfReferenceHolding from @p sleShareIssuance to determine which
|
||||
* asset the vault wraps. @p sleHolding must be the SLE that
|
||||
* sfReferenceHolding points to — either an ltMPTOKEN (returns its
|
||||
* MPTIssue) or an ltRIPPLE_STATE (returns its low/high Issue).
|
||||
*
|
||||
* @pre Both SLEs must exist and @p sleHolding must be of type ltMPTOKEN
|
||||
* or ltRIPPLE_STATE. Passing any other type is undefined behaviour.
|
||||
* @param sleShareIssuance MPTokenIssuance SLE for the vault share token.
|
||||
* @param sleHolding SLE referenced by sfReferenceHolding.
|
||||
* @return The underlying Asset (MPTIssue or Issue).
|
||||
*/
|
||||
[[nodiscard]] Asset
|
||||
assetOfHolding(SLE const& sleShareIssuance, SLE const& sleHolding);
|
||||
|
||||
/** Check whether @p to may receive the given MPT from @p from.
|
||||
*
|
||||
* The check passes when any of the following is true:
|
||||
* - @p waive is WaiveMPTCanTransfer::Yes (recovery-path exemption), or
|
||||
* - @p from or @p to is the issuer, or
|
||||
* - lsfMPTCanTransfer is set on the MPTokenIssuance.
|
||||
*
|
||||
* For vault shares (MPTokenIssuances that carry sfReferenceHolding) the
|
||||
* check recurses into the underlying asset's transferability. This
|
||||
* recursion is defensive; vault-of-vault-shares is rejected at vault
|
||||
* creation, so in practice depth never exceeds 1.
|
||||
*
|
||||
* @param view Ledger state to read from.
|
||||
* @param mptIssue The MPT issuance being transferred.
|
||||
* @param from Sending account.
|
||||
* @param to Receiving account.
|
||||
* @param waive WaiveMPTCanTransfer::Yes skips the lsfMPTCanTransfer
|
||||
* check. Use for recovery paths (e.g. unwinding SAV or
|
||||
* Lending Protocol positions after an issuer revokes
|
||||
* transferability).
|
||||
* @param depth Recursion depth; bounded at kMaxAssetCheckDepth.
|
||||
* @return tesSUCCESS if the transfer is allowed, tecNO_AUTH otherwise.
|
||||
/** Check if the destination account is allowed
|
||||
* to receive MPT. Return tecNO_AUTH if it doesn't
|
||||
* and tesSUCCESS otherwise.
|
||||
*/
|
||||
[[nodiscard]] TER
|
||||
canTransfer(
|
||||
ReadView const& view,
|
||||
MPTIssue const& mptIssue,
|
||||
AccountID const& from,
|
||||
AccountID const& to,
|
||||
WaiveMPTCanTransfer waive = WaiveMPTCanTransfer::No,
|
||||
std::uint8_t depth = 0);
|
||||
|
||||
/** Check whether @p asset may be traded on the DEX.
|
||||
*
|
||||
* For IOU assets the check delegates to the existing offer/AMM freeze
|
||||
* logic. For MPT assets it checks lsfMPTCanTrade on the MPTokenIssuance.
|
||||
* Vault shares recurse into the underlying asset's tradability via
|
||||
* sfReferenceHolding; depth is bounded at kMaxAssetCheckDepth.
|
||||
*
|
||||
* @param view Ledger state to read from.
|
||||
* @param asset The asset to check.
|
||||
* @param depth Recursion depth; bounded at kMaxAssetCheckDepth.
|
||||
* @return tesSUCCESS if trading is allowed, tecNO_PERMISSION otherwise.
|
||||
*/
|
||||
[[nodiscard]] TER
|
||||
canTrade(ReadView const& view, Asset const& asset, std::uint8_t depth = 0);
|
||||
|
||||
/** Convenience to combine canTrade/Transfer. Returns tesSUCCESS if Asset is Issue.
|
||||
*/
|
||||
[[nodiscard]] TER
|
||||
canMPTTradeAndTransfer(
|
||||
ReadView const& v,
|
||||
Asset const& asset,
|
||||
AccountID const& from,
|
||||
AccountID const& to);
|
||||
|
||||
/** Check if Asset can be traded on DEX. return tecNO_PERMISSION
|
||||
* if it doesn't and tesSUCCESS otherwise.
|
||||
*/
|
||||
[[nodiscard]] TER
|
||||
canTrade(ReadView const& view, Asset const& asset);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// Empty holding operations (MPT-specific)
|
||||
@@ -286,4 +227,17 @@ issuerFundsToSelfIssue(ReadView const& view, MPTIssue const& issue);
|
||||
void
|
||||
issuerSelfDebitHookMPT(ApplyView& view, MPTIssue const& issue, std::uint64_t amount);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// MPT DEX
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/* Return true if a transaction is allowed for the specified MPT/account. The
|
||||
* function checks MPTokenIssuance and MPToken objects flags to determine if the
|
||||
* transaction is allowed.
|
||||
*/
|
||||
TER
|
||||
checkMPTTxAllowed(ReadView const& v, TxType tx, Asset const& asset, AccountID const& accountID);
|
||||
|
||||
} // namespace xrpl
|
||||
|
||||
@@ -93,7 +93,7 @@ isFrozen(ReadView const& view, AccountID const& account, Issue const& issue)
|
||||
// Overload with depth parameter for uniformity with MPTIssue version.
|
||||
// The depth parameter is ignored for IOUs since they don't have vault recursion.
|
||||
[[nodiscard]] inline bool
|
||||
isFrozen(ReadView const& view, AccountID const& account, Issue const& issue, std::uint8_t /*depth*/)
|
||||
isFrozen(ReadView const& view, AccountID const& account, Issue const& issue, int /*depth*/)
|
||||
{
|
||||
return isFrozen(view, account, issue);
|
||||
}
|
||||
@@ -110,7 +110,7 @@ isDeepFrozen(
|
||||
ReadView const& view,
|
||||
AccountID const& account,
|
||||
Issue const& issue,
|
||||
std::uint8_t = 0 /*ignored*/)
|
||||
int = 0 /*ignored*/)
|
||||
{
|
||||
return isDeepFrozen(view, account, issue.currency, issue.account);
|
||||
}
|
||||
|
||||
@@ -34,15 +34,6 @@ enum class WaiveTransferFee : bool { No = false, Yes };
|
||||
/** Controls whether accountSend is allowed to overflow OutstandingAmount **/
|
||||
enum class AllowMPTOverflow : bool { No = false, Yes };
|
||||
|
||||
/** Controls whether canTransfer enforces lsfMPTCanTransfer on MPTs.
|
||||
*
|
||||
* Default is No (enforce). Use Yes at call sites that must remain available
|
||||
* even when an MPT issuer has cleared lsfMPTCanTransfer - for example,
|
||||
* unwinding existing positions in SAV or the Lending Protocol. Has no
|
||||
* effect on the IOU branch of canTransfer.
|
||||
*/
|
||||
enum class WaiveMPTCanTransfer : bool { No = false, Yes };
|
||||
|
||||
/* Check if MPToken (for MPT) or trust line (for IOU) exists:
|
||||
* - StrongAuth - before checking if authorization is required
|
||||
* - WeakAuth
|
||||
@@ -63,26 +54,16 @@ enum class AuthType { StrongAuth, WeakAuth, Legacy };
|
||||
[[nodiscard]] bool
|
||||
isGlobalFrozen(ReadView const& view, Asset const& asset);
|
||||
|
||||
[[nodiscard]] TER
|
||||
checkGlobalFrozen(ReadView const& view, Asset const& asset);
|
||||
|
||||
[[nodiscard]] bool
|
||||
isIndividualFrozen(ReadView const& view, AccountID const& account, Asset const& asset);
|
||||
|
||||
[[nodiscard]] TER
|
||||
checkIndividualFrozen(ReadView const& view, AccountID const& account, Asset const& asset);
|
||||
|
||||
/**
|
||||
* isFrozen check is recursive for MPT shares in a vault, descending to
|
||||
* assets in the vault, up to maxAssetCheckDepth recursion depth. This is
|
||||
* purely defensive, as we currently do not allow such vaults to be created.
|
||||
*/
|
||||
[[nodiscard]] bool
|
||||
isFrozen(
|
||||
ReadView const& view,
|
||||
AccountID const& account,
|
||||
Asset const& asset,
|
||||
std::uint8_t depth = 0);
|
||||
isFrozen(ReadView const& view, AccountID const& account, Asset const& asset, int depth = 0);
|
||||
|
||||
[[nodiscard]] TER
|
||||
checkFrozen(ReadView const& view, AccountID const& account, Issue const& issue);
|
||||
@@ -104,14 +85,14 @@ isAnyFrozen(
|
||||
ReadView const& view,
|
||||
std::initializer_list<AccountID> const& accounts,
|
||||
Asset const& asset,
|
||||
std::uint8_t depth = 0);
|
||||
int depth = 0);
|
||||
|
||||
[[nodiscard]] bool
|
||||
isDeepFrozen(
|
||||
ReadView const& view,
|
||||
AccountID const& account,
|
||||
MPTIssue const& mptIssue,
|
||||
std::uint8_t depth = 0);
|
||||
int depth = 0);
|
||||
|
||||
/**
|
||||
* isFrozen check is recursive for MPT shares in a vault, descending to
|
||||
@@ -119,11 +100,7 @@ isDeepFrozen(
|
||||
* purely defensive, as we currently do not allow such vaults to be created.
|
||||
*/
|
||||
[[nodiscard]] bool
|
||||
isDeepFrozen(
|
||||
ReadView const& view,
|
||||
AccountID const& account,
|
||||
Asset const& asset,
|
||||
std::uint8_t depth = 0);
|
||||
isDeepFrozen(ReadView const& view, AccountID const& account, Asset const& asset, int depth = 0);
|
||||
|
||||
[[nodiscard]] TER
|
||||
checkDeepFrozen(ReadView const& view, AccountID const& account, MPTIssue const& mptIssue);
|
||||
@@ -257,13 +234,7 @@ requireAuth(
|
||||
AuthType authType = AuthType::Legacy);
|
||||
|
||||
[[nodiscard]] TER
|
||||
canTransfer(
|
||||
ReadView const& view,
|
||||
Asset const& asset,
|
||||
AccountID const& from,
|
||||
AccountID const& to,
|
||||
WaiveMPTCanTransfer waive = WaiveMPTCanTransfer::No,
|
||||
std::uint8_t depth = 0);
|
||||
canTransfer(ReadView const& view, Asset const& asset, AccountID const& from, AccountID const& to);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <xrpl/ledger/ReadView.h>
|
||||
#include <xrpl/protocol/AccountID.h>
|
||||
#include <xrpl/protocol/STAmount.h>
|
||||
#include <xrpl/protocol/STLedgerEntry.h>
|
||||
|
||||
@@ -45,14 +43,6 @@ sharesToAssetsDeposit(
|
||||
/** Controls whether to truncate shares instead of rounding. */
|
||||
enum class TruncateShares : bool { No = false, Yes = true };
|
||||
|
||||
/** Controls whether the withdraw conversion helpers
|
||||
(assetsToSharesWithdraw and sharesToAssetsWithdraw) subtract
|
||||
sfLossUnrealized from sfAssetsTotal before computing the exchange rate.
|
||||
The default (No) applies the standard discounted rate; Yes is used when
|
||||
the redeemer is the sole remaining shareholder.
|
||||
*/
|
||||
enum class WaiveUnrealizedLoss : bool { No = false, Yes = true };
|
||||
|
||||
/** From the perspective of a vault, return the number of shares to demand from
|
||||
the depositor when they ask to withdraw a fixed amount of assets. Since
|
||||
shares are MPT this number is integral, and it will be rounded to nearest
|
||||
@@ -62,8 +52,6 @@ enum class WaiveUnrealizedLoss : bool { No = false, Yes = true };
|
||||
@param issuance The MPTokenIssuance SLE for the vault's shares.
|
||||
@param assets The amount of assets to convert.
|
||||
@param truncate Whether to truncate instead of rounding.
|
||||
@param waive Whether to waive the unrealized-loss discount when computing
|
||||
the exchange rate.
|
||||
|
||||
@return The number of shares, or nullopt on error.
|
||||
*/
|
||||
@@ -72,8 +60,7 @@ assetsToSharesWithdraw(
|
||||
std::shared_ptr<SLE const> const& vault,
|
||||
std::shared_ptr<SLE const> const& issuance,
|
||||
STAmount const& assets,
|
||||
TruncateShares truncate = TruncateShares::No,
|
||||
WaiveUnrealizedLoss waive = WaiveUnrealizedLoss::No);
|
||||
TruncateShares truncate = TruncateShares::No);
|
||||
|
||||
/** From the perspective of a vault, return the number of assets to give the
|
||||
depositor when they redeem a fixed amount of shares. Note, since shares are
|
||||
@@ -82,8 +69,6 @@ assetsToSharesWithdraw(
|
||||
@param vault The vault SLE.
|
||||
@param issuance The MPTokenIssuance SLE for the vault's shares.
|
||||
@param shares The amount of shares to convert.
|
||||
@param waive Whether to waive (i.e. not subtract) the vault's unrealized
|
||||
loss when computing the exchange rate.
|
||||
|
||||
@return The number of assets, or nullopt on error.
|
||||
*/
|
||||
@@ -91,22 +76,6 @@ assetsToSharesWithdraw(
|
||||
sharesToAssetsWithdraw(
|
||||
std::shared_ptr<SLE const> const& vault,
|
||||
std::shared_ptr<SLE const> const& issuance,
|
||||
STAmount const& shares,
|
||||
WaiveUnrealizedLoss waive = WaiveUnrealizedLoss::No);
|
||||
|
||||
/** Returns true iff `account` holds all of the vault's outstanding shares —
|
||||
i.e. is the sole remaining shareholder. Returns false if the account
|
||||
holds no shares or fewer than the total outstanding.
|
||||
|
||||
@param view The ledger view.
|
||||
@param account The candidate sole shareholder.
|
||||
@param issuance The MPTokenIssuance SLE for the vault's shares; provides
|
||||
both the share MPTID and the outstanding-amount total.
|
||||
*/
|
||||
[[nodiscard]] bool
|
||||
isSoleShareholder(
|
||||
ReadView const& view,
|
||||
AccountID const& account,
|
||||
std::shared_ptr<SLE const> const& issuance);
|
||||
STAmount const& shares);
|
||||
|
||||
} // namespace xrpl
|
||||
|
||||
@@ -21,13 +21,13 @@ public:
|
||||
bool sslVerify,
|
||||
beast::Journal j,
|
||||
boost::asio::ssl::context_base::method method = boost::asio::ssl::context::sslv23)
|
||||
: sslContext_{method}, j_(j), verify_{sslVerify}
|
||||
: ssl_context_{method}, j_(j), verify_{sslVerify}
|
||||
{
|
||||
boost::system::error_code ec;
|
||||
|
||||
if (sslVerifyFile.empty())
|
||||
{
|
||||
registerSSLCerts(sslContext_, ec, j_);
|
||||
registerSSLCerts(ssl_context_, ec, j_);
|
||||
|
||||
if (ec && sslVerifyDir.empty())
|
||||
{
|
||||
@@ -37,12 +37,12 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
sslContext_.load_verify_file(sslVerifyFile);
|
||||
ssl_context_.load_verify_file(sslVerifyFile);
|
||||
}
|
||||
|
||||
if (!sslVerifyDir.empty())
|
||||
{
|
||||
sslContext_.add_verify_path(sslVerifyDir, ec);
|
||||
ssl_context_.add_verify_path(sslVerifyDir, ec);
|
||||
|
||||
if (ec)
|
||||
{
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
boost::asio::ssl::context&
|
||||
context()
|
||||
{
|
||||
return sslContext_;
|
||||
return ssl_context_;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool
|
||||
@@ -153,7 +153,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
boost::asio::ssl::context sslContext_;
|
||||
boost::asio::ssl::context ssl_context_;
|
||||
beast::Journal const j_;
|
||||
bool const verify_;
|
||||
};
|
||||
|
||||
@@ -83,6 +83,10 @@ public:
|
||||
virtual Status
|
||||
fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pObject) = 0;
|
||||
|
||||
/** Fetch a batch synchronously. */
|
||||
virtual std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
|
||||
fetchBatch(std::vector<uint256> const& hashes) = 0;
|
||||
|
||||
/** Store a single object.
|
||||
Depending on the implementation this may happen immediately
|
||||
or deferred using a scheduled task.
|
||||
|
||||
@@ -131,10 +131,6 @@ public:
|
||||
std::uint32_t ledgerSeq,
|
||||
std::function<void(std::shared_ptr<NodeObject> const&)>&& callback);
|
||||
|
||||
/** Remove expired entries from the positive and negative caches. */
|
||||
virtual void
|
||||
sweep() = 0;
|
||||
|
||||
/** Gather statistics pertaining to read and write activities.
|
||||
*
|
||||
* @param obj Json object reference into which to place counters.
|
||||
|
||||
@@ -22,32 +22,6 @@ public:
|
||||
beast::Journal j)
|
||||
: Database(scheduler, readThreads, config, j), backend_(std::move(backend))
|
||||
{
|
||||
std::optional<int> cacheSize, cacheAge;
|
||||
|
||||
if (config.exists("cache_size"))
|
||||
{
|
||||
cacheSize = get<int>(config, "cache_size");
|
||||
if (cacheSize.value() < 0)
|
||||
Throw<std::runtime_error>("Specified negative value for cache_size");
|
||||
}
|
||||
|
||||
if (config.exists("cache_age"))
|
||||
{
|
||||
cacheAge = get<int>(config, "cache_age");
|
||||
if (cacheAge.value() < 0)
|
||||
Throw<std::runtime_error>("Specified negative value for cache_age");
|
||||
}
|
||||
|
||||
if (cacheSize.has_value() || cacheAge.has_value())
|
||||
{
|
||||
cache_ = std::make_shared<TaggedCache<uint256, NodeObject>>(
|
||||
"DatabaseNodeImp",
|
||||
cacheSize.value_or(0),
|
||||
std::chrono::minutes(cacheAge.value_or(0)),
|
||||
stopwatch(),
|
||||
j);
|
||||
}
|
||||
|
||||
XRPL_ASSERT(
|
||||
backend_,
|
||||
"xrpl::NodeStore::DatabaseNodeImp::DatabaseNodeImp : non-null "
|
||||
@@ -93,19 +67,16 @@ public:
|
||||
backend_->sync();
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<NodeObject>>
|
||||
fetchBatch(std::vector<uint256> const& hashes);
|
||||
|
||||
void
|
||||
asyncFetch(
|
||||
uint256 const& hash,
|
||||
std::uint32_t ledgerSeq,
|
||||
std::function<void(std::shared_ptr<NodeObject> const&)>&& callback) override;
|
||||
|
||||
void
|
||||
sweep() override;
|
||||
|
||||
private:
|
||||
// Cache for database objects. This cache is not always initialized. Check
|
||||
// for null before using.
|
||||
std::shared_ptr<TaggedCache<uint256, NodeObject>> cache_;
|
||||
// Persistent key/value storage
|
||||
std::shared_ptr<Backend> backend_;
|
||||
|
||||
|
||||
@@ -55,9 +55,6 @@ public:
|
||||
void
|
||||
sync() override;
|
||||
|
||||
void
|
||||
sweep() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Backend> writableBackend_;
|
||||
std::shared_ptr<Backend> archiveBackend_;
|
||||
|
||||
@@ -140,8 +140,8 @@ private:
|
||||
using issue_hasher = std::hash<xrpl::Issue>;
|
||||
using mptissue_hasher = std::hash<xrpl::MPTIssue>;
|
||||
|
||||
issue_hasher mIssueHasher_;
|
||||
mptissue_hasher mMptissueHasher_;
|
||||
issue_hasher m_issue_hasher_;
|
||||
mptissue_hasher m_mptissue_hasher_;
|
||||
|
||||
public:
|
||||
explicit hash() = default;
|
||||
@@ -151,11 +151,11 @@ public:
|
||||
{
|
||||
return asset.visit(
|
||||
[&](xrpl::Issue const& issue) {
|
||||
value_type const result(mIssueHasher_(issue));
|
||||
value_type const result(m_issue_hasher_(issue));
|
||||
return result;
|
||||
},
|
||||
[&](xrpl::MPTIssue const& issue) {
|
||||
value_type const result(mMptissueHasher_(issue));
|
||||
value_type const result(m_mptissue_hasher_(issue));
|
||||
return result;
|
||||
});
|
||||
}
|
||||
@@ -170,8 +170,8 @@ private:
|
||||
using asset_hasher = std::hash<xrpl::Asset>;
|
||||
using uint256_hasher = xrpl::uint256::hasher;
|
||||
|
||||
asset_hasher issueHasher_;
|
||||
uint256_hasher uint256Hasher_;
|
||||
asset_hasher issue_hasher_;
|
||||
uint256_hasher uint256_hasher_;
|
||||
|
||||
public:
|
||||
hash() = default;
|
||||
@@ -182,11 +182,11 @@ public:
|
||||
value_type
|
||||
operator()(argument_type const& value) const
|
||||
{
|
||||
value_type result(issueHasher_(value.in));
|
||||
boost::hash_combine(result, issueHasher_(value.out));
|
||||
value_type result(issue_hasher_(value.in));
|
||||
boost::hash_combine(result, issue_hasher_(value.out));
|
||||
|
||||
if (value.domain)
|
||||
boost::hash_combine(result, uint256Hasher_(*value.domain));
|
||||
boost::hash_combine(result, uint256_hasher_(*value.domain));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -172,24 +172,24 @@ struct ErrorInfo
|
||||
{
|
||||
// Default ctor needed to produce an empty std::array during constexpr eval.
|
||||
constexpr ErrorInfo()
|
||||
: code(RpcUnknown), token("unknown"), message("An unknown error code."), httpStatus(200)
|
||||
: code(RpcUnknown), token("unknown"), message("An unknown error code."), http_status(200)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr ErrorInfo(ErrorCodeI code, char const* token, char const* message)
|
||||
: code(code), token(token), message(message), httpStatus(200)
|
||||
: code(code), token(token), message(message), http_status(200)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr ErrorInfo(ErrorCodeI code, char const* token, char const* message, int httpStatus)
|
||||
: code(code), token(token), message(message), httpStatus(httpStatus)
|
||||
: code(code), token(token), message(message), http_status(httpStatus)
|
||||
{
|
||||
}
|
||||
|
||||
ErrorCodeI code;
|
||||
json::StaticString token;
|
||||
json::StaticString message;
|
||||
int httpStatus;
|
||||
int http_status;
|
||||
};
|
||||
|
||||
/** Returns an ErrorInfo that reflects the error code. */
|
||||
|
||||
@@ -122,17 +122,4 @@ private:
|
||||
std::optional<Rules> saved_;
|
||||
};
|
||||
|
||||
class NumberSO;
|
||||
class NumberMantissaScaleGuard;
|
||||
|
||||
bool
|
||||
useRulesGuards(Rules const& rules);
|
||||
|
||||
void
|
||||
createGuards(
|
||||
Rules const& rules,
|
||||
std::optional<NumberSO>& stNumberSO,
|
||||
std::optional<CurrentTransactionRulesGuard>& rulesGuard,
|
||||
std::optional<NumberMantissaScaleGuard>& mantissaScaleGuard);
|
||||
|
||||
} // namespace xrpl
|
||||
|
||||
@@ -365,8 +365,8 @@ using SF_XCHAIN_BRIDGE = TypedField<STXChainBridge>;
|
||||
#define UNTYPED_SFIELD(sfName, stiSuffix, fieldValue, ...) extern SField const sfName;
|
||||
#define TYPED_SFIELD(sfName, stiSuffix, fieldValue, ...) extern SF_##stiSuffix const sfName;
|
||||
|
||||
extern SField const sfInvalid; // NOLINT(readability-identifier-naming)
|
||||
extern SField const sfGeneric; // NOLINT(readability-identifier-naming)
|
||||
extern SField const kSfInvalid;
|
||||
extern SField const kSfGeneric;
|
||||
|
||||
#include <xrpl/protocol/detail/sfields.macro>
|
||||
|
||||
|
||||
@@ -3,13 +3,11 @@
|
||||
#include <xrpl/basics/CountedObject.h>
|
||||
#include <xrpl/basics/LocalValue.h>
|
||||
#include <xrpl/basics/Number.h>
|
||||
#include <xrpl/beast/utility/Journal.h>
|
||||
#include <xrpl/beast/utility/instrumentation.h>
|
||||
#include <xrpl/protocol/Asset.h>
|
||||
#include <xrpl/protocol/IOUAmount.h>
|
||||
#include <xrpl/protocol/Issue.h>
|
||||
#include <xrpl/protocol/MPTAmount.h>
|
||||
#include <xrpl/protocol/Protocol.h>
|
||||
#include <xrpl/protocol/SField.h>
|
||||
#include <xrpl/protocol/STBase.h>
|
||||
#include <xrpl/protocol/Serializer.h>
|
||||
@@ -186,23 +184,6 @@ public:
|
||||
[[nodiscard]] STAmount const&
|
||||
value() const noexcept;
|
||||
|
||||
/**
|
||||
* Checks if this amount evaluates to zero when constrained to a specific
|
||||
* accounting scale.
|
||||
* For XRP and MPT `roundToScale` is a no-op, returns true only when the amount itself is zero.
|
||||
* The `scale` argument is ignored in that case.
|
||||
* For IOU, the amount is rounded to the given scale using Number::RoundingMode::ToNearest mode
|
||||
* and the result is checked for zero; if `scale <= exponent()`, `roundToScale` short-circuits
|
||||
* and returns the value unchanged, so this returns false for any non-zero amount.
|
||||
*
|
||||
* @param scale The target accounting scale to evaluate against.
|
||||
* @return `true` if this amount rounds to zero at the given scale, `false` otherwise.
|
||||
*
|
||||
* @see roundToScale
|
||||
*/
|
||||
[[nodiscard]] bool
|
||||
isZeroAtScale(int scale) const;
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
//
|
||||
// Operators
|
||||
@@ -559,7 +540,7 @@ STAmount::fromNumber(A const& a, Number const& number)
|
||||
return STAmount{asset, intValue, 0, negative};
|
||||
}
|
||||
|
||||
auto const [mantissa, exponent] = working.normalizeToRange<kMinValue, kMaxValue>();
|
||||
auto const [mantissa, exponent] = working.normalizeToRange(kMinValue, kMaxValue);
|
||||
|
||||
return STAmount{asset, mantissa, exponent, negative};
|
||||
}
|
||||
@@ -594,25 +575,12 @@ STAmount::value() const noexcept
|
||||
return *this;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool
|
||||
inline bool
|
||||
isLegalNet(STAmount const& value)
|
||||
{
|
||||
return !value.native() || (value.mantissa() <= STAmount::kMaxNativeN);
|
||||
}
|
||||
|
||||
[[nodiscard]] inline bool
|
||||
isLegalMPT(STAmount const& value)
|
||||
{
|
||||
return !value.holds<MPTIssue>() ||
|
||||
(!value.negative() && value.exponent() == 0 && value.mantissa() <= kMaxMpTokenAmount);
|
||||
}
|
||||
|
||||
/* Check recursively if an object has invalid MPTAmount or XRPAmount in STAmount field.
|
||||
* Calls isLegalNet() and isLegalMPT().
|
||||
*/
|
||||
[[nodiscard]] bool
|
||||
hasInvalidAmount(STBase const& field, beast::Journal j);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// Operators
|
||||
|
||||
@@ -24,7 +24,7 @@ public:
|
||||
STBlob(SField const& f, void const* data, std::size_t size);
|
||||
STBlob(SField const& f, Buffer&& b);
|
||||
STBlob(SField const& n);
|
||||
STBlob(SerialIter&, SField const& name = sfGeneric);
|
||||
STBlob(SerialIter&, SField const& name = kSfGeneric);
|
||||
|
||||
[[nodiscard]] std::size_t
|
||||
size() const;
|
||||
|
||||
@@ -21,8 +21,8 @@ class STPathElement final : public CountedObject<STPathElement>
|
||||
PathAsset assetID_;
|
||||
AccountID issuerID_;
|
||||
|
||||
bool isOffer_;
|
||||
std::size_t hashValue_;
|
||||
bool is_offer_;
|
||||
std::size_t hash_value_;
|
||||
|
||||
public:
|
||||
// Bitwise values (typeCurrency | typeMPT)
|
||||
@@ -235,9 +235,9 @@ private:
|
||||
|
||||
// ------------ STPathElement ------------
|
||||
|
||||
inline STPathElement::STPathElement() : type_(TypeNone), isOffer_(true)
|
||||
inline STPathElement::STPathElement() : type_(TypeNone), is_offer_(true)
|
||||
{
|
||||
hashValue_ = getHash(*this);
|
||||
hash_value_ = getHash(*this);
|
||||
}
|
||||
|
||||
inline STPathElement::STPathElement(
|
||||
@@ -248,11 +248,11 @@ inline STPathElement::STPathElement(
|
||||
{
|
||||
if (!account)
|
||||
{
|
||||
isOffer_ = true;
|
||||
is_offer_ = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
isOffer_ = false;
|
||||
is_offer_ = false;
|
||||
accountID_ = *account;
|
||||
type_ |= TypeAccount;
|
||||
XRPL_ASSERT(
|
||||
@@ -272,7 +272,7 @@ inline STPathElement::STPathElement(
|
||||
XRPL_ASSERT(issuerID_ != noAccount(), "xrpl::STPathElement::STPathElement : issuer is set");
|
||||
}
|
||||
|
||||
hashValue_ = getHash(*this);
|
||||
hash_value_ = getHash(*this);
|
||||
}
|
||||
|
||||
inline STPathElement::STPathElement(
|
||||
@@ -284,9 +284,9 @@ inline STPathElement::STPathElement(
|
||||
, accountID_(account)
|
||||
, assetID_(asset)
|
||||
, issuerID_(issuer)
|
||||
, isOffer_(isXRP(accountID_))
|
||||
, is_offer_(isXRP(accountID_))
|
||||
{
|
||||
if (!isOffer_)
|
||||
if (!is_offer_)
|
||||
type_ |= TypeAccount;
|
||||
|
||||
if (forceAsset || !isXRP(assetID_))
|
||||
@@ -295,7 +295,7 @@ inline STPathElement::STPathElement(
|
||||
if (!isXRP(issuer))
|
||||
type_ |= TypeIssuer;
|
||||
|
||||
hashValue_ = getHash(*this);
|
||||
hash_value_ = getHash(*this);
|
||||
}
|
||||
|
||||
inline STPathElement::STPathElement(
|
||||
@@ -307,12 +307,12 @@ inline STPathElement::STPathElement(
|
||||
, accountID_(account)
|
||||
, assetID_(asset)
|
||||
, issuerID_(issuer)
|
||||
, isOffer_(isXRP(accountID_))
|
||||
, is_offer_(isXRP(accountID_))
|
||||
{
|
||||
assetID_.visit(
|
||||
[&](Currency const&) { type_ = type_ & (~Type::TypeMpt); },
|
||||
[&](MPTID const&) { type_ = type_ & (~Type::TypeCurrency); });
|
||||
hashValue_ = getHash(*this);
|
||||
hash_value_ = getHash(*this);
|
||||
}
|
||||
|
||||
inline auto
|
||||
@@ -324,7 +324,7 @@ STPathElement::getNodeType() const
|
||||
inline bool
|
||||
STPathElement::isOffer() const
|
||||
{
|
||||
return isOffer_;
|
||||
return is_offer_;
|
||||
}
|
||||
|
||||
inline bool
|
||||
@@ -404,7 +404,7 @@ STPathElement::getIssuerID() const
|
||||
inline bool
|
||||
STPathElement::operator==(STPathElement const& t) const
|
||||
{
|
||||
return (type_ & TypeAccount) == (t.type_ & TypeAccount) && hashValue_ == t.hashValue_ &&
|
||||
return (type_ & TypeAccount) == (t.type_ & TypeAccount) && hash_value_ == t.hash_value_ &&
|
||||
accountID_ == t.accountID_ && assetID_ == t.assetID_ && issuerID_ == t.issuerID_;
|
||||
}
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ enum class TxnSql : char {
|
||||
class STTx final : public STObject, public CountedObject<STTx>
|
||||
{
|
||||
uint256 tid_;
|
||||
TxType txType_;
|
||||
TxType tx_type_;
|
||||
|
||||
public:
|
||||
static constexpr std::size_t kMinMultiSigners = 1;
|
||||
@@ -187,7 +187,7 @@ inline STTx::STTx(SerialIter&& sit) // NOLINT(cppcoreguidelines-rvalue-referenc
|
||||
inline TxType
|
||||
STTx::getTxnType() const
|
||||
{
|
||||
return txType_;
|
||||
return tx_type_;
|
||||
}
|
||||
|
||||
inline Blob
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
// Add new amendments to the top of this list.
|
||||
// Keep it sorted in reverse chronological order.
|
||||
|
||||
XRPL_FIX (Cleanup3_2_0, Supported::Yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FIX (Cleanup3_2_0, Supported::No, VoteBehavior::DefaultNo)
|
||||
XRPL_FEATURE(MPTokensV2, Supported::No, VoteBehavior::DefaultNo)
|
||||
XRPL_FIX (Cleanup3_1_3, Supported::Yes, VoteBehavior::DefaultYes)
|
||||
XRPL_FIX (BatchInnerSigs, Supported::No, VoteBehavior::DefaultNo)
|
||||
|
||||
@@ -400,7 +400,6 @@ LEDGER_ENTRY(ltMPTOKEN_ISSUANCE, 0x007e, MPTokenIssuance, mpt_issuance, ({
|
||||
{sfPreviousTxnLgrSeq, SoeRequired},
|
||||
{sfDomainID, SoeOptional},
|
||||
{sfMutableFlags, SoeDefault},
|
||||
{sfReferenceHolding, SoeOptional},
|
||||
}))
|
||||
|
||||
/** A ledger object which tracks MPToken
|
||||
@@ -592,7 +591,7 @@ LEDGER_ENTRY(ltLOAN, 0x0089, Loan, loan, ({
|
||||
// LoanBroker.ManagementFeeRate
|
||||
// The unrounded true total fee still owed to the broker.
|
||||
//
|
||||
// Note the "True" values may differ significantly from the tracked
|
||||
// Note the the "True" values may differ significantly from the tracked
|
||||
// rounded values.
|
||||
{sfPaymentRemaining, SoeDefault},
|
||||
{sfPeriodicPayment, SoeRequired},
|
||||
|
||||
@@ -205,7 +205,6 @@ TYPED_SFIELD(sfParentBatchID, UINT256, 36)
|
||||
TYPED_SFIELD(sfLoanBrokerID, UINT256, 37,
|
||||
SField::kSmdPseudoAccount | SField::kSmdDefault)
|
||||
TYPED_SFIELD(sfLoanID, UINT256, 38)
|
||||
TYPED_SFIELD(sfReferenceHolding, UINT256, 39)
|
||||
|
||||
// number (common)
|
||||
TYPED_SFIELD(sfNumber, NUMBER, 1)
|
||||
|
||||
@@ -688,7 +688,6 @@ TRANSACTION(ttLEDGER_STATE_FIX, 53, LedgerStateFix,
|
||||
({
|
||||
{sfLedgerFixType, SoeRequired},
|
||||
{sfOwner, SoeOptional},
|
||||
{sfBookDirectory, SoeOptional},
|
||||
}))
|
||||
|
||||
/** This transaction type creates a MPTokensIssuance instance */
|
||||
|
||||
@@ -15,8 +15,8 @@ Generation requires a one-time setup step to create a virtual environment
|
||||
and install Python dependencies, followed by running the generation target:
|
||||
|
||||
```bash
|
||||
cmake --build . --target setup_code_gen # create venv and install dependencies (once)
|
||||
cmake --build . --target code_gen # generate code
|
||||
cmake --build . --target setup_code_gen # create venv and install dependencies (once)
|
||||
cmake --build . --target code_gen # generate code
|
||||
```
|
||||
|
||||
By default, `CODEGEN_VENV_DIR` points to `.venv` in the project root. The
|
||||
|
||||
@@ -278,30 +278,6 @@ public:
|
||||
{
|
||||
return this->sle_->isFieldPresent(sfMutableFlags);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get sfReferenceHolding (SoeOptional)
|
||||
* @return The field value, or std::nullopt if not present.
|
||||
*/
|
||||
[[nodiscard]]
|
||||
protocol_autogen::Optional<SF_UINT256::type::value_type>
|
||||
getReferenceHolding() const
|
||||
{
|
||||
if (hasReferenceHolding())
|
||||
return this->sle_->at(sfReferenceHolding);
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check if sfReferenceHolding is present.
|
||||
* @return True if the field is present, false otherwise.
|
||||
*/
|
||||
[[nodiscard]]
|
||||
bool
|
||||
hasReferenceHolding() const
|
||||
{
|
||||
return this->sle_->isFieldPresent(sfReferenceHolding);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -493,17 +469,6 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set sfReferenceHolding (SoeOptional)
|
||||
* @return Reference to this builder for method chaining.
|
||||
*/
|
||||
MPTokenIssuanceBuilder&
|
||||
setReferenceHolding(std::decay_t<typename SF_UINT256::type::value_type> const& value)
|
||||
{
|
||||
object_[sfReferenceHolding] = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Build and return the completed MPTokenIssuance wrapper.
|
||||
* @param index The ledger entry index.
|
||||
|
||||
@@ -83,32 +83,6 @@ public:
|
||||
{
|
||||
return this->tx_->isFieldPresent(sfOwner);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get sfBookDirectory (SoeOptional)
|
||||
* @return The field value, or std::nullopt if not present.
|
||||
*/
|
||||
[[nodiscard]]
|
||||
protocol_autogen::Optional<SF_UINT256::type::value_type>
|
||||
getBookDirectory() const
|
||||
{
|
||||
if (hasBookDirectory())
|
||||
{
|
||||
return this->tx_->at(sfBookDirectory);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check if sfBookDirectory is present.
|
||||
* @return True if the field is present, false otherwise.
|
||||
*/
|
||||
[[nodiscard]]
|
||||
bool
|
||||
hasBookDirectory() const
|
||||
{
|
||||
return this->tx_->isFieldPresent(sfBookDirectory);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -175,17 +149,6 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set sfBookDirectory (SoeOptional)
|
||||
* @return Reference to this builder for method chaining.
|
||||
*/
|
||||
LedgerStateFixBuilder&
|
||||
setBookDirectory(std::decay_t<typename SF_UINT256::type::value_type> const& value)
|
||||
{
|
||||
object_[sfBookDirectory] = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Build and return the LedgerStateFix wrapper.
|
||||
* @param publicKey The public key for signing.
|
||||
|
||||
@@ -21,7 +21,7 @@ struct Entry : public beast::List<Entry>::Node
|
||||
@param now Construction time of Entry.
|
||||
*/
|
||||
explicit Entry(clock_type::time_point const now)
|
||||
: refcount(0), localBalance(now), remoteBalance(0)
|
||||
: refcount(0), local_balance(now), remote_balance(0)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ struct Entry : public beast::List<Entry>::Node
|
||||
int
|
||||
balance(clock_type::time_point const now)
|
||||
{
|
||||
return localBalance.value(now) + remoteBalance;
|
||||
return local_balance.value(now) + remote_balance;
|
||||
}
|
||||
|
||||
// Add a charge and return normalized balance
|
||||
@@ -54,7 +54,7 @@ struct Entry : public beast::List<Entry>::Node
|
||||
int
|
||||
add(int charge, clock_type::time_point const now)
|
||||
{
|
||||
return localBalance.add(charge, now) + remoteBalance;
|
||||
return local_balance.add(charge, now) + remote_balance;
|
||||
}
|
||||
|
||||
// The public key of the peer
|
||||
@@ -67,10 +67,10 @@ struct Entry : public beast::List<Entry>::Node
|
||||
int refcount;
|
||||
|
||||
// Exponentially decaying balance of resource consumption
|
||||
DecayingSample<kDecayWindowSeconds, clock_type> localBalance;
|
||||
DecayingSample<kDecayWindowSeconds, clock_type> local_balance;
|
||||
|
||||
// Normalized balance contribution from imports
|
||||
int remoteBalance;
|
||||
int remote_balance;
|
||||
|
||||
// Time of the last warning
|
||||
clock_type::time_point lastWarningTime;
|
||||
|
||||
@@ -25,11 +25,11 @@ struct Key
|
||||
std::size_t
|
||||
operator()(Key const& v) const
|
||||
{
|
||||
return addrHash_(v.address);
|
||||
return addr_hash_(v.address);
|
||||
}
|
||||
|
||||
private:
|
||||
beast::Uhash<> addrHash_;
|
||||
beast::Uhash<> addr_hash_;
|
||||
};
|
||||
|
||||
struct KeyEqual
|
||||
|
||||
@@ -194,34 +194,34 @@ public:
|
||||
|
||||
for (auto& inboundEntry : inbound_)
|
||||
{
|
||||
int const localBalance = inboundEntry.localBalance.value(now);
|
||||
if ((localBalance + inboundEntry.remoteBalance) >= threshold)
|
||||
int const localBalance = inboundEntry.local_balance.value(now);
|
||||
if ((localBalance + inboundEntry.remote_balance) >= threshold)
|
||||
{
|
||||
json::Value& entry = (ret[inboundEntry.toString()] = json::ValueType::Object);
|
||||
entry[jss::local] = localBalance;
|
||||
entry[jss::remote] = inboundEntry.remoteBalance;
|
||||
entry[jss::remote] = inboundEntry.remote_balance;
|
||||
entry[jss::type] = "inbound";
|
||||
}
|
||||
}
|
||||
for (auto& outboundEntry : outbound_)
|
||||
{
|
||||
int const localBalance = outboundEntry.localBalance.value(now);
|
||||
if ((localBalance + outboundEntry.remoteBalance) >= threshold)
|
||||
int const localBalance = outboundEntry.local_balance.value(now);
|
||||
if ((localBalance + outboundEntry.remote_balance) >= threshold)
|
||||
{
|
||||
json::Value& entry = (ret[outboundEntry.toString()] = json::ValueType::Object);
|
||||
entry[jss::local] = localBalance;
|
||||
entry[jss::remote] = outboundEntry.remoteBalance;
|
||||
entry[jss::remote] = outboundEntry.remote_balance;
|
||||
entry[jss::type] = "outbound";
|
||||
}
|
||||
}
|
||||
for (auto& adminEntry : admin_)
|
||||
{
|
||||
int const localBalance = adminEntry.localBalance.value(now);
|
||||
if ((localBalance + adminEntry.remoteBalance) >= threshold)
|
||||
int const localBalance = adminEntry.local_balance.value(now);
|
||||
if ((localBalance + adminEntry.remote_balance) >= threshold)
|
||||
{
|
||||
json::Value& entry = (ret[adminEntry.toString()] = json::ValueType::Object);
|
||||
entry[jss::local] = localBalance;
|
||||
entry[jss::remote] = adminEntry.remoteBalance;
|
||||
entry[jss::remote] = adminEntry.remote_balance;
|
||||
entry[jss::type] = "admin";
|
||||
}
|
||||
}
|
||||
@@ -242,7 +242,7 @@ public:
|
||||
for (auto& inboundEntry : inbound_)
|
||||
{
|
||||
Gossip::Item item;
|
||||
item.balance = inboundEntry.localBalance.value(now);
|
||||
item.balance = inboundEntry.local_balance.value(now);
|
||||
if (item.balance >= kMinimumGossipBalance)
|
||||
{
|
||||
item.address = inboundEntry.key->address;
|
||||
@@ -278,7 +278,7 @@ public:
|
||||
Import::Item item;
|
||||
item.balance = gossipItem.balance;
|
||||
item.consumer = newInboundEndpoint(gossipItem.address);
|
||||
item.consumer.entry().remoteBalance += item.balance;
|
||||
item.consumer.entry().remote_balance += item.balance;
|
||||
next.items.push_back(item);
|
||||
}
|
||||
}
|
||||
@@ -295,14 +295,14 @@ public:
|
||||
Import::Item item;
|
||||
item.balance = gossipItem.balance;
|
||||
item.consumer = newInboundEndpoint(gossipItem.address);
|
||||
item.consumer.entry().remoteBalance += item.balance;
|
||||
item.consumer.entry().remote_balance += item.balance;
|
||||
next.items.push_back(item);
|
||||
}
|
||||
|
||||
Import& prev(resultIt->second);
|
||||
for (auto& item : prev.items)
|
||||
{
|
||||
item.consumer.entry().remoteBalance -= item.balance;
|
||||
item.consumer.entry().remote_balance -= item.balance;
|
||||
}
|
||||
|
||||
std::swap(next, prev);
|
||||
@@ -345,7 +345,7 @@ public:
|
||||
for (auto itemIter(import.items.begin()); itemIter != import.items.end();
|
||||
++itemIter)
|
||||
{
|
||||
itemIter->consumer.entry().remoteBalance -= itemIter->balance;
|
||||
itemIter->consumer.entry().remote_balance -= itemIter->balance;
|
||||
}
|
||||
|
||||
iter = importTable_.erase(iter);
|
||||
@@ -520,8 +520,8 @@ public:
|
||||
item["count"] = entry.refcount;
|
||||
item["name"] = entry.toString();
|
||||
item["balance"] = entry.balance(now);
|
||||
if (entry.remoteBalance != 0)
|
||||
item["remote_balance"] = entry.remoteBalance;
|
||||
if (entry.remote_balance != 0)
|
||||
item["remote_balance"] = entry.remote_balance;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ struct Handoff
|
||||
bool moved = false;
|
||||
|
||||
// If response is set, this determines the keep alive
|
||||
bool keepAlive = false;
|
||||
bool keep_alive = false;
|
||||
|
||||
// When set, this will be sent back
|
||||
std::shared_ptr<Writer> response;
|
||||
|
||||
@@ -30,19 +30,19 @@ struct Port
|
||||
boost::asio::ip::address ip;
|
||||
std::uint16_t port = 0;
|
||||
std::set<std::string, boost::beast::iless> protocol;
|
||||
std::vector<boost::asio::ip::network_v4> adminNetsV4;
|
||||
std::vector<boost::asio::ip::network_v6> adminNetsV6;
|
||||
std::vector<boost::asio::ip::network_v4> secureGatewayNetsV4;
|
||||
std::vector<boost::asio::ip::network_v6> secureGatewayNetsV6;
|
||||
std::vector<boost::asio::ip::network_v4> admin_nets_v4;
|
||||
std::vector<boost::asio::ip::network_v6> admin_nets_v6;
|
||||
std::vector<boost::asio::ip::network_v4> secure_gateway_nets_v4;
|
||||
std::vector<boost::asio::ip::network_v6> secure_gateway_nets_v6;
|
||||
std::string user;
|
||||
std::string password;
|
||||
std::string adminUser;
|
||||
std::string adminPassword;
|
||||
std::string sslKey;
|
||||
std::string sslCert;
|
||||
std::string sslChain;
|
||||
std::string sslCiphers;
|
||||
boost::beast::websocket::permessage_deflate pmdOptions;
|
||||
std::string admin_user;
|
||||
std::string admin_password;
|
||||
std::string ssl_key;
|
||||
std::string ssl_cert;
|
||||
std::string ssl_chain;
|
||||
std::string ssl_ciphers;
|
||||
boost::beast::websocket::permessage_deflate pmd_options;
|
||||
std::shared_ptr<boost::asio::ssl::context> context;
|
||||
|
||||
// How many incoming connections are allowed on this
|
||||
@@ -50,7 +50,7 @@ struct Port
|
||||
int limit = 0;
|
||||
|
||||
// Websocket disconnects if send queue exceeds this limit
|
||||
std::uint16_t wsQueueLimit{};
|
||||
std::uint16_t ws_queue_limit{};
|
||||
|
||||
// Returns `true` if any websocket protocols are specified
|
||||
[[nodiscard]] bool
|
||||
@@ -78,22 +78,22 @@ struct ParsedPort
|
||||
std::set<std::string, boost::beast::iless> protocol;
|
||||
std::string user;
|
||||
std::string password;
|
||||
std::string adminUser;
|
||||
std::string adminPassword;
|
||||
std::string sslKey;
|
||||
std::string sslCert;
|
||||
std::string sslChain;
|
||||
std::string sslCiphers;
|
||||
boost::beast::websocket::permessage_deflate pmdOptions;
|
||||
std::string admin_user;
|
||||
std::string admin_password;
|
||||
std::string ssl_key;
|
||||
std::string ssl_cert;
|
||||
std::string ssl_chain;
|
||||
std::string ssl_ciphers;
|
||||
boost::beast::websocket::permessage_deflate pmd_options;
|
||||
int limit = 0;
|
||||
std::uint16_t wsQueueLimit{};
|
||||
std::uint16_t ws_queue_limit{};
|
||||
|
||||
std::optional<boost::asio::ip::address> ip;
|
||||
std::optional<std::uint16_t> port;
|
||||
std::vector<boost::asio::ip::network_v4> adminNetsV4;
|
||||
std::vector<boost::asio::ip::network_v6> adminNetsV6;
|
||||
std::vector<boost::asio::ip::network_v4> secureGatewayNetsV4;
|
||||
std::vector<boost::asio::ip::network_v6> secureGatewayNetsV6;
|
||||
std::vector<boost::asio::ip::network_v4> admin_nets_v4;
|
||||
std::vector<boost::asio::ip::network_v6> admin_nets_v6;
|
||||
std::vector<boost::asio::ip::network_v4> secure_gateway_nets_v4;
|
||||
std::vector<boost::asio::ip::network_v6> secure_gateway_nets_v6;
|
||||
};
|
||||
|
||||
void
|
||||
|
||||
@@ -58,13 +58,13 @@ protected:
|
||||
Handler& handler_;
|
||||
boost::asio::executor_work_guard<boost::asio::executor> work_;
|
||||
boost::asio::strand<boost::asio::executor> strand_;
|
||||
endpoint_type remoteAddress_;
|
||||
endpoint_type remote_address_;
|
||||
beast::Journal const journal_;
|
||||
|
||||
std::string id_;
|
||||
std::size_t nid_;
|
||||
|
||||
boost::asio::streambuf readBuf_;
|
||||
boost::asio::streambuf read_buf_;
|
||||
http_request_type message_;
|
||||
std::vector<Buffer> wq_;
|
||||
std::vector<Buffer> wq2_;
|
||||
@@ -73,9 +73,9 @@ protected:
|
||||
bool complete_ = false;
|
||||
boost::system::error_code ec_;
|
||||
|
||||
int requestCount_ = 0;
|
||||
std::size_t bytesIn_ = 0;
|
||||
std::size_t bytesOut_ = 0;
|
||||
int request_count_ = 0;
|
||||
std::size_t bytes_in_ = 0;
|
||||
std::size_t bytes_out_ = 0;
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
@@ -151,7 +151,7 @@ protected:
|
||||
beast::IP::Endpoint
|
||||
remoteAddress() override
|
||||
{
|
||||
return beast::IPAddressConversion::fromAsio(remoteAddress_);
|
||||
return beast::IPAddressConversion::fromAsio(remote_address_);
|
||||
}
|
||||
|
||||
http_request_type&
|
||||
@@ -191,23 +191,23 @@ BaseHTTPPeer<Handler, Impl>::BaseHTTPPeer(
|
||||
, handler_(handler)
|
||||
, work_(boost::asio::make_work_guard(executor))
|
||||
, strand_(boost::asio::make_strand(executor))
|
||||
, remoteAddress_(std::move(remoteAddress))
|
||||
, remote_address_(std::move(remoteAddress))
|
||||
, journal_(journal)
|
||||
{
|
||||
readBuf_.commit(
|
||||
boost::asio::buffer_copy(readBuf_.prepare(boost::asio::buffer_size(buffers)), buffers));
|
||||
read_buf_.commit(
|
||||
boost::asio::buffer_copy(read_buf_.prepare(boost::asio::buffer_size(buffers)), buffers));
|
||||
static std::atomic<int> kSid;
|
||||
nid_ = ++kSid;
|
||||
id_ = std::string("#") + std::to_string(nid_) + " ";
|
||||
JLOG(journal_.trace()) << id_ << "accept: " << remoteAddress_.address();
|
||||
JLOG(journal_.trace()) << id_ << "accept: " << remote_address_.address();
|
||||
}
|
||||
|
||||
template <class Handler, class Impl>
|
||||
BaseHTTPPeer<Handler, Impl>::~BaseHTTPPeer()
|
||||
{
|
||||
handler_.onClose(session(), ec_);
|
||||
JLOG(journal_.trace()) << id_ << "destroyed: " << requestCount_
|
||||
<< ((requestCount_ == 1) ? " request" : " requests");
|
||||
JLOG(journal_.trace()) << id_ << "destroyed: " << request_count_
|
||||
<< ((request_count_ == 1) ? " request" : " requests");
|
||||
}
|
||||
|
||||
template <class Handler, class Impl>
|
||||
@@ -245,7 +245,7 @@ BaseHTTPPeer<Handler, Impl>::startTimer()
|
||||
boost::beast::get_lowest_layer(impl().stream_)
|
||||
.expires_after(
|
||||
std::chrono::seconds(
|
||||
remoteAddress_.address().is_loopback() ? kTimeoutSecondsLocal : kTimeoutSeconds));
|
||||
remote_address_.address().is_loopback() ? kTimeoutSecondsLocal : kTimeoutSeconds));
|
||||
}
|
||||
|
||||
// Convenience for discarding the error code
|
||||
@@ -274,7 +274,7 @@ BaseHTTPPeer<Handler, Impl>::doRead(yield_context doYield)
|
||||
complete_ = false;
|
||||
error_code ec;
|
||||
startTimer();
|
||||
boost::beast::http::async_read(impl().stream_, readBuf_, message_, doYield[ec]);
|
||||
boost::beast::http::async_read(impl().stream_, read_buf_, message_, doYield[ec]);
|
||||
cancelTimer();
|
||||
if (ec == boost::beast::http::error::end_of_stream)
|
||||
return doClose();
|
||||
@@ -296,7 +296,7 @@ BaseHTTPPeer<Handler, Impl>::onWrite(error_code const& ec, std::size_t bytesTran
|
||||
return onTimer();
|
||||
if (ec)
|
||||
return fail(ec, "write");
|
||||
bytesOut_ += bytesTransferred;
|
||||
bytes_out_ += bytesTransferred;
|
||||
{
|
||||
std::scoped_lock const lock(mutex_);
|
||||
wq2_.clear();
|
||||
|
||||
@@ -27,7 +27,7 @@ protected:
|
||||
|
||||
Port const& port_;
|
||||
Handler& handler_;
|
||||
endpoint_type remoteAddress_;
|
||||
endpoint_type remote_address_;
|
||||
beast::WrappedSink sink_;
|
||||
beast::Journal const j_;
|
||||
|
||||
@@ -65,7 +65,7 @@ BasePeer<Handler, Impl>::BasePeer(
|
||||
beast::Journal journal)
|
||||
: port_(port)
|
||||
, handler_(handler)
|
||||
, remoteAddress_(std::move(remoteAddress))
|
||||
, remote_address_(std::move(remoteAddress))
|
||||
, sink_(
|
||||
journal.sink(),
|
||||
[] {
|
||||
|
||||
@@ -42,15 +42,15 @@ private:
|
||||
/// The socket has been closed, or will close after the next write
|
||||
/// finishes. Do not do any more writes, and don't try to close
|
||||
/// again.
|
||||
bool doClose_ = false;
|
||||
bool do_close_ = false;
|
||||
boost::beast::websocket::close_reason cr_;
|
||||
waitable_timer timer_;
|
||||
bool closeOnTimer_ = false;
|
||||
bool pingActive_ = false;
|
||||
bool close_on_timer_ = false;
|
||||
bool ping_active_ = false;
|
||||
boost::beast::websocket::ping_data payload_;
|
||||
error_code ec_;
|
||||
std::function<void(boost::beast::websocket::frame_type, boost::beast::string_view)>
|
||||
controlCallback_;
|
||||
control_callback_;
|
||||
|
||||
public:
|
||||
template <class Body, class Headers>
|
||||
@@ -85,7 +85,7 @@ public:
|
||||
[[nodiscard]] boost::asio::ip::tcp::endpoint const&
|
||||
remoteEndpoint() const override
|
||||
{
|
||||
return this->remoteAddress_;
|
||||
return this->remote_address_;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -173,14 +173,14 @@ BaseWSPeer<Handler, Impl>::run()
|
||||
{
|
||||
if (!strand_.running_in_this_thread())
|
||||
return post(strand_, std::bind(&BaseWSPeer::run, impl().shared_from_this()));
|
||||
impl().ws_.set_option(port().pmdOptions);
|
||||
impl().ws_.set_option(port().pmd_options);
|
||||
// Must manage the control callback memory outside of the `control_callback`
|
||||
// function
|
||||
controlCallback_ =
|
||||
control_callback_ =
|
||||
std::bind(&BaseWSPeer::onPingPong, this, std::placeholders::_1, std::placeholders::_2);
|
||||
impl().ws_.control_callback(controlCallback_);
|
||||
impl().ws_.control_callback(control_callback_);
|
||||
startTimer();
|
||||
closeOnTimer_ = true;
|
||||
close_on_timer_ = true;
|
||||
impl().ws_.set_option(boost::beast::websocket::stream_base::decorator([](auto& res) {
|
||||
res.set(boost::beast::http::field::server, BuildInfo::getFullVersionString());
|
||||
}));
|
||||
@@ -198,9 +198,9 @@ BaseWSPeer<Handler, Impl>::send(std::shared_ptr<WSMsg> w)
|
||||
{
|
||||
if (!strand_.running_in_this_thread())
|
||||
return post(strand_, std::bind(&BaseWSPeer::send, impl().shared_from_this(), std::move(w)));
|
||||
if (doClose_)
|
||||
if (do_close_)
|
||||
return;
|
||||
if (wq_.size() > port().wsQueueLimit)
|
||||
if (wq_.size() > port().ws_queue_limit)
|
||||
{
|
||||
cr_.code = safeCast<decltype(cr_.code)>(boost::beast::websocket::close_code::policy_error);
|
||||
cr_.reason = "Policy error: client is too slow.";
|
||||
@@ -227,9 +227,9 @@ BaseWSPeer<Handler, Impl>::close(boost::beast::websocket::close_reason const& re
|
||||
{
|
||||
if (!strand_.running_in_this_thread())
|
||||
return post(strand_, [self = impl().shared_from_this(), reason] { self->close(reason); });
|
||||
if (doClose_)
|
||||
if (do_close_)
|
||||
return;
|
||||
doClose_ = true;
|
||||
do_close_ = true;
|
||||
if (wq_.empty())
|
||||
{
|
||||
impl().ws_.async_close(
|
||||
@@ -260,7 +260,7 @@ BaseWSPeer<Handler, Impl>::onWsHandshake(error_code const& ec)
|
||||
{
|
||||
if (ec)
|
||||
return fail(ec, "on_ws_handshake");
|
||||
closeOnTimer_ = false;
|
||||
close_on_timer_ = false;
|
||||
doRead();
|
||||
}
|
||||
|
||||
@@ -313,7 +313,7 @@ BaseWSPeer<Handler, Impl>::onWriteFin(error_code const& ec)
|
||||
if (ec)
|
||||
return fail(ec, "write_fin");
|
||||
wq_.pop_front();
|
||||
if (doClose_)
|
||||
if (do_close_)
|
||||
{
|
||||
impl().ws_.async_close(
|
||||
cr_,
|
||||
@@ -409,7 +409,7 @@ BaseWSPeer<Handler, Impl>::onPing(error_code const& ec)
|
||||
{
|
||||
if (ec == boost::asio::error::operation_aborted)
|
||||
return;
|
||||
pingActive_ = false;
|
||||
ping_active_ = false;
|
||||
if (!ec)
|
||||
return;
|
||||
fail(ec, "on_ping");
|
||||
@@ -426,7 +426,7 @@ BaseWSPeer<Handler, Impl>::onPingPong(
|
||||
boost::beast::string_view const p(payload_.begin());
|
||||
if (payload == p)
|
||||
{
|
||||
closeOnTimer_ = false;
|
||||
close_on_timer_ = false;
|
||||
JLOG(this->j_.trace()) << "got matching pong";
|
||||
}
|
||||
else
|
||||
@@ -444,11 +444,11 @@ BaseWSPeer<Handler, Impl>::onTimer(error_code ec)
|
||||
return;
|
||||
if (!ec)
|
||||
{
|
||||
if (!closeOnTimer_ || !pingActive_)
|
||||
if (!close_on_timer_ || !ping_active_)
|
||||
{
|
||||
startTimer();
|
||||
closeOnTimer_ = true;
|
||||
pingActive_ = true;
|
||||
close_on_timer_ = true;
|
||||
ping_active_ = true;
|
||||
// cryptographic is probably overkill..
|
||||
beast::rngfill(payload_.begin(), payload_.size(), cryptoPrng());
|
||||
impl().ws_.async_ping(
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
@@ -60,7 +61,7 @@ private:
|
||||
boost::asio::io_context& ioc_;
|
||||
stream_type stream_;
|
||||
socket_type& socket_;
|
||||
endpoint_type remoteAddress_;
|
||||
endpoint_type remote_address_;
|
||||
boost::asio::strand<boost::asio::io_context::executor_type> strand_;
|
||||
beast::Journal const j_;
|
||||
|
||||
@@ -89,19 +90,16 @@ private:
|
||||
acceptor_type acceptor_;
|
||||
boost::asio::strand<boost::asio::io_context::executor_type> strand_;
|
||||
bool ssl_{
|
||||
port_.protocol.contains("https") || port_.protocol.contains("wss") ||
|
||||
port_.protocol.contains("wss2") || port_.protocol.contains("peer")};
|
||||
port_.protocol.count("https") > 0 || port_.protocol.count("wss") > 0 ||
|
||||
port_.protocol.count("wss2") > 0 || port_.protocol.count("peer") > 0};
|
||||
bool plain_{
|
||||
port_.protocol.contains("http") || port_.protocol.contains("ws") ||
|
||||
(port_.protocol.contains("ws2"))};
|
||||
port_.protocol.count("http") > 0 || port_.protocol.count("ws") > 0 ||
|
||||
(port_.protocol.count("ws2") != 0u)};
|
||||
static constexpr std::chrono::milliseconds kInitialAcceptDelay{50};
|
||||
static constexpr std::chrono::milliseconds kMaxAcceptDelay{2000};
|
||||
std::chrono::milliseconds acceptDelay_{kInitialAcceptDelay};
|
||||
boost::asio::steady_timer backoffTimer_;
|
||||
static constexpr std::uint64_t kMaxUsedFdPercent = 70;
|
||||
static constexpr std::chrono::milliseconds kFdSampleInterval{250};
|
||||
clock_type::time_point fdSampleAt_;
|
||||
bool cachedThrottle_{false};
|
||||
std::chrono::milliseconds accept_delay_{kInitialAcceptDelay};
|
||||
boost::asio::steady_timer backoff_timer_;
|
||||
static constexpr double kFreeFdThreshold = 0.70;
|
||||
|
||||
struct FDStats
|
||||
{
|
||||
@@ -166,7 +164,7 @@ Door<Handler>::Detector::Detector(
|
||||
, ioc_(ioc)
|
||||
, stream_(std::move(stream))
|
||||
, socket_(stream_.socket())
|
||||
, remoteAddress_(std::move(remoteAddress))
|
||||
, remote_address_(std::move(remoteAddress))
|
||||
, strand_(boost::asio::make_strand(ioc_))
|
||||
, j_(j)
|
||||
{
|
||||
@@ -201,18 +199,18 @@ Door<Handler>::Detector::doDetect(boost::asio::yield_context doYield)
|
||||
if (ssl)
|
||||
{
|
||||
if (auto sp = ios().template emplace<SSLHTTPPeer<Handler>>(
|
||||
port_, handler_, ioc_, j_, remoteAddress_, buf.data(), std::move(stream_)))
|
||||
port_, handler_, ioc_, j_, remote_address_, buf.data(), std::move(stream_)))
|
||||
sp->run();
|
||||
return;
|
||||
}
|
||||
if (auto sp = ios().template emplace<PlainHTTPPeer<Handler>>(
|
||||
port_, handler_, ioc_, j_, remoteAddress_, buf.data(), std::move(stream_)))
|
||||
port_, handler_, ioc_, j_, remote_address_, buf.data(), std::move(stream_)))
|
||||
sp->run();
|
||||
return;
|
||||
}
|
||||
if (ec != boost::asio::error::operation_aborted)
|
||||
{
|
||||
JLOG(j_.trace()) << "Error detecting ssl: " << ec.message() << " from " << remoteAddress_;
|
||||
JLOG(j_.trace()) << "Error detecting ssl: " << ec.message() << " from " << remote_address_;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -281,8 +279,7 @@ Door<Handler>::Door(
|
||||
, ioc_(ioContext)
|
||||
, acceptor_(ioContext)
|
||||
, strand_(boost::asio::make_strand(ioContext))
|
||||
, backoffTimer_(ioContext)
|
||||
, fdSampleAt_(clock_type::now() - kFdSampleInterval)
|
||||
, backoff_timer_(ioContext)
|
||||
{
|
||||
reOpen();
|
||||
}
|
||||
@@ -305,7 +302,7 @@ Door<Handler>::close()
|
||||
return boost::asio::post(
|
||||
strand_, std::bind(&Door<Handler>::close, this->shared_from_this()));
|
||||
}
|
||||
backoffTimer_.cancel();
|
||||
backoff_timer_.cancel();
|
||||
error_code ec;
|
||||
acceptor_.close(ec);
|
||||
}
|
||||
@@ -341,11 +338,11 @@ Door<Handler>::doAccept(boost::asio::yield_context doYield)
|
||||
{
|
||||
if (shouldThrottleForFds())
|
||||
{
|
||||
JLOG(j_.warn()) << "Throttling do_accept for " << acceptDelay_.count() << "ms.";
|
||||
backoffTimer_.expires_after(acceptDelay_);
|
||||
backoff_timer_.expires_after(accept_delay_);
|
||||
boost::system::error_code tec;
|
||||
backoffTimer_.async_wait(doYield[tec]);
|
||||
acceptDelay_ = std::min(acceptDelay_ * 2, kMaxAcceptDelay);
|
||||
backoff_timer_.async_wait(doYield[tec]);
|
||||
accept_delay_ = std::min(accept_delay_ * 2, kMaxAcceptDelay);
|
||||
JLOG(j_.warn()) << "Throttling do_accept for " << accept_delay_.count() << "ms.";
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -362,17 +359,14 @@ Door<Handler>::doAccept(boost::asio::yield_context doYield)
|
||||
if (ec == boost::asio::error::no_descriptors ||
|
||||
ec == boost::asio::error::no_buffer_space)
|
||||
{
|
||||
char const* const cause = (ec == boost::asio::error::no_descriptors)
|
||||
? "too many open files"
|
||||
: "kernel buffer space exhausted";
|
||||
JLOG(j_.warn()) << "accept: " << cause << ". Pausing for " << acceptDelay_.count()
|
||||
<< "ms.";
|
||||
JLOG(j_.warn()) << "accept: Too many open files. Pausing for "
|
||||
<< accept_delay_.count() << "ms.";
|
||||
|
||||
backoffTimer_.expires_after(acceptDelay_);
|
||||
backoff_timer_.expires_after(accept_delay_);
|
||||
boost::system::error_code tec;
|
||||
backoffTimer_.async_wait(doYield[tec]);
|
||||
backoff_timer_.async_wait(doYield[tec]);
|
||||
|
||||
acceptDelay_ = std::min(acceptDelay_ * 2, kMaxAcceptDelay);
|
||||
accept_delay_ = std::min(accept_delay_ * 2, kMaxAcceptDelay);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -381,7 +375,7 @@ Door<Handler>::doAccept(boost::asio::yield_context doYield)
|
||||
continue;
|
||||
}
|
||||
|
||||
acceptDelay_ = kInitialAcceptDelay;
|
||||
accept_delay_ = kInitialAcceptDelay;
|
||||
|
||||
if (ssl_ && plain_)
|
||||
{
|
||||
@@ -434,15 +428,14 @@ Door<Handler>::shouldThrottleForFds()
|
||||
#if BOOST_OS_WINDOWS
|
||||
return false;
|
||||
#else
|
||||
auto const now = clock_type::now();
|
||||
if (now - fdSampleAt_ < kFdSampleInterval)
|
||||
return cachedThrottle_;
|
||||
|
||||
fdSampleAt_ = now;
|
||||
auto const stats = queryFdStats();
|
||||
cachedThrottle_ =
|
||||
stats && stats->limit > 0 && stats->used * 100 > stats->limit * kMaxUsedFdPercent;
|
||||
return cachedThrottle_;
|
||||
if (!stats || stats->limit == 0)
|
||||
return false;
|
||||
|
||||
auto const& s = *stats;
|
||||
auto const free = (s.limit > s.used) ? (s.limit - s.used) : 0ull;
|
||||
double const freeRatio = static_cast<double>(free) / static_cast<double>(s.limit);
|
||||
return freeRatio < kFreeFdThreshold;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user