Compare commits

..

2 Commits

Author SHA1 Message Date
Jingchen
ce31a7ed16 chore: Replace levelization shell script by python script (#6325)
The new python version is significantly faster.
2026-03-12 15:38:00 +00:00
tsinglua
91a23cf80b chore: Fix minor issues in the comments (#6535) 2026-03-12 11:15:30 -04:00
10 changed files with 353 additions and 232 deletions

View File

@@ -70,7 +70,7 @@ that `test` code should _never_ be included in `xrpl` or `xrpld` code.)
## Validation
The [levelization](generate.sh) script takes no parameters,
The [levelization](generate.py) script takes no parameters,
reads no environment variables, and can be run from any directory,
as long as it is in the expected location in the rippled repo.
It can be run at any time from within a checked out repo, and will
@@ -104,7 +104,7 @@ It generates many files of [results](results):
Github Actions workflow to test that levelization loops haven't
changed. Unfortunately, if changes are detected, it can't tell if
they are improvements or not, so if you have resolved any issues or
done anything else to improve levelization, run `levelization.sh`,
done anything else to improve levelization, run `generate.py`,
and commit the updated results.
The `loops.txt` and `ordering.txt` files relate the modules
@@ -128,7 +128,7 @@ The committed files hide the detailed values intentionally, to
prevent false alarms and merging issues, and because it's easy to
get those details locally.
1. Run `levelization.sh`
1. Run `generate.py`
2. Grep the modules in `paths.txt`.
- For example, if a cycle is found `A ~= B`, simply `grep -w
A .github/scripts/levelization/results/paths.txt | grep -w B`

335
.github/scripts/levelization/generate.py vendored Normal file
View File

@@ -0,0 +1,335 @@
#!/usr/bin/env python3
"""
Usage: generate.py
This script takes no parameters, and can be called from any directory in the file system.
"""
import os
import re
import subprocess
import sys
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple, Set, Optional
# Compile regex patterns once at module level
INCLUDE_PATTERN = re.compile(r"^\s*#include.*/.*\.h")
INCLUDE_PATH_PATTERN = re.compile(r'[<"]([^>"]+)[>"]')
def dictionary_sort_key(s: str) -> str:
"""
Create a sort key that mimics 'sort -d' (dictionary order).
Dictionary order only considers blanks and alphanumeric characters.
This means punctuation like '.' is ignored during sorting.
"""
# Keep only alphanumeric characters and spaces
return "".join(c for c in s if c.isalnum() or c.isspace())
def get_level(file_path: str) -> str:
"""
Extract the level from a file path (second and third directory components).
Equivalent to bash: cut -d/ -f 2,3
Examples:
src/xrpld/app/main.cpp -> xrpld.app
src/libxrpl/protocol/STObject.cpp -> libxrpl.protocol
include/xrpl/basics/base_uint.h -> xrpl.basics
"""
parts = file_path.split("/")
# Get fields 2 and 3 (indices 1 and 2 in 0-based indexing)
if len(parts) >= 3:
level = f"{parts[1]}/{parts[2]}"
elif len(parts) >= 2:
level = f"{parts[1]}/toplevel"
else:
level = file_path
# If the "level" indicates a file, cut off the filename
if "." in level.split("/")[-1]: # Avoid Path object creation
# Use the "toplevel" label as a workaround for `sort`
# inconsistencies between different utility versions
level = level.rsplit("/", 1)[0] + "/toplevel"
return level.replace("/", ".")
def extract_include_level(include_line: str) -> Optional[str]:
"""
Extract the include path from an #include directive.
Gets the first two directory components from the include path.
Equivalent to bash: cut -d/ -f 1,2
Examples:
#include <xrpl/basics/base_uint.h> -> xrpl.basics
#include "xrpld/app/main/Application.h" -> xrpld.app
"""
# Remove everything before the quote or angle bracket
match = INCLUDE_PATH_PATTERN.search(include_line)
if not match:
return None
include_path = match.group(1)
parts = include_path.split("/")
# Get first two fields (indices 0 and 1)
if len(parts) >= 2:
include_level = f"{parts[0]}/{parts[1]}"
else:
include_level = include_path
# If the "includelevel" indicates a file, cut off the filename
if "." in include_level.split("/")[-1]: # Avoid Path object creation
include_level = include_level.rsplit("/", 1)[0] + "/toplevel"
return include_level.replace("/", ".")
def find_repository_directories(
start_path: Path, depth_limit: int = 10
) -> Tuple[Path, List[Path]]:
"""
Find the repository root by looking for src or include folders.
Walks up the directory tree from the start path.
"""
current = start_path.resolve()
# Walk up the directory tree
for _ in range(depth_limit): # Limit search depth to prevent infinite loops
src_path = current / "src"
include_path = current / "include"
# Check if this directory has src or include folders
has_src = src_path.exists()
has_include = include_path.exists()
if has_src or has_include:
return current, [src_path, include_path]
# Move up one level
parent = current.parent
if parent == current: # Reached filesystem root
break
current = parent
# If we couldn't find it, raise an error
raise RuntimeError(
"Could not find repository root. "
"Expected to find a directory containing 'src' and/or 'include' folders."
)
def main():
# Change to the script's directory
script_dir = Path(__file__).parent.resolve()
os.chdir(script_dir)
# Clean up and create results directory.
results_dir = script_dir / "results"
if results_dir.exists():
import shutil
shutil.rmtree(results_dir)
results_dir.mkdir()
# Find the repository root by searching for src and include directories.
try:
repo_root, scan_dirs = find_repository_directories(script_dir)
print(f"Found repository root: {repo_root}")
print(f"Scanning directories:")
for scan_dir in scan_dirs:
print(f" - {scan_dir.relative_to(repo_root)}")
except RuntimeError as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
print("\nScanning for raw includes...")
# Find all #include directives
raw_includes: List[Tuple[str, str]] = []
rawincludes_file = results_dir / "rawincludes.txt"
# Write to file as we go to avoid storing everything in memory.
with open(rawincludes_file, "w", buffering=8192) as raw_f:
for dir_path in scan_dirs:
print(f" Scanning {dir_path.relative_to(repo_root)}...")
for file_path in dir_path.rglob("*"):
if not file_path.is_file():
continue
try:
rel_path_str = str(file_path.relative_to(repo_root))
# Read file with a large buffer for performance.
with open(
file_path,
"r",
encoding="utf-8",
errors="ignore",
buffering=8192,
) as f:
for line in f:
# Quick check before regex
if "#include" not in line or "boost" in line:
continue
if INCLUDE_PATTERN.match(line):
line_stripped = line.strip()
entry = f"{rel_path_str}:{line_stripped}\n"
print(entry, end="")
raw_f.write(entry)
raw_includes.append((rel_path_str, line_stripped))
except Exception as e:
print(f"Error reading {file_path}: {e}", file=sys.stderr)
# Build levelization paths and count directly (no need to sort first).
print("Build levelization paths")
path_counts: Dict[Tuple[str, str], int] = defaultdict(int)
for file_path, include_line in raw_includes:
include_level = extract_include_level(include_line)
if not include_level:
continue
level = get_level(file_path)
if level != include_level:
path_counts[(level, include_level)] += 1
# Sort and deduplicate paths (using dictionary order like bash 'sort -d').
print("Sort and deduplicate paths")
paths_file = results_dir / "paths.txt"
with open(paths_file, "w") as f:
# Sort using dictionary order: only alphanumeric and spaces matter
sorted_items = sorted(
path_counts.items(),
key=lambda x: (dictionary_sort_key(x[0][0]), dictionary_sort_key(x[0][1])),
)
for (level, include_level), count in sorted_items:
line = f"{count:7} {level} {include_level}\n"
print(line.rstrip())
f.write(line)
# Split into flat-file database
print("Split into flat-file database")
includes_dir = results_dir / "includes"
included_by_dir = results_dir / "included_by"
includes_dir.mkdir()
included_by_dir.mkdir()
# Batch writes by grouping data first to avoid repeated file opens.
includes_data: Dict[str, List[Tuple[str, int]]] = defaultdict(list)
included_by_data: Dict[str, List[Tuple[str, int]]] = defaultdict(list)
# Process in sorted order to match bash script behaviour (dictionary order).
sorted_items = sorted(
path_counts.items(),
key=lambda x: (dictionary_sort_key(x[0][0]), dictionary_sort_key(x[0][1])),
)
for (level, include_level), count in sorted_items:
includes_data[level].append((include_level, count))
included_by_data[include_level].append((level, count))
# Write all includes files in sorted order (dictionary order).
for level in sorted(includes_data.keys(), key=dictionary_sort_key):
entries = includes_data[level]
with open(includes_dir / level, "w") as f:
for include_level, count in entries:
line = f"{include_level} {count}\n"
print(line.rstrip())
f.write(line)
# Write all included_by files in sorted order (dictionary order).
for include_level in sorted(included_by_data.keys(), key=dictionary_sort_key):
entries = included_by_data[include_level]
with open(included_by_dir / include_level, "w") as f:
for level, count in entries:
line = f"{level} {count}\n"
print(line.rstrip())
f.write(line)
# Search for loops
print("Search for loops")
loops_file = results_dir / "loops.txt"
ordering_file = results_dir / "ordering.txt"
loops_found: Set[Tuple[str, str]] = set()
# Pre-load all include files into memory to avoid repeated I/O.
# This is the biggest optimisation - we were reading files repeatedly in nested loops.
# Use list of tuples to preserve file order.
includes_cache: Dict[str, List[Tuple[str, int]]] = {}
includes_lookup: Dict[str, Dict[str, int]] = {} # For fast lookup
# Note: bash script uses 'for source in *' which uses standard glob sorting,
# NOT dictionary order. So we use standard sorted() here, not dictionary_sort_key.
for include_file in sorted(includes_dir.iterdir(), key=lambda p: p.name):
if not include_file.is_file():
continue
includes_cache[include_file.name] = []
includes_lookup[include_file.name] = {}
with open(include_file, "r") as f:
for line in f:
parts = line.strip().split()
if len(parts) >= 2:
include_name = parts[0]
include_count = int(parts[1])
includes_cache[include_file.name].append(
(include_name, include_count)
)
includes_lookup[include_file.name][include_name] = include_count
with open(loops_file, "w", buffering=8192) as loops_f, open(
ordering_file, "w", buffering=8192
) as ordering_f:
# Use standard sorting to match bash glob expansion 'for source in *'.
for source in sorted(includes_cache.keys()):
source_includes = includes_cache[source]
for include, include_freq in source_includes:
# Check if include file exists and references source
if include not in includes_lookup:
continue
source_freq = includes_lookup[include].get(source)
if source_freq is not None:
# Found a loop
loop_key = tuple(sorted([source, include]))
if loop_key in loops_found:
continue
loops_found.add(loop_key)
loops_f.write(f"Loop: {source} {include}\n")
# If the counts are close, indicate that the two modules are
# on the same level, though they shouldn't be.
diff = include_freq - source_freq
if diff > 3:
loops_f.write(f" {source} > {include}\n\n")
elif diff < -3:
loops_f.write(f" {include} > {source}\n\n")
elif source_freq == include_freq:
loops_f.write(f" {include} == {source}\n\n")
else:
loops_f.write(f" {include} ~= {source}\n\n")
else:
ordering_f.write(f"{source} > {include}\n")
# Print results
print("\nOrdering:")
with open(ordering_file, "r") as f:
print(f.read(), end="")
print("\nLoops:")
with open(loops_file, "r") as f:
print(f.read(), end="")
if __name__ == "__main__":
main()

View File

@@ -1,130 +0,0 @@
#!/bin/bash
# Usage: generate.sh
# This script takes no parameters, reads no environment variables,
# and can be run from any directory, as long as it is in the expected
# location in the repo.
pushd $( dirname $0 )
if [ -v PS1 ]
then
# if the shell is interactive, clean up any flotsam before analyzing
git clean -ix
fi
# Ensure all sorting is ASCII-order consistently across platforms.
export LANG=C
rm -rfv results
mkdir results
includes="$( pwd )/results/rawincludes.txt"
pushd ../../..
echo Raw includes:
grep -r '^[ ]*#include.*/.*\.h' include src | \
grep -v boost | tee ${includes}
popd
pushd results
oldifs=${IFS}
IFS=:
mkdir includes
mkdir included_by
echo Build levelization paths
exec 3< ${includes} # open rawincludes.txt for input
while read -r -u 3 file include
do
level=$( echo ${file} | cut -d/ -f 2,3 )
# If the "level" indicates a file, cut off the filename
if [[ "${level##*.}" != "${level}" ]]
then
# Use the "toplevel" label as a workaround for `sort`
# inconsistencies between different utility versions
level="$( dirname ${level} )/toplevel"
fi
level=$( echo ${level} | tr '/' '.' )
includelevel=$( echo ${include} | sed 's/.*["<]//; s/[">].*//' | \
cut -d/ -f 1,2 )
if [[ "${includelevel##*.}" != "${includelevel}" ]]
then
# Use the "toplevel" label as a workaround for `sort`
# inconsistencies between different utility versions
includelevel="$( dirname ${includelevel} )/toplevel"
fi
includelevel=$( echo ${includelevel} | tr '/' '.' )
if [[ "$level" != "$includelevel" ]]
then
echo $level $includelevel | tee -a paths.txt
fi
done
echo Sort and deduplicate paths
sort -ds paths.txt | uniq -c | tee sortedpaths.txt
mv sortedpaths.txt paths.txt
exec 3>&- #close fd 3
IFS=${oldifs}
unset oldifs
echo Split into flat-file database
exec 4<paths.txt # open paths.txt for input
while read -r -u 4 count level include
do
echo ${include} ${count} | tee -a includes/${level}
echo ${level} ${count} | tee -a included_by/${include}
done
exec 4>&- #close fd 4
loops="$( pwd )/loops.txt"
ordering="$( pwd )/ordering.txt"
pushd includes
echo Search for loops
# Redirect stdout to a file
exec 4>&1
exec 1>"${loops}"
for source in *
do
if [[ -f "$source" ]]
then
exec 5<"${source}" # open for input
while read -r -u 5 include includefreq
do
if [[ -f $include ]]
then
if grep -q -w $source $include
then
if grep -q -w "Loop: $include $source" "${loops}"
then
continue
fi
sourcefreq=$( grep -w $source $include | cut -d\ -f2 )
echo "Loop: $source $include"
# If the counts are close, indicate that the two modules are
# on the same level, though they shouldn't be
if [[ $(( $includefreq - $sourcefreq )) -gt 3 ]]
then
echo -e " $source > $include\n"
elif [[ $(( $sourcefreq - $includefreq )) -gt 3 ]]
then
echo -e " $include > $source\n"
elif [[ $sourcefreq -eq $includefreq ]]
then
echo -e " $include == $source\n"
else
echo -e " $include ~= $source\n"
fi
else
echo "$source > $include" >> "${ordering}"
fi
fi
done
exec 5>&- #close fd 5
fi
done
exec 1>&4 #close fd 1
exec 4>&- #close fd 4
cat "${ordering}"
cat "${loops}"
popd
popd
popd

View File

@@ -20,7 +20,7 @@ jobs:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check levelization
run: .github/scripts/levelization/generate.sh
run: python .github/scripts/levelization/generate.py
- name: Check for differences
env:
MESSAGE: |
@@ -32,7 +32,7 @@ jobs:
removed from loops.txt, it's probably an improvement, while if
something was added, it's probably a regression.
Run '.github/scripts/levelization/generate.sh' in your repo, commit
Run '.github/scripts/levelization/generate.py' in your repo, commit
and push the changes. See .github/scripts/levelization/README.md for
more info.
run: |

3
.gitignore vendored
View File

@@ -75,6 +75,9 @@ DerivedData
/.claude
/CLAUDE.md
# Python
__pycache__
# Direnv's directory
/.direnv

View File

@@ -64,49 +64,6 @@
namespace xrpl {
// Feature names must not exceed this length (in characters, excluding the null terminator).
static constexpr std::size_t maxFeatureNameSize = 63;
// Reserve this exact feature-name length (in characters/bytes, excluding the null terminator)
// so that a 32-byte uint256 (for example, in WASM or other interop contexts) can be used
// as a compact, fixed-size feature selector without conflicting with human-readable names.
static constexpr std::size_t reservedFeatureNameSize = 32;
// Both validFeatureNameSize and validFeatureName are consteval functions that can be used in
// static_asserts to validate feature names at compile time. They are only used inside
// enforceValidFeatureName in Feature.cpp, but are exposed here for testing. The expected
// parameter `auto fn` is a constexpr lambda which returns a const char*, making it available
// for compile-time evaluation. Read more in https://accu.org/journals/overload/30/172/wu/
consteval auto
validFeatureNameSize(auto fn) -> bool
{
constexpr char const* n = fn();
// Note, std::strlen is not constexpr, we need to implement our own here.
constexpr std::size_t N = [](auto n) {
std::size_t ret = 0;
for (auto ptr = n; *ptr != '\0'; ret++, ++ptr)
;
return ret;
}(n);
return N != reservedFeatureNameSize && //
N <= maxFeatureNameSize;
}
consteval auto
validFeatureName(auto fn) -> bool
{
constexpr char const* n = fn();
// Prevent the use of visually confusable characters and enforce that feature names
// are always valid ASCII. This is needed because C++ allows Unicode identifiers.
// Characters below 0x20 are nonprintable control characters, and characters with the 0x80 bit
// set are non-ASCII (e.g. UTF-8 encoding of Unicode), so both are disallowed.
for (auto ptr = n; *ptr != '\0'; ++ptr)
{
if (*ptr & 0x80 || *ptr < 0x20)
return false;
}
return true;
}
enum class VoteBehavior : int { Obsolete = -1, DefaultNo = 0, DefaultYes };
enum class AmendmentSupport : int { Retired = -1, Supported = 0, Unsupported };

View File

@@ -395,20 +395,10 @@ featureToName(uint256 const& f)
#pragma push_macro("XRPL_RETIRE_FIX")
#undef XRPL_RETIRE_FIX
consteval auto
enforceValidFeatureName(auto fn) -> char const*
{
static_assert(validFeatureName(fn), "Invalid feature name");
static_assert(validFeatureNameSize(fn), "Invalid feature name size");
return fn();
}
#define XRPL_FEATURE(name, supported, vote) \
uint256 const feature##name = \
registerFeature(enforceValidFeatureName([] { return #name; }), supported, vote);
uint256 const feature##name = registerFeature(#name, supported, vote);
#define XRPL_FIX(name, supported, vote) \
uint256 const fix##name = \
registerFeature(enforceValidFeatureName([] { return "fix" #name; }), supported, vote);
uint256 const fix##name = registerFeature("fix" #name, supported, vote);
// clang-format off
#define XRPL_RETIRE_FEATURE(name) \

View File

@@ -129,7 +129,7 @@ public:
// TrustedPublisherServer must be accessed through a shared_ptr.
// This constructor is only public so std::make_shared has access.
// The function`make_TrustedPublisherServer` should be used to create
// The function `make_TrustedPublisherServer` should be used to create
// instances.
// The `futures` member is expected to be structured as
// effective / expiration time point pairs for use in version 2 UNLs
@@ -249,7 +249,7 @@ public:
{
error_code ec;
acceptor_.close(ec);
// TODO consider making this join
// TODO: consider making this join
// any running do_peer threads
}

View File

@@ -1474,7 +1474,7 @@ public:
// Signer {
// Account: "...",
// TxnSignature: "...",
// PublicKey: "...""
// PublicKey: "..."
// }
// Make one well formed Signer and several mal-formed ones. See
// whether the serializer lets the good one through and catches

View File

@@ -2,7 +2,6 @@
#include <xrpl/ledger/AmendmentTable.h>
#include <xrpl/protocol/Feature.h>
#include <xrpl/protocol/digest.h>
#include <xrpl/protocol/jss.h>
namespace xrpl {
@@ -169,18 +168,16 @@ class Feature_test : public beast::unit_test::suite
using namespace test::jtx;
Env env{*this};
std::string const name = "fixAMMOverflowOffer";
auto jrr = env.rpc("feature", name)[jss::result];
auto jrr = env.rpc("feature", "fixAMMOverflowOffer")[jss::result];
BEAST_EXPECTS(jrr[jss::status] == jss::success, "status");
jrr.removeMember(jss::status);
BEAST_EXPECT(jrr.size() == 1);
auto const expected = to_string(sha512Half(Slice(name.data(), name.size())));
char const sha[] = "12523DF04B553A0B1AD74F42DDB741DE8DC06A03FC089A0EF197E2A87F1D8107";
BEAST_EXPECT(expected == sha);
BEAST_EXPECT(jrr.isMember(expected));
BEAST_EXPECT(jrr.isMember(
"12523DF04B553A0B1AD74F42DDB741DE8DC06A03FC089A0EF197E"
"2A87F1D8107"));
auto feature = *(jrr.begin());
BEAST_EXPECTS(feature[jss::name] == name, "name");
BEAST_EXPECTS(feature[jss::name] == "fixAMMOverflowOffer", "name");
BEAST_EXPECTS(!feature[jss::enabled].asBool(), "enabled");
BEAST_EXPECTS(feature[jss::vetoed].isBool() && !feature[jss::vetoed].asBool(), "vetoed");
BEAST_EXPECTS(feature[jss::supported].asBool(), "supported");
@@ -189,37 +186,6 @@ class Feature_test : public beast::unit_test::suite
jrr = env.rpc("feature", "fMM")[jss::result];
BEAST_EXPECT(jrr[jss::error] == "badFeature");
BEAST_EXPECT(jrr[jss::error_message] == "Feature unknown or invalid.");
// Test feature name size checks
constexpr auto ok63Name = [] {
return "123456789012345678901234567890123456789012345678901234567890123";
};
static_assert(validFeatureNameSize(ok63Name));
constexpr auto bad64Name = [] {
return "1234567890123456789012345678901234567890123456789012345678901234";
};
static_assert(!validFeatureNameSize(bad64Name));
constexpr auto ok31Name = [] { return "1234567890123456789012345678901"; };
static_assert(validFeatureNameSize(ok31Name));
constexpr auto bad32Name = [] { return "12345678901234567890123456789012"; };
static_assert(!validFeatureNameSize(bad32Name));
constexpr auto ok33Name = [] { return "123456789012345678901234567890123"; };
static_assert(validFeatureNameSize(ok33Name));
// Test feature character set checks
constexpr auto okName = [] { return "AMM_123"; };
static_assert(validFeatureName(okName));
// First character is Greek Capital Alpha, visually confusable with ASCII 'A'
constexpr auto badName = [] { return "ΑMM_123"; };
static_assert(!validFeatureName(badName));
constexpr auto badEmoji = [] { return "🔥"; };
static_assert(!validFeatureName(badEmoji));
}
void