mirror of
https://github.com/XRPLF/rippled.git
synced 2026-02-04 22:15:28 +00:00
Compare commits
3 Commits
tapanito/l
...
a1q123456/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4796ed57a4 | ||
|
|
73e5323859 | ||
|
|
3d6c575f5c |
6
.github/scripts/levelization/README.md
vendored
6
.github/scripts/levelization/README.md
vendored
@@ -70,7 +70,7 @@ that `test` code should _never_ be included in `xrpl` or `xrpld` code.)
|
||||
|
||||
## Validation
|
||||
|
||||
The [levelization](generate.sh) script takes no parameters,
|
||||
The [levelization](generate.py) script takes no parameters,
|
||||
reads no environment variables, and can be run from any directory,
|
||||
as long as it is in the expected location in the rippled repo.
|
||||
It can be run at any time from within a checked out repo, and will
|
||||
@@ -104,7 +104,7 @@ It generates many files of [results](results):
|
||||
Github Actions workflow to test that levelization loops haven't
|
||||
changed. Unfortunately, if changes are detected, it can't tell if
|
||||
they are improvements or not, so if you have resolved any issues or
|
||||
done anything else to improve levelization, run `levelization.sh`,
|
||||
done anything else to improve levelization, run `generate.py`,
|
||||
and commit the updated results.
|
||||
|
||||
The `loops.txt` and `ordering.txt` files relate the modules
|
||||
@@ -128,7 +128,7 @@ The committed files hide the detailed values intentionally, to
|
||||
prevent false alarms and merging issues, and because it's easy to
|
||||
get those details locally.
|
||||
|
||||
1. Run `levelization.sh`
|
||||
1. Run `generate.py`
|
||||
2. Grep the modules in `paths.txt`.
|
||||
- For example, if a cycle is found `A ~= B`, simply `grep -w
|
||||
A .github/scripts/levelization/results/paths.txt | grep -w B`
|
||||
|
||||
369
.github/scripts/levelization/generate.py
vendored
Normal file
369
.github/scripts/levelization/generate.py
vendored
Normal file
@@ -0,0 +1,369 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Usage: generate.py
|
||||
This script takes no parameters, reads no environment variables,
|
||||
and can be run from any directory, as long as it is in the expected
|
||||
location in the repo.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Set, Optional
|
||||
|
||||
|
||||
# Compile regex patterns once at module level
|
||||
INCLUDE_PATTERN = re.compile(r"^\s*#include.*/.*\.h")
|
||||
INCLUDE_PATH_PATTERN = re.compile(r'[<"]([^>"]+)[>"]')
|
||||
|
||||
|
||||
def dictionary_sort_key(s: str) -> str:
|
||||
"""
|
||||
Create a sort key that mimics 'sort -d' (dictionary order).
|
||||
Dictionary order only considers blanks and alphanumeric characters.
|
||||
This means punctuation like '.' is ignored during sorting.
|
||||
"""
|
||||
# Keep only alphanumeric characters and spaces
|
||||
return "".join(c for c in s if c.isalnum() or c.isspace())
|
||||
|
||||
|
||||
def get_level(file_path: str) -> str:
|
||||
"""
|
||||
Extract the level from a file path (second and third directory components).
|
||||
Equivalent to bash: cut -d/ -f 2,3
|
||||
|
||||
Examples:
|
||||
src/xrpld/app/main.cpp -> xrpld.app
|
||||
src/libxrpl/protocol/STObject.cpp -> libxrpl.protocol
|
||||
include/xrpl/basics/base_uint.h -> xrpl.basics
|
||||
"""
|
||||
parts = file_path.split("/")
|
||||
|
||||
# Get fields 2 and 3 (indices 1 and 2 in 0-based indexing)
|
||||
if len(parts) >= 3:
|
||||
level = f"{parts[1]}/{parts[2]}"
|
||||
elif len(parts) >= 2:
|
||||
level = f"{parts[1]}/toplevel"
|
||||
else:
|
||||
level = file_path
|
||||
|
||||
# If the "level" indicates a file, cut off the filename
|
||||
if "." in level.split("/")[-1]: # Avoid Path object creation
|
||||
# Use the "toplevel" label as a workaround for `sort`
|
||||
# inconsistencies between different utility versions
|
||||
level = level.rsplit("/", 1)[0] + "/toplevel"
|
||||
|
||||
return level.replace("/", ".")
|
||||
|
||||
|
||||
def extract_include_level(include_line: str) -> Optional[str]:
|
||||
"""
|
||||
Extract the include path from an #include directive.
|
||||
Gets the first two directory components from the include path.
|
||||
Equivalent to bash: cut -d/ -f 1,2
|
||||
|
||||
Examples:
|
||||
#include <xrpl/basics/base_uint.h> -> xrpl.basics
|
||||
#include "xrpld/app/main/Application.h" -> xrpld.app
|
||||
"""
|
||||
# Remove everything before the quote or angle bracket
|
||||
match = INCLUDE_PATH_PATTERN.search(include_line)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
include_path = match.group(1)
|
||||
parts = include_path.split("/")
|
||||
|
||||
# Get first two fields (indices 0 and 1)
|
||||
if len(parts) >= 2:
|
||||
include_level = f"{parts[0]}/{parts[1]}"
|
||||
else:
|
||||
include_level = include_path
|
||||
|
||||
# If the "includelevel" indicates a file, cut off the filename
|
||||
if "." in include_level.split("/")[-1]: # Avoid Path object creation
|
||||
include_level = include_level.rsplit("/", 1)[0] + "/toplevel"
|
||||
|
||||
return include_level.replace("/", ".")
|
||||
|
||||
|
||||
def find_repo_root(start_path: Path, depth_limit: int = 10) -> Path:
|
||||
"""
|
||||
Find the repository root by looking for .git directory or src/include folders.
|
||||
Walks up the directory tree from the start path.
|
||||
"""
|
||||
current = start_path.resolve()
|
||||
|
||||
# Walk up the directory tree
|
||||
for _ in range(depth_limit): # Limit search depth to prevent infinite loops
|
||||
# Check if this directory has src or include folders
|
||||
has_src = (current / "src").exists()
|
||||
has_include = (current / "include").exists()
|
||||
|
||||
if has_src or has_include:
|
||||
return current
|
||||
|
||||
# Check if this is a git repository root
|
||||
if (current / ".git").exists():
|
||||
# Check if it has src or include nearby
|
||||
if has_src or has_include:
|
||||
return current
|
||||
|
||||
# Move up one level
|
||||
parent = current.parent
|
||||
if parent == current: # Reached filesystem root
|
||||
break
|
||||
current = parent
|
||||
|
||||
# If we couldn't find it, raise an error
|
||||
raise RuntimeError(
|
||||
"Could not find repository root. "
|
||||
"Expected to find a directory containing 'src' and/or 'include' folders."
|
||||
)
|
||||
|
||||
|
||||
def get_scan_directories(repo_root: Path) -> List[Path]:
|
||||
"""
|
||||
Get the list of directories to scan for include files.
|
||||
Returns paths that actually exist.
|
||||
"""
|
||||
directories = []
|
||||
|
||||
for dir_name in ["include", "src"]:
|
||||
dir_path = repo_root / dir_name
|
||||
if dir_path.exists() and dir_path.is_dir():
|
||||
directories.append(dir_path)
|
||||
|
||||
if not directories:
|
||||
raise RuntimeError(f"No 'src' or 'include' directories found in {repo_root}")
|
||||
|
||||
return directories
|
||||
|
||||
|
||||
def main():
|
||||
# Change to the script's directory
|
||||
script_dir = Path(__file__).parent.resolve()
|
||||
os.chdir(script_dir)
|
||||
|
||||
# If the shell is interactive, clean up any flotsam before analyzing
|
||||
# Match bash behavior: check if PS1 is set (indicates interactive shell)
|
||||
# When running a script, PS1 is not set even if stdin/stdout are TTYs
|
||||
if os.environ.get("PS1"):
|
||||
try:
|
||||
subprocess.run(["git", "clean", "-ix"], check=False, timeout=30)
|
||||
except (subprocess.TimeoutExpired, KeyboardInterrupt):
|
||||
print("Skipping git clean...")
|
||||
except Exception:
|
||||
# If git clean fails for any reason, just continue
|
||||
pass
|
||||
|
||||
# Clean up and create results directory
|
||||
results_dir = script_dir / "results"
|
||||
if results_dir.exists():
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(results_dir)
|
||||
results_dir.mkdir()
|
||||
|
||||
# Find the repository root by searching for src and include directories
|
||||
try:
|
||||
repo_root = find_repo_root(script_dir)
|
||||
scan_dirs = get_scan_directories(repo_root)
|
||||
|
||||
print(f"Found repository root: {repo_root}")
|
||||
print(f"Scanning directories:")
|
||||
for scan_dir in scan_dirs:
|
||||
print(f" - {scan_dir.relative_to(repo_root)}")
|
||||
except RuntimeError as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("\nScanning for raw includes...")
|
||||
# Find all #include directives
|
||||
raw_includes: List[Tuple[str, str]] = []
|
||||
rawincludes_file = results_dir / "rawincludes.txt"
|
||||
|
||||
# Write to file as we go to avoid storing everything in memory
|
||||
with open(rawincludes_file, "w", buffering=8192) as raw_f:
|
||||
for dir_path in scan_dirs:
|
||||
print(f" Scanning {dir_path.relative_to(repo_root)}...")
|
||||
|
||||
for file_path in dir_path.rglob("*"):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
|
||||
try:
|
||||
rel_path_str = str(file_path.relative_to(repo_root))
|
||||
|
||||
# Read file with larger buffer for better performance
|
||||
with open(
|
||||
file_path,
|
||||
"r",
|
||||
encoding="utf-8",
|
||||
errors="ignore",
|
||||
buffering=8192,
|
||||
) as f:
|
||||
for line in f:
|
||||
# Quick check before regex
|
||||
if "#include" not in line or "boost" in line:
|
||||
continue
|
||||
|
||||
if INCLUDE_PATTERN.match(line):
|
||||
line_stripped = line.strip()
|
||||
entry = f"{rel_path_str}:{line_stripped}\n"
|
||||
print(entry, end="")
|
||||
raw_f.write(entry)
|
||||
raw_includes.append((rel_path_str, line_stripped))
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}", file=sys.stderr)
|
||||
|
||||
# Build levelization paths and count directly (no need to sort first)
|
||||
print("Build levelization paths")
|
||||
path_counts: Dict[Tuple[str, str], int] = defaultdict(int)
|
||||
|
||||
for file_path, include_line in raw_includes:
|
||||
level = get_level(file_path)
|
||||
include_level = extract_include_level(include_line)
|
||||
|
||||
if include_level and level != include_level:
|
||||
path_counts[(level, include_level)] += 1
|
||||
|
||||
# Sort and deduplicate paths (using dictionary order like bash 'sort -d')
|
||||
print("Sort and deduplicate paths")
|
||||
|
||||
paths_file = results_dir / "paths.txt"
|
||||
with open(paths_file, "w") as f:
|
||||
# Sort using dictionary order: only alphanumeric and spaces matter
|
||||
sorted_items = sorted(
|
||||
path_counts.items(),
|
||||
key=lambda x: (dictionary_sort_key(x[0][0]), dictionary_sort_key(x[0][1])),
|
||||
)
|
||||
for (level, include_level), count in sorted_items:
|
||||
line = f"{count:7} {level} {include_level}\n"
|
||||
print(line.rstrip())
|
||||
f.write(line)
|
||||
|
||||
# Split into flat-file database
|
||||
print("Split into flat-file database")
|
||||
includes_dir = results_dir / "includes"
|
||||
included_by_dir = results_dir / "included_by"
|
||||
includes_dir.mkdir()
|
||||
included_by_dir.mkdir()
|
||||
|
||||
# Batch writes by grouping data first to avoid repeated file opens
|
||||
includes_data: Dict[str, List[Tuple[str, int]]] = defaultdict(list)
|
||||
included_by_data: Dict[str, List[Tuple[str, int]]] = defaultdict(list)
|
||||
|
||||
# Process in sorted order to match bash script behavior (dictionary order)
|
||||
sorted_items = sorted(
|
||||
path_counts.items(),
|
||||
key=lambda x: (dictionary_sort_key(x[0][0]), dictionary_sort_key(x[0][1])),
|
||||
)
|
||||
for (level, include_level), count in sorted_items:
|
||||
includes_data[level].append((include_level, count))
|
||||
included_by_data[include_level].append((level, count))
|
||||
|
||||
# Write all includes files in sorted order (dictionary order)
|
||||
for level in sorted(includes_data.keys(), key=dictionary_sort_key):
|
||||
entries = includes_data[level]
|
||||
with open(includes_dir / level, "w") as f:
|
||||
for include_level, count in entries:
|
||||
line = f"{include_level} {count}\n"
|
||||
print(line.rstrip())
|
||||
f.write(line)
|
||||
|
||||
# Write all included_by files in sorted order (dictionary order)
|
||||
for include_level in sorted(included_by_data.keys(), key=dictionary_sort_key):
|
||||
entries = included_by_data[include_level]
|
||||
with open(included_by_dir / include_level, "w") as f:
|
||||
for level, count in entries:
|
||||
line = f"{level} {count}\n"
|
||||
print(line.rstrip())
|
||||
f.write(line)
|
||||
|
||||
# Search for loops
|
||||
print("Search for loops")
|
||||
loops_file = results_dir / "loops.txt"
|
||||
ordering_file = results_dir / "ordering.txt"
|
||||
|
||||
loops_found: Set[Tuple[str, str]] = set()
|
||||
|
||||
# Pre-load all include files into memory to avoid repeated I/O
|
||||
# This is the biggest optimization - we were reading files repeatedly in nested loops
|
||||
# Use list of tuples to preserve file order
|
||||
includes_cache: Dict[str, List[Tuple[str, int]]] = {}
|
||||
includes_lookup: Dict[str, Dict[str, int]] = {} # For fast lookup
|
||||
|
||||
# Note: bash script uses 'for source in *' which uses standard glob sorting,
|
||||
# NOT dictionary order. So we use standard sorted() here, not dictionary_sort_key.
|
||||
for include_file in sorted(includes_dir.iterdir(), key=lambda p: p.name):
|
||||
if not include_file.is_file():
|
||||
continue
|
||||
|
||||
includes_cache[include_file.name] = []
|
||||
includes_lookup[include_file.name] = {}
|
||||
with open(include_file, "r") as f:
|
||||
for line in f:
|
||||
parts = line.strip().split()
|
||||
if len(parts) >= 2:
|
||||
include_name = parts[0]
|
||||
include_count = int(parts[1])
|
||||
includes_cache[include_file.name].append(
|
||||
(include_name, include_count)
|
||||
)
|
||||
includes_lookup[include_file.name][include_name] = include_count
|
||||
|
||||
with open(loops_file, "w", buffering=8192) as loops_f, open(
|
||||
ordering_file, "w", buffering=8192
|
||||
) as ordering_f:
|
||||
|
||||
# Use standard sorting to match bash glob expansion 'for source in *'
|
||||
for source in sorted(includes_cache.keys()):
|
||||
source_includes = includes_cache[source]
|
||||
|
||||
for include, include_freq in source_includes:
|
||||
# Check if include file exists and references source
|
||||
if include not in includes_lookup:
|
||||
continue
|
||||
|
||||
source_freq = includes_lookup[include].get(source)
|
||||
|
||||
if source_freq is not None:
|
||||
# Found a loop
|
||||
loop_key = tuple(sorted([source, include]))
|
||||
if loop_key in loops_found:
|
||||
continue
|
||||
loops_found.add(loop_key)
|
||||
|
||||
loops_f.write(f"Loop: {source} {include}\n")
|
||||
|
||||
# If the counts are close, indicate that the two modules are
|
||||
# on the same level, though they shouldn't be
|
||||
diff = include_freq - source_freq
|
||||
if diff > 3:
|
||||
loops_f.write(f" {source} > {include}\n\n")
|
||||
elif diff < -3:
|
||||
loops_f.write(f" {include} > {source}\n\n")
|
||||
elif source_freq == include_freq:
|
||||
loops_f.write(f" {include} == {source}\n\n")
|
||||
else:
|
||||
loops_f.write(f" {include} ~= {source}\n\n")
|
||||
else:
|
||||
ordering_f.write(f"{source} > {include}\n")
|
||||
|
||||
# Print results
|
||||
print("\nOrdering:")
|
||||
with open(ordering_file, "r") as f:
|
||||
print(f.read(), end="")
|
||||
|
||||
print("\nLoops:")
|
||||
with open(loops_file, "r") as f:
|
||||
print(f.read(), end="")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
130
.github/scripts/levelization/generate.sh
vendored
130
.github/scripts/levelization/generate.sh
vendored
@@ -1,130 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Usage: generate.sh
|
||||
# This script takes no parameters, reads no environment variables,
|
||||
# and can be run from any directory, as long as it is in the expected
|
||||
# location in the repo.
|
||||
|
||||
pushd $( dirname $0 )
|
||||
|
||||
if [ -v PS1 ]
|
||||
then
|
||||
# if the shell is interactive, clean up any flotsam before analyzing
|
||||
git clean -ix
|
||||
fi
|
||||
|
||||
# Ensure all sorting is ASCII-order consistently across platforms.
|
||||
export LANG=C
|
||||
|
||||
rm -rfv results
|
||||
mkdir results
|
||||
includes="$( pwd )/results/rawincludes.txt"
|
||||
pushd ../../..
|
||||
echo Raw includes:
|
||||
grep -r '^[ ]*#include.*/.*\.h' include src | \
|
||||
grep -v boost | tee ${includes}
|
||||
popd
|
||||
pushd results
|
||||
|
||||
oldifs=${IFS}
|
||||
IFS=:
|
||||
mkdir includes
|
||||
mkdir included_by
|
||||
echo Build levelization paths
|
||||
exec 3< ${includes} # open rawincludes.txt for input
|
||||
while read -r -u 3 file include
|
||||
do
|
||||
level=$( echo ${file} | cut -d/ -f 2,3 )
|
||||
# If the "level" indicates a file, cut off the filename
|
||||
if [[ "${level##*.}" != "${level}" ]]
|
||||
then
|
||||
# Use the "toplevel" label as a workaround for `sort`
|
||||
# inconsistencies between different utility versions
|
||||
level="$( dirname ${level} )/toplevel"
|
||||
fi
|
||||
level=$( echo ${level} | tr '/' '.' )
|
||||
|
||||
includelevel=$( echo ${include} | sed 's/.*["<]//; s/[">].*//' | \
|
||||
cut -d/ -f 1,2 )
|
||||
if [[ "${includelevel##*.}" != "${includelevel}" ]]
|
||||
then
|
||||
# Use the "toplevel" label as a workaround for `sort`
|
||||
# inconsistencies between different utility versions
|
||||
includelevel="$( dirname ${includelevel} )/toplevel"
|
||||
fi
|
||||
includelevel=$( echo ${includelevel} | tr '/' '.' )
|
||||
|
||||
if [[ "$level" != "$includelevel" ]]
|
||||
then
|
||||
echo $level $includelevel | tee -a paths.txt
|
||||
fi
|
||||
done
|
||||
echo Sort and deduplicate paths
|
||||
sort -ds paths.txt | uniq -c | tee sortedpaths.txt
|
||||
mv sortedpaths.txt paths.txt
|
||||
exec 3>&- #close fd 3
|
||||
IFS=${oldifs}
|
||||
unset oldifs
|
||||
|
||||
echo Split into flat-file database
|
||||
exec 4<paths.txt # open paths.txt for input
|
||||
while read -r -u 4 count level include
|
||||
do
|
||||
echo ${include} ${count} | tee -a includes/${level}
|
||||
echo ${level} ${count} | tee -a included_by/${include}
|
||||
done
|
||||
exec 4>&- #close fd 4
|
||||
|
||||
loops="$( pwd )/loops.txt"
|
||||
ordering="$( pwd )/ordering.txt"
|
||||
pushd includes
|
||||
echo Search for loops
|
||||
# Redirect stdout to a file
|
||||
exec 4>&1
|
||||
exec 1>"${loops}"
|
||||
for source in *
|
||||
do
|
||||
if [[ -f "$source" ]]
|
||||
then
|
||||
exec 5<"${source}" # open for input
|
||||
while read -r -u 5 include includefreq
|
||||
do
|
||||
if [[ -f $include ]]
|
||||
then
|
||||
if grep -q -w $source $include
|
||||
then
|
||||
if grep -q -w "Loop: $include $source" "${loops}"
|
||||
then
|
||||
continue
|
||||
fi
|
||||
sourcefreq=$( grep -w $source $include | cut -d\ -f2 )
|
||||
echo "Loop: $source $include"
|
||||
# If the counts are close, indicate that the two modules are
|
||||
# on the same level, though they shouldn't be
|
||||
if [[ $(( $includefreq - $sourcefreq )) -gt 3 ]]
|
||||
then
|
||||
echo -e " $source > $include\n"
|
||||
elif [[ $(( $sourcefreq - $includefreq )) -gt 3 ]]
|
||||
then
|
||||
echo -e " $include > $source\n"
|
||||
elif [[ $sourcefreq -eq $includefreq ]]
|
||||
then
|
||||
echo -e " $include == $source\n"
|
||||
else
|
||||
echo -e " $include ~= $source\n"
|
||||
fi
|
||||
else
|
||||
echo "$source > $include" >> "${ordering}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
exec 5>&- #close fd 5
|
||||
fi
|
||||
done
|
||||
exec 1>&4 #close fd 1
|
||||
exec 4>&- #close fd 4
|
||||
cat "${ordering}"
|
||||
cat "${loops}"
|
||||
popd
|
||||
popd
|
||||
popd
|
||||
@@ -20,7 +20,7 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||
- name: Check levelization
|
||||
run: .github/scripts/levelization/generate.sh
|
||||
run: python .github/scripts/levelization/generate.py
|
||||
- name: Check for differences
|
||||
env:
|
||||
MESSAGE: |
|
||||
@@ -32,7 +32,7 @@ jobs:
|
||||
removed from loops.txt, it's probably an improvement, while if
|
||||
something was added, it's probably a regression.
|
||||
|
||||
Run '.github/scripts/levelization/generate.sh' in your repo, commit
|
||||
Run '.github/scripts/levelization/generate.py' in your repo, commit
|
||||
and push the changes. See .github/scripts/levelization/README.md for
|
||||
more info.
|
||||
run: |
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -71,3 +71,6 @@ DerivedData
|
||||
/.augment
|
||||
/.claude
|
||||
/CLAUDE.md
|
||||
|
||||
# Python
|
||||
__pycache__
|
||||
|
||||
Reference in New Issue
Block a user