Revert "chore: use improved levelization script with threading and argparse"

This reverts commit 5c1d7d9ae9.
chore: use improved levelization script with threading and argparse
2026-03-20 19:42:22 +00:00 · 2026-03-13 12:33:19 +07:00 · 2026-03-13 12:13:39 +07:00 · 2026-03-13 12:08:27 +07:00
5 changed files with 291 additions and 135 deletions
--- a/.github/workflows/levelization.yml
+++ b/.github/workflows/levelization.yml
@@ -10,7 +10,7 @@ jobs:
    steps:
    - uses: actions/checkout@v3
    - name: Check levelization
-      run: Builds/levelization/levelization.sh
+      run: python Builds/levelization/levelization.py
    - name: Check for differences
      id: assert
      run: |
@@ -40,7 +40,7 @@ jobs:
          To fix it, you can do one of two things:
          1. Download and apply the patch generated as an artifact of this
             job to your repo, commit, and push.
-          2. Run './Builds/levelization/levelization.sh' in your repo,
+          2. Run 'python Builds/levelization/levelization.py' in your repo,
             commit, and push.
          See Builds/levelization/README.md for more info.
--- a/.gitignore
+++ b/.gitignore
@@ -53,6 +53,9 @@ Builds/levelization/results/paths.txt
 Builds/levelization/results/includes/
 Builds/levelization/results/includedby/
 # Python
 __pycache__
 # Ignore tmp directory.
 tmp
--- a/Builds/levelization/README.md
+++ b/Builds/levelization/README.md
@@ -50,7 +50,7 @@ that `test` code should *never* be included in `ripple` code.)
 ## Validation
-The [levelization.sh](levelization.sh) script takes no parameters,
+The [levelization.py](levelization.py) script takes no parameters,
 reads no environment variables, and can be run from any directory,
 as long as it is in the expected location in the rippled repo.
 It can be run at any time from within a checked out repo, and will
@@ -84,7 +84,7 @@ It generates many files of [results](results):
  Github Actions workflow to test that levelization loops haven't
  changed.  Unfortunately, if changes are detected, it can't tell if
  they are improvements or not, so if you have resolved any issues or
-  done anything else to improve levelization, run `levelization.sh`,
+  done anything else to improve levelization, run `levelization.py`,
  and commit the updated results.
 The  `loops.txt` and `ordering.txt` files relate the modules
@@ -108,7 +108,7 @@ The committed files hide the detailed values intentionally, to
 prevent false alarms and merging issues, and because it's easy to
 get those details locally.
-1. Run `levelization.sh`
+1. Run `levelization.py`
 2. Grep the modules in `paths.txt`.
   * For example, if a cycle is found `A ~= B`, simply `grep -w
     A Builds/levelization/results/paths.txt | grep -w B`
--- a/Builds/levelization/levelization.py
+++ b/Builds/levelization/levelization.py
@@ -0,0 +1,283 @@
 #!/usr/bin/env python3
 """
 Usage: levelization.py
 This script takes no parameters, and can be called from any directory in the file system.
 """
 import os
 import re
 import sys
 from collections import defaultdict
 from pathlib import Path
 # Compile regex patterns once at module level
 INCLUDE_PATTERN = re.compile(r"^\s*#include.*/.*\.h")
 INCLUDE_PATH_PATTERN = re.compile(r'[<"]([^>"]+)[>"]')
 def dictionary_sort_key(s):
    """
    Create a sort key that mimics 'sort -d' (dictionary order).
    Dictionary order only considers blanks and alphanumeric characters.
    """
    return "".join(c for c in s if c.isalnum() or c.isspace())
 def get_level(file_path):
    """
    Extract the level from a file path (second and third directory components).
    Equivalent to bash: cut -d/ -f 2,3
    Examples:
        src/ripple/app/main.cpp -> ripple.app
        src/test/app/Import_test.cpp -> test.app
    """
    parts = file_path.split("/")
    if len(parts) >= 3:
        level = f"{parts[1]}/{parts[2]}"
    elif len(parts) >= 2:
        level = f"{parts[1]}/toplevel"
    else:
        level = file_path
    # If the "level" indicates a file, cut off the filename
    if "." in level.split("/")[-1]:
        # Use the "toplevel" label as a workaround for `sort`
        # inconsistencies between different utility versions
        level = level.rsplit("/", 1)[0] + "/toplevel"
    return level.replace("/", ".")
 def extract_include_level(include_line):
    """
    Extract the include path from an #include directive.
    Gets the first two directory components from the include path.
    Equivalent to bash: cut -d/ -f 1,2
    Examples:
        #include <ripple/basics/base_uint.h> -> ripple.basics
        #include "ripple/app/main/Application.h" -> ripple.app
    """
    match = INCLUDE_PATH_PATTERN.search(include_line)
    if not match:
        return None
    include_path = match.group(1)
    parts = include_path.split("/")
    if len(parts) >= 2:
        include_level = f"{parts[0]}/{parts[1]}"
    else:
        include_level = include_path
    # If the "includelevel" indicates a file, cut off the filename
    if "." in include_level.split("/")[-1]:
        include_level = include_level.rsplit("/", 1)[0] + "/toplevel"
    return include_level.replace("/", ".")
 def find_repository_directories(start_path, depth_limit=10):
    """
    Find the repository root by looking for src or include folders.
    Walks up the directory tree from the start path.
    """
    current = start_path.resolve()
    for _ in range(depth_limit):
        src_path = current / "src"
        include_path = current / "include"
        has_src = src_path.exists()
        has_include = include_path.exists()
        if has_src or has_include:
            dirs = []
            if has_src:
                dirs.append(src_path)
            if has_include:
                dirs.append(include_path)
            return current, dirs
        parent = current.parent
        if parent == current:
            break
        current = parent
    raise RuntimeError(
        "Could not find repository root. "
        "Expected to find a directory containing 'src' and/or 'include' folders."
    )
 def main():
    script_dir = Path(__file__).parent.resolve()
    os.chdir(script_dir)
    # Clean up and create results directory.
    results_dir = script_dir / "results"
    if results_dir.exists():
        import shutil
        shutil.rmtree(results_dir)
    results_dir.mkdir()
    # Find the repository root.
    try:
        repo_root, scan_dirs = find_repository_directories(script_dir)
        print(f"Found repository root: {repo_root}")
        for scan_dir in scan_dirs:
            print(f"  Scanning: {scan_dir.relative_to(repo_root)}")
    except RuntimeError as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)
    # Find all #include directives.
    print("\nScanning for raw includes...")
    raw_includes = []
    rawincludes_file = results_dir / "rawincludes.txt"
    with open(rawincludes_file, "w", buffering=8192) as raw_f:
        for dir_path in scan_dirs:
            for file_path in dir_path.rglob("*"):
                if not file_path.is_file():
                    continue
                try:
                    rel_path_str = str(file_path.relative_to(repo_root))
                    with open(
                        file_path, "r", encoding="utf-8", errors="ignore", buffering=8192
                    ) as f:
                        for line in f:
                            if "#include" not in line or "boost" in line:
                                continue
                            if INCLUDE_PATTERN.match(line):
                                line_stripped = line.strip()
                                entry = f"{rel_path_str}:{line_stripped}\n"
                                print(entry, end="")
                                raw_f.write(entry)
                                raw_includes.append((rel_path_str, line_stripped))
                except Exception as e:
                    print(f"Error reading {file_path}: {e}", file=sys.stderr)
    # Build levelization paths and count directly.
    print("Build levelization paths")
    path_counts = defaultdict(int)
    for file_path, include_line in raw_includes:
        include_level = extract_include_level(include_line)
        if not include_level:
            continue
        level = get_level(file_path)
        if level != include_level:
            path_counts[(level, include_level)] += 1
    # Sort and deduplicate paths.
    print("Sort and deduplicate paths")
    sorted_items = sorted(
        path_counts.items(),
        key=lambda x: (dictionary_sort_key(x[0][0]), dictionary_sort_key(x[0][1])),
    )
    paths_file = results_dir / "paths.txt"
    with open(paths_file, "w") as f:
        for (level, include_level), count in sorted_items:
            line = f"{count:7} {level} {include_level}\n"
            print(line.rstrip())
            f.write(line)
    # Split into flat-file database.
    print("Split into flat-file database")
    includes_dir = results_dir / "includes"
    includedby_dir = results_dir / "includedby"
    includes_dir.mkdir()
    includedby_dir.mkdir()
    includes_data = defaultdict(list)
    includedby_data = defaultdict(list)
    for (level, include_level), count in sorted_items:
        includes_data[level].append((include_level, count))
        includedby_data[include_level].append((level, count))
    for level in sorted(includes_data.keys(), key=dictionary_sort_key):
        with open(includes_dir / level, "w") as f:
            for include_level, count in includes_data[level]:
                line = f"{include_level} {count}\n"
                print(line.rstrip())
                f.write(line)
    for include_level in sorted(includedby_data.keys(), key=dictionary_sort_key):
        with open(includedby_dir / include_level, "w") as f:
            for level, count in includedby_data[include_level]:
                line = f"{level} {count}\n"
                print(line.rstrip())
                f.write(line)
    # Search for loops.
    print("Search for loops")
    loops_file = results_dir / "loops.txt"
    ordering_file = results_dir / "ordering.txt"
    # Pre-load all include files into memory for fast lookup.
    includes_cache = {}
    includes_lookup = {}
    for include_file in sorted(includes_dir.iterdir(), key=lambda p: p.name):
        if not include_file.is_file():
            continue
        includes_cache[include_file.name] = []
        includes_lookup[include_file.name] = {}
        with open(include_file, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    name, count = parts[0], int(parts[1])
                    includes_cache[include_file.name].append((name, count))
                    includes_lookup[include_file.name][name] = count
    loops_found = set()
    with open(loops_file, "w", buffering=8192) as loops_f, open(
        ordering_file, "w", buffering=8192
    ) as ordering_f:
        for source in sorted(includes_cache.keys()):
            for include, include_freq in includes_cache[source]:
                if include not in includes_lookup:
                    continue
                source_freq = includes_lookup[include].get(source)
                if source_freq is not None:
                    loop_key = tuple(sorted([source, include]))
                    if loop_key in loops_found:
                        continue
                    loops_found.add(loop_key)
                    loops_f.write(f"Loop: {source} {include}\n")
                    diff = include_freq - source_freq
                    if diff > 3:
                        loops_f.write(f"  {source} > {include}\n\n")
                    elif diff < -3:
                        loops_f.write(f"  {include} > {source}\n\n")
                    elif source_freq == include_freq:
                        loops_f.write(f"  {include} == {source}\n\n")
                    else:
                        loops_f.write(f"  {include} ~= {source}\n\n")
                else:
                    ordering_f.write(f"{source} > {include}\n")
    # Print results.
    print("\nOrdering:")
    with open(ordering_file, "r") as f:
        print(f.read(), end="")
    print("\nLoops:")
    with open(loops_file, "r") as f:
        print(f.read(), end="")
 if __name__ == "__main__":
    main()
--- a/Builds/levelization/levelization.sh
+++ b/Builds/levelization/levelization.sh
@@ -1,130 +0,0 @@
 #!/bin/bash
 # Usage: levelization.sh
 # This script takes no parameters, reads no environment variables,
 # and can be run from any directory, as long as it is in the expected
 # location in the repo.
 pushd $( dirname $0 )
 if [ -v PS1 ]
 then
  # if the shell is interactive, clean up any flotsam before analyzing
  git clean -ix
 fi
 # Ensure all sorting is ASCII-order consistently across platforms.
 export LANG=C
 rm -rfv results
 mkdir results
 includes="$( pwd )/results/rawincludes.txt"
 pushd ../..
 echo Raw includes:
 grep -r '^[ ]*#include.*/.*\.h' include src | \
    grep -v boost | tee ${includes}
 popd
 pushd results
 oldifs=${IFS}
 IFS=:
 mkdir includes
 mkdir includedby
 echo Build levelization paths
 exec 3< ${includes} # open rawincludes.txt for input
 while read -r -u 3 file include
 do
    level=$( echo ${file} | cut -d/ -f 2,3 )
    # If the "level" indicates a file, cut off the filename
    if [[ "${level##*.}" != "${level}" ]]
    then
        # Use the "toplevel" label as a workaround for `sort`
        # inconsistencies between different utility versions
        level="$( dirname ${level} )/toplevel"
    fi
    level=$( echo ${level} | tr '/' '.' )
    includelevel=$( echo ${include} | sed 's/.*["<]//; s/[">].*//' | \
        cut -d/ -f 1,2 )
    if [[ "${includelevel##*.}" != "${includelevel}" ]]
    then
        # Use the "toplevel" label as a workaround for `sort`
        # inconsistencies between different utility versions
        includelevel="$( dirname ${includelevel} )/toplevel"
    fi
    includelevel=$( echo ${includelevel} | tr '/' '.' )
    if [[ "$level" != "$includelevel" ]]
    then
        echo $level $includelevel | tee -a paths.txt
    fi
 done
 echo Sort and dedup paths
 sort -ds paths.txt | uniq -c | tee sortedpaths.txt
 mv sortedpaths.txt paths.txt
 exec 3>&- #close fd 3
 IFS=${oldifs}
 unset oldifs
 echo Split into flat-file database
 exec 4<paths.txt # open paths.txt for input
 while read -r -u 4 count level include
 do
    echo ${include} ${count} | tee -a includes/${level}
    echo ${level} ${count} | tee -a includedby/${include}
 done
 exec 4>&- #close fd 4
 loops="$( pwd )/loops.txt"
 ordering="$( pwd )/ordering.txt"
 pushd includes
 echo Search for loops
 # Redirect stdout to a file
 exec 4>&1
 exec 1>"${loops}"
 for source in *
 do
  if [[ -f "$source" ]]
  then
    exec 5<"${source}" # open for input
    while read -r -u 5 include includefreq
    do
      if [[ -f $include ]]
      then
        if grep -q -w $source $include
        then
          if grep -q -w "Loop: $include $source" "${loops}"
          then
            continue
          fi
          sourcefreq=$( grep -w $source $include | cut -d\  -f2 )
          echo "Loop: $source $include"
          # If the counts are close, indicate that the two modules are
          # on the same level, though they shouldn't be
          if [[ $(( $includefreq - $sourcefreq )) -gt 3 ]]
          then
              echo -e "  $source > $include\n"
          elif [[ $(( $sourcefreq - $includefreq )) -gt 3 ]]
          then
              echo -e "  $include > $source\n"
          elif [[ $sourcefreq -eq $includefreq ]]
          then
              echo -e "  $include == $source\n"
          else
              echo -e "  $include ~= $source\n"
          fi
        else
          echo "$source > $include" >> "${ordering}"
        fi
      fi
    done
    exec 5>&- #close fd 5
  fi
 done
 exec 1>&4 #close fd 1
 exec 4>&- #close fd 4
 cat "${ordering}"
 cat "${loops}"
 popd
 popd
 popd
Author	SHA1	Message	Date
Nicholas Dudfield	823d41775a	Revert "chore: use improved levelization script with threading and argparse" This reverts commit `5c1d7d9ae9`.	2026-03-13 12:33:19 +07:00
Nicholas Dudfield	5c1d7d9ae9	chore: use improved levelization script with threading and argparse	2026-03-13 12:13:39 +07:00
Nicholas Dudfield	70d4d3ba81	chore: replace levelization shell script with python Backport of XRPLF/rippled#6325. The python version runs ~80x faster.	2026-03-13 12:08:27 +07:00