Test blake3

Add blake 3 and skip some unit tests
Fix a PR comment
2025-11-19 02:25:52 +00:00 · 2025-06-26 13:27:55 +01:00 · 2025-06-25 15:01:44 +01:00 · 2025-06-25 11:30:02 +01:00 · 2025-06-25 11:28:04 +01:00 · 2025-06-25 10:38:28 +01:00
513 changed files with 52969 additions and 14118 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -1,5 +1,5 @@
 ---
-Language: Cpp
+Language:        Cpp
 AccessModifierOffset: -4
 AlignAfterOpenBracket: AlwaysBreak
 AlignConsecutiveAssignments: false
@@ -19,52 +19,52 @@ AlwaysBreakTemplateDeclarations: true
 BinPackArguments: false
 BinPackParameters: false
 BraceWrapping:
-  AfterClass: true
+  AfterClass:      true
  AfterControlStatement: true
-  AfterEnum: false
-  AfterFunction: true
-  AfterNamespace: false
+  AfterEnum:       false
+  AfterFunction:   true
+  AfterNamespace:  false
  AfterObjCDeclaration: true
-  AfterStruct: true
-  AfterUnion: true
-  BeforeCatch: true
-  BeforeElse: true
-  IndentBraces: false
+  AfterStruct:     true
+  AfterUnion:      true
+  BeforeCatch:     true
+  BeforeElse:      true
+  IndentBraces:    false
 BreakBeforeBinaryOperators: false
 BreakBeforeBraces: Custom
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: true
-ColumnLimit: 80
-CommentPragmas: "^ IWYU pragma:"
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
 ConstructorInitializerAllOnOneLineOrOnePerLine: true
 ConstructorInitializerIndentWidth: 4
 ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
 DerivePointerAlignment: false
-DisableFormat: false
+DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
-ForEachMacros: [Q_FOREACH, BOOST_FOREACH]
-IncludeBlocks: Regroup
+ForEachMacros:   [ Q_FOREACH,  BOOST_FOREACH ]
+IncludeBlocks:   Regroup
 IncludeCategories:
-  - Regex: "^<(test)/"
-    Priority: 0
-  - Regex: "^<(xrpld)/"
-    Priority: 1
-  - Regex: "^<(xrpl)/"
-    Priority: 2
-  - Regex: "^<(boost)/"
-    Priority: 3
-  - Regex: "^.*/"
-    Priority: 4
-  - Regex: '^.*\.h'
-    Priority: 5
-  - Regex: ".*"
-    Priority: 6
-IncludeIsMainRegex: "$"
+  - Regex:           '^<(test)/'
+    Priority:        0
+  - Regex:           '^<(xrpld)/'
+    Priority:        1
+  - Regex:           '^<(xrpl)/'
+    Priority:        2
+  - Regex:           '^<(boost)/'
+    Priority:        3
+  - Regex:           '^.*/'
+    Priority:        4
+  - Regex:           '^.*\.h'
+    Priority:        5
+  - Regex:           '.*'
+    Priority:        6
+IncludeIsMainRegex: '$'
 IndentCaseLabels: true
 IndentFunctionDeclarationAfterType: false
 IndentRequiresClause: true
-IndentWidth: 4
+IndentWidth:     4
 IndentWrappedFunctionNames: false
 KeepEmptyLinesAtTheStartOfBlocks: false
 MaxEmptyLinesToKeep: 1
@@ -78,25 +78,20 @@ PenaltyBreakString: 1000
 PenaltyExcessCharacter: 1000000
 PenaltyReturnTypeOnItsOwnLine: 200
 PointerAlignment: Left
-ReflowComments: true
+ReflowComments:  true
 RequiresClausePosition: OwnLine
-SortIncludes: true
+SortIncludes:    true
 SpaceAfterCStyleCast: false
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 SpaceInEmptyParentheses: false
 SpacesBeforeTrailingComments: 2
-SpacesInAngles: false
+SpacesInAngles:  false
 SpacesInContainerLiterals: true
 SpacesInCStyleCastParentheses: false
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
-Standard: Cpp11
-TabWidth: 8
-UseTab: Never
-QualifierAlignment: Right
---
-Language: JavaScript
---
-Language: Json
-IndentWidth: 2
+Standard:        Cpp11
+TabWidth:        8
+UseTab:          Never
+QualifierAlignment: Right
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -27,7 +27,7 @@ github_checks:
 parsers:
  cobertura:
    partials_as_hits: true
-    handle_missing_conditions: true
+    handle_missing_conditions : true

 slack_app: false

--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -11,4 +11,3 @@ b9d007813378ad0ff45660dc07285b823c7e9855
 fe9a5365b8a52d4acc42eb27369247e6f238a4f9
 9a93577314e6a8d4b4a8368cc9d2b15a5d8303e8
 552377c76f55b403a1c876df873a23d780fcc81c
-97f0747e103f13e26e45b731731059b32f7679ac
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -2,35 +2,30 @@
 name: Bug Report
 about: Create a report to help us improve rippled
 title: "[Title with short description] (Version: [rippled version])"
-labels: ""
-assignees: ""
---
+labels: ''
+assignees: ''

+---
 <!-- Please search existing issues to avoid creating duplicates.-->

 ## Issue Description
-
 <!--Provide a summary for your issue/bug.-->

 ## Steps to Reproduce
-
 <!--List in detail the exact steps to reproduce the unexpected behavior of the software.-->

 ## Expected Result
-
 <!--Explain in detail what behavior you expected to happen.-->

 ## Actual Result
-
 <!--Explain in detail what behavior actually happened.-->

 ## Environment
-
 <!--Please describe your environment setup (such as Ubuntu 18.04 with Boost 1.70).-->
 <!-- If you are using a formal release, please use the version returned by './rippled --version' as the version number-->
 <!-- If you are working off of develop, please add the git hash via 'git rev-parse HEAD'-->

 ## Supporting Files
-
 <!--If you have supporting files such as a log, feel free to post a link here using Github Gist.-->
 <!--Consider adding configuration files with private information removed via Github Gist. -->
+
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -3,23 +3,19 @@ name: Feature Request
 about: Suggest a new feature for the rippled project
 title: "[Title with short description] (Version: [rippled version])"
 labels: Feature Request
-assignees: ""
---
+assignees: ''

+---
 <!-- Please search existing issues to avoid creating duplicates.-->

 ## Summary
-
 <!-- Provide a summary to the feature request-->

 ## Motivation
-
 <!-- Why do we need this feature?-->

 ## Solution
-
 <!-- What is the solution?-->

 ## Paths Not Taken
-
 <!-- What other alternatives have been considered?-->
--- a/.github/actions/dependencies/action.yml
+++ b/.github/actions/dependencies/action.yml
@@ -2,37 +2,56 @@ name: dependencies
 inputs:
  configuration:
    required: true
-# Implicit inputs are the environment variables `build_dir`, CONAN_REMOTE_URL,
-# CONAN_REMOTE_USERNAME, and CONAN_REMOTE_PASSWORD. The latter two are only
-# used to upload newly built dependencies to the Conan remote.
+# An implicit input is the environment variable `build_dir`.
 runs:
  using: composite
  steps:
-    - name: add Conan remote
-      if: ${{ env.CONAN_REMOTE_URL != '' }}
+    - name: unlock Conan
+      shell: bash
+      run: conan remove --locks
+    - name: export custom recipes
+      shell: bash
+      run: |
+        conan config set general.revisions_enabled=1
+        conan export external/snappy snappy/1.1.10@
+        conan export external/rocksdb rocksdb/9.7.3@
+        conan export external/soci soci/4.0.3@
+        conan export external/nudb nudb/2.0.8@
+    - name: add Ripple Conan remote
      shell: bash
      run: |
-        echo "Adding Conan remote 'xrplf' at ${{ env.CONAN_REMOTE_URL }}."
-        conan remote add --index 0 --force xrplf ${{ env.CONAN_REMOTE_URL }}
-        echo "Listing Conan remotes."
        conan remote list
+        conan remote remove ripple || true
+        # Do not quote the URL. An empty string will be accepted (with
+        # a non-fatal warning), but a missing argument will not.
+        conan remote add ripple ${{ env.CONAN_URL }} --insert 0
+    - name: try to authenticate to Ripple Conan remote
+      id: remote
+      shell: bash
+      run: |
+        # `conan user` implicitly uses the environment variables
+        # CONAN_LOGIN_USERNAME_<REMOTE> and CONAN_PASSWORD_<REMOTE>.
+        # https://docs.conan.io/1/reference/commands/misc/user.html#using-environment-variables
+        # https://docs.conan.io/1/reference/env_vars.html#conan-login-username-conan-login-username-remote-name
+        # https://docs.conan.io/1/reference/env_vars.html#conan-password-conan-password-remote-name
+        echo outcome=$(conan user --remote ripple --password >&2 \
+          && echo success || echo failure) | tee ${GITHUB_OUTPUT}
+    - name: list missing binaries
+      id: binaries
+      shell: bash
+      # Print the list of dependencies that would need to be built locally.
+      # A non-empty list means we have "failed" to cache binaries remotely.
+      run: |
+        echo missing=$(conan info . --build missing --settings build_type=${{ inputs.configuration }} --json 2>/dev/null  | grep '^\[') | tee ${GITHUB_OUTPUT}
    - name: install dependencies
      shell: bash
      run: |
-        mkdir -p ${{ env.build_dir }}
-        cd ${{ env.build_dir }}
+        mkdir ${build_dir}
+        cd ${build_dir}
        conan install \
          --output-folder . \
          --build missing \
-          --options:host "&:tests=True" \
-          --options:host "&:xrpld=True" \
-          --settings:all build_type=${{ inputs.configuration }} \
+          --options tests=True \
+          --options xrpld=True \
+          --settings build_type=${{ inputs.configuration }} \
          ..
-    - name: upload dependencies
-      if: ${{ env.CONAN_REMOTE_URL != '' && env.CONAN_REMOTE_USERNAME != '' && env.CONAN_REMOTE_PASSWORD != '' && github.ref_type == 'branch' && github.ref_name == github.event.repository.default_branch }}
-      shell: bash
-      run: |
-        echo "Logging into Conan remote 'xrplf' at ${{ env.CONAN_REMOTE_URL }}."
-        conan remote login xrplf "${{ env.CONAN_REMOTE_USERNAME }}" --password "${{ env.CONAN_REMOTE_PASSWORD }}"
-        echo "Uploading dependencies."
-        conan upload '*' --confirm --check --remote xrplf
--- a/.github/workflows/clang-format.yml
+++ b/.github/workflows/clang-format.yml
@@ -9,25 +9,24 @@ jobs:
  check:
    if: ${{ github.event_name == 'push' || github.event.pull_request.draft != true || contains(github.event.pull_request.labels.*.name, 'DraftRunCI') }}
    runs-on: ubuntu-24.04
-    container: ghcr.io/xrplf/ci/tools-rippled-clang-format
+    env:
+      CLANG_VERSION: 18
    steps:
-      # For jobs running in containers, $GITHUB_WORKSPACE and ${{ github.workspace }} might not be the
-      # same directory. The actions/checkout step is *supposed* to checkout into $GITHUB_WORKSPACE and
-      # then add it to safe.directory (see instructions at https://github.com/actions/checkout)
-      # but that's apparently not happening for some container images. We can't be sure what is actually
-      # happening, so let's pre-emptively add both directories to safe.directory. There's a
-      # Github issue opened in 2022 and not resolved in 2025 https://github.com/actions/runner/issues/2058 ¯\_(ツ)_/¯
-      - run: |
-          git config --global --add safe.directory $GITHUB_WORKSPACE
-          git config --global --add safe.directory ${{ github.workspace }}
      - uses: actions/checkout@v4
-      - name: Format first-party sources
+      - name: Install clang-format
        run: |
-          clang-format --version
-          find include src tests -type f \( -name '*.cpp' -o -name '*.hpp' -o -name '*.h' -o -name '*.ipp' \) -exec clang-format -i {} +
+          codename=$( lsb_release --codename --short )
+          sudo tee /etc/apt/sources.list.d/llvm.list >/dev/null <<EOF
+          deb http://apt.llvm.org/${codename}/ llvm-toolchain-${codename}-${CLANG_VERSION} main
+          deb-src http://apt.llvm.org/${codename}/ llvm-toolchain-${codename}-${CLANG_VERSION} main
+          EOF
+          wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add
+          sudo apt-get update
+          sudo apt-get install clang-format-${CLANG_VERSION}
+      - name: Format first-party sources
+        run: find include src tests -type f \( -name '*.cpp' -o -name '*.hpp' -o -name '*.h' -o -name '*.ipp' \) -exec clang-format-${CLANG_VERSION} -i {} +
      - name: Check for differences
        id: assert
-        shell: bash
        run: |
          set -o pipefail
          git diff --exit-code | tee "clang-format.patch"
@@ -59,6 +58,6 @@ jobs:
               in your repo, commit, and push.
        run: |
          echo "${PREAMBLE}"
-          clang-format --version
+          clang-format-${CLANG_VERSION} --version
          echo "${SUGGESTION}"
          exit 1
--- a/.github/workflows/libxrpl.yml
+++ b/.github/workflows/libxrpl.yml
@@ -1,13 +1,13 @@
 name: Check libXRPL compatibility with Clio
 env:
-  CONAN_REMOTE_URL: https://conan.ripplex.io
-  CONAN_LOGIN_USERNAME_XRPLF: ${{ secrets.CONAN_REMOTE_USERNAME }}
-  CONAN_PASSWORD_XRPLF: ${{ secrets.CONAN_REMOTE_PASSWORD }}
+  CONAN_URL: http://18.143.149.228:8081/artifactory/api/conan/dev
+  CONAN_LOGIN_USERNAME_RIPPLE: ${{ secrets.CONAN_USERNAME }}
+  CONAN_PASSWORD_RIPPLE: ${{ secrets.CONAN_TOKEN }}
 on:
  pull_request:
    paths:
-      - "src/libxrpl/protocol/BuildInfo.cpp"
-      - ".github/workflows/libxrpl.yml"
+      - 'src/libxrpl/protocol/BuildInfo.cpp'
+      - '.github/workflows/libxrpl.yml'
    types: [opened, reopened, synchronize, ready_for_review]
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -29,7 +29,7 @@ jobs:
        with:
          ref: ${{ github.event.pull_request.head.sha || github.sha }}
          running-workflow-name: wait-for-check-regexp
-          check-regexp: "(dependencies|test).*linux.*" # Ignore windows and mac tests but make sure linux passes
+          check-regexp: '(dependencies|test).*linux.*' # Ignore windows and mac tests but make sure linux passes
          repo-token: ${{ secrets.GITHUB_TOKEN }}
          wait-interval: 10
      - name: Checkout
@@ -43,20 +43,20 @@ jobs:
        shell: bash
        run: |
          conan export . ${{ steps.channel.outputs.channel }}
-      - name: Add Conan remote
+      - name: Add Ripple Conan remote
        shell: bash
        run: |
-          echo "Adding Conan remote 'xrplf' at ${{ env.CONAN_REMOTE_URL }}."
-          conan remote add xrplf ${{ env.CONAN_REMOTE_URL }} --insert 0 --force 
-          echo "Listing Conan remotes."
          conan remote list
+          conan remote remove ripple || true
+          # Do not quote the URL. An empty string will be accepted (with a non-fatal warning), but a missing argument will not.
+          conan remote add ripple ${{ env.CONAN_URL }} --insert 0
      - name: Parse new version
        id: version
        shell: bash
        run: |
          echo version="$(cat src/libxrpl/protocol/BuildInfo.cpp | grep "versionString =" \
            | awk -F '"' '{print $2}')" | tee ${GITHUB_OUTPUT}
-      - name: Try to authenticate to Conan remote
+      - name: Try to authenticate to Ripple Conan remote
        id: remote
        shell: bash
        run: |
@@ -64,7 +64,7 @@ jobs:
          # https://docs.conan.io/1/reference/commands/misc/user.html#using-environment-variables
          # https://docs.conan.io/1/reference/env_vars.html#conan-login-username-conan-login-username-remote-name
          # https://docs.conan.io/1/reference/env_vars.html#conan-password-conan-password-remote-name
-          echo outcome=$(conan user --remote xrplf --password >&2 \
+          echo outcome=$(conan user --remote ripple --password >&2 \
            && echo success || echo failure) | tee ${GITHUB_OUTPUT}
      - name: Upload new package
        id: upload
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -11,27 +11,13 @@ on:
      - release
      - master
      # Branches that opt-in to running
-      - "ci/**"
+      - 'ci/**'
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
-# This part of Conan configuration is specific to this workflow only; we do not want
-# to pollute conan/profiles directory with settings which might not work for others
-env:
-  CONAN_REMOTE_URL: https://conan.ripplex.io
-  CONAN_REMOTE_USERNAME: ${{ secrets.CONAN_REMOTE_USERNAME }}
-  CONAN_REMOTE_PASSWORD: ${{ secrets.CONAN_REMOTE_PASSWORD }}
-  # This part of the Conan configuration is specific to this workflow only; we
-  # do not want to pollute the 'conan/profiles' directory with settings that
-  # might not work for other workflows.
-  CONAN_GLOBAL_CONF: |
-    core.download:parallel={{os.cpu_count()}}
-    core.upload:parallel={{os.cpu_count()}}
-    tools.build:jobs={{ (os.cpu_count() * 4/5) | int }}
-    tools.build:verbosity=verbose
-    tools.compilation:verbosity=verbose

 jobs:
+
  test:
    if: ${{ github.event_name == 'push' || github.event.pull_request.draft != true || contains(github.event.pull_request.labels.*.name, 'DraftRunCI') }}
    strategy:
@@ -42,22 +28,23 @@ jobs:
          - Ninja
        configuration:
          - Release
-    runs-on: [self-hosted, macOS, mac-runner-m1]
+    runs-on: [self-hosted, macOS]
    env:
      # The `build` action requires these variables.
      build_dir: .build
      NUM_PROCESSORS: 12
    steps:
      - name: checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@v4
      - name: install Conan
        run: |
-          brew install conan
+          brew install conan@1
+          echo '/opt/homebrew/opt/conan@1/bin' >> $GITHUB_PATH
      - name: install Ninja
        if: matrix.generator == 'Ninja'
        run: brew install ninja
      - name: install python
-        run: |
+        run: | 
          if which python > /dev/null 2>&1; then
              echo "Python executable exists"
          else
@@ -88,12 +75,15 @@ jobs:
          sysctl -n hw.logicalcpu
          clang --version
      - name: configure Conan
-        run: |
-          echo "${CONAN_GLOBAL_CONF}" > $(conan config home)/global.conf
-          conan config install conan/profiles/ -tf $(conan config home)/profiles/
-          conan profile show
+        run : |
+          conan profile new default --detect || true
+          conan profile update settings.compiler.cppstd=20 default
      - name: build dependencies
        uses: ./.github/actions/dependencies
+        env:
+          CONAN_URL: http://18.143.149.228:8081/artifactory/api/conan/conan-non-prod
+          CONAN_LOGIN_USERNAME_RIPPLE: ${{ secrets.CONAN_USERNAME }}
+          CONAN_PASSWORD_RIPPLE: ${{ secrets.CONAN_TOKEN }}
        with:
          configuration: ${{ matrix.configuration }}
      - name: build
@@ -106,7 +96,4 @@ jobs:
        run: |
          n=$(nproc)
          echo "Using $n test jobs"
-
-          cd ${build_dir}
-          ./rippled --unittest --unittest-jobs $n
-          ctest -j $n --output-on-failure
+          ${build_dir}/rippled --unittest --unittest-jobs $n
--- a/.github/workflows/missing-commits.yml
+++ b/.github/workflows/missing-commits.yml
@@ -12,49 +12,49 @@ jobs:
  up_to_date:
    runs-on: ubuntu-24.04
    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Check for missing commits
-        id: commits
-        env:
-          SUGGESTION: |
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Check for missing commits
+      id: commits
+      env:
+        SUGGESTION: |

-            If you are reading this, then the commits indicated above are
-            missing from "develop" and/or "release". Do a reverse-merge
-            as soon as possible. See CONTRIBUTING.md for instructions.
-        run: |
-          set -o pipefail
-          # Branches ordered by how "canonical" they are. Every commit in
-          # one branch should be in all the branches behind it
-          order=( master release develop )
-          branches=()
-          for branch in "${order[@]}"
-          do
-            # Check that the branches exist so that this job will work on
-            # forked repos, which don't necessarily have master and
-            # release branches.
-            if git ls-remote --exit-code --heads origin \
-              refs/heads/${branch} > /dev/null
-            then
-              branches+=( origin/${branch} )
-            fi
-          done
-
-          prior=()
-          for branch in "${branches[@]}"
-          do
-            if [[ ${#prior[@]} -ne 0 ]]
-            then
-              echo "Checking ${prior[@]} for commits missing from ${branch}"
-              git log --oneline --no-merges "${prior[@]}" \
-                ^$branch | tee -a "missing-commits.txt"
-              echo
-            fi
-            prior+=( "${branch}" )
-          done
-          if [[ $( cat missing-commits.txt | wc -l ) -ne 0 ]]
+          If you are reading this, then the commits indicated above are
+          missing from "develop" and/or "release". Do a reverse-merge
+          as soon as possible. See CONTRIBUTING.md for instructions.
+      run: |
+        set -o pipefail
+        # Branches ordered by how "canonical" they are. Every commit in
+        # one branch should be in all the branches behind it
+        order=( master release develop )
+        branches=()
+        for branch in "${order[@]}"
+        do
+          # Check that the branches exist so that this job will work on
+          # forked repos, which don't necessarily have master and
+          # release branches.
+          if git ls-remote --exit-code --heads origin \
+            refs/heads/${branch} > /dev/null
          then
-            echo "${SUGGESTION}"
-            exit 1
+            branches+=( origin/${branch} )
          fi
+        done
+
+        prior=()
+        for branch in "${branches[@]}"
+        do
+          if [[ ${#prior[@]} -ne 0 ]]
+          then
+            echo "Checking ${prior[@]} for commits missing from ${branch}"
+            git log --oneline --no-merges "${prior[@]}" \
+              ^$branch | tee -a "missing-commits.txt"
+            echo
+          fi
+          prior+=( "${branch}" )
+        done
+        if [[ $( cat missing-commits.txt | wc -l ) -ne 0 ]]
+        then
+          echo "${SUGGESTION}"
+          exit 1
+        fi
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -16,20 +16,6 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

-env:
-  CONAN_REMOTE_URL: https://conan.ripplex.io
-  CONAN_REMOTE_USERNAME: ${{ secrets.CONAN_REMOTE_USERNAME }}
-  CONAN_REMOTE_PASSWORD: ${{ secrets.CONAN_REMOTE_PASSWORD }}
-  # This part of the Conan configuration is specific to this workflow only; we
-  # do not want to pollute the 'conan/profiles' directory with settings that
-  # might not work for other workflows.
-  CONAN_GLOBAL_CONF: |
-    core.download:parallel={{ os.cpu_count() }}
-    core.upload:parallel={{ os.cpu_count() }}
-    tools.build:jobs={{ (os.cpu_count() * 4/5) | int }}
-    tools.build:verbosity=verbose
-    tools.compilation:verbosity=verbose
-
 # This workflow has multiple job matrixes.
 # They can be considered phases because most of the matrices ("test",
 # "coverage", "conan", ) depend on the first ("dependencies").
@@ -68,45 +54,59 @@ jobs:
          - Release
        include:
          - compiler: gcc
-            compiler_version: 12
-            distro: ubuntu
-            codename: jammy
+            profile:
+              version: 11
+              cc: /usr/bin/gcc
+              cxx: /usr/bin/g++
          - compiler: clang
-            compiler_version: 16
-            distro: debian
-            codename: bookworm
+            profile:
+              version: 14
+              cc: /usr/bin/clang-14
+              cxx: /usr/bin/clang++-14
    runs-on: [self-hosted, heavy]
-    container: ghcr.io/xrplf/ci/${{ matrix.distro }}-${{ matrix.codename }}:${{ matrix.compiler }}-${{ matrix.compiler_version }}
+    container: ghcr.io/xrplf/rippled-build-ubuntu:aaf5e3e
    env:
      build_dir: .build
    steps:
+      - name: upgrade conan
+        run: |
+          pip install --upgrade "conan<2"
      - name: checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@v4
      - name: check environment
        run: |
          echo ${PATH} | tr ':' '\n'
          lsb_release -a || true
-          ${{ matrix.compiler }}-${{ matrix.compiler_version }} --version
+          ${{ matrix.profile.cc }} --version
          conan --version
          cmake --version
          env | sort
      - name: configure Conan
        run: |
-          echo "${CONAN_GLOBAL_CONF}" >> $(conan config home)/global.conf
-          conan config install conan/profiles/ -tf $(conan config home)/profiles/
-          conan profile show
+          conan profile new default --detect
+          conan profile update settings.compiler.cppstd=20 default
+          conan profile update settings.compiler=${{ matrix.compiler }} default
+          conan profile update settings.compiler.version=${{ matrix.profile.version }} default
+          conan profile update settings.compiler.libcxx=libstdc++11 default
+          conan profile update env.CC=${{ matrix.profile.cc }} default
+          conan profile update env.CXX=${{ matrix.profile.cxx }} default
+          conan profile update conf.tools.build:compiler_executables='{"c": "${{ matrix.profile.cc }}", "cpp": "${{ matrix.profile.cxx }}"}' default
      - name: archive profile
        # Create this archive before dependencies are added to the local cache.
-        run: tar -czf conan.tar.gz -C ${CONAN_HOME} .
+        run: tar -czf conan.tar -C ~/.conan .
      - name: build dependencies
        uses: ./.github/actions/dependencies
+        env:
+          CONAN_URL: http://18.143.149.228:8081/artifactory/api/conan/conan-non-prod
+          CONAN_LOGIN_USERNAME_RIPPLE: ${{ secrets.CONAN_USERNAME }}
+          CONAN_PASSWORD_RIPPLE: ${{ secrets.CONAN_TOKEN }}
        with:
          configuration: ${{ matrix.configuration }}
      - name: upload archive
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
+        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.platform }}-${{ matrix.compiler }}-${{ matrix.configuration }}
-          path: conan.tar.gz
+          path: conan.tar
          if-no-files-found: error

  test:
@@ -121,32 +121,26 @@ jobs:
        configuration:
          - Debug
          - Release
-        include:
-          - compiler: gcc
-            compiler_version: 12
-            distro: ubuntu
-            codename: jammy
-          - compiler: clang
-            compiler_version: 16
-            distro: debian
-            codename: bookworm
        cmake-args:
          -
          - "-Dunity=ON"
    needs: dependencies
    runs-on: [self-hosted, heavy]
-    container: ghcr.io/xrplf/ci/${{ matrix.distro }}-${{ matrix.codename }}:${{ matrix.compiler }}-${{ matrix.compiler_version }}
+    container: ghcr.io/xrplf/rippled-build-ubuntu:aaf5e3e
    env:
      build_dir: .build
    steps:
+      - name: upgrade conan
+        run: |
+          pip install --upgrade "conan<2"
      - name: download cache
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093
+        uses: actions/download-artifact@v4
        with:
          name: ${{ matrix.platform }}-${{ matrix.compiler }}-${{ matrix.configuration }}
      - name: extract cache
        run: |
-          mkdir -p ${CONAN_HOME}
-          tar -xzf conan.tar.gz -C ${CONAN_HOME}
+          mkdir -p ~/.conan
+          tar -xzf conan.tar -C ~/.conan
      - name: check environment
        run: |
          env | sort
@@ -154,9 +148,11 @@ jobs:
          conan --version
          cmake --version
      - name: checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@v4
      - name: dependencies
        uses: ./.github/actions/dependencies
+        env:
+          CONAN_URL: http://18.143.149.228:8081/artifactory/api/conan/conan-non-prod
        with:
          configuration: ${{ matrix.configuration }}
      - name: build
@@ -165,21 +161,9 @@ jobs:
          generator: Ninja
          configuration: ${{ matrix.configuration }}
          cmake-args: "-Dassert=TRUE -Dwerr=TRUE ${{ matrix.cmake-args }}"
-      - name: check linking
-        run: |
-          cd ${build_dir}
-          ldd ./rippled
-          if [ "$(ldd ./rippled | grep -E '(libstdc\+\+|libgcc)' | wc -l)" -eq 0 ]; then
-            echo 'The binary is statically linked.'
-          else
-            echo 'The binary is dynamically linked.'
-            exit 1
-          fi
      - name: test
        run: |
-          cd ${build_dir}
-          ./rippled --unittest --unittest-jobs $(nproc)
-          ctest -j $(nproc) --output-on-failure
+          ${build_dir}/rippled --unittest --unittest-jobs $(nproc)

  reference-fee-test:
    strategy:
@@ -196,18 +180,21 @@ jobs:
          - "-DUNIT_TEST_REFERENCE_FEE=1000"
    needs: dependencies
    runs-on: [self-hosted, heavy]
-    container: ghcr.io/xrplf/ci/ubuntu-jammy:gcc-12
+    container: ghcr.io/xrplf/rippled-build-ubuntu:aaf5e3e
    env:
      build_dir: .build
    steps:
+      - name: upgrade conan
+        run: |
+          pip install --upgrade "conan<2"
      - name: download cache
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093
+        uses: actions/download-artifact@v4
        with:
          name: ${{ matrix.platform }}-${{ matrix.compiler }}-${{ matrix.configuration }}
      - name: extract cache
        run: |
-          mkdir -p ${CONAN_HOME}
-          tar -xzf conan.tar.gz -C ${CONAN_HOME}
+          mkdir -p ~/.conan
+          tar -xzf conan.tar -C ~/.conan
      - name: check environment
        run: |
          env | sort
@@ -215,9 +202,11 @@ jobs:
          conan --version
          cmake --version
      - name: checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@v4
      - name: dependencies
        uses: ./.github/actions/dependencies
+        env:
+          CONAN_URL: http://18.143.149.228:8081/artifactory/api/conan/conan-non-prod
        with:
          configuration: ${{ matrix.configuration }}
      - name: build
@@ -228,9 +217,7 @@ jobs:
          cmake-args: "-Dassert=TRUE -Dwerr=TRUE ${{ matrix.cmake-args }}"
      - name: test
        run: |
-          cd ${build_dir}
-          ./rippled --unittest --unittest-jobs $(nproc)
-          ctest -j $(nproc) --output-on-failure
+          ${build_dir}/rippled --unittest --unittest-jobs $(nproc)

  coverage:
    strategy:
@@ -244,18 +231,23 @@ jobs:
          - Debug
    needs: dependencies
    runs-on: [self-hosted, heavy]
-    container: ghcr.io/xrplf/ci/ubuntu-jammy:gcc-12
+    container: ghcr.io/xrplf/rippled-build-ubuntu:aaf5e3e
    env:
      build_dir: .build
    steps:
+      - name: upgrade conan
+        run: |
+          pip install --upgrade "conan<2"
      - name: download cache
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093
+        uses: actions/download-artifact@v4
        with:
          name: ${{ matrix.platform }}-${{ matrix.compiler }}-${{ matrix.configuration }}
      - name: extract cache
        run: |
-          mkdir -p ${CONAN_HOME}
-          tar -xzf conan.tar.gz -C ${CONAN_HOME}
+          mkdir -p ~/.conan
+          tar -xzf conan.tar -C ~/.conan
+      - name: install gcovr
+        run: pip install "gcovr>=7,<9"
      - name: check environment
        run: |
          echo ${PATH} | tr ':' '\n'
@@ -263,11 +255,13 @@ jobs:
          cmake --version
          gcovr --version
          env | sort
-          ls ${CONAN_HOME}
+          ls ~/.conan
      - name: checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@v4
      - name: dependencies
        uses: ./.github/actions/dependencies
+        env:
+          CONAN_URL: http://18.143.149.228:8081/artifactory/api/conan/conan-non-prod
        with:
          configuration: ${{ matrix.configuration }}
      - name: build
@@ -289,7 +283,7 @@ jobs:
        run: |
          mv "${build_dir}/coverage.xml" ./
      - name: archive coverage report
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
+        uses: actions/upload-artifact@v4
        with:
          name: coverage.xml
          path: coverage.xml
@@ -311,23 +305,22 @@ jobs:
  conan:
    needs: dependencies
    runs-on: [self-hosted, heavy]
-    container:
-      image: ghcr.io/xrplf/ci/ubuntu-jammy:gcc-12
+    container: ghcr.io/xrplf/rippled-build-ubuntu:aaf5e3e
    env:
      build_dir: .build
-      platform: linux
-      compiler: gcc
-      compiler_version: 12
      configuration: Release
    steps:
+      - name: upgrade conan
+        run: |
+          pip install --upgrade "conan<2"
      - name: download cache
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093
+        uses: actions/download-artifact@v4
        with:
-          name: ${{ env.platform }}-${{ env.compiler }}-${{ env.configuration }}
+          name: linux-gcc-${{ env.configuration }}
      - name: extract cache
        run: |
-          mkdir -p ${CONAN_HOME}
-          tar -xzf conan.tar.gz -C ${CONAN_HOME}
+          mkdir -p ~/.conan
+          tar -xzf conan.tar -C ~/.conan
      - name: check environment
        run: |
          env | sort
@@ -335,88 +328,116 @@ jobs:
          conan --version
          cmake --version
      - name: checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@v4
      - name: dependencies
        uses: ./.github/actions/dependencies
+        env:
+          CONAN_URL: http://18.143.149.228:8081/artifactory/api/conan/conan-non-prod
        with:
          configuration: ${{ env.configuration }}
      - name: export
        run: |
-          conan export . --version head
+          version=$(conan inspect --raw version .)
+          reference="xrpl/${version}@local/test"
+          conan remove -f ${reference} || true
+          conan export . local/test
+          echo "reference=${reference}" >> "${GITHUB_ENV}"
      - name: build
        run: |
          cd tests/conan
-          mkdir ${build_dir} && cd ${build_dir}
-          conan install .. \
-            --settings:all build_type=${configuration} \
-            --output-folder . \
-            --build missing
+          mkdir ${build_dir}
+          cd ${build_dir}
+          conan install .. --output-folder . \
+            --require-override ${reference} --build missing
          cmake .. \
            -DCMAKE_TOOLCHAIN_FILE:FILEPATH=./build/${configuration}/generators/conan_toolchain.cmake \
            -DCMAKE_BUILD_TYPE=${configuration}
          cmake --build .
          ./example | grep '^[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+'

+  # NOTE we are not using dependencies built above because it lags with
+  # compiler versions. Instrumentation requires clang version 16 or
+  # later
+
  instrumentation-build:
-    needs: dependencies
-    runs-on: [self-hosted, heavy]
-    container: ghcr.io/xrplf/ci/debian-bookworm:clang-16
+    if: ${{ github.event_name == 'push' || github.event.pull_request.draft != true || contains(github.event.pull_request.labels.*.name, 'DraftRunCI') }}
    env:
-      build_dir: .build
+      CLANG_RELEASE: 16
+    strategy:
+      fail-fast: false
+    runs-on: [self-hosted, heavy]
+    container: debian:bookworm
    steps:
-      - name: download cache
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093
-        with:
-          name: linux-clang-Debug
+        - name: install prerequisites
+          env:
+            DEBIAN_FRONTEND: noninteractive
+          run: |
+            apt-get update
+            apt-get install --yes --no-install-recommends \
+              clang-${CLANG_RELEASE} clang++-${CLANG_RELEASE} \
+              python3-pip python-is-python3 make cmake git wget
+            apt-get clean
+            update-alternatives --install \
+              /usr/bin/clang clang /usr/bin/clang-${CLANG_RELEASE} 100 \
+              --slave /usr/bin/clang++ clang++ /usr/bin/clang++-${CLANG_RELEASE}
+            update-alternatives --auto clang
+            pip install --no-cache --break-system-packages "conan<2"

-      - name: extract cache
-        run: |
-          mkdir -p ${CONAN_HOME}
-          tar -xzf conan.tar.gz -C ${CONAN_HOME}
+        - name: checkout
+          uses: actions/checkout@v4

-      - name: check environment
-        run: |
-          echo ${PATH} | tr ':' '\n'
-          conan --version
-          cmake --version
-          env | sort
-          ls ${CONAN_HOME}
+        - name: prepare environment
+          run: |
+            mkdir ${GITHUB_WORKSPACE}/.build
+            echo "SOURCE_DIR=$GITHUB_WORKSPACE" >> $GITHUB_ENV
+            echo "BUILD_DIR=$GITHUB_WORKSPACE/.build" >> $GITHUB_ENV
+            echo "CC=/usr/bin/clang" >> $GITHUB_ENV
+            echo "CXX=/usr/bin/clang++" >> $GITHUB_ENV

-      - name: checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        - name: configure Conan
+          run: |
+            conan profile new --detect default
+            conan profile update settings.compiler=clang default
+            conan profile update settings.compiler.version=${CLANG_RELEASE} default
+            conan profile update settings.compiler.libcxx=libstdc++11 default
+            conan profile update settings.compiler.cppstd=20 default
+            conan profile update options.rocksdb=False default
+            conan profile update \
+              'conf.tools.build:compiler_executables={"c": "/usr/bin/clang", "cpp": "/usr/bin/clang++"}' default
+            conan profile update 'env.CXXFLAGS="-DBOOST_ASIO_DISABLE_CONCEPTS"' default
+            conan profile update 'conf.tools.build:cxxflags+=["-DBOOST_ASIO_DISABLE_CONCEPTS"]' default
+            conan export external/snappy snappy/1.1.10@
+            conan export external/soci soci/4.0.3@

-      - name: dependencies
-        uses: ./.github/actions/dependencies
-        with:
-          configuration: Debug
+        - name: build dependencies
+          run: |
+            cd ${BUILD_DIR}
+            conan install ${SOURCE_DIR} \
+              --output-folder ${BUILD_DIR} \
+              --install-folder ${BUILD_DIR} \
+              --build missing \
+              --settings build_type=Debug

-      - name: prepare environment
-        run: |
-          mkdir -p ${build_dir}
-          echo "SOURCE_DIR=$(pwd)" >> $GITHUB_ENV
-          echo "BUILD_DIR=$(pwd)/${build_dir}" >> $GITHUB_ENV
+        - name: build with instrumentation
+          run: |
+            cd ${BUILD_DIR}
+            cmake -S ${SOURCE_DIR} -B ${BUILD_DIR} \
+              -Dvoidstar=ON \
+              -Dtests=ON \
+              -Dxrpld=ON \
+              -DCMAKE_BUILD_TYPE=Debug \
+              -DSECP256K1_BUILD_BENCHMARK=OFF \
+              -DSECP256K1_BUILD_TESTS=OFF \
+              -DSECP256K1_BUILD_EXHAUSTIVE_TESTS=OFF \
+              -DCMAKE_TOOLCHAIN_FILE=${BUILD_DIR}/build/generators/conan_toolchain.cmake
+            cmake --build .  --parallel $(nproc)

-      - name: build with instrumentation
-        run: |
-          cd ${BUILD_DIR}
-          cmake -S ${SOURCE_DIR} -B ${BUILD_DIR} \
-            -Dvoidstar=ON \
-            -Dtests=ON \
-            -Dxrpld=ON \
-            -DCMAKE_BUILD_TYPE=Debug \
-            -DSECP256K1_BUILD_BENCHMARK=OFF \
-            -DSECP256K1_BUILD_TESTS=OFF \
-            -DSECP256K1_BUILD_EXHAUSTIVE_TESTS=OFF \
-            -DCMAKE_TOOLCHAIN_FILE=${BUILD_DIR}/build/generators/conan_toolchain.cmake
-          cmake --build .  --parallel $(nproc)
+        - name: verify instrumentation enabled
+          run: |
+            cd ${BUILD_DIR}
+            ./rippled --version | grep libvoidstar

-      - name: verify instrumentation enabled
-        run: |
-          cd ${BUILD_DIR}
-          ./rippled --version | grep libvoidstar
-
-      - name: run unit tests
-        run: |
-          cd ${BUILD_DIR}
-          ./rippled -u --unittest-jobs $(( $(nproc)/4 ))
-          ctest -j $(nproc) --output-on-failure
+        - name: run unit tests
+          run: |
+            cd ${BUILD_DIR}
+            ./rippled -u --unittest-jobs $(( $(nproc)/4 ))
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -12,27 +12,15 @@ on:
      - release
      - master
      # Branches that opt-in to running
-      - "ci/**"
+      - 'ci/**'

 # https://docs.github.com/en/actions/using-jobs/using-concurrency
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
-env:
-  CONAN_REMOTE_URL: https://conan.ripplex.io
-  CONAN_REMOTE_USERNAME: ${{ secrets.CONAN_REMOTE_USERNAME }}
-  CONAN_REMOTE_PASSWORD: ${{ secrets.CONAN_REMOTE_PASSWORD }}
-  # This part of the Conan configuration is specific to this workflow only; we
-  # do not want to pollute the 'conan/profiles' directory with settings that
-  # might not work for other workflows.
-  CONAN_GLOBAL_CONF: |
-    core.download:parallel={{os.cpu_count()}}
-    core.upload:parallel={{os.cpu_count()}}
-    tools.build:jobs=24
-    tools.build:verbosity=verbose
-    tools.compilation:verbosity=verbose

 jobs:
+
  test:
    if: ${{ github.event_name == 'push' || github.event.pull_request.draft != true || contains(github.event.pull_request.labels.*.name, 'DraftRunCI') }}
    strategy:
@@ -54,11 +42,11 @@ jobs:
      build_dir: .build
    steps:
      - name: checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@v4
      - name: choose Python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
+        uses: actions/setup-python@v5
        with:
-          python-version: 3.13
+          python-version: 3.9
      - name: learn Python cache directory
        id: pip-cache
        shell: bash
@@ -66,12 +54,12 @@ jobs:
          python -m pip install --upgrade pip
          echo "dir=$(pip cache dir)" | tee ${GITHUB_OUTPUT}
      - name: restore Python cache directory
-        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684
+        uses: actions/cache@v4
        with:
-          path: ${{ steps.pip-cache.outputs.dir }}
-          key: ${{ runner.os }}-${{ hashFiles('.github/workflows/windows.yml') }}
+            path: ${{ steps.pip-cache.outputs.dir }}
+            key: ${{ runner.os }}-${{ hashFiles('.github/workflows/windows.yml') }}
      - name: install Conan
-        run: pip install wheel conan
+        run: pip install wheel 'conan<2'
      - name: check environment
        run: |
          dir env:
@@ -82,25 +70,30 @@ jobs:
      - name: configure Conan
        shell: bash
        run: |
-          echo "${CONAN_GLOBAL_CONF}" > $(conan config home)/global.conf
-          conan config install conan/profiles/ -tf $(conan config home)/profiles/
-          conan profile show
+          conan profile new default --detect
+          conan profile update settings.compiler.cppstd=20 default
+          conan profile update \
+            settings.compiler.runtime=MT${{ matrix.configuration.runtime }} \
+            default
      - name: build dependencies
        uses: ./.github/actions/dependencies
+        env:
+          CONAN_URL: http://18.143.149.228:8081/artifactory/api/conan/conan-non-prod
+          CONAN_LOGIN_USERNAME_RIPPLE: ${{ secrets.CONAN_USERNAME }}
+          CONAN_PASSWORD_RIPPLE: ${{ secrets.CONAN_TOKEN }}
        with:
          configuration: ${{ matrix.configuration.type }}
      - name: build
        uses: ./.github/actions/build
        with:
-          generator: "${{ matrix.version.generator }}"
+          generator: '${{ matrix.version.generator }}'
          configuration: ${{ matrix.configuration.type }}
          # Hard code for now. Move to the matrix if varied options are needed
-          cmake-args: "-Dassert=TRUE -Dwerr=TRUE -Dreporting=OFF -Dunity=ON"
+          cmake-args: '-Dassert=TRUE -Dwerr=TRUE -Dreporting=OFF -Dunity=ON'
          cmake-target: install
      - name: test
        shell: bash
        if: ${{ matrix.configuration.tests }}
        run: |
-          cd ${build_dir}/${{ matrix.configuration.type }}
-          ./rippled --unittest --unittest-jobs $(nproc)
-          ctest -j $(nproc) --output-on-failure
+          ${build_dir}/${{ matrix.configuration.type }}/rippled --unittest \
+              --unittest-jobs $(nproc)
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 # .pre-commit-config.yaml
 repos:
-  - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v18.1.8
-    hooks:
-      - id: clang-format
+- repo: https://github.com/pre-commit/mirrors-clang-format
+  rev: v18.1.3
+  hooks:
+  - id: clang-format
--- a/BUILD.md
+++ b/BUILD.md
@@ -3,29 +3,29 @@
 | These instructions assume you have a C++ development environment ready with Git, Python, Conan, CMake, and a C++ compiler. For help setting one up on Linux, macOS, or Windows, [see this guide](./docs/build/environment.md). |

 > These instructions also assume a basic familiarity with Conan and CMake.
-> If you are unfamiliar with Conan, you can read our
-> [crash course](./docs/build/conan.md) or the official [Getting Started][3]
-> walkthrough.
+> If you are unfamiliar with Conan,
+> you can read our [crash course](./docs/build/conan.md)
+> or the official [Getting Started][3] walkthrough.

 ## Branches

 For a stable release, choose the `master` branch or one of the [tagged
 releases](https://github.com/ripple/rippled/releases).

-```bash
+```
 git checkout master
 ```

 For the latest release candidate, choose the `release` branch.

-```bash
+```
 git checkout release
 ```

 For the latest set of untested features, or to contribute, choose the `develop`
 branch.

-```bash
+```
 git checkout develop
 ```

@@ -33,323 +33,176 @@ git checkout develop

 See [System Requirements](https://xrpl.org/system-requirements.html).

-Building rippled generally requires git, Python, Conan, CMake, and a C++
-compiler. Some guidance on setting up such a [C++ development environment can be
-found here](./docs/build/environment.md).
+Building rippled generally requires git, Python, Conan, CMake, and a C++ compiler. Some guidance on setting up such a [C++ development environment can be found here](./docs/build/environment.md).

- [Python 3.11](https://www.python.org/downloads/), or higher
- [Conan 2.17](https://conan.io/downloads.html)[^1], or higher
- [CMake 3.22](https://cmake.org/download/)[^2], or higher
+- [Python 3.7](https://www.python.org/downloads/)
+- [Conan 1.60](https://conan.io/downloads.html)[^1]
+- [CMake 3.16](https://cmake.org/download/)

-[^1]:
-    It is possible to build with Conan 1.60+, but the instructions are
-    significantly different, which is why we are not recommending it.
-
-[^2]:
-    CMake 4 is not yet supported by all dependencies required by this project.
-    If you are affected by this issue, follow [conan workaround for cmake
-    4](#workaround-for-cmake-4)
+[^1]: It is possible to build with Conan 2.x,
+but the instructions are significantly different,
+which is why we are not recommending it yet.
+Notably, the `conan profile update` command is removed in 2.x.
+Profiles must be edited by hand.

 `rippled` is written in the C++20 dialect and includes the `<concepts>` header.
 The [minimum compiler versions][2] required are:

-| Compiler    | Version   |
-| ----------- | --------- |
-| GCC         | 12        |
-| Clang       | 16        |
-| Apple Clang | 16        |
-| MSVC        | 19.44[^3] |
+| Compiler    | Version |
+|-------------|---------|
+| GCC         | 11      |
+| Clang       | 13      |
+| Apple Clang | 13.1.6  |
+| MSVC        | 19.23   |

 ### Linux

-The Ubuntu Linux distribution has received the highest level of quality
-assurance, testing, and support. We also support Red Hat and use Debian
-internally.
+The Ubuntu operating system has received the highest level of
+quality assurance, testing, and support.

-Here are [sample instructions for setting up a C++ development environment on
-Linux](./docs/build/environment.md#linux).
+Here are [sample instructions for setting up a C++ development environment on Linux](./docs/build/environment.md#linux).

 ### Mac

 Many rippled engineers use macOS for development.

-Here are [sample instructions for setting up a C++ development environment on
-macOS](./docs/build/environment.md#macos).
+Here are [sample instructions for setting up a C++ development environment on macOS](./docs/build/environment.md#macos).

 ### Windows

-Windows is used by some engineers for development only.
+Windows is not recommended for production use at this time.

-[^3]: Windows is not recommended for production use.
+- Additionally, 32-bit Windows development is not supported.
+
+[Boost]: https://www.boost.org/

 ## Steps

 ### Set Up Conan

-After you have a [C++ development environment](./docs/build/environment.md) ready with Git, Python,
-Conan, CMake, and a C++ compiler, you may need to set up your Conan profile.
+After you have a [C++ development environment](./docs/build/environment.md) ready with Git, Python, Conan, CMake, and a C++ compiler, you may need to set up your Conan profile.

-These instructions assume a basic familiarity with Conan and CMake. If you are
-unfamiliar with Conan, then please read [this crash course](./docs/build/conan.md) or the official
-[Getting Started][3] walkthrough.
+These instructions assume a basic familiarity with Conan and CMake.

-#### Default profile
+If you are unfamiliar with Conan, then please read [this crash course](./docs/build/conan.md) or the official [Getting Started][3] walkthrough.

-We recommend that you import the provided `conan/profiles/default` profile:
+You'll need at least one Conan profile:

-```bash
-conan config install conan/profiles/ -tf $(conan config home)/profiles/
+   ```
+   conan profile new default --detect
+   ```
+
+Update the compiler settings:
+
+   ```
+   conan profile update settings.compiler.cppstd=20 default
+   ```
+
+Configure Conan (1.x only) to use recipe revisions:
+
+   ```
+   conan config set general.revisions_enabled=1
+   ```
+
+**Linux** developers will commonly have a default Conan [profile][] that compiles
+with GCC and links with libstdc++.
+If you are linking with libstdc++ (see profile setting `compiler.libcxx`),
+then you will need to choose the `libstdc++11` ABI:
+
+   ```
+   conan profile update settings.compiler.libcxx=libstdc++11 default
+   ```
+
+
+Ensure inter-operability between `boost::string_view` and `std::string_view` types:
+
+```
+conan profile update 'conf.tools.build:cxxflags+=["-DBOOST_BEAST_USE_STD_STRING_VIEW"]' default
+conan profile update 'env.CXXFLAGS="-DBOOST_BEAST_USE_STD_STRING_VIEW"' default
 ```

-You can check your Conan profile by running:
-
-```bash
-conan profile show
+If you have other flags in the `conf.tools.build` or `env.CXXFLAGS` sections, make sure to retain the existing flags and append the new ones. You can check them with:
+```
+conan profile show default
 ```

-#### Custom profile

-If the default profile does not work for you and you do not yet have a Conan
-profile, you can create one by running:
+**Windows** developers may need to use the x64 native build tools.
+An easy way to do that is to run the shortcut "x64 Native Tools Command
+Prompt" for the version of Visual Studio that you have installed.

-```bash
-conan profile detect
+   Windows developers must also build `rippled` and its dependencies for the x64
+   architecture:
+
+   ```
+   conan profile update settings.arch=x86_64 default
+   ```
+
+### Multiple compilers
+
+When `/usr/bin/g++` exists on a platform, it is the default cpp compiler. This
+default works for some users.
+
+However, if this compiler cannot build rippled or its dependencies, then you can
+install another compiler and set Conan and CMake to use it.
+Update the `conf.tools.build:compiler_executables` setting in order to set the correct variables (`CMAKE_<LANG>_COMPILER`) in the
+generated CMake toolchain file.
+For example, on Ubuntu 20, you may have gcc at `/usr/bin/gcc` and g++ at `/usr/bin/g++`; if that is the case, you can select those compilers with:
+```
+conan profile update 'conf.tools.build:compiler_executables={"c": "/usr/bin/gcc", "cpp": "/usr/bin/g++"}' default
 ```

-You may need to make changes to the profile to suit your environment. You can
-refer to the provided `conan/profiles/default` profile for inspiration, and you
-may also need to apply the required [tweaks](#conan-profile-tweaks) to this
-default profile.
+Replace `/usr/bin/gcc` and `/usr/bin/g++` with paths to the desired compilers.

-### Patched recipes
+It should choose the compiler for dependencies as well,
+but not all of them have a Conan recipe that respects this setting (yet).
+For the rest, you can set these environment variables.
+Replace `<path>` with paths to the desired compilers:

-The recipes in Conan Center occasionally need to be patched for compatibility
-with the latest version of `rippled`. We maintain a fork of the Conan Center
-[here](https://github.com/XRPLF/conan-center-index/) containing the patches.
+- `conan profile update env.CC=<path> default`
+- `conan profile update env.CXX=<path> default`

-To ensure our patched recipes are used, you must add our Conan remote at a
-higher index than the default Conan Center remote, so it is consulted first. You
-can do this by running:
+Export our [Conan recipe for Snappy](./external/snappy).
+It does not explicitly link the C++ standard library,
+which allows you to statically link it with GCC, if you want.

-```bash
-conan remote add --index 0 xrplf "https://conan.ripplex.io"
-```
+   ```
+   # Conan 1.x
+   conan export external/snappy snappy/1.1.10@
+   # Conan 2.x
+   conan export --version 1.1.10 external/snappy
+   ```

-Alternatively, you can pull the patched recipes into the repository and use them
-locally:
+Export our [Conan recipe for RocksDB](./external/rocksdb).
+It does not override paths to dependencies when building with Visual Studio.

-```bash
-cd external
-git init
-git remote add origin git@github.com:XRPLF/conan-center-index.git
-git sparse-checkout init
-git sparse-checkout set recipes/snappy
-git sparse-checkout add recipes/soci
-git fetch origin master
-git checkout master
-conan export --version 1.1.10 recipes/snappy/all
-conan export --version 4.0.3 recipes/soci/all
-rm -rf .git
-```
+   ```
+   # Conan 1.x
+   conan export external/rocksdb rocksdb/9.7.3@
+   # Conan 2.x
+   conan export --version 9.7.3 external/rocksdb
+   ```

-In the case we switch to a newer version of a dependency that still requires a
-patch, it will be necessary for you to pull in the changes and re-export the
-updated dependencies with the newer version. However, if we switch to a newer
-version that no longer requires a patch, no action is required on your part, as
-the new recipe will be automatically pulled from the official Conan Center.
+Export our [Conan recipe for SOCI](./external/soci).
+It patches their CMake to correctly import its dependencies.

-### Conan profile tweaks
+   ```
+   # Conan 1.x
+   conan export external/soci soci/4.0.3@
+   # Conan 2.x
+   conan export --version 4.0.3 external/soci
+   ```

-#### Missing compiler version
+Export our [Conan recipe for NuDB](./external/nudb).
+It fixes some source files to add missing `#include`s.

-If you see an error similar to the following after running `conan profile show`:

-```bash
-ERROR: Invalid setting '17' is not a valid 'settings.compiler.version' value.
-Possible values are ['5.0', '5.1', '6.0', '6.1', '7.0', '7.3', '8.0', '8.1',
-'9.0', '9.1', '10.0', '11.0', '12.0', '13', '13.0', '13.1', '14', '14.0', '15',
-'15.0', '16', '16.0']
-Read "http://docs.conan.io/2/knowledge/faq.html#error-invalid-setting"
-```
-
-you need to amend the list of compiler versions in
-`$(conan config home)/settings.yml`, by appending the required version number(s)
-to the `version` array specific for your compiler. For example:
-
-```yaml
-apple-clang:
-  version:
-    [
-      "5.0",
-      "5.1",
-      "6.0",
-      "6.1",
-      "7.0",
-      "7.3",
-      "8.0",
-      "8.1",
-      "9.0",
-      "9.1",
-      "10.0",
-      "11.0",
-      "12.0",
-      "13",
-      "13.0",
-      "13.1",
-      "14",
-      "14.0",
-      "15",
-      "15.0",
-      "16",
-      "16.0",
-      "17",
-      "17.0",
-    ]
-```
-
-#### Multiple compilers
-
-If you have multiple compilers installed, make sure to select the one to use in
-your default Conan configuration **before** running `conan profile detect`, by
-setting the `CC` and `CXX` environment variables.
-
-For example, if you are running MacOS and have [homebrew
-LLVM@18](https://formulae.brew.sh/formula/llvm@18), and want to use it as a
-compiler in the new Conan profile:
-
-```bash
-export CC=$(brew --prefix llvm@18)/bin/clang
-export CXX=$(brew --prefix llvm@18)/bin/clang++
-conan profile detect
-```
-
-You should also explicitly set the path to the compiler in the profile file,
-which helps to avoid errors when `CC` and/or `CXX` are set and disagree with the
-selected Conan profile. For example:
-
-```text
-[conf]
-tools.build:compiler_executables={'c':'/usr/bin/gcc','cpp':'/usr/bin/g++'}
-```
-
-#### Multiple profiles
-
-You can manage multiple Conan profiles in the directory
-`$(conan config home)/profiles`, for example renaming `default` to a different
-name and then creating a new `default` profile for a different compiler.
-
-#### Select language
-
-The default profile created by Conan will typically select different C++ dialect
-than C++20 used by this project. You should set `20` in the profile line
-starting with `compiler.cppstd=`. For example:
-
-```bash
-sed -i.bak -e 's|^compiler\.cppstd=.*$|compiler.cppstd=20|' $(conan config home)/profiles/default
-```
-
-#### Select standard library in Linux
-
-**Linux** developers will commonly have a default Conan [profile][] that
-compiles with GCC and links with libstdc++. If you are linking with libstdc++
-(see profile setting `compiler.libcxx`), then you will need to choose the
-`libstdc++11` ABI:
-
-```bash
-sed -i.bak -e 's|^compiler\.libcxx=.*$|compiler.libcxx=libstdc++11|' $(conan config home)/profiles/default
-```
-
-#### Select architecture and runtime in Windows
-
-**Windows** developers may need to use the x64 native build tools. An easy way
-to do that is to run the shortcut "x64 Native Tools Command Prompt" for the
-version of Visual Studio that you have installed.
-
-Windows developers must also build `rippled` and its dependencies for the x64
-architecture:
-
-```bash
-sed -i.bak -e 's|^arch=.*$|arch=x86_64|' $(conan config home)/profiles/default
-```
-
-**Windows** developers also must select static runtime:
-
-```bash
-sed -i.bak -e 's|^compiler\.runtime=.*$|compiler.runtime=static|' $(conan config home)/profiles/default
-```
-
-#### Workaround for CMake 4
-
-If your system CMake is version 4 rather than 3, you may have to configure Conan
-profile to use CMake version 3 for dependencies, by adding the following two
-lines to your profile:
-
-```text
-[tool_requires]
-!cmake/*: cmake/[>=3 <4]
-```
-
-This will force Conan to download and use a locally cached CMake 3 version, and
-is needed because some of the dependencies used by this project do not support
-CMake 4.
-
-#### Clang workaround for grpc
-
-If your compiler is clang, version 19 or later, or apple-clang, version 17 or
-later, you may encounter a compilation error while building the `grpc`
-dependency:
-
-```text
-In file included from .../lib/promise/try_seq.h:26:
-.../lib/promise/detail/basic_seq.h:499:38: error: a template argument list is expected after a name prefixed by the template keyword [-Wmissing-template-arg-list-after-template-kw]
-  499 |                     Traits::template CallSeqFactory(f_, *cur_, std::move(arg)));
-      |                                      ^
-```
-
-The workaround for this error is to add two lines to profile:
-
-```text
-[conf]
-tools.build:cxxflags=['-Wno-missing-template-arg-list-after-template-kw']
-```
-
-#### Workaround for gcc 12
-
-If your compiler is gcc, version 12, and you have enabled `werr` option, you may
-encounter a compilation error such as:
-
-```text
-/usr/include/c++/12/bits/char_traits.h:435:56: error: 'void* __builtin_memcpy(void*, const void*, long unsigned int)' accessing 9223372036854775810 or more bytes at offsets [2, 9223372036854775807] and 1 may overlap up to 9223372036854775813 bytes at offset -3 [-Werror=restrict]
-  435 |         return static_cast<char_type*>(__builtin_memcpy(__s1, __s2, __n));
-      |                                        ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~
-cc1plus: all warnings being treated as errors
-```
-
-The workaround for this error is to add two lines to your profile:
-
-```text
-[conf]
-tools.build:cxxflags=['-Wno-restrict']
-```
-
-#### Workaround for clang 16
-
-If your compiler is clang, version 16, you may encounter compilation error such
-as:
-
-```text
-In file included from .../boost/beast/websocket/stream.hpp:2857:
-.../boost/beast/websocket/impl/read.hpp:695:17: error: call to 'async_teardown' is ambiguous
-                async_teardown(impl.role, impl.stream(),
-                ^~~~~~~~~~~~~~
-```
-
-The workaround for this error is to add two lines to your profile:
-
-```text
-[conf]
-tools.build:cxxflags=['-DBOOST_ASIO_DISABLE_CONCEPTS']
-```
+   ```
+   # Conan 1.x
+   conan export external/nudb nudb/2.0.8@
+   # Conan 2.x
+   conan export --version 2.0.8 external/nudb
+   ```

 ### Build and Test

@@ -371,70 +224,71 @@ tools.build:cxxflags=['-DBOOST_ASIO_DISABLE_CONCEPTS']

 2. Use conan to generate CMake files for every configuration you want to build:

-   ```
-   conan install .. --output-folder . --build missing --settings build_type=Release
-   conan install .. --output-folder . --build missing --settings build_type=Debug
-   ```
+    ```
+    conan install .. --output-folder . --build missing --settings build_type=Release
+    conan install .. --output-folder . --build missing --settings build_type=Debug
+    ```

-   To build Debug, in the next step, be sure to set `-DCMAKE_BUILD_TYPE=Debug`
+    To build Debug, in the next step, be sure to set `-DCMAKE_BUILD_TYPE=Debug`

-   For a single-configuration generator, e.g. `Unix Makefiles` or `Ninja`,
-   you only need to run this command once.
-   For a multi-configuration generator, e.g. `Visual Studio`, you may want to
-   run it more than once.
+    For a single-configuration generator, e.g. `Unix Makefiles` or `Ninja`,
+    you only need to run this command once.
+    For a multi-configuration generator, e.g. `Visual Studio`, you may want to
+    run it more than once.

-   Each of these commands should also have a different `build_type` setting.
-   A second command with the same `build_type` setting will overwrite the files
-   generated by the first. You can pass the build type on the command line with
-   `--settings build_type=$BUILD_TYPE` or in the profile itself,
-   under the section `[settings]` with the key `build_type`.
+    Each of these commands should also have a different `build_type` setting.
+    A second command with the same `build_type` setting will overwrite the files
+    generated by the first. You can pass the build type on the command line with
+    `--settings build_type=$BUILD_TYPE` or in the profile itself,
+    under the section `[settings]` with the key `build_type`.

-   If you are using a Microsoft Visual C++ compiler,
-   then you will need to ensure consistency between the `build_type` setting
-   and the `compiler.runtime` setting.
+    If you are using a Microsoft Visual C++ compiler,
+    then you will need to ensure consistency between the `build_type` setting
+    and the `compiler.runtime` setting.

-   When `build_type` is `Release`, `compiler.runtime` should be `MT`.
+    When `build_type` is `Release`, `compiler.runtime` should be `MT`.

-   When `build_type` is `Debug`, `compiler.runtime` should be `MTd`.
+    When `build_type` is `Debug`, `compiler.runtime` should be `MTd`.

-   ```
-   conan install .. --output-folder . --build missing --settings build_type=Release --settings compiler.runtime=MT
-   conan install .. --output-folder . --build missing --settings build_type=Debug --settings compiler.runtime=MTd
-   ```
+    ```
+    conan install .. --output-folder . --build missing --settings build_type=Release --settings compiler.runtime=MT
+    conan install .. --output-folder . --build missing --settings build_type=Debug --settings compiler.runtime=MTd
+    ```

 3. Configure CMake and pass the toolchain file generated by Conan, located at
   `$OUTPUT_FOLDER/build/generators/conan_toolchain.cmake`.

-   Single-config generators:
+    Single-config generators:

-   Pass the CMake variable [`CMAKE_BUILD_TYPE`][build_type]
-   and make sure it matches the one of the `build_type` settings
-   you chose in the previous step.
+    Pass the CMake variable [`CMAKE_BUILD_TYPE`][build_type]
+    and make sure it matches the one of the `build_type` settings
+    you chose in the previous step.

-   For example, to build Debug, in the next command, replace "Release" with "Debug"
+    For example, to build Debug, in the next command, replace "Release" with "Debug"

-   ```
-   cmake -DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release -Dxrpld=ON -Dtests=ON ..
-   ```
+    ```
+    cmake -DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release -Dxrpld=ON -Dtests=ON ..
+    ```

-   Multi-config generators:

-   ```
-   cmake -DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake -Dxrpld=ON -Dtests=ON  ..
-   ```
+    Multi-config generators:

-   **Note:** You can pass build options for `rippled` in this step.
+    ```
+    cmake -DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake -Dxrpld=ON -Dtests=ON  ..
+    ```

-4. Build `rippled`.
+    **Note:** You can pass build options for `rippled` in this step.
+
+5. Build `rippled`.

   For a single-configuration generator, it will build whatever configuration
-   you passed for `CMAKE_BUILD_TYPE`. For a multi-configuration generator, you
-   must pass the option `--config` to select the build configuration.
+   you passed for `CMAKE_BUILD_TYPE`. For a multi-configuration generator,
+   you must pass the option `--config` to select the build configuration. 

   Single-config generators:

   ```
-   cmake --build .
+   cmake --build . -j $(nproc)
   ```

   Multi-config generators:
@@ -444,27 +298,24 @@ tools.build:cxxflags=['-DBOOST_ASIO_DISABLE_CONCEPTS']
   cmake --build . --config Debug
   ```

-5. Test rippled.
+6. Test rippled.

   Single-config generators:

   ```
-   ./rippled --unittest --unittest-jobs N
+   ./rippled --unittest
   ```

   Multi-config generators:

   ```
-   ./Release/rippled --unittest --unittest-jobs N
-   ./Debug/rippled --unittest --unittest-jobs N
+   ./Release/rippled --unittest
+   ./Debug/rippled --unittest
   ```

-   Replace the `--unittest-jobs` parameter N with the desired unit tests
-   concurrency. Recommended setting is half of the number of available CPU
-   cores.
+   The location of `rippled` in your build directory depends on your CMake
+   generator. Pass `--help` to see the rest of the command line options.

-   The location of `rippled` binary in your build directory depends on your
-   CMake generator. Pass `--help` to see the rest of the command line options.

 ## Coverage report

@@ -505,7 +356,7 @@ variable in `cmake`. The specific command line used to run the `gcovr` tool will
 displayed if the `CODE_COVERAGE_VERBOSE` variable is set.

 By default, the code coverage tool runs parallel unit tests with `--unittest-jobs`
-set to the number of available CPU cores. This may cause spurious test
+ set to the number of available CPU cores. This may cause spurious test
 errors on Apple. Developers can override the number of unit test jobs with
 the `coverage_test_parallelism` variable in `cmake`.

@@ -521,56 +372,45 @@ cmake --build . --target coverage
 After the `coverage` target is completed, the generated coverage report will be
 stored inside the build directory, as either of:

- file named `coverage.`_extension_, with a suitable extension for the report format, or
+- file named `coverage.`_extension_ , with a suitable extension for the report format, or
 - directory named `coverage`, with the `index.html` and other files inside, for the `html-details` or `html-nested` report formats.

+
 ## Options

-| Option     | Default Value | Description                                                                |
-| ---------- | ------------- | -------------------------------------------------------------------------- |
-| `assert`   | OFF           | Enable assertions.                                                         |
-| `coverage` | OFF           | Prepare the coverage report.                                               |
-| `san`      | N/A           | Enable a sanitizer with Clang. Choices are `thread` and `address`.         |
-| `tests`    | OFF           | Build tests.                                                               |
-| `unity`    | OFF           | Configure a unity build.                                                   |
-| `xrpld`    | OFF           | Build the xrpld (`rippled`) application, and not just the libxrpl library. |
-| `werr`     | OFF           | Treat compilation warnings as errors                                       |
-| `wextra`   | OFF           | Enable additional compilation warnings                                     |
+| Option | Default Value | Description |
+| --- | ---| ---|
+| `assert` | OFF | Enable assertions.
+| `coverage` | OFF | Prepare the coverage report. |
+| `san` | N/A | Enable a sanitizer with Clang. Choices are `thread` and `address`. |
+| `tests` | OFF | Build tests. |
+| `unity` | ON | Configure a unity build. |
+| `xrpld` | OFF | Build the xrpld (`rippled`) application, and not just the libxrpl library. |

 [Unity builds][5] may be faster for the first build
 (at the cost of much more memory) since they concatenate sources into fewer
 translation units. Non-unity builds may be faster for incremental builds,
 and can be helpful for detecting `#include` omissions.

+
 ## Troubleshooting

+
 ### Conan

 After any updates or changes to dependencies, you may need to do the following:

 1. Remove your build directory.
-2. Remove individual libraries from the Conan cache, e.g.
-
-   ```bash
-   conan remove 'grpc/*'
+2. Remove the Conan cache:
   ```
-
-   **or**
-
-   Remove all libraries from Conan cache:
-
-   ```bash
-   conan remove '*'
+   rm -rf ~/.conan/data
   ```
-
-3. Re-run [conan export](#patched-recipes) if needed.
 4. Re-run [conan install](#build-and-test).

-### `protobuf/port_def.inc` file not found

-If `cmake --build .` results in an error due to a missing a protobuf file, then
-you might have generated CMake files for a different `build_type` than the
-`CMAKE_BUILD_TYPE` you passed to Conan.
+### 'protobuf/port_def.inc' file not found
+
+If `cmake --build .` results in an error due to a missing a protobuf file, then you might have generated CMake files for a different `build_type` than the `CMAKE_BUILD_TYPE` you passed to conan.

 ```
 /rippled/.build/pb-xrpl.libpb/xrpl/proto/ripple.pb.h:10:10: fatal error: 'google/protobuf/port_def.inc' file not found
@@ -584,21 +424,70 @@ For example, if you want to build Debug:
 1. For conan install, pass `--settings build_type=Debug`
 2. For cmake, pass `-DCMAKE_BUILD_TYPE=Debug`

+
+### no std::result_of
+
+If your compiler version is recent enough to have removed `std::result_of` as
+part of C++20, e.g. Apple Clang 15.0, then you might need to add a preprocessor
+definition to your build.
+
+```
+conan profile update 'options.boost:extra_b2_flags="define=BOOST_ASIO_HAS_STD_INVOKE_RESULT"' default
+conan profile update 'env.CFLAGS="-DBOOST_ASIO_HAS_STD_INVOKE_RESULT"' default
+conan profile update 'env.CXXFLAGS="-DBOOST_ASIO_HAS_STD_INVOKE_RESULT"' default
+conan profile update 'conf.tools.build:cflags+=["-DBOOST_ASIO_HAS_STD_INVOKE_RESULT"]' default
+conan profile update 'conf.tools.build:cxxflags+=["-DBOOST_ASIO_HAS_STD_INVOKE_RESULT"]' default
+```
+
+
+### call to 'async_teardown' is ambiguous
+
+If you are compiling with an early version of Clang 16, then you might hit
+a [regression][6] when compiling C++20 that manifests as an [error in a Boost
+header][7]. You can workaround it by adding this preprocessor definition:
+
+```
+conan profile update 'env.CXXFLAGS="-DBOOST_ASIO_DISABLE_CONCEPTS"' default
+conan profile update 'conf.tools.build:cxxflags+=["-DBOOST_ASIO_DISABLE_CONCEPTS"]' default
+```
+
+
+### recompile with -fPIC
+
+If you get a linker error suggesting that you recompile Boost with
+position-independent code, such as:
+
+```
+/usr/bin/ld.gold: error: /home/username/.conan/data/boost/1.77.0/_/_/package/.../lib/libboost_container.a(alloc_lib.o):
+  requires unsupported dynamic reloc 11; recompile with -fPIC
+```
+
+Conan most likely downloaded a bad binary distribution of the dependency.
+This seems to be a [bug][1] in Conan just for Boost 1.77.0 compiled with GCC
+for Linux. The solution is to build the dependency locally by passing
+`--build boost` when calling `conan install`.
+
+```
+conan install --build boost ...
+```
+
+
 ## Add a Dependency

 If you want to experiment with a new package, follow these steps:

 1. Search for the package on [Conan Center](https://conan.io/center/).
 2. Modify [`conanfile.py`](./conanfile.py):
-   - Add a version of the package to the `requires` property.
-   - Change any default options for the package by adding them to the
-     `default_options` property (with syntax `'$package:$option': $value`).
+    - Add a version of the package to the `requires` property.
+    - Change any default options for the package by adding them to the
+    `default_options` property (with syntax `'$package:$option': $value`).
 3. Modify [`CMakeLists.txt`](./CMakeLists.txt):
-   - Add a call to `find_package($package REQUIRED)`.
-   - Link a library from the package to the target `ripple_libs`
-     (search for the existing call to `target_link_libraries(ripple_libs INTERFACE ...)`).
+    - Add a call to `find_package($package REQUIRED)`.
+    - Link a library from the package to the target `ripple_libs`
+    (search for the existing call to `target_link_libraries(ripple_libs INTERFACE ...)`).
 4. Start coding! Don't forget to include whatever headers you need from the package.

+
 [1]: https://github.com/conan-io/conan-center-index/issues/13168
 [2]: https://en.cppreference.com/w/cpp/compiler_support/20
 [3]: https://docs.conan.io/en/latest/getting_started.html
--- a/Builds/levelization/README.md
+++ b/Builds/levelization/README.md
@@ -25,28 +25,28 @@ more dependencies listed later.
 **tl;dr:** The modules listed first are more independent than the modules
 listed later.

-| Level / Tier | Module(s)                                                                                                |
-| ------------ | -------------------------------------------------------------------------------------------------------- |
-| 01           | ripple/beast ripple/unity                                                                                |
-| 02           | ripple/basics                                                                                            |
-| 03           | ripple/json ripple/crypto                                                                                |
-| 04           | ripple/protocol                                                                                          |
-| 05           | ripple/core ripple/conditions ripple/consensus ripple/resource ripple/server                             |
-| 06           | ripple/peerfinder ripple/ledger ripple/nodestore ripple/net                                              |
-| 07           | ripple/shamap ripple/overlay                                                                             |
-| 08           | ripple/app                                                                                               |
-| 09           | ripple/rpc                                                                                               |
-| 10           | ripple/perflog                                                                                           |
-| 11           | test/jtx test/beast test/csf                                                                             |
-| 12           | test/unit_test                                                                                           |
-| 13           | test/crypto test/conditions test/json test/resource test/shamap test/peerfinder test/basics test/overlay |
-| 14           | test                                                                                                     |
-| 15           | test/net test/protocol test/ledger test/consensus test/core test/server test/nodestore                   |
-| 16           | test/rpc test/app                                                                                        |
+| Level / Tier | Module(s)                                     |
+|--------------|-----------------------------------------------|
+| 01           | ripple/beast ripple/unity
+| 02           | ripple/basics
+| 03           | ripple/json ripple/crypto
+| 04           | ripple/protocol
+| 05           | ripple/core ripple/conditions ripple/consensus ripple/resource ripple/server
+| 06           | ripple/peerfinder ripple/ledger ripple/nodestore ripple/net
+| 07           | ripple/shamap ripple/overlay
+| 08           | ripple/app
+| 09           | ripple/rpc
+| 10           | ripple/perflog
+| 11           | test/jtx test/beast test/csf
+| 12           | test/unit_test
+| 13           | test/crypto test/conditions test/json test/resource test/shamap test/peerfinder test/basics test/overlay
+| 14           | test
+| 15           | test/net test/protocol test/ledger test/consensus test/core test/server test/nodestore
+| 16           | test/rpc test/app

-(Note that `test` levelization is _much_ less important and _much_ less
+(Note that `test` levelization is *much* less important and *much* less
 strictly enforced than `ripple` levelization, other than the requirement
-that `test` code should _never_ be included in `ripple` code.)
+that `test` code should *never* be included in `ripple` code.)

 ## Validation

@@ -59,48 +59,48 @@ the rippled source. The only caveat is that it runs much slower
 under Windows than in Linux. It hasn't yet been tested under MacOS.
 It generates many files of [results](results):

- `rawincludes.txt`: The raw dump of the `#includes`
- `paths.txt`: A second dump grouping the source module
+* `rawincludes.txt`: The raw dump of the `#includes`
+* `paths.txt`: A second dump grouping the source module
  to the destination module, deduped, and with frequency counts.
- `includes/`: A directory where each file represents a module and
+* `includes/`: A directory where each file represents a module and
  contains a list of modules and counts that the module _includes_.
- `includedby/`: Similar to `includes/`, but the other way around. Each
+* `includedby/`: Similar to `includes/`, but the other way around. Each
  file represents a module and contains a list of modules and counts
  that _include_ the module.
- [`loops.txt`](results/loops.txt): A list of direct loops detected
+* [`loops.txt`](results/loops.txt): A list of direct loops detected
  between modules as they actually exist, as opposed to how they are
  desired as described above. In a perfect repo, this file will be
  empty.
  This file is committed to the repo, and is used by the [levelization
  Github workflow](../../.github/workflows/levelization.yml) to validate
  that nothing changed.
- [`ordering.txt`](results/ordering.txt): A list showing relationships
+* [`ordering.txt`](results/ordering.txt): A list showing relationships
  between modules where there are no loops as they actually exist, as
  opposed to how they are desired as described above.
  This file is committed to the repo, and is used by the [levelization
  Github workflow](../../.github/workflows/levelization.yml) to validate
  that nothing changed.
- [`levelization.yml`](../../.github/workflows/levelization.yml)
+* [`levelization.yml`](../../.github/workflows/levelization.yml)
  Github Actions workflow to test that levelization loops haven't
-  changed. Unfortunately, if changes are detected, it can't tell if
+  changed.  Unfortunately, if changes are detected, it can't tell if
  they are improvements or not, so if you have resolved any issues or
  done anything else to improve levelization, run `levelization.sh`,
  and commit the updated results.

-The `loops.txt` and `ordering.txt` files relate the modules
+The  `loops.txt` and `ordering.txt` files relate the modules
 using comparison signs, which indicate the number of times each
 module is included in the other.

- `A > B` means that A should probably be at a higher level than B,
+* `A > B` means that A should probably be at a higher level than B,
  because B is included in A significantly more than A is included in B.
  These results can be included in both `loops.txt` and `ordering.txt`.
  Because `ordering.txt`only includes relationships where B is not
  included in A at all, it will only include these types of results.
- `A ~= B` means that A and B are included in each other a different
+* `A ~= B` means that A and B are included in each other a different
  number of times, but the values are so close that the script can't
  definitively say that one should be above the other. These results
  will only be included in `loops.txt`.
- `A == B` means that A and B include each other the same number of
+* `A == B` means that A and B include each other the same number of
  times, so the script has no clue which should be higher. These results
  will only be included in `loops.txt`.

@@ -110,5 +110,5 @@ get those details locally.

 1. Run `levelization.sh`
 2. Grep the modules in `paths.txt`.
-   - For example, if a cycle is found `A ~= B`, simply `grep -w
-A Builds/levelization/results/paths.txt | grep -w B`
+   * For example, if a cycle is found `A ~= B`, simply `grep -w
+     A Builds/levelization/results/paths.txt | grep -w B`
--- a/Builds/levelization/results/loops.txt
+++ b/Builds/levelization/results/loops.txt
@@ -10,6 +10,9 @@ Loop: xrpld.app xrpld.core
 Loop: xrpld.app xrpld.ledger
  xrpld.app > xrpld.ledger

+Loop: xrpld.app xrpld.net
+  xrpld.app > xrpld.net
+
 Loop: xrpld.app xrpld.overlay
  xrpld.overlay > xrpld.app

@@ -22,9 +25,15 @@ Loop: xrpld.app xrpld.rpc
 Loop: xrpld.app xrpld.shamap
  xrpld.app > xrpld.shamap

+Loop: xrpld.core xrpld.net
+  xrpld.net > xrpld.core
+
 Loop: xrpld.core xrpld.perflog
  xrpld.perflog == xrpld.core

+Loop: xrpld.net xrpld.rpc
+  xrpld.rpc ~= xrpld.net
+
 Loop: xrpld.overlay xrpld.rpc
  xrpld.rpc ~= xrpld.overlay

--- a/Builds/levelization/results/ordering.txt
+++ b/Builds/levelization/results/ordering.txt
@@ -2,8 +2,6 @@ libxrpl.basics > xrpl.basics
 libxrpl.crypto > xrpl.basics
 libxrpl.json > xrpl.basics
 libxrpl.json > xrpl.json
-libxrpl.net > xrpl.basics
-libxrpl.net > xrpl.net
 libxrpl.protocol > xrpl.basics
 libxrpl.protocol > xrpl.json
 libxrpl.protocol > xrpl.protocol
@@ -64,9 +62,9 @@ test.jtx > xrpl.basics
 test.jtx > xrpld.app
 test.jtx > xrpld.core
 test.jtx > xrpld.ledger
+test.jtx > xrpld.net
 test.jtx > xrpld.rpc
 test.jtx > xrpl.json
-test.jtx > xrpl.net
 test.jtx > xrpl.protocol
 test.jtx > xrpl.resource
 test.jtx > xrpl.server
@@ -111,6 +109,7 @@ test.rpc > test.toplevel
 test.rpc > xrpl.basics
 test.rpc > xrpld.app
 test.rpc > xrpld.core
+test.rpc > xrpld.net
 test.rpc > xrpld.overlay
 test.rpc > xrpld.rpc
 test.rpc > xrpl.json
@@ -133,9 +132,7 @@ test.shamap > xrpl.protocol
 test.toplevel > test.csf
 test.toplevel > xrpl.json
 test.unit_test > xrpl.basics
-tests.libxrpl > xrpl.basics
 xrpl.json > xrpl.basics
-xrpl.net > xrpl.basics
 xrpl.protocol > xrpl.basics
 xrpl.protocol > xrpl.json
 xrpl.resource > xrpl.basics
@@ -151,7 +148,6 @@ xrpld.app > xrpld.consensus
 xrpld.app > xrpld.nodestore
 xrpld.app > xrpld.perflog
 xrpld.app > xrpl.json
-xrpld.app > xrpl.net
 xrpld.app > xrpl.protocol
 xrpld.app > xrpl.resource
 xrpld.conditions > xrpl.basics
@@ -161,11 +157,14 @@ xrpld.consensus > xrpl.json
 xrpld.consensus > xrpl.protocol
 xrpld.core > xrpl.basics
 xrpld.core > xrpl.json
-xrpld.core > xrpl.net
 xrpld.core > xrpl.protocol
 xrpld.ledger > xrpl.basics
 xrpld.ledger > xrpl.json
 xrpld.ledger > xrpl.protocol
+xrpld.net > xrpl.basics
+xrpld.net > xrpl.json
+xrpld.net > xrpl.protocol
+xrpld.net > xrpl.resource
 xrpld.nodestore > xrpl.basics
 xrpld.nodestore > xrpld.core
 xrpld.nodestore > xrpld.unity
@@ -189,7 +188,6 @@ xrpld.rpc > xrpld.core
 xrpld.rpc > xrpld.ledger
 xrpld.rpc > xrpld.nodestore
 xrpld.rpc > xrpl.json
-xrpld.rpc > xrpl.net
 xrpld.rpc > xrpl.protocol
 xrpld.rpc > xrpl.resource
 xrpld.rpc > xrpl.server
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -90,15 +90,11 @@ set_target_properties(OpenSSL::SSL PROPERTIES
  INTERFACE_COMPILE_DEFINITIONS OPENSSL_NO_SSL2
 )
 set(SECP256K1_INSTALL TRUE)
-set(SECP256K1_BUILD_BENCHMARK FALSE)
-set(SECP256K1_BUILD_TESTS FALSE)
-set(SECP256K1_BUILD_EXHAUSTIVE_TESTS FALSE)
-set(SECP256K1_BUILD_CTIME_TESTS FALSE)
-set(SECP256K1_BUILD_EXAMPLES FALSE)
 add_subdirectory(external/secp256k1)
 add_library(secp256k1::secp256k1 ALIAS secp256k1)
 add_subdirectory(external/ed25519-donna)
 add_subdirectory(external/antithesis-sdk)
+add_subdirectory(external/blake3)
 find_package(gRPC REQUIRED)
 find_package(lz4 REQUIRED)
 # Target names with :: are not allowed in a generator expression.
@@ -129,6 +125,7 @@ target_link_libraries(ripple_libs INTERFACE
  secp256k1::secp256k1
  soci::soci
  SQLite::SQLite3
+  blake3
 )

 # Work around changes to Conan recipe for now.
@@ -149,8 +146,3 @@ set(PROJECT_EXPORT_SET RippleExports)
 include(RippledCore)
 include(RippledInstall)
 include(RippledValidatorKeys)
-
-if(tests)
-  include(CTest)
-  add_subdirectory(src/tests/libxrpl)
-endif()
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,12 +8,13 @@ We assume you are familiar with the general practice of [making
 contributions on GitHub][contrib]. This file includes only special
 instructions specific to this project.

+
 ## Before you start

 The following branches exist in the main project repository:

 - `develop`: The latest set of unreleased features, and the most common
-  starting point for contributions.
+    starting point for contributions.
 - `release`: The latest beta release or release candidate.
 - `master`: The latest stable release.
 - `gh-pages`: The documentation for this project, built by Doxygen.
@@ -26,18 +27,18 @@ In general, external contributions should be developed in your personal
 [fork][forking]. Contributions from developers with write permissions
 should be done in [the main repository][rippled] in a branch with
 a permitted prefix. Permitted prefixes are:
+* XLS-[a-zA-Z0-9]+/.+
+  * e.g. XLS-0033d/mpt-clarify-STEitherAmount
+* [GitHub username]/.+
+  * e.g. JoelKatz/fix-rpc-webhook-queue
+* [Organization name]/.+
+  * e.g. ripple/antithesis

- XLS-[a-zA-Z0-9]+/.+
-  - e.g. XLS-0033d/mpt-clarify-STEitherAmount
- [GitHub username]/.+
-  - e.g. JoelKatz/fix-rpc-webhook-queue
- [Organization name]/.+
-  - e.g. ripple/antithesis
-
-Regardless of where the branch is created, please open a _draft_ pull
+Regardless of where the branch is created, please open a *draft* pull
 request as soon as possible after pushing the branch to Github, to
 increase visibility, and ease feedback during the development process.

+
 ## Major contributions

 If your contribution is a major feature or breaking change, then you
@@ -54,8 +55,8 @@ responsibility of the XLS author to update the draft to match the final
 implementation when its corresponding pull request is merged, unless the
 author delegates that responsibility to others.

-## Before making a pull request

+## Before making a pull request
 (Or marking a draft pull request as ready.)

 Changes that alter transaction processing must be guarded by an
@@ -72,12 +73,11 @@ automatic test run by `rippled --unittest`.
 Otherwise, it must be a manual test.

 If you create new source files, they must be organized as follows:
-
- If the files are in any of the `libxrpl` modules, the headers (`.h`) must go
+* If the files are in any of the `libxrpl` modules, the headers (`.h`) must go
  under `include/xrpl`, and source (`.cpp`) files must go under
  `src/libxrpl`.
- All other non-test files must go under `src/xrpld`.
- All test source files must go under `src/test`.
+* All other non-test files must go under `src/xrpld`.
+* All test source files must go under `src/test`.

 The source must be formatted according to the style guide below.

@@ -87,17 +87,16 @@ Changes should be usually squashed down into a single commit.
 Some larger or more complicated change sets make more sense,
 and are easier to review if organized into multiple logical commits.
 Either way, all commits should fit the following criteria:
-
- Changes should be presented in a single commit or a logical
+* Changes should be presented in a single commit or a logical
  sequence of commits.
  Specifically, chronological commits that simply
  reflect the history of how the author implemented
  the change, "warts and all", are not useful to
  reviewers.
- Every commit should have a [good message](#good-commit-messages).
+* Every commit should have a [good message](#good-commit-messages).
  to explain a specific aspects of the change.
- Every commit should be signed.
- Every commit should be well-formed (builds successfully,
+* Every commit should be signed.
+* Every commit should be well-formed (builds successfully,
  unit tests passing), as this helps to resolve merge
  conflicts, and makes it easier to use `git bisect`
  to find bugs.
@@ -109,14 +108,13 @@ Refer to
 for general rules on writing a good commit message.

 tl;dr
-
 > 1. Separate subject from body with a blank line.
 > 2. Limit the subject line to 50 characters.
->    - [...]shoot for 50 characters, but consider 72 the hard limit.
+>    * [...]shoot for 50 characters, but consider 72 the hard limit.
 > 3. Capitalize the subject line.
 > 4. Do not end the subject line with a period.
 > 5. Use the imperative mood in the subject line.
->    - A properly formed Git commit subject line should always be able
+>    * A properly formed Git commit subject line should always be able
 >      to complete the following sentence: "If applied, this commit will
 >      _your subject line here_".
 > 6. Wrap the body at 72 characters.
@@ -124,17 +122,16 @@ tl;dr

 In addition to those guidelines, please add one of the following
 prefixes to the subject line if appropriate.
-
- `fix:` - The primary purpose is to fix an existing bug.
- `perf:` - The primary purpose is performance improvements.
- `refactor:` - The changes refactor code without affecting
+* `fix:` - The primary purpose is to fix an existing bug.
+* `perf:` - The primary purpose is performance improvements.
+* `refactor:` - The changes refactor code without affecting
  functionality.
- `test:` - The changes _only_ affect unit tests.
- `docs:` - The changes _only_ affect documentation. This can
+* `test:` - The changes _only_ affect unit tests.
+* `docs:` - The changes _only_ affect documentation. This can
  include code comments in addition to `.md` files like this one.
- `build:` - The changes _only_ affect the build process,
+* `build:` - The changes _only_ affect the build process,
  including CMake and/or Conan settings.
- `chore:` - Other tasks that don't affect the binary, but don't fit
+* `chore:` - Other tasks that don't affect the binary, but don't fit
  any of the other cases. e.g. formatting, git settings, updating
  Github Actions jobs.

@@ -146,10 +143,9 @@ unit tests for Feature X (#1234)`.

 In general, pull requests use `develop` as the base branch.
 The exceptions are
-
- Fixes and improvements to a release candidate use `release` as the
+* Fixes and improvements to a release candidate use `release` as the
  base.
- Hotfixes use `master` as the base.
+* Hotfixes use `master` as the base.

 If your changes are not quite ready, but you want to make it easily available
 for preliminary examination or review, you can create a "Draft" pull request.
@@ -186,11 +182,11 @@ meets a few criteria:
 2. All CI checks must be complete and passed. (One-off failures may
   be acceptable if they are related to a known issue.)
 3. The PR must have a [good commit message](#good-commit-messages).
-   - If the PR started with a good commit message, and it doesn't
+   * If the PR started with a good commit message, and it doesn't
     need to be updated, the author can indicate that in a comment.
-   - Any contributor, preferably the author, can leave a comment
+   * Any contributor, preferably the author, can leave a comment
     suggesting a commit message.
-   - If the author squashes and rebases the code in preparation for
+   * If the author squashes and rebases the code in preparation for
     merge, they should also ensure the commit message(s) are updated
     as well.
 4. The PR branch must be up to date with the base branch (usually
@@ -212,6 +208,7 @@ This is a non-exhaustive list of recommended style guidelines. These are
 not always strictly enforced and serve as a way to keep the codebase
 coherent rather than a set of _thou shalt not_ commandments.

+
 ## Formatting

 All code must conform to `clang-format` version 18,
@@ -240,7 +237,6 @@ To download the patch file:
 5. Commit and push.

 You can install a pre-commit hook to automatically run `clang-format` before every commit:
-
 ```
 pip3 install pre-commit
 pre-commit install
@@ -271,51 +267,49 @@ locations, where the reporting of contract violations on the Antithesis
 platform is either not possible or not useful.

 For this reason:
-
- The locations where `assert` or `assert(false)` contracts should continue to be used:
-  - `constexpr` functions
-  - unit tests i.e. files under `src/test`
-  - unit tests-related modules (files under `beast/test` and `beast/unit_test`)
- Outside of the listed locations, do not use `assert`; use `XRPL_ASSERT` instead,
+* The locations where `assert` or `assert(false)` contracts should continue to be used:
+  * `constexpr` functions
+  * unit tests i.e. files under `src/test`
+  * unit tests-related modules (files under `beast/test` and `beast/unit_test`)
+* Outside of the listed locations, do not use `assert`; use `XRPL_ASSERT` instead,
  giving it unique name, with the short description of the contract.
- Outside of the listed locations, do not use `assert(false)`; use
+* Outside of the listed locations, do not use `assert(false)`; use
  `UNREACHABLE` instead, giving it unique name, with the description of the
  condition being violated
- The contract name should start with a full name (including scope) of the
-  function, optionally a named lambda, followed by a colon `:` and a brief
+* The contract name should start with a full name (including scope) of the
+  function, optionally a named lambda, followed by a colon ` : ` and a brief
  (typically at most five words) description. `UNREACHABLE` contracts
  can use slightly longer descriptions. If there are multiple overloads of the
  function, use common sense to balance both brevity and unambiguity of the
  function name. NOTE: the purpose of name is to provide stable means of
  unique identification of every contract; for this reason try to avoid elements
  which can change in some obvious refactors or when reinforcing the condition.
- Contract description typically (except for `UNREACHABLE`) should describe the
+* Contract description typically (except for `UNREACHABLE`) should describe the
  _expected_ condition, as in "I assert that _expected_ is true".
- Contract description for `UNREACHABLE` should describe the _unexpected_
+* Contract description for `UNREACHABLE` should describe the _unexpected_
  situation which caused the line to have been reached.
- Example good name for an
+* Example good name for an
  `UNREACHABLE` macro `"Json::operator==(Value, Value) : invalid type"`; example
  good name for an `XRPL_ASSERT` macro `"Json::Value::asCString : valid type"`.
- Example **bad** name
+* Example **bad** name
  `"RFC1751::insert(char* s, int x, int start, int length) : length is greater than or equal zero"`
  (missing namespace, unnecessary full function signature, description too verbose).
  Good name: `"ripple::RFC1751::insert : minimum length"`.
- In **few** well-justified cases a non-standard name can be used, in which case a
+* In **few** well-justified cases a non-standard name can be used, in which case a
  comment should be placed to explain the rationale (example in `contract.cpp`)
- Do **not** rename a contract without a good reason (e.g. the name no longer
+* Do **not** rename a contract without a good reason (e.g. the name no longer
  reflects the location or the condition being checked)
- Do not use `std::unreachable`
- Do not put contracts where they can be violated by an external condition
+* Do not use `std::unreachable`
+* Do not put contracts where they can be violated by an external condition
  (e.g. timing, data payload before mandatory validation etc.) as this creates
  bogus bug reports (and causes crashes of Debug builds)

 ## Unit Tests
-
 To execute all unit tests:

-`rippled --unittest --unittest-jobs=<number of cores>`
+```rippled --unittest --unittest-jobs=<number of cores>```

-(Note: Using multiple cores on a Mac M1 can cause spurious test failures. The
+(Note: Using multiple cores on a Mac M1 can cause spurious test failures. The 
 cause is still under investigation. If you observe this problem, try specifying fewer jobs.)

 To run a specific set of test suites:
@@ -323,11 +317,10 @@ To run a specific set of test suites:
 ```
 rippled --unittest TestSuiteName
 ```
-
 Note: In this example, all tests with prefix `TestSuiteName` will be run, so if
-`TestSuiteName1` and `TestSuiteName2` both exist, then both tests will run.
-Alternatively, if the unit test name finds an exact match, it will stop
-doing partial matches, i.e. if a unit test with a title of `TestSuiteName`
+`TestSuiteName1` and `TestSuiteName2` both exist, then both tests will run. 
+Alternatively, if the unit test name finds an exact match, it will stop 
+doing partial matches, i.e. if a unit test with a title of `TestSuiteName` 
 exists, then no other unit test will be executed, apart from `TestSuiteName`.

 ## Avoid
@@ -343,6 +336,7 @@ exists, then no other unit test will be executed, apart from `TestSuiteName`.
   explanatory comments.
 8. Importing new libraries unless there is a very good reason to do so.

+
 ## Seek to

 9. Extend functionality of existing code rather than creating new code.
@@ -357,12 +351,14 @@ exists, then no other unit test will be executed, apart from `TestSuiteName`.
 14. Provide as many comments as you feel that a competent programmer
    would need to understand what your code does.

+
 # Maintainers

 Maintainers are ecosystem participants with elevated access to the repository.
 They are able to push new code, make decisions on when a release should be
 made, etc.

+
 ## Adding and removing

 New maintainers can be proposed by two existing maintainers, subject to a vote
@@ -377,41 +373,47 @@ A minimum of 60% agreement and 50% participation are required.
 The XRP Ledger Foundation will have the ability, for cause, to remove an
 existing maintainer without a vote.

+
 ## Current Maintainers

 Maintainers are users with maintain or admin access to the repo.

- [bthomee](https://github.com/bthomee) (Ripple)
- [intelliot](https://github.com/intelliot) (Ripple)
- [JoelKatz](https://github.com/JoelKatz) (Ripple)
- [legleux](https://github.com/legleux) (Ripple)
- [mankins](https://github.com/mankins) (XRP Ledger Foundation)
- [WietseWind](https://github.com/WietseWind) (XRPL Labs + XRP Ledger Foundation)
- [ximinez](https://github.com/ximinez) (Ripple)
+* [bthomee](https://github.com/bthomee) (Ripple)
+* [intelliot](https://github.com/intelliot) (Ripple)
+* [JoelKatz](https://github.com/JoelKatz) (Ripple)
+* [nixer89](https://github.com/nixer89) (XRP Ledger Foundation)
+* [RichardAH](https://github.com/RichardAH) (XRP Ledger Foundation)
+* [Silkjaer](https://github.com/Silkjaer) (XRP Ledger Foundation)
+* [WietseWind](https://github.com/WietseWind) (XRPL Labs + XRP Ledger Foundation)
+* [ximinez](https://github.com/ximinez) (Ripple)
+

 ## Current Code Reviewers

 Code Reviewers are developers who have the ability to review, approve, and
 in some cases merge source code changes.

- [a1q123456](https://github.com/a1q123456) (Ripple)
- [Bronek](https://github.com/Bronek) (Ripple)
- [bthomee](https://github.com/bthomee) (Ripple)
- [ckeshava](https://github.com/ckeshava) (Ripple)
- [dangell7](https://github.com/dangell7) (XRPL Labs)
- [godexsoft](https://github.com/godexsoft) (Ripple)
- [gregtatcam](https://github.com/gregtatcam) (Ripple)
- [kuznetsss](https://github.com/kuznetsss) (Ripple)
- [lmaisons](https://github.com/lmaisons) (Ripple)
- [mathbunnyru](https://github.com/mathbunnyru) (Ripple)
- [mvadari](https://github.com/mvadari) (Ripple)
- [oleks-rip](https://github.com/oleks-rip) (Ripple)
- [PeterChen13579](https://github.com/PeterChen13579) (Ripple)
- [pwang200](https://github.com/pwang200) (Ripple)
- [q73zhao](https://github.com/q73zhao) (Ripple)
- [shawnxie999](https://github.com/shawnxie999) (Ripple)
- [Tapanito](https://github.com/Tapanito) (Ripple)
- [ximinez](https://github.com/ximinez) (Ripple)
+* [HowardHinnant](https://github.com/HowardHinnant) (Ripple)
+* [scottschurr](https://github.com/scottschurr) (Ripple)
+* [seelabs](https://github.com/seelabs) (Ripple)
+* [Ed Hennis](https://github.com/ximinez) (Ripple)
+* [mvadari](https://github.com/mvadari) (Ripple)
+* [thejohnfreeman](https://github.com/thejohnfreeman) (Ripple)
+* [Bronek](https://github.com/Bronek) (Ripple)
+* [manojsdoshi](https://github.com/manojsdoshi) (Ripple)
+* [godexsoft](https://github.com/godexsoft) (Ripple)
+* [mDuo13](https://github.com/mDuo13) (Ripple)
+* [ckniffen](https://github.com/ckniffen) (Ripple)
+* [arihantkothari](https://github.com/arihantkothari) (Ripple)
+* [pwang200](https://github.com/pwang200) (Ripple)
+* [sophiax851](https://github.com/sophiax851) (Ripple)
+* [shawnxie999](https://github.com/shawnxie999) (Ripple)
+* [gregtatcam](https://github.com/gregtatcam) (Ripple)
+* [mtrippled](https://github.com/mtrippled) (Ripple)
+* [ckeshava](https://github.com/ckeshava) (Ripple)
+* [nbougalis](https://github.com/nbougalis) None
+* [RichardAH](https://github.com/RichardAH) (XRPL Labs + XRP Ledger Foundation)
+* [dangell7](https://github.com/dangell7) (XRPL Labs)

 Developers not on this list are able and encouraged to submit feedback
 on pending code changes (open pull requests).
@@ -421,7 +423,6 @@ on pending code changes (open pull requests).
 These instructions assume you have your git upstream remotes configured
 to avoid accidental pushes to the main repo, and a remote group
 specifying both of them. e.g.
-
 ```
 $ git remote -v | grep upstream
 upstream        https://github.com/XRPLF/rippled.git (fetch)
@@ -436,7 +437,6 @@ upstream upstream-push
 You can use the [setup-upstreams] script to set this up.

 It also assumes you have a default gpg signing key set up in git. e.g.
-
 ```
 $ git config user.signingkey
 968479A1AFF927E37D1A566BB5690EEEBB952194
@@ -461,8 +461,8 @@ the suggested commit message, or modify it as needed.
 #### Slightly more complicated pull requests

 Some pull requests need to be pushed to `develop` as more than one
-commit. A PR author may _request_ to merge as separate commits. They
-must _justify_ why separate commits are needed, and _specify_ how they
+commit. A PR author may *request* to merge as separate commits. They
+must *justify* why separate commits are needed, and *specify* how they
 would like the commits to be merged. If you disagree with the author,
 discuss it with them directly.

@@ -471,22 +471,20 @@ fast forward only merge (`--ff-only`) on the command line and push to
 `develop`.

 Some examples of when separate commits are worthwhile are:
-
 1. PRs where source files are reorganized in multiple steps.
-2. PRs where the commits are mostly independent and _could_ be separate
+2. PRs where the commits are mostly independent and *could* be separate
   PRs, but are pulled together into one PR under a commit theme or
   issue.
 3. PRs that are complicated enough that `git bisect` would not be much
   help if it determined this PR introduced a problem.

 Either way, check that:
-
- The commits are based on the current tip of `develop`.
- The commits are clean: No merge commits (except when reverse
+* The commits are based on the current tip of `develop`.
+* The commits are clean: No merge commits (except when reverse
  merging), no "[FOLD]" or "fixup!" messages.
- All commits are signed. If the commits are not signed by the author, use
+* All commits are signed. If the commits are not signed by the author, use
  `git commit --amend -S` to sign them yourself.
- At least one (but preferably all) of the commits has the PR number
+* At least one (but preferably all) of the commits has the PR number
  in the commit message.

 The "Create a merge commit" and "Rebase and merge" options should be
@@ -504,13 +502,13 @@ Rippled uses a linear workflow model that can be summarized as:
 1. In between releases, developers work against the `develop` branch.
 2. Periodically, a maintainer will build and tag a beta version from
   `develop`, which is pushed to `release`.
-   - Betas are usually released every two to three weeks, though that
+   * Betas are usually released every two to three weeks, though that
     schedule can vary depending on progress, availability, and other
     factors.
 3. When the changes in `develop` are considered stable and mature enough
   to be ready to release, a release candidate (RC) is built and tagged
   from `develop`, and merged to `release`.
-   - Further development for that release (primarily fixes) then
+   * Further development for that release (primarily fixes) then
     continues against `release`, while other development continues on
     `develop`. Effectively, `release` is forked from `develop`. Changes
     to `release` must be reverse merged to `develop`.
@@ -545,7 +543,6 @@ Rippled uses a linear workflow model that can be summarized as:
         the version number, etc.

      The workflow may look something like:
-
 ```
 git fetch --multiple upstreams user1 user2 user3 [...]
 git checkout -B release-next --no-track upstream/develop
@@ -584,9 +581,8 @@ This includes, betas, and the first release candidate (RC).

 1. If you didn't create one [preparing the `develop`
   branch](#preparing-the-develop-branch), Ensure there is no old
-   `release-next` branch hanging around. Then make a `release-next`
+   `release-next` branch hanging around.  Then make a `release-next`
   branch that only changes the version number. e.g.
-
 ```
 git fetch upstreams

@@ -607,30 +603,25 @@ git push upstream-push
 git fetch upstreams
 git branch --set-upstream-to=upstream/release-next
 ```
-
-You can also use the [update-version] script. 2. Create a Pull Request for `release-next` with **`develop`** as
-the base branch.
-
-1.  Use the title "[TRIVIAL] Set version to X.X.X-bX".
-2.  Instead of the default description template, use the following:
-
+   You can also use the [update-version] script.
+2. Create a Pull Request for `release-next` with **`develop`** as
+   the base branch.
+   1. Use the title "[TRIVIAL] Set version to X.X.X-bX".
+   2. Instead of the default description template, use the following:
 ```
 ## High Level Overview of Change

 This PR only changes the version number. It will be merged as
 soon as Github CI actions successfully complete.
 ```
-
 3. Wait for CI to successfully complete, and get someone to approve
   the PR. (It is safe to ignore known CI issues.)
 4. Push the updated `develop` branch using your `release-next`
   branch. **Do not use the Github UI. It's important to preserve
   commit IDs.**
-
 ```
 git push upstream-push release-next:develop
 ```
-
 5. In the unlikely event that the push fails because someone has merged
   something else in the meantime, rebase your branch onto the updated
   `develop` branch, push again, and go back to step 3.
@@ -639,25 +630,22 @@ git push upstream-push release-next:develop
 7. Once this is done, forward progress on `develop` can continue
   (other PRs may be merged).
 8. Now create a Pull Request for `release-next` with **`release`** as
-   the base branch. Instead of the default template, reuse and update
+   the base branch.  Instead of the default template, reuse and update
   the message from the previous release. Include the following verbiage
   somewhere in the description:
-
 ```
 The base branch is `release`. [All releases (including
 betas)](https://github.com/XRPLF/rippled/blob/develop/CONTRIBUTING.md#before-you-start)
 go in `release`. This PR branch will be pushed directly to `release` (not
 squashed or rebased, and not using the GitHub UI).
 ```
-
 7. Sign-offs for the three platforms (Linux, Mac, Windows) usually occur
   offline, but at least one approval will be needed on the PR.
-   - If issues are discovered during testing, simply abandon the
-     release. It's easy to start a new release, it should be easy to
+   * If issues are discovered during testing, simply abandon the
+     release.  It's easy to start a new release, it should be easy to
     abandon one. **DO NOT REUSE THE VERSION NUMBER.** e.g. If you
     abandon 2.4.0-b1, the next attempt will be 2.4.0-b2.
 8. Once everything is ready to go, push to `release`.
-
 ```
 git fetch upstreams

@@ -678,28 +666,23 @@ git log -1 --oneline
 # Other branches, including some from upstream-push, may also be
 # present.
 ```
-
 9. Tag the release, too.
-
 ```
 git tag <version number>
 git push upstream-push <version number>
 ```
-
 10. Delete the `release-next` branch on the repo. Use the Github UI or:
-
 ```
 git push --delete upstream-push release-next
 ```
-
 11. Finally [create a new release on
    Github](https://github.com/XRPLF/rippled/releases).

 #### Release candidates after the first

 Once the first release candidate is [merged into
-release](#making-the-release), then `release` and `develop` _are allowed
-to diverge_.
+release](#making-the-release), then `release` and `develop` *are allowed
+to diverge*.

 If a bug or issue is discovered in a version that has a release
 candidate being tested, any fix and new version will need to be applied
@@ -707,7 +690,7 @@ against `release`, then reverse-merged to `develop`. This helps keep git
 history as linear as possible.

 A `release-next` branch will be created from `release`, and any further
-work for that release must be based on `release-next`. Specifically,
+work for that release must be based on `release-next`.  Specifically,
 PRs must use `release-next` as the base, and those PRs will be merged
 directly to `release-next` when approved. Changes should be restricted
 to bug fixes, but other changes may be necessary from time to time.
@@ -730,21 +713,17 @@ Once the RC is merged and tagged, it needs to be reverse merged into
 1. Create a branch, based on `upstream/develop`.
   The branch name is not important, but could include "mergeNNNrcN".
   E.g. For release A.B.C-rcD, use `mergeABCrcD`.
-
 ```
 git fetch upstreams

 git checkout --no-track -b mergeABCrcD upstream/develop
 ```
-
 2. Merge `release` into your branch.
-
 ```
 # I like the "--edit --log --verbose" parameters, but they are
 # not required.
 git merge upstream/release
 ```
-
 3. `BuildInfo.cpp` will have a conflict with the version number.
   Resolve it with the version from `develop` - the higher version.
 4. Push your branch to your repo (or `upstream` if you have permission),
@@ -752,27 +731,22 @@ git merge upstream/release
   simply indicate that this is a merge of the RC. The "Context" should
   summarize the changes from the RC. Include the following text
   prominently:
-
 ```
 This PR must be merged manually using a push. Do not use the Github UI.
 ```
-
 5. Depending on the complexity of the changes, and/or merge conflicts,
   the PR may need a thorough review, or just a sign-off that the
   merge was done correctly.
 6. If `develop` is updated before this PR is merged, do not merge
   `develop` back into your branch. Instead rebase preserving merges,
   or do the merge again. (See also the `rerere` git config setting.)
-
 ```
 git rebase --rebase-merges upstream/develop
 # OR
 git reset --hard upstream/develop
 git merge upstream/release
 ```
-
 7. When the PR is ready, push it to `develop`.
-
 ```
 git fetch upstreams

@@ -783,9 +757,9 @@ git push upstream-push mergeABCrcD:develop

 git fetch upstreams
 ```
-
 Development on `develop` can proceed as normal.

+
 #### Final releases

 A final release is any release that is not a beta or RC, such as 2.2.0.
@@ -799,7 +773,7 @@ internally as if they were RCs (at minimum, ensuring unit tests pass,
 and the app starts, syncs, and stops cleanly across all three
 platforms.)

-_If in doubt, make an RC first._
+*If in doubt, make an RC first.*

 The process for building a final release is very similar to [the process
 for building a beta](#making-the-release), except the code will be
@@ -811,23 +785,20 @@ moving from `release` to `master` instead of from `develop` to
   number. As above, or using the
   [update-version] script.
 2. Create a Pull Request for `master-next` with **`master`** as
-   the base branch. Instead of the default template, reuse and update
+   the base branch.  Instead of the default template, reuse and update
   the message from the previous final release. Include the following verbiage
   somewhere in the description:
-
 ```
 The base branch is `master`. This PR branch will be pushed directly to
 `release` and `master` (not squashed or rebased, and not using the
 GitHub UI).
 ```
-
 7. Sign-offs for the three platforms (Linux, Mac, Windows) usually occur
   offline, but at least one approval will be needed on the PR.
-   - If issues are discovered during testing, close the PR, delete
+   * If issues are discovered during testing, close the PR, delete
     `master-next`, and move development back to `release`, [issuing
     more RCs as necessary](#release-candidates-after-the-first)
 8. Once everything is ready to go, push to `release` and `master`.
-
 ```
 git fetch upstreams

@@ -850,20 +821,15 @@ git log -1 --oneline
 # Other branches, including some from upstream-push, may also be
 # present.
 ```
-
 9. Tag the release, too.
-
 ```
 git tag <version number>
 git push upstream-push <version number>
 ```
-
 10. Delete the `master-next` branch on the repo. Use the Github UI or:
-
 ```
 git push --delete upstream-push master-next
 ```
-
 11. [Create a new release on
    Github](https://github.com/XRPLF/rippled/releases). Be sure that
    "Set as the latest release" is checked.
@@ -890,13 +856,11 @@ any branch. When it's ready to merge, jump to step 3 using your branch
 instead of `master-next`.

 1. Create a `master-next` branch from `master`.
-
 ```
 git checkout --no-track -b master-next upstream/master
 git push upstream-push
 git fetch upstreams
 ```
-
 2. Open any PRs for the pending hotfix using `master-next` as the base,
   so they can be merged directly in to it. Unlike `develop`, though,
   `master-next` can be thrown away and recreated if necessary.
@@ -904,22 +868,19 @@ git fetch upstreams
   steps as above, or use the
   [update-version] script.
 4. Create a Pull Request for `master-next` with **`master`** as
-   the base branch. Instead of the default template, reuse and update
+   the base branch.  Instead of the default template, reuse and update
   the message from the previous final release. Include the following verbiage
   somewhere in the description:
-
 ```
 The base branch is `master`. This PR branch will be pushed directly to
 `master` (not squashed or rebased, and not using the GitHub UI).
 ```
-
 7. Sign-offs for the three platforms (Linux, Mac, Windows) usually occur
   offline, but at least one approval will be needed on the PR.
-   - If issues are discovered during testing, update `master-next` as
+   * If issues are discovered during testing, update `master-next` as
     needed, but ensure that the changes are properly squashed, and the
     version setting commit remains last
 8. Once everything is ready to go, push to `master` **only**.
-
 ```
 git fetch upstreams

@@ -940,20 +901,15 @@ git log -1 --oneline
 # Other branches, including some from upstream-push, may also be
 # present.
 ```
-
 9. Tag the release, too.
-
 ```
 git tag <version number>
 git push upstream-push <version number>
 ```
-
 9. Delete the `master-next` branch on the repo.
-
 ```
 git push --delete upstream-push master-next
 ```
-
 10. [Create a new release on
    Github](https://github.com/XRPLF/rippled/releases). Be sure that
    "Set as the latest release" is checked.
@@ -965,21 +921,17 @@ Once the hotfix is released, it needs to be reverse merged into
 1. Create a branch in your own repo, based on `upstream/develop`.
   The branch name is not important, but could include "mergeNNN".
   E.g. For release 2.2.3, use `merge223`.
-
 ```
 git fetch upstreams

 git checkout --no-track -b merge223 upstream/develop
 ```
-
 2. Merge master into your branch.
-
 ```
 # I like the "--edit --log --verbose" parameters, but they are
 # not required.
 git merge upstream/master
 ```
-
 3. `BuildInfo.cpp` will have a conflict with the version number.
   Resolve it with the version from `develop` - the higher version.
 4. Push your branch to your repo, and open a normal PR against
@@ -987,27 +939,22 @@ git merge upstream/master
   is a merge of the hotfix version. The "Context" should summarize
   the changes from the hotfix. Include the following text
   prominently:
-
 ```
 This PR must be merged manually using a --ff-only merge. Do not use the Github UI.
 ```
-
 5. Depending on the complexity of the hotfix, and/or merge conflicts,
   the PR may need a thorough review, or just a sign-off that the
   merge was done correctly.
 6. If `develop` is updated before this PR is merged, do not merge
   `develop` back into your branch. Instead rebase preserving merges,
   or do the merge again. (See also the `rerere` git config setting.)
-
 ```
 git rebase --rebase-merges upstream/develop
 # OR
 git reset --hard upstream/develop
 git merge upstream/master
 ```
-
 7. When the PR is ready, push it to `develop`.
-
 ```
 git fetch upstreams

@@ -1016,7 +963,6 @@ git log --show-signature "upstream/develop..HEAD"

 git push upstream-push HEAD:develop
 ```
-
 Development on `develop` can proceed as normal. It is recommended to
 create a beta (or RC) immediately to ensure that everything worked as
 expected.
@@ -1031,13 +977,12 @@ a significant fraction of users, which would necessitate a hotfix / point
 release to that version as well as any later versions.

 This scenario would follow the same basic procedure as above,
-except that _none_ of `develop`, `release`, or `master`
+except that *none* of `develop`, `release`, or `master`
 would be touched during the release process.

 In this example, consider if version 2.1.1 needed to be patched.

 1. Create two branches in the main (`upstream`) repo.
-
 ```
 git fetch upstreams

@@ -1051,7 +996,6 @@ git push upstream-push

 git fetch upstreams
 ```
-
 2. Work continues as above, except using `master-2.1.2`as
   the base branch for any merging, packaging, etc.
 3. After the release is tagged and packages are built, you could
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,4 +1,4 @@
-ISC License
+ISC License 

 Copyright (c) 2011, Arthur Britto, David Schwartz, Jed McCaleb, Vinnie Falco, Bob Way, Eric Lombrozo, Nikolaos D. Bougalis, Howard Hinnant.
 Copyright (c) 2012-2020, the XRP Ledger developers.
@@ -14,3 +14,4 @@ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
--- a/README.md
+++ b/README.md
@@ -5,19 +5,17 @@
 The [XRP Ledger](https://xrpl.org/) is a decentralized cryptographic ledger powered by a network of peer-to-peer nodes. The XRP Ledger uses a novel Byzantine Fault Tolerant consensus algorithm to settle and record transactions in a secure distributed database without a central operator.

 ## XRP
-
 [XRP](https://xrpl.org/xrp.html) is a public, counterparty-free asset native to the XRP Ledger, and is designed to bridge the many different currencies in use worldwide. XRP is traded on the open-market and is available for anyone to access. The XRP Ledger was created in 2012 with a finite supply of 100 billion units of XRP.

 ## rippled
-
 The server software that powers the XRP Ledger is called `rippled` and is available in this repository under the permissive [ISC open-source license](LICENSE.md). The `rippled` server software is written primarily in C++ and runs on a variety of platforms. The `rippled` server software can run in several modes depending on its [configuration](https://xrpl.org/rippled-server-modes.html).

 If you are interested in running an **API Server** (including a **Full History Server**), take a look at [Clio](https://github.com/XRPLF/clio). (rippled Reporting Mode has been replaced by Clio.)

 ### Build from Source

- [Read the build instructions in `BUILD.md`](BUILD.md)
- If you encounter any issues, please [open an issue](https://github.com/XRPLF/rippled/issues)
+* [Read the build instructions in `BUILD.md`](BUILD.md)
+* If you encounter any issues, please [open an issue](https://github.com/XRPLF/rippled/issues)

 ## Key Features of the XRP Ledger

@@ -37,6 +35,7 @@ If you are interested in running an **API Server** (including a **Full History S
 [Modern Features for Smart Contracts]: https://xrpl.org/xrp-ledger-overview.html#modern-features-for-smart-contracts
 [On-Ledger Decentralized Exchange]: https://xrpl.org/xrp-ledger-overview.html#on-ledger-decentralized-exchange

+
 ## Source Code

 Here are some good places to start learning the source code:
@@ -48,7 +47,7 @@ Here are some good places to start learning the source code:
 ### Repository Contents

 | Folder     | Contents                                         |
-| :--------- | :----------------------------------------------- |
+|:-----------|:-------------------------------------------------|
 | `./bin`    | Scripts and data files for Ripple integrators.   |
 | `./Builds` | Platform-specific guides for building `rippled`. |
 | `./docs`   | Source documentation files and doxygen config.   |
@@ -58,14 +57,15 @@ Here are some good places to start learning the source code:
 Some of the directories under `src` are external repositories included using
 git-subtree. See those directories' README files for more details.

+
 ## Additional Documentation

- [XRP Ledger Dev Portal](https://xrpl.org/)
- [Setup and Installation](https://xrpl.org/install-rippled.html)
- [Source Documentation (Doxygen)](https://xrplf.github.io/rippled/)
+* [XRP Ledger Dev Portal](https://xrpl.org/)
+* [Setup and Installation](https://xrpl.org/install-rippled.html)
+* [Source Documentation (Doxygen)](https://xrplf.github.io/rippled/)

 ## See Also

- [Clio API Server for the XRP Ledger](https://github.com/XRPLF/clio)
- [Mailing List for Release Announcements](https://groups.google.com/g/ripple-server)
- [Learn more about the XRP Ledger (YouTube)](https://www.youtube.com/playlist?list=PLJQ55Tj1hIVZtJ_JdTvSum2qMTsedWkNi)
+* [Clio API Server for the XRP Ledger](https://github.com/XRPLF/clio)
+* [Mailing List for Release Announcements](https://groups.google.com/g/ripple-server)
+* [Learn more about the XRP Ledger (YouTube)](https://www.youtube.com/playlist?list=PLJQ55Tj1hIVZtJ_JdTvSum2qMTsedWkNi)
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,6 +2,7 @@

 For more details on operating an XRP Ledger server securely, please visit https://xrpl.org/manage-the-rippled-server.html.

+
 # Security Policy

 ## Supported Versions
@@ -76,14 +77,13 @@ The amount paid varies dramatically. Vulnerabilities that are harmless on their

 To report a qualifying bug, please send a detailed report to:

-| Email Address | bugs@ripple.com                                     |
-| :-----------: | :-------------------------------------------------- |
-| Short Key ID  | `0xC57929BE`                                        |
-|  Long Key ID  | `0xCD49A0AFC57929BE`                                |
-|  Fingerprint  | `24E6 3B02 37E0 FA9C 5E96 8974 CD49 A0AF C579 29BE` |
-
-The full PGP key for this address, which is also available on several key servers (e.g. on [keyserver.ubuntu.com](https://keyserver.ubuntu.com)), is:
+|Email Address|bugs@ripple.com                                      |
+|:-----------:|:----------------------------------------------------|
+|Short Key ID | `0xC57929BE`                                        |
+|Long Key ID  | `0xCD49A0AFC57929BE`                                |
+|Fingerprint  | `24E6 3B02 37E0 FA9C 5E96 8974 CD49 A0AF C579 29BE` |

+The full PGP key for this address, which is also available on several key servers (e.g. on [keyserver.ubuntu.com](https://keyserver.ubuntu.com)), is: 
 ```
 -----BEGIN PGP PUBLIC KEY BLOCK-----
 mQINBFUwGHYBEAC0wpGpBPkd8W1UdQjg9+cEFzeIEJRaoZoeuJD8mofwI5Ejnjdt
--- a/bin/browser.js
+++ b/bin/browser.js
@@ -0,0 +1,470 @@
+#!/usr/bin/node
+//
+// ledger?l=L
+// transaction?h=H
+// ledger_entry?l=L&h=H
+// account?l=L&a=A
+// directory?l=L&dir_root=H&i=I
+// directory?l=L&o=A&i=I     // owner directory
+// offer?l=L&offer=H
+// offer?l=L&account=A&i=I
+// ripple_state=l=L&a=A&b=A&c=C
+// account_lines?l=L&a=A
+//
+// A=address
+// C=currency 3 letter code
+// H=hash
+// I=index
+// L=current | closed | validated | index | hash
+//
+
+var async     = require("async");
+var extend    = require("extend");
+var http      = require("http");
+var url       = require("url");
+
+var Remote    = require("ripple-lib").Remote;
+
+var program   = process.argv[1];
+
+var httpd_response = function (res, opts) {
+  var self=this;
+
+  res.statusCode = opts.statusCode;
+  res.end(
+    "<HTML>"
+      + "<HEAD><TITLE>Title</TITLE></HEAD>"
+      + "<BODY BACKGROUND=\"#FFFFFF\">"
+      + "State:" + self.state
+      + "<UL>"
+      + "<LI><A HREF=\"/\">home</A>"
+      + "<LI>" + html_link('r4EM4gBQfr1QgQLXSPF4r7h84qE9mb6iCC')
+//      + "<LI><A HREF=\""+test+"\">rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh</A>"
+      + "<LI><A HREF=\"/ledger\">ledger</A>"
+      + "</UL>"
+      + (opts.body || '')
+      + '<HR><PRE>'
+      + (opts.url || '')
+      + '</PRE>'
+      + "</BODY>"
+      + "</HTML>"
+    );
+};
+
+var html_link = function (generic) {
+  return '<A HREF="' + build_uri({ type: 'account', account: generic}) + '">' + generic + '</A>';
+};
+
+// Build a link to a type.
+var build_uri = function (params, opts) {
+  var c;
+
+  if (params.type === 'account') {
+    c = {
+        pathname: 'account',
+        query: {
+          l: params.ledger,
+          a: params.account,
+        },
+      };
+
+  } else if (params.type === 'ledger') {
+    c = {
+        pathname: 'ledger',
+        query: {
+          l: params.ledger,
+        },
+      };
+
+  } else if (params.type === 'transaction') {
+    c = {
+        pathname: 'transaction',
+        query: {
+          h: params.hash,
+        },
+      };
+  } else {
+    c = {};
+  }
+
+  opts  = opts || {};
+
+  c.protocol  = "http";
+  c.hostname  = opts.hostname || self.base.hostname;
+  c.port      = opts.port || self.base.port;
+
+  return url.format(c);
+};
+
+var build_link = function (item, link) {
+console.log(link);
+  return "<A HREF=" + link + ">" + item + "</A>";
+};
+
+var rewrite_field = function (type, obj, field, opts) {
+  if (field in obj) {
+    obj[field]  = rewrite_type(type, obj[field], opts);
+  }
+};
+
+var rewrite_type = function (type, obj, opts) {
+  if ('amount' === type) {
+    if ('string' === typeof obj) {
+      // XRP.
+      return '<B>' + obj + '</B>';
+
+    } else {
+      rewrite_field('address', obj, 'issuer', opts);
+
+      return obj; 
+    }
+    return build_link(
+      obj,
+      build_uri({
+          type: 'account',
+          account: obj
+        }, opts)
+    );
+  }
+  if ('address' === type) {
+    return build_link(
+      obj,
+      build_uri({
+          type: 'account',
+          account: obj
+        }, opts)
+    );
+  }
+  else if ('ledger' === type) {
+    return build_link(
+      obj,
+      build_uri({
+          type: 'ledger',
+          ledger: obj,
+        }, opts)
+      );
+  }
+  else if ('node' === type) {
+    // A node
+    if ('PreviousTxnID' in obj)
+      obj.PreviousTxnID      = rewrite_type('transaction', obj.PreviousTxnID, opts);
+
+    if ('Offer' === obj.LedgerEntryType) {
+      if ('NewFields' in obj) {
+        if ('TakerGets' in obj.NewFields)
+          obj.NewFields.TakerGets = rewrite_type('amount', obj.NewFields.TakerGets, opts);
+
+        if ('TakerPays' in obj.NewFields)
+          obj.NewFields.TakerPays = rewrite_type('amount', obj.NewFields.TakerPays, opts);
+      }
+    }
+
+    obj.LedgerEntryType  = '<B>' + obj.LedgerEntryType + '</B>';
+
+    return obj;
+  }
+  else if ('transaction' === type) {
+    // Reference to a transaction.
+    return build_link(
+      obj,
+      build_uri({
+          type: 'transaction',
+          hash: obj
+        }, opts)
+      );
+  }
+
+  return 'ERROR: ' + type;
+};
+
+var rewrite_object = function (obj, opts) {
+  var out = extend({}, obj);
+
+  rewrite_field('address', out, 'Account', opts);
+
+  rewrite_field('ledger', out, 'parent_hash', opts);
+  rewrite_field('ledger', out, 'ledger_index', opts);
+  rewrite_field('ledger', out, 'ledger_current_index', opts);
+  rewrite_field('ledger', out, 'ledger_hash', opts);
+
+  if ('ledger' in obj) {
+    // It's a ledger header.
+    out.ledger  = rewrite_object(out.ledger, opts);
+
+    if ('ledger_hash' in out.ledger)
+      out.ledger.ledger_hash = '<B>' + out.ledger.ledger_hash + '</B>';
+
+    delete out.ledger.hash;
+    delete out.ledger.totalCoins;
+  }
+
+  if ('TransactionType' in obj) {
+    // It's a transaction.
+    out.TransactionType = '<B>' + obj.TransactionType + '</B>';
+
+    rewrite_field('amount', out, 'TakerGets', opts);
+    rewrite_field('amount', out, 'TakerPays', opts);
+    rewrite_field('ledger', out, 'inLedger', opts);
+
+    out.meta.AffectedNodes = out.meta.AffectedNodes.map(function (node) {
+        var kind  = 'CreatedNode' in node
+          ? 'CreatedNode'
+          : 'ModifiedNode' in node
+            ? 'ModifiedNode'
+            : 'DeletedNode' in node
+              ? 'DeletedNode'
+              : undefined;
+        
+        if (kind) {
+          node[kind]  = rewrite_type('node', node[kind], opts);
+        }
+        return node;
+      });
+  }
+  else if ('node' in obj && 'LedgerEntryType' in obj.node) {
+    // Its a ledger entry.
+
+    if (obj.node.LedgerEntryType === 'AccountRoot') {
+      rewrite_field('address', out.node, 'Account', opts);
+      rewrite_field('transaction', out.node, 'PreviousTxnID', opts);
+      rewrite_field('ledger', out.node, 'PreviousTxnLgrSeq', opts);
+    }
+
+    out.node.LedgerEntryType = '<B>' + out.node.LedgerEntryType + '</B>';
+  }
+
+  return out;
+};
+
+var augment_object = function (obj, opts, done) {
+  if (obj.node.LedgerEntryType == 'AccountRoot') {
+    var   tx_hash   = obj.node.PreviousTxnID;
+    var   tx_ledger = obj.node.PreviousTxnLgrSeq;
+
+    obj.history                 = [];
+
+    async.whilst(
+      function () { return tx_hash; },
+      function (callback) {
+// console.log("augment_object: request: %s %s", tx_hash, tx_ledger);
+        opts.remote.request_tx(tx_hash)
+          .on('success', function (m) {
+              tx_hash   = undefined;
+              tx_ledger = undefined;
+
+//console.log("augment_object: ", JSON.stringify(m));
+              m.meta.AffectedNodes.filter(function(n) {
+// console.log("augment_object: ", JSON.stringify(n));
+// if (n.ModifiedNode)
+// console.log("augment_object: %s %s %s %s %s %s/%s", 'ModifiedNode' in n, n.ModifiedNode && (n.ModifiedNode.LedgerEntryType === 'AccountRoot'), n.ModifiedNode && n.ModifiedNode.FinalFields && (n.ModifiedNode.FinalFields.Account === obj.node.Account), Object.keys(n)[0], n.ModifiedNode && (n.ModifiedNode.LedgerEntryType), obj.node.Account, n.ModifiedNode && n.ModifiedNode.FinalFields && n.ModifiedNode.FinalFields.Account);
+// if ('ModifiedNode' in n && n.ModifiedNode.LedgerEntryType === 'AccountRoot')
+// {
+//   console.log("***: ", JSON.stringify(m));
+//   console.log("***: ", JSON.stringify(n));
+// }
+                  return 'ModifiedNode' in n
+                    && n.ModifiedNode.LedgerEntryType === 'AccountRoot'
+                    && n.ModifiedNode.FinalFields
+                    && n.ModifiedNode.FinalFields.Account === obj.node.Account;
+                })
+              .forEach(function (n) {
+                  tx_hash   = n.ModifiedNode.PreviousTxnID;
+                  tx_ledger = n.ModifiedNode.PreviousTxnLgrSeq;
+
+                  obj.history.push({
+                      tx_hash:    tx_hash,
+                      tx_ledger:  tx_ledger
+                    });
+console.log("augment_object: next: %s %s", tx_hash, tx_ledger);
+                });
+
+              callback();
+            })
+          .on('error', function (m) {
+              callback(m);
+            })
+          .request();
+      },
+      function (err) {
+        if (err) {
+          done();
+        }
+        else {
+          async.forEach(obj.history, function (o, callback) {
+              opts.remote.request_account_info(obj.node.Account)
+                .ledger_index(o.tx_ledger)
+                .on('success', function (m) {
+//console.log("augment_object: ", JSON.stringify(m));
+                    o.Balance       = m.account_data.Balance;
+//                    o.account_data  = m.account_data;
+                    callback();
+                  })
+                .on('error', function (m) {
+                    o.error = m;
+                    callback();
+                  })
+                .request();
+            },
+            function (err) {
+              done(err);
+            });
+        }
+      });
+  }
+  else {
+    done();
+  }
+};
+
+if (process.argv.length < 4 || process.argv.length > 7) {
+  console.log("Usage: %s ws_ip ws_port [<ip> [<port> [<start>]]]", program);
+}
+else {
+  var ws_ip   = process.argv[2];
+  var ws_port = process.argv[3];
+  var ip      = process.argv.length > 4 ? process.argv[4] : "127.0.0.1";
+  var port    = process.argv.length > 5 ? process.argv[5] : "8080";
+
+// console.log("START");
+  var self  = this;
+  
+  var remote  = (new Remote({
+                    websocket_ip: ws_ip,
+                    websocket_port: ws_port,
+                    trace: false
+                  }))
+                  .on('state', function (m) {
+                      console.log("STATE: %s", m);
+
+                      self.state   = m;
+                    })
+//                  .once('ledger_closed', callback)
+                  .connect()
+                  ;
+
+  self.base = {
+      hostname: ip,
+      port:     port,
+      remote:   remote,
+    };
+
+// console.log("SERVE");
+  var server  = http.createServer(function (req, res) {
+      var input = "";
+
+      req.setEncoding();
+
+      req.on('data', function (buffer) {
+          // console.log("DATA: %s", buffer);
+          input = input + buffer;
+        });
+
+      req.on('end', function () {
+          // console.log("URL: %s", req.url);
+          // console.log("HEADERS: %s", JSON.stringify(req.headers, undefined, 2));
+
+          var _parsed = url.parse(req.url, true);
+          var _url    = JSON.stringify(_parsed, undefined, 2);
+
+          // console.log("HEADERS: %s", JSON.stringify(_parsed, undefined, 2));
+          if (_parsed.pathname === "/account") {
+              var request = remote
+                .request_ledger_entry('account_root')
+                .ledger_index(-1)
+                .account_root(_parsed.query.a)
+                .on('success', function (m) {
+                    // console.log("account_root: %s", JSON.stringify(m, undefined, 2));
+
+                    augment_object(m, self.base, function() {
+                      httpd_response(res,
+                          {
+                            statusCode: 200,
+                            url: _url,
+                            body: "<PRE>"
+                              + JSON.stringify(rewrite_object(m, self.base), undefined, 2)
+                              + "</PRE>"
+                          });
+                    });
+                  })
+                .request();
+
+          } else if (_parsed.pathname === "/ledger") {
+            var request = remote
+              .request_ledger(undefined, { expand: true, transactions: true })
+              .on('success', function (m) {
+                  // console.log("Ledger: %s", JSON.stringify(m, undefined, 2));
+
+                  httpd_response(res,
+                      {
+                        statusCode: 200,
+                        url: _url,
+                        body: "<PRE>"
+                          + JSON.stringify(rewrite_object(m, self.base), undefined, 2)
+                          +"</PRE>"
+                      });
+                })
+
+            if (_parsed.query.l && _parsed.query.l.length === 64) {
+              request.ledger_hash(_parsed.query.l);
+            }
+            else if (_parsed.query.l) {
+              request.ledger_index(Number(_parsed.query.l));
+            }
+            else {
+              request.ledger_index(-1);
+            }
+
+            request.request();
+
+          } else if (_parsed.pathname === "/transaction") {
+              var request = remote
+                .request_tx(_parsed.query.h)
+//                .request_transaction_entry(_parsed.query.h)
+//              .ledger_select(_parsed.query.l)
+                .on('success', function (m) {
+                    // console.log("transaction: %s", JSON.stringify(m, undefined, 2));
+
+                    httpd_response(res,
+                        {
+                          statusCode: 200,
+                          url: _url,
+                          body: "<PRE>"
+                            + JSON.stringify(rewrite_object(m, self.base), undefined, 2)
+                            +"</PRE>"
+                        });
+                  })
+                .on('error', function (m) {
+                    httpd_response(res,
+                        {
+                          statusCode: 200,
+                          url: _url,
+                          body: "<PRE>"
+                            + 'ERROR: ' + JSON.stringify(m, undefined, 2)
+                            +"</PRE>"
+                        });
+                  })
+                .request();
+
+          } else {
+            var test  = build_uri({
+                type: 'account',
+                ledger: 'closed',
+                account: 'rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh',
+              }, self.base);
+
+            httpd_response(res,
+                {
+                  statusCode: req.url === "/" ? 200 : 404,
+                  url: _url,
+                });
+          }
+        });
+    });
+
+  server.listen(port, ip, undefined,
+    function () {
+      console.log("Listening at: http://%s:%s", ip, port);
+    });
+}
+
+// vim:sw=2:sts=2:ts=8:et
--- a/bin/debug_local_sign.js
+++ b/bin/debug_local_sign.js
@@ -0,0 +1,64 @@
+var ripple = require('ripple-lib');
+
+var v = {
+  seed: "snoPBrXtMeMyMHUVTgbuqAfg1SUTb",
+  addr: "rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh"
+};
+
+var remote = ripple.Remote.from_config({
+  "trusted" : true,
+  "websocket_ip" : "127.0.0.1",
+  "websocket_port" : 5006,
+  "websocket_ssl" : false,
+  "local_signing" : true
+});
+
+var tx_json = {
+	"Account" : v.addr,
+	"Amount" : "10000000",
+	"Destination" : "rEu2ULPiEQm1BAL8pYzmXnNX1aFX9sCks",
+	"Fee" : "10",
+	"Flags" : 0,
+	"Sequence" : 3,
+	"TransactionType" : "Payment"
+
+  //"SigningPubKey": '0396941B22791A448E5877A44CE98434DB217D6FB97D63F0DAD23BE49ED45173C9'
+};
+
+remote.on('connected', function () {
+  var req = remote.request_sign(v.seed, tx_json);
+  req.message.debug_signing = true;
+  req.on('success', function (result) {
+    console.log("SERVER RESULT");
+    console.log(result);
+
+    var sim = {};
+    var tx = remote.transaction();
+    tx.tx_json = tx_json;
+    tx._secret = v.seed;
+    tx.complete();
+    var unsigned = tx.serialize().to_hex();
+    tx.sign();
+
+    sim.tx_blob = tx.serialize().to_hex();
+    sim.tx_json = tx.tx_json;
+    sim.tx_signing_hash = tx.signing_hash().to_hex();
+    sim.tx_unsigned = unsigned;
+
+    console.log("\nLOCAL RESULT");
+    console.log(sim);
+
+    remote.connect(false);
+  });
+  req.on('error', function (err) {
+    if (err.error === "remoteError" && err.remote.error === "srcActNotFound") {
+      console.log("Please fund account "+v.addr+" to run this test.");
+    } else {
+      console.log('error', err);
+    }
+    remote.connect(false);
+  });
+  req.request();
+
+});
+remote.connect();
--- a/bin/email_hash.js
+++ b/bin/email_hash.js
@@ -0,0 +1,18 @@
+#!/usr/bin/node
+//
+// Returns a Gravatar style hash as per: http://en.gravatar.com/site/implement/hash/
+//
+
+if (3 != process.argv.length) {
+  process.stderr.write("Usage: " + process.argv[1] + " email_address\n\nReturns gravatar style hash.\n");
+  process.exit(1);
+
+} else {
+  var md5 = require('crypto').createHash('md5');
+
+  md5.update(process.argv[2].trim().toLowerCase());
+
+  process.stdout.write(md5.digest('hex') + "\n");
+}
+
+// vim:sw=2:sts=2:ts=8:et
--- a/bin/flash_policy.js
+++ b/bin/flash_policy.js
@@ -0,0 +1,31 @@
+#!/usr/bin/node
+//
+// This program allows IE 9 ripple-clients to make websocket connections to
+// rippled using flash.  As IE 9 does not have websocket support, this required
+// if you wish to support IE 9 ripple-clients.
+//
+// http://www.lightsphere.com/dev/articles/flash_socket_policy.html
+//
+// For better security, be sure to set the Port below to the port of your
+// [websocket_public_port].
+//
+
+var net	    = require("net"),
+    port    = "*",
+    domains = ["*:"+port]; // Domain:Port
+
+net.createServer(
+  function(socket) {
+    socket.write("<?xml version='1.0' ?>\n");
+    socket.write("<!DOCTYPE cross-domain-policy SYSTEM 'http://www.macromedia.com/xml/dtds/cross-domain-policy.dtd'>\n");
+    socket.write("<cross-domain-policy>\n");
+    domains.forEach(
+      function(domain) {
+        var parts = domain.split(':');
+        socket.write("\t<allow-access-from domain='" + parts[0] + "' to-ports='" + parts[1] + "' />\n");
+      }
+    );
+    socket.write("</cross-domain-policy>\n");
+    socket.end();
+  }
+).listen(843);
--- a/bin/getRippledInfo
+++ b/bin/getRippledInfo
@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+
+# This script generates information about your rippled installation
+# and system. It can be used to help debug issues that you may face
+# in your installation. While this script endeavors to not display any 
+# sensitive information, it is recommended that you read the output
+# before sharing with any third parties.
+
+
+rippled_exe=/opt/ripple/bin/rippled
+conf_file=/etc/opt/ripple/rippled.cfg
+
+while getopts ":e:c:" opt; do
+    case $opt in
+        e)
+            rippled_exe=${OPTARG}
+            ;;
+        c)
+            conf_file=${OPTARG}
+            ;;
+        \?)
+            echo "Invalid option: -$OPTARG"
+            exit -1
+    esac
+done
+
+tmp_loc=$(mktemp -d --tmpdir ripple_info.XXXXX)
+chmod 751 ${tmp_loc}
+awk_prog=${tmp_loc}/cfg.awk
+summary_out=${tmp_loc}/rippled_info.md
+printf "# rippled report info\n\n> generated at %s\n" "$(date -R)" > ${summary_out}
+
+function log_section {
+    printf "\n## %s\n" "$*" >> ${summary_out}
+
+    while read -r l; do
+        echo "    $l" >> ${summary_out}
+    done </dev/stdin
+}
+
+function join_by {
+    local IFS="$1"; shift; echo "$*";
+}
+
+if [[ -f ${conf_file} ]] ; then
+    exclude=( ips ips_fixed node_seed validation_seed validator_token )
+    cleaned_conf=${tmp_loc}/cleaned_rippled_cfg.txt
+    cat << 'EOP' >> ${awk_prog}
+    BEGIN {FS="[[:space:]]*=[[:space:]]*"; skip=0; db_path=""; print > OUT_FILE; split(exl,exa,"|")}
+    /^#/ {next}
+    save==2 && /^[[:space:]]*$/ {next}
+    /^\[.+\]$/ {
+      section=tolower(gensub(/^\[[[:space:]]*([a-zA-Z_]+)[[:space:]]*\]$/, "\\1", "g"))
+      skip = 0
+      for (i in exa) {
+        if (section == exa[i])
+          skip = 1
+      }
+      if (section == "database_path")
+        save = 1
+    }
+    skip==1 {next}
+    save==2 {save=0; db_path=$0}
+    save==1 {save=2}
+    $1 ~ /password/ {$0=$1"=<redacted>"}
+    {print >> OUT_FILE}
+    END {print db_path}
+EOP
+
+    db=$(\
+        sed -r -e 's/\<s[[:alnum:]]{28}\>/<redactedsecret>/g;s/^[[:space:]]*//;s/[[:space:]]*$//' ${conf_file} |\
+        awk -v OUT_FILE=${cleaned_conf} -v exl="$(join_by '|' "${exclude[@]}")" -f ${awk_prog})
+    rm ${awk_prog}
+    cat ${cleaned_conf} | log_section "cleaned config file"
+    rm ${cleaned_conf}
+    echo "${db}"  | log_section "database path"
+    df ${db}      | log_section "df: database"
+fi
+
+# Send output from this script to a log file
+## this captures any messages
+## or errors from the script itself
+
+log_file=${tmp_loc}/get_info.log
+exec 3>&1 1>>${log_file} 2>&1
+
+## Send all stdout files to /tmp
+
+if [[ -x ${rippled_exe} ]] ; then
+    pgrep rippled && \
+    ${rippled_exe} --conf ${conf_file} \
+    -- server_info                  | log_section "server info"
+fi
+
+cat /proc/meminfo                   | log_section "meminfo"
+cat /proc/swaps                     | log_section "swap space"
+ulimit -a                           | log_section "ulimit"
+
+if command -v lshw >/dev/null 2>&1 ; then
+    lshw    2>/dev/null             | log_section "hardware info"
+else
+    lscpu                           >  ${tmp_loc}/hw_info.txt
+    hwinfo                          >> ${tmp_loc}/hw_info.txt
+    lspci                           >> ${tmp_loc}/hw_info.txt
+    lsblk                           >> ${tmp_loc}/hw_info.txt
+    cat ${tmp_loc}/hw_info.txt | log_section "hardware info"
+    rm ${tmp_loc}/hw_info.txt
+fi
+
+if command -v iostat >/dev/null 2>&1 ; then
+    iostat -t -d -x 2 6             | log_section "iostat"
+fi
+
+df -h                               | log_section "free disk space"
+drives=($(df | awk '$1 ~ /^\/dev\// {print $1}' | xargs -n 1 basename))
+block_devs=($(ls /sys/block/))
+for d in "${drives[@]}"; do
+    for dev in "${block_devs[@]}"; do
+        #echo "D: [$d], DEV: [$dev]"
+        if [[ $d =~ $dev ]]; then
+            # this file (if exists) has 0 for SSD and 1 for HDD
+            if [[ "$(cat /sys/block/${dev}/queue/rotational 2>/dev/null)" == 0 ]] ; then
+                echo "${d} : SSD" >> ${tmp_loc}/is_ssd.txt
+            else
+                echo "${d} : NO SSD" >> ${tmp_loc}/is_ssd.txt
+            fi
+        fi
+    done
+done
+
+if [[ -f ${tmp_loc}/is_ssd.txt ]] ; then
+    cat ${tmp_loc}/is_ssd.txt | log_section "SSD"
+    rm ${tmp_loc}/is_ssd.txt
+fi
+
+cat ${log_file} | log_section "script log"
+
+cat << MSG | tee /dev/fd/3
+####################################################
+  rippled info has been gathered. Please copy the
+  contents of ${summary_out}
+  to a github gist at https://gist.github.com/
+
+  PLEASE REVIEW THIS FILE FOR ANY SENSITIVE DATA
+  BEFORE POSTING! We have tried our best to omit
+  any sensitive information from this file, but you
+  should verify before posting.
+####################################################
+MSG
+
--- a/bin/hexify.js
+++ b/bin/hexify.js
@@ -0,0 +1,23 @@
+#!/usr/bin/node
+//
+// Returns hex of lowercasing a string.
+//
+
+var stringToHex = function (s) {
+  return Array.prototype.map.call(s, function (c) {
+      var b = c.charCodeAt(0);
+
+      return b < 16 ? "0" + b.toString(16) : b.toString(16);
+    }).join("");
+};
+
+if (3 != process.argv.length) {
+  process.stderr.write("Usage: " + process.argv[1] + " string\n\nReturns hex of lowercasing string.\n");
+  process.exit(1);
+
+} else {
+
+  process.stdout.write(stringToHex(process.argv[2].toLowerCase()) + "\n");
+}
+
+// vim:sw=2:sts=2:ts=8:et
--- a/bin/jsonrpc_request.js
+++ b/bin/jsonrpc_request.js
@@ -0,0 +1,42 @@
+#!/usr/bin/node
+//
+// This is a tool to issue JSON-RPC requests from the command line.
+//
+// This can be used to test a JSON-RPC server.
+//
+// Requires: npm simple-jsonrpc
+//
+
+var jsonrpc   = require('simple-jsonrpc');
+
+var program   = process.argv[1];
+
+if (5 !== process.argv.length) {
+  console.log("Usage: %s <URL> <method> <json>", program);
+}
+else {
+  var url       = process.argv[2];
+  var method    = process.argv[3];
+  var json_raw  = process.argv[4];
+  var json;
+
+  try {
+    json      = JSON.parse(json_raw);
+  }
+  catch (e) {
+      console.log("JSON parse error: %s", e.message);
+      throw e;
+  }
+
+  var client  = jsonrpc.client(url);
+
+  client.call(method, json,
+    function (result) {
+      console.log(JSON.stringify(result, undefined, 2));
+    },
+    function (error) {
+      console.log(JSON.stringify(error, undefined, 2));
+    });
+}
+
+// vim:sw=2:sts=2:ts=8:et
--- a/bin/jsonrpc_server.js
+++ b/bin/jsonrpc_server.js
@@ -0,0 +1,68 @@
+#!/usr/bin/node
+//
+// This is a tool to listen for JSON-RPC requests at an IP and port.
+//
+// This will report the request to console and echo back the request as the response.
+//
+
+var http      = require("http");
+
+var program   = process.argv[1];
+
+if (4 !== process.argv.length) {
+  console.log("Usage: %s <ip> <port>", program);
+}
+else {
+  var ip      = process.argv[2];
+  var port    = process.argv[3];
+
+  var server  = http.createServer(function (req, res) {
+      console.log("CONNECT");
+      var input = "";
+
+      req.setEncoding();
+
+      req.on('data', function (buffer) {
+          // console.log("DATA: %s", buffer);
+          input = input + buffer;
+        });
+
+      req.on('end', function () {
+          // console.log("END");
+
+          var json_req;
+
+          console.log("URL: %s", req.url);
+          console.log("HEADERS: %s", JSON.stringify(req.headers, undefined, 2));
+
+          try {
+            json_req = JSON.parse(input);
+
+            console.log("REQ: %s", JSON.stringify(json_req, undefined, 2));
+          }
+          catch (e) {
+            console.log("BAD JSON: %s", e.message);
+
+            json_req = { error : e.message }
+          }
+
+          res.statusCode = 200;
+          res.end(JSON.stringify({
+              jsonrpc: "2.0",
+              result: { request : json_req },
+              id: req.id
+            }));
+        });
+
+      req.on('close', function () {
+          console.log("CLOSE");
+        });
+    });
+
+  server.listen(port, ip, undefined,
+    function () {
+      console.log("Listening at: %s:%s", ip, port);
+    });
+}
+
+// vim:sw=2:sts=2:ts=8:et
--- a/bin/physical.sh
+++ b/bin/physical.sh
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+set -o errexit
+
+marker_base=985c80fbc6131f3a8cedd0da7e8af98dfceb13c7
+marker_commit=${1:-${marker_base}}
+
+if [ $(git merge-base ${marker_commit} ${marker_base}) != ${marker_base} ]; then
+  echo "first marker commit not an ancestor: ${marker_commit}"
+  exit 1
+fi
+
+if [ $(git merge-base ${marker_commit} HEAD) != $(git rev-parse --verify ${marker_commit}) ]; then
+  echo "given marker commit not an ancestor: ${marker_commit}"
+  exit 1
+fi
+
+if [ -e Builds/CMake ]; then
+  echo move CMake
+  git mv Builds/CMake cmake
+  git add --update .
+  git commit -m 'Move CMake directory' --author 'Pretty Printer <cpp@ripple.com>'
+fi
+
+if [ -e src/ripple ]; then
+
+  echo move protocol buffers
+  mkdir -p include/xrpl
+  if [ -e src/ripple/proto ]; then
+    git mv src/ripple/proto include/xrpl
+  fi
+
+  extract_list() {
+    git show ${marker_commit}:Builds/CMake/RippledCore.cmake | \
+    awk "/END ${1}/ { p = 0 } p && /src\/ripple/; /BEGIN ${1}/ { p = 1 }" | \
+    sed -e 's#src/ripple/##' -e 's#[^a-z]\+$##'
+  }
+
+  move_files() {
+    oldroot="$1"; shift
+    newroot="$1"; shift
+    detail="$1"; shift
+    files=("$@")
+    for file in ${files[@]}; do
+      if [ ! -e ${oldroot}/${file} ]; then
+        continue
+      fi
+      dir=$(dirname ${file})
+      if [ $(basename ${dir}) == 'details' ]; then
+        dir=$(dirname ${dir})
+      fi
+      if [ $(basename ${dir}) == 'impl' ]; then
+        dir="$(dirname ${dir})/${detail}"
+      fi
+      mkdir -p ${newroot}/${dir}
+      git mv ${oldroot}/${file} ${newroot}/${dir}
+    done
+  }
+
+  echo move libxrpl headers
+  files=$(extract_list 'LIBXRPL HEADERS')
+  files+=(
+    basics/SlabAllocator.h
+
+    beast/asio/io_latency_probe.h
+    beast/container/aged_container.h
+    beast/container/aged_container_utility.h
+    beast/container/aged_map.h
+    beast/container/aged_multimap.h
+    beast/container/aged_multiset.h
+    beast/container/aged_set.h
+    beast/container/aged_unordered_map.h
+    beast/container/aged_unordered_multimap.h
+    beast/container/aged_unordered_multiset.h
+    beast/container/aged_unordered_set.h
+    beast/container/detail/aged_associative_container.h
+    beast/container/detail/aged_container_iterator.h
+    beast/container/detail/aged_ordered_container.h
+    beast/container/detail/aged_unordered_container.h
+    beast/container/detail/empty_base_optimization.h
+    beast/core/LockFreeStack.h
+    beast/insight/Collector.h
+    beast/insight/Counter.h
+    beast/insight/CounterImpl.h
+    beast/insight/Event.h
+    beast/insight/EventImpl.h
+    beast/insight/Gauge.h
+    beast/insight/GaugeImpl.h
+    beast/insight/Group.h
+    beast/insight/Groups.h
+    beast/insight/Hook.h
+    beast/insight/HookImpl.h
+    beast/insight/Insight.h
+    beast/insight/Meter.h
+    beast/insight/MeterImpl.h
+    beast/insight/NullCollector.h
+    beast/insight/StatsDCollector.h
+    beast/test/fail_counter.h
+    beast/test/fail_stream.h
+    beast/test/pipe_stream.h
+    beast/test/sig_wait.h
+    beast/test/string_iostream.h
+    beast/test/string_istream.h
+    beast/test/string_ostream.h
+    beast/test/test_allocator.h
+    beast/test/yield_to.h
+    beast/utility/hash_pair.h
+    beast/utility/maybe_const.h
+    beast/utility/temp_dir.h
+
+    # included by only json/impl/json_assert.h
+    json/json_errors.h
+
+    protocol/PayChan.h
+    protocol/RippleLedgerHash.h
+    protocol/messages.h
+    protocol/st.h
+  )
+  files+=(
+    basics/README.md
+    crypto/README.md
+    json/README.md
+    protocol/README.md
+    resource/README.md
+  )
+  move_files src/ripple include/xrpl detail ${files[@]}
+
+  echo move libxrpl sources
+  files=$(extract_list 'LIBXRPL SOURCES')
+  move_files src/ripple src/libxrpl "" ${files[@]}
+
+  echo check leftovers
+  dirs=$(cd include/xrpl; ls -d */)
+  dirs=$(cd src/ripple; ls -d ${dirs} 2>/dev/null || true)
+  files="$(cd src/ripple; find ${dirs} -type f)"
+  if [ -n "${files}" ]; then
+    echo "leftover files:"
+    echo ${files}
+    exit
+  fi
+
+  echo remove empty directories
+  empty_dirs="$(cd src/ripple; find ${dirs} -depth -type d)"
+  for dir in ${empty_dirs[@]}; do
+    if [ -e ${dir} ]; then
+      rmdir ${dir}
+    fi
+  done
+
+  echo move xrpld sources
+  files=$(
+    extract_list 'XRPLD SOURCES'
+    cd src/ripple
+    find * -regex '.*\.\(h\|ipp\|md\|pu\|uml\|png\)'
+  )
+  move_files src/ripple src/xrpld detail ${files[@]}
+
+  files="$(cd src/ripple; find . -type f)"
+  if [ -n "${files}" ]; then
+    echo "leftover files:"
+    echo ${files}
+    exit
+  fi
+
+fi
+
+rm -rf src/ripple
+
+echo rename .hpp to .h
+find include src -name '*.hpp' -exec bash -c 'f="{}"; git mv "${f}" "${f%hpp}h"' \;
+
+echo move PerfLog.h
+if [ -e include/xrpl/basics/PerfLog.h ]; then
+  git mv include/xrpl/basics/PerfLog.h src/xrpld/perflog
+fi
+
+# Make sure all protobuf includes have the correct prefix.
+protobuf_replace='s:^#include\s*["<].*org/xrpl\([^">]\+\)[">]:#include <xrpl/proto/org/xrpl\1>:'
+# Make sure first-party includes use angle brackets and .h extension.
+ripple_replace='s:include\s*["<]ripple/\(.*\)\.h\(pp\)\?[">]:include <ripple/\1.h>:'
+beast_replace='s:include\s*<beast/:include <xrpl/beast/:'
+# Rename impl directories to detail.
+impl_rename='s:\(<xrpl.*\)/impl\(/details\)\?/:\1/detail/:'
+
+echo rewrite includes in libxrpl
+find include/xrpl src/libxrpl -type f -exec sed -i \
+  -e "${protobuf_replace}" \
+  -e "${ripple_replace}" \
+  -e "${beast_replace}" \
+  -e 's:^#include <ripple/:#include <xrpl/:' \
+  -e "${impl_rename}" \
+  {} +
+
+echo rewrite includes in xrpld
+# # https://www.baeldung.com/linux/join-multiple-lines
+libxrpl_dirs="$(cd include/xrpl; ls -d1 */ | sed 's:/$::')"
+# libxrpl_dirs='a\nb\nc\n'
+readarray -t libxrpl_dirs <<< "${libxrpl_dirs}"
+# libxrpl_dirs=(a b c)
+libxrpl_dirs=$(printf -v txt '%s\\|' "${libxrpl_dirs[@]}"; echo "${txt%\\|}")
+# libxrpl_dirs='a\|b\|c'
+find src/xrpld src/test -type f -exec sed -i \
+  -e "${protobuf_replace}" \
+  -e "${ripple_replace}" \
+  -e "${beast_replace}" \
+  -e "s:^#include <ripple/basics/PerfLog.h>:#include <xrpld/perflog/PerfLog.h>:" \
+  -e "s:^#include <ripple/\(${libxrpl_dirs}\)/:#include <xrpl/\1/:" \
+  -e 's:^#include <ripple/:#include <xrpld/:' \
+  -e "${impl_rename}" \
+  {} +
+
+git commit -m 'Rearrange sources' --author 'Pretty Printer <cpp@ripple.com>'
+find include src -type f \( -name '*.cpp' -o -name '*.h' -o -name '*.ipp' \) -exec clang-format-10 -i {} +
+git add --update .
+git commit -m 'Rewrite includes' --author 'Pretty Printer <cpp@ripple.com>'
+./Builds/levelization/levelization.sh
+git add --update .
+git commit -m 'Recompute loops' --author 'Pretty Printer <cpp@ripple.com>'
--- a/bin/rlint.js
+++ b/bin/rlint.js
@@ -0,0 +1,252 @@
+#!/usr/bin/node
+
+var async       = require('async');
+var Remote      = require('ripple-lib').Remote;
+var Transaction = require('ripple-lib').Transaction;
+var UInt160     = require('ripple-lib').UInt160;
+var Amount      = require('ripple-lib').Amount;
+
+var book_key = function (book) {
+  return book.taker_pays.currency
+    + ":" + book.taker_pays.issuer
+    + ":" + book.taker_gets.currency
+    + ":" + book.taker_gets.issuer;
+};
+
+var book_key_cross = function (book) {
+  return book.taker_gets.currency
+    + ":" + book.taker_gets.issuer
+    + ":" + book.taker_pays.currency
+    + ":" + book.taker_pays.issuer;
+};
+
+var ledger_verify = function (ledger) {
+  var dir_nodes = ledger.accountState.filter(function (entry) {
+      return entry.LedgerEntryType === 'DirectoryNode'    // Only directories
+        && entry.index === entry.RootIndex                // Only root nodes
+        && 'TakerGetsCurrency' in entry;                  // Only offer directories
+    });
+
+  var books = {};
+
+  dir_nodes.forEach(function (node) {
+      var book = {
+        taker_gets: {
+            currency: UInt160.from_generic(node.TakerGetsCurrency).to_json(),
+            issuer: UInt160.from_generic(node.TakerGetsIssuer).to_json()
+          },
+        taker_pays: {
+          currency: UInt160.from_generic(node.TakerPaysCurrency).to_json(),
+          issuer: UInt160.from_generic(node.TakerPaysIssuer).to_json()
+        },
+        quality: Amount.from_quality(node.RootIndex),
+        index: node.RootIndex
+      };
+
+      books[book_key(book)] = book;
+
+//      console.log(JSON.stringify(node, undefined, 2));
+    });
+
+//  console.log(JSON.stringify(dir_entry, undefined, 2));
+  console.log("#%s books: %s", ledger.ledger_index, Object.keys(books).length);
+
+  Object.keys(books).forEach(function (key) {
+      var book        = books[key];
+      var key_cross   = book_key_cross(book);
+      var book_cross  = books[key_cross];
+
+      if (book && book_cross && !book_cross.done)
+      {
+        var book_cross_quality_inverted = Amount.from_json("1.0/1/1").divide(book_cross.quality);
+
+        if (book_cross_quality_inverted.compareTo(book.quality) >= 0)
+        {
+          // Crossing books
+          console.log("crossing: #%s :: %s :: %s :: %s :: %s :: %s :: %s", ledger.ledger_index, key, book.quality.to_text(), book_cross.quality.to_text(), book_cross_quality_inverted.to_text(),
+            book.index, book_cross.index);
+        }
+
+        book_cross.done = true;
+      }
+    });
+
+  var ripple_selfs  = {};
+
+  var accounts  = {};
+  var counts    = {};
+
+  ledger.accountState.forEach(function (entry) {
+      if (entry.LedgerEntryType === 'Offer')
+      {
+        counts[entry.Account] = (counts[entry.Account] || 0) + 1;
+      }
+      else if (entry.LedgerEntryType === 'RippleState')
+      {
+        if (entry.Flags & (0x10000 | 0x40000))
+        {
+          counts[entry.LowLimit.issuer]   = (counts[entry.LowLimit.issuer] || 0) + 1;
+        }
+
+        if (entry.Flags & (0x20000 | 0x80000))
+        {
+          counts[entry.HighLimit.issuer]  = (counts[entry.HighLimit.issuer] || 0) + 1;
+        }
+
+        if (entry.HighLimit.issuer === entry.LowLimit.issuer)
+          ripple_selfs[entry.Account] = entry;
+      }
+      else if (entry.LedgerEntryType == 'AccountRoot')
+      {
+        accounts[entry.Account] = entry;
+      }
+    });
+
+  var low               = 0;  // Accounts with too low a count.
+  var high              = 0;
+  var missing_accounts  = 0;  // Objects with no referencing account.
+  var missing_objects   = 0;  // Accounts specifying an object but having none.
+
+  Object.keys(counts).forEach(function (account) {
+      if (account in accounts)
+      {
+        if (counts[account] !== accounts[account].OwnerCount)
+        {
+          if (counts[account] < accounts[account].OwnerCount)
+          {
+            high  += 1;
+            console.log("%s: high count %s/%s", account, counts[account], accounts[account].OwnerCount);
+          }
+          else
+          {
+            low   += 1;
+            console.log("%s: low count %s/%s", account, counts[account], accounts[account].OwnerCount);
+          }
+        }
+      }
+      else
+      {
+        missing_accounts  += 1;
+
+        console.log("%s: missing : count %s", account, counts[account]);
+      }
+    });
+
+  Object.keys(accounts).forEach(function (account) {
+      if (!('OwnerCount' in accounts[account]))
+      {
+          console.log("%s: bad entry : %s", account, JSON.stringify(accounts[account], undefined, 2));
+      }
+      else if (!(account in counts) && accounts[account].OwnerCount)
+      {
+          missing_objects += 1;
+
+          console.log("%s: no objects : %s/%s", account, 0, accounts[account].OwnerCount);
+      }
+    });
+
+  if (low)
+    console.log("counts too low = %s", low);
+
+  if (high)
+    console.log("counts too high = %s", high);
+
+  if (missing_objects)
+    console.log("missing_objects = %s", missing_objects);
+
+  if (missing_accounts)
+    console.log("missing_accounts = %s", missing_accounts);
+
+  if (Object.keys(ripple_selfs).length)
+    console.log("RippleState selfs = %s", Object.keys(ripple_selfs).length);
+
+};
+
+var ledger_request = function (remote, ledger_index, done) {
+ remote.request_ledger(undefined, {
+      accounts: true,
+      expand: true,
+    })
+  .ledger_index(ledger_index)
+  .on('success', function (m) {
+      // console.log("ledger: ", ledger_index);
+      // console.log("ledger: ", JSON.stringify(m, undefined, 2));
+      done(m.ledger);
+    })
+  .on('error', function (m) {
+      console.log("error");
+      done();
+    })
+  .request();
+};
+
+var usage = function () {
+  console.log("rlint.js _websocket_ip_ _websocket_port_ ");
+};
+
+var finish = function (remote) {
+  remote.disconnect();
+
+  // XXX Because remote.disconnect() doesn't work:
+  process.exit();
+};
+
+console.log("args: ", process.argv.length);
+console.log("args: ", process.argv);
+
+if (process.argv.length < 4) {
+  usage();
+}
+else {
+  var remote  = Remote.from_config({
+        websocket_ip:   process.argv[2],
+        websocket_port: process.argv[3],
+      })
+    .once('ledger_closed', function (m) {
+        console.log("ledger_closed: ", JSON.stringify(m, undefined, 2));
+
+        if (process.argv.length === 5) {
+          var ledger_index  = process.argv[4];
+
+          ledger_request(remote, ledger_index, function (l) {
+              if (l) {
+                ledger_verify(l);
+              }
+
+              finish(remote);
+            });
+
+        } else if (process.argv.length === 6) {
+          var ledger_start  = Number(process.argv[4]);
+          var ledger_end    = Number(process.argv[5]);
+          var ledger_cursor = ledger_end;
+
+          async.whilst(
+            function () {
+              return ledger_start <= ledger_cursor && ledger_cursor <=ledger_end;
+            },
+            function (callback) {
+              // console.log(ledger_cursor);
+
+              ledger_request(remote, ledger_cursor, function (l) {
+                  if (l) {
+                    ledger_verify(l);
+                  }
+
+                  --ledger_cursor;
+
+                  callback();
+                });
+            },
+            function (error) {
+              finish(remote);
+            });
+
+        } else {
+          finish(remote);
+        }
+      })
+    .connect();
+}
+
+// vim:sw=2:sts=2:ts=8:et
--- a/bin/sh/install-vcpkg.sh
+++ b/bin/sh/install-vcpkg.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+set -exu
+
+: ${TRAVIS_BUILD_DIR:=""}
+: ${VCPKG_DIR:=".vcpkg"}
+export VCPKG_ROOT=${VCPKG_DIR}
+: ${VCPKG_DEFAULT_TRIPLET:="x64-windows-static"}
+
+export VCPKG_DEFAULT_TRIPLET
+
+EXE="vcpkg"
+if [[ -z ${COMSPEC:-} ]]; then
+    EXE="${EXE}.exe"
+fi
+
+if [[ -d "${VCPKG_DIR}" && -x "${VCPKG_DIR}/${EXE}" && -d "${VCPKG_DIR}/installed" ]] ; then
+    echo "Using cached vcpkg at ${VCPKG_DIR}"
+    ${VCPKG_DIR}/${EXE} list
+else
+    if [[ -d "${VCPKG_DIR}" ]] ; then
+        rm -rf "${VCPKG_DIR}"
+    fi
+    git clone --branch 2021.04.30 https://github.com/Microsoft/vcpkg.git ${VCPKG_DIR}
+    pushd ${VCPKG_DIR}
+    BSARGS=()
+    if [[ "$(uname)" == "Darwin" ]] ; then
+        BSARGS+=(--allowAppleClang)
+    fi
+    if [[ -z ${COMSPEC:-} ]]; then
+        chmod +x ./bootstrap-vcpkg.sh
+        time ./bootstrap-vcpkg.sh "${BSARGS[@]}"
+    else
+        time ./bootstrap-vcpkg.bat
+    fi
+    popd
+fi
+
+# TODO: bring boost in this way as well ?
+# NOTE: can pin specific ports to a commit/version like this:
+#    git checkout <SOME COMMIT HASH> ports/boost
+if [ $# -eq 0 ]; then
+    echo "No extra packages specified..."
+    PKGS=()
+else
+    PKGS=( "$@" )
+fi
+for LIB in "${PKGS[@]}"; do
+    time ${VCPKG_DIR}/${EXE} --clean-after-build install ${LIB}
+done
+
+
--- a/bin/sh/setup-msvc.sh
+++ b/bin/sh/setup-msvc.sh
@@ -0,0 +1,40 @@
+
+# NOTE: must be sourced from a shell so it can export vars
+
+cat << BATCH > ./getenv.bat
+CALL %*
+ENV
+BATCH
+
+while read line ; do
+  IFS='"' read x path arg <<<"${line}"
+  if [ -f "${path}" ] ; then
+    echo "FOUND: $path"
+    export VCINSTALLDIR=$(./getenv.bat "${path}" ${arg} | grep "^VCINSTALLDIR=" | sed -E "s/^VCINSTALLDIR=//g")
+    if [ "${VCINSTALLDIR}" != "" ] ; then
+      echo "USING ${VCINSTALLDIR}"
+      export LIB=$(./getenv.bat "${path}" ${arg} | grep "^LIB=" | sed -E "s/^LIB=//g")
+      export LIBPATH=$(./getenv.bat "${path}" ${arg} | grep "^LIBPATH=" | sed -E "s/^LIBPATH=//g")
+      export INCLUDE=$(./getenv.bat "${path}" ${arg} | grep "^INCLUDE=" | sed -E "s/^INCLUDE=//g")
+      ADDPATH=$(./getenv.bat "${path}" ${arg} | grep "^PATH=" | sed -E "s/^PATH=//g")
+      export PATH="${ADDPATH}:${PATH}"
+      break
+    fi
+  fi
+done <<EOL
+"C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Auxiliary/Build/vcvarsall.bat" x86_amd64
+"C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Auxiliary/Build/vcvarsall.bat" x86_amd64
+"C:/Program Files (x86)/Microsoft Visual Studio/2017/BuildTools/VC/Auxiliary/Build/vcvarsall.bat" x86_amd64
+"C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Auxiliary/Build/vcvarsall.bat" x86_amd64
+"C:/Program Files (x86)/Microsoft Visual Studio 15.0/VC/vcvarsall.bat" amd64
+"C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/vcvarsall.bat" amd64
+"C:/Program Files (x86)/Microsoft Visual Studio 13.0/VC/vcvarsall.bat" amd64
+"C:/Program Files (x86)/Microsoft Visual Studio 12.0/VC/vcvarsall.bat" amd64
+EOL
+# TODO: update the list above as needed to support newer versions of msvc tools
+
+rm -f getenv.bat
+
+if [ "${VCINSTALLDIR}" = "" ] ; then
+  echo "No compatible visual studio found!"
+fi
--- a/bin/start_sync_stop.py
+++ b/bin/start_sync_stop.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python
+"""A script to test rippled in an infinite loop of start-sync-stop.
+
+- Requires Python 3.7+.
+- Can be stopped with SIGINT.
+- Has no dependencies outside the standard library.
+"""
+
+import sys
+
+assert sys.version_info.major == 3 and sys.version_info.minor >= 7
+
+import argparse
+import asyncio
+import configparser
+import contextlib
+import json
+import logging
+import os
+from pathlib import Path
+import platform
+import subprocess
+import time
+import urllib.error
+import urllib.request
+
+# Enable asynchronous subprocesses on Windows. The default changed in 3.8.
+# https://docs.python.org/3.7/library/asyncio-platforms.html#subprocess-support-on-windows
+if (platform.system() == 'Windows' and sys.version_info.major == 3
+        and sys.version_info.minor < 8):
+    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+
+DEFAULT_EXE = 'rippled'
+DEFAULT_CONFIGURATION_FILE = 'rippled.cfg'
+# Number of seconds to wait before forcefully terminating.
+PATIENCE = 120
+# Number of contiguous seconds in a sync state to be considered synced.
+DEFAULT_SYNC_DURATION = 60
+# Number of seconds between polls of state.
+DEFAULT_POLL_INTERVAL = 5
+SYNC_STATES = ('full', 'validating', 'proposing')
+
+
+def read_config(config_file):
+    # strict = False: Allow duplicate keys, e.g. [rpc_startup].
+    # allow_no_value = True: Allow keys with no values. Generally, these
+    # instances use the "key" as the value, and the section name is the key,
+    # e.g. [debug_logfile].
+    # delimiters = ('='): Allow ':' as a character in Windows paths. Some of
+    # our "keys" are actually values, and we don't want to split them on ':'.
+    config = configparser.ConfigParser(
+        strict=False,
+        allow_no_value=True,
+        delimiters=('='),
+    )
+    config.read(config_file)
+    return config
+
+
+def to_list(value, separator=','):
+    """Parse a list from a delimited string value."""
+    return [s.strip() for s in value.split(separator) if s]
+
+
+def find_log_file(config_file):
+    """Try to figure out what log file the user has chosen. Raises all kinds
+    of exceptions if there is any possibility of ambiguity."""
+    config = read_config(config_file)
+    values = list(config['debug_logfile'].keys())
+    if len(values) < 1:
+        raise ValueError(
+            f'no [debug_logfile] in configuration file: {config_file}')
+    if len(values) > 1:
+        raise ValueError(
+            f'too many [debug_logfile] in configuration file: {config_file}')
+    return values[0]
+
+
+def find_http_port(config_file):
+    config = read_config(config_file)
+    names = list(config['server'].keys())
+    for name in names:
+        server = config[name]
+        if 'http' in to_list(server.get('protocol', '')):
+            return int(server['port'])
+    raise ValueError(f'no server in [server] for "http" protocol')
+
+
+@contextlib.asynccontextmanager
+async def rippled(exe=DEFAULT_EXE, config_file=DEFAULT_CONFIGURATION_FILE):
+    """A context manager for a rippled process."""
+    # Start the server.
+    process = await asyncio.create_subprocess_exec(
+        str(exe),
+        '--conf',
+        str(config_file),
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    logging.info(f'rippled started with pid {process.pid}')
+    try:
+        yield process
+    finally:
+        # Ask it to stop.
+        logging.info(f'asking rippled (pid: {process.pid}) to stop')
+        start = time.time()
+        process.terminate()
+
+        # Wait nicely.
+        try:
+            await asyncio.wait_for(process.wait(), PATIENCE)
+        except asyncio.TimeoutError:
+            # Ask the operating system to kill it.
+            logging.warning(f'killing rippled ({process.pid})')
+            try:
+                process.kill()
+            except ProcessLookupError:
+                pass
+
+        code = await process.wait()
+        end = time.time()
+        logging.info(
+            f'rippled stopped after {end - start:.1f} seconds with code {code}'
+        )
+
+
+async def sync(
+        port,
+        *,
+        duration=DEFAULT_SYNC_DURATION,
+        interval=DEFAULT_POLL_INTERVAL,
+):
+    """Poll rippled on an interval until it has been synced for a duration."""
+    start = time.perf_counter()
+    while (time.perf_counter() - start) < duration:
+        await asyncio.sleep(interval)
+
+        request = urllib.request.Request(
+            f'http://127.0.0.1:{port}',
+            data=json.dumps({
+                'method': 'server_state'
+            }).encode(),
+            headers={'Content-Type': 'application/json'},
+        )
+        with urllib.request.urlopen(request) as response:
+            try:
+                body = json.loads(response.read())
+            except urllib.error.HTTPError as cause:
+                logging.warning(f'server_state returned not JSON: {cause}')
+                start = time.perf_counter()
+                continue
+
+        try:
+            state = body['result']['state']['server_state']
+        except KeyError as cause:
+            logging.warning(f'server_state response missing key: {cause.key}')
+            start = time.perf_counter()
+            continue
+        logging.info(f'server_state: {state}')
+        if state not in SYNC_STATES:
+            # Require a contiguous sync state.
+            start = time.perf_counter()
+
+
+async def loop(test,
+               *,
+               exe=DEFAULT_EXE,
+               config_file=DEFAULT_CONFIGURATION_FILE):
+    """
+    Start-test-stop rippled in an infinite loop.
+
+    Moves log to a different file after each iteration.
+    """
+    log_file = find_log_file(config_file)
+    id = 0
+    while True:
+        logging.info(f'iteration: {id}')
+        async with rippled(exe, config_file) as process:
+            start = time.perf_counter()
+            exited = asyncio.create_task(process.wait())
+            tested = asyncio.create_task(test())
+            # Try to sync as long as the process is running.
+            done, pending = await asyncio.wait(
+                {exited, tested},
+                return_when=asyncio.FIRST_COMPLETED,
+            )
+            if done == {exited}:
+                code = exited.result()
+                logging.warning(
+                    f'server halted for unknown reason with code {code}')
+            else:
+                assert done == {tested}
+                assert tested.exception() is None
+            end = time.perf_counter()
+            logging.info(f'synced after {end - start:.0f} seconds')
+        os.replace(log_file, f'debug.{id}.log')
+        id += 1
+
+
+logging.basicConfig(
+    format='%(asctime)s %(levelname)-8s %(message)s',
+    level=logging.INFO,
+    datefmt='%Y-%m-%d %H:%M:%S',
+)
+
+parser = argparse.ArgumentParser(
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument(
+    'rippled',
+    type=Path,
+    nargs='?',
+    default=DEFAULT_EXE,
+    help='Path to rippled.',
+)
+parser.add_argument(
+    '--conf',
+    type=Path,
+    default=DEFAULT_CONFIGURATION_FILE,
+    help='Path to configuration file.',
+)
+parser.add_argument(
+    '--duration',
+    type=int,
+    default=DEFAULT_SYNC_DURATION,
+    help='Number of contiguous seconds required in a synchronized state.',
+)
+parser.add_argument(
+    '--interval',
+    type=int,
+    default=DEFAULT_POLL_INTERVAL,
+    help='Number of seconds to wait between polls of state.',
+)
+args = parser.parse_args()
+
+port = find_http_port(args.conf)
+
+
+def test():
+    return sync(port, duration=args.duration, interval=args.interval)
+
+
+try:
+    asyncio.run(loop(test, exe=args.rippled, config_file=args.conf))
+except KeyboardInterrupt:
+    # Squelch the message. This is a normal mode of exit.
+    pass
--- a/bin/stop-test.js
+++ b/bin/stop-test.js
@@ -0,0 +1,133 @@
+/* -------------------------------- REQUIRES -------------------------------- */
+
+var child = require("child_process");
+var assert = require("assert");
+
+/* --------------------------------- CONFIG --------------------------------- */
+
+if (process.argv[2] == null) {
+  [
+   'Usage: ',
+   '',
+   '  `node bin/stop-test.js i,j [rippled_path] [rippled_conf]`',
+   '',
+   '  Launch rippled and stop it after n seconds for all n in [i, j}',
+   '  For all even values of n launch rippled with `--fg`',
+   '  For values of n where n % 3 == 0 launch rippled with `--fg`\n',
+   'Examples: ',
+   '',
+   '  $ node bin/stop-test.js 5,10',
+   ('  $ node bin/stop-test.js 1,4 ' +
+      'build/clang.debug/rippled $HOME/.confs/rippled.cfg')
+   ]
+      .forEach(function(l){console.log(l)});
+
+  process.exit();
+} else {
+  var testRange = process.argv[2].split(',').map(Number);
+  var rippledPath = process.argv[3] || 'build/rippled'
+  var rippledConf = process.argv[4] || 'rippled.cfg'
+}
+
+var options = {
+  env: process.env,
+  stdio: 'ignore' // we could dump the child io when it fails abnormally
+};
+
+// default args
+var conf_args = ['--conf='+rippledConf];
+var start_args  = conf_args.concat([/*'--net'*/])
+var stop_args = conf_args.concat(['stop']);
+
+/* --------------------------------- HELPERS -------------------------------- */
+
+function start(args) {
+    return child.spawn(rippledPath, args, options);
+}
+function stop(rippled) { child.execFile(rippledPath, stop_args, options)}
+function secs_l8r(ms, f) {setTimeout(f, ms * 1000); }
+
+function show_results_and_exit(results) {
+  console.log(JSON.stringify(results, undefined, 2));
+  process.exit();
+}
+
+var timeTakes = function (range) {
+  function sumRange(n) {return (n+1) * n /2}
+  var ret = sumRange(range[1]);
+  if (range[0] > 1) {
+    ret = ret - sumRange(range[0] - 1)
+  }
+  var stopping = (range[1] - range[0]) * 0.5;
+  return ret + stopping;
+}
+
+/* ---------------------------------- TEST ---------------------------------- */
+
+console.log("Test will take ~%s seconds", timeTakes(testRange));
+
+(function oneTest(n /* seconds */, results) {
+  if (n >= testRange[1]) {
+    // show_results_and_exit(results);
+    console.log(JSON.stringify(results, undefined, 2));
+    oneTest(testRange[0], []);
+    return;
+  }
+
+  var args = start_args;
+  if (n % 2 == 0) {args = args.concat(['--fg'])}
+  if (n % 3 == 0) {args = args.concat(['--net'])}
+
+  var result = {args: args, alive_for: n};
+  results.push(result);
+
+  console.log("\nLaunching `%s` with `%s` for %d seconds",
+                rippledPath, JSON.stringify(args), n);
+
+  rippled = start(args);
+  console.log("Rippled pid: %d", rippled.pid);
+
+  // defaults
+  var b4StopSent = false;
+  var stopSent = false;
+  var stop_took = null;
+
+  rippled.once('exit', function(){
+    if (!stopSent && !b4StopSent) {
+      console.warn('\nRippled exited itself b4 stop issued');
+      process.exit();
+    };
+
+    // The io handles close AFTER exit, may have implications for
+    // `stdio:'inherit'` option to `child.spawn`.
+    rippled.once('close', function() {
+      result.stop_took = (+new Date() - stop_took) / 1000; // seconds
+      console.log("Stopping after %d seconds took %s seconds",
+                   n, result.stop_took);
+      oneTest(n+1, results);
+    });
+  });
+
+  secs_l8r(n, function(){
+    console.log("Stopping rippled after %d seconds", n);
+
+    // possible race here ?
+    // seems highly unlikely, but I was having issues at one point
+    b4StopSent=true;
+    stop_took = (+new Date());
+    // when does `exit` actually get sent?
+    stop();
+    stopSent=true;
+
+    // Sometimes we want to attach with a debugger.
+    if (process.env.ABORT_TESTS_ON_STALL != null) {
+      // We wait 30 seconds, and if it hasn't stopped, we abort the process
+      secs_l8r(30, function() {
+        if (result.stop_took == null) {
+          console.log("rippled has stalled");
+          process.exit();
+        };
+      });
+    }
+  })
+}(testRange[0], []));
--- a/bin/update_binformat.js
+++ b/bin/update_binformat.js
@@ -0,0 +1,119 @@
+/**
+ * bin/update_bintypes.js
+ *
+ * This unholy abomination of a script generates the JavaScript file
+ * src/js/bintypes.js from various parts of the C++ source code.
+ *
+ * This should *NOT* be part of any automatic build process unless the C++
+ * source data are brought into a more easily parseable format. Until then,
+ * simply run this script manually and fix as needed.
+ */
+
+// XXX: Process LedgerFormats.(h|cpp) as well.
+
+var filenameProto = __dirname + '/../src/cpp/ripple/SerializeProto.h',
+    filenameTxFormatsH = __dirname + '/../src/cpp/ripple/TransactionFormats.h',
+    filenameTxFormats = __dirname + '/../src/cpp/ripple/TransactionFormats.cpp';
+
+var fs = require('fs');
+
+var output = [];
+
+// Stage 1: Get the field types and codes from SerializeProto.h
+var types = {},
+    fields = {};
+String(fs.readFileSync(filenameProto)).split('\n').forEach(function (line) {
+  line = line.replace(/^\s+|\s+$/g, '').replace(/\s+/g, '');
+  if (!line.length || line.slice(0, 2) === '//' || line.slice(-1) !== ')') return;
+
+  var tmp = line.slice(0, -1).split('('),
+      type = tmp[0],
+      opts = tmp[1].split(',');
+
+  if (type === 'TYPE') types[opts[1]] = [opts[0], +opts[2]];
+  else if (type === 'FIELD') fields[opts[0]] = [types[opts[1]][0], +opts[2]];
+});
+
+output.push('var ST = require("./serializedtypes");');
+output.push('');
+output.push('var REQUIRED = exports.REQUIRED = 0,');
+output.push('    OPTIONAL = exports.OPTIONAL = 1,');
+output.push('    DEFAULT  = exports.DEFAULT  = 2;');
+output.push('');
+
+function pad(s, n) { while (s.length < n) s += ' '; return s; }
+function padl(s, n) { while (s.length < n) s = ' '+s; return s; }
+
+Object.keys(types).forEach(function (type) {
+  output.push(pad('ST.'+types[type][0]+'.id', 25) + ' = '+types[type][1]+';');
+});
+output.push('');
+
+// Stage 2: Get the transaction type IDs from TransactionFormats.h
+var ttConsts = {};
+String(fs.readFileSync(filenameTxFormatsH)).split('\n').forEach(function (line) {
+  var regex = /tt([A-Z_]+)\s+=\s+([0-9-]+)/;
+  var match = line.match(regex);
+  if (match) ttConsts[match[1]] = +match[2];
+});
+
+// Stage 3: Get the transaction formats from TransactionFormats.cpp
+var base = [],
+    sections = [],
+    current = base;
+String(fs.readFileSync(filenameTxFormats)).split('\n').forEach(function (line) {
+  line = line.replace(/^\s+|\s+$/g, '').replace(/\s+/g, '');
+
+  var d_regex = /DECLARE_TF\(([A-Za-z]+),tt([A-Z_]+)/;
+  var d_match = line.match(d_regex);
+
+  var s_regex = /SOElement\(sf([a-z]+),SOE_(REQUIRED|OPTIONAL|DEFAULT)/i;
+  var s_match = line.match(s_regex);
+
+  if (d_match) sections.push(current = [d_match[1], ttConsts[d_match[2]]]);
+  else if (s_match) current.push([s_match[1], s_match[2]]);
+});
+
+function removeFinalComma(arr) {
+  arr[arr.length-1] = arr[arr.length-1].slice(0, -1);
+}
+
+output.push('var base = [');
+base.forEach(function (field) {
+  var spec = fields[field[0]];
+  output.push('  [ '+
+              pad("'"+field[0]+"'", 21)+', '+
+              pad(field[1], 8)+', '+
+              padl(""+spec[1], 2)+', '+
+              'ST.'+pad(spec[0], 3)+
+              ' ],');
+});
+removeFinalComma(output);
+output.push('];');
+output.push('');
+
+
+output.push('exports.tx = {');
+sections.forEach(function (section) {
+  var name = section.shift(),
+      ttid = section.shift();
+
+  output.push('  '+name+': ['+ttid+'].concat(base, [');
+  section.forEach(function (field) {
+    var spec = fields[field[0]];
+    output.push('    [ '+
+                pad("'"+field[0]+"'", 21)+', '+
+                pad(field[1], 8)+', '+
+                padl(""+spec[1], 2)+', '+
+                'ST.'+pad(spec[0], 3)+
+                ' ],');
+  });
+  removeFinalComma(output);
+  output.push('  ]),');
+});
+removeFinalComma(output);
+output.push('};');
+output.push('');
+
+console.log(output.join('\n'));
+
--- a/cmake/RippledCompiler.cmake
+++ b/cmake/RippledCompiler.cmake
@@ -90,16 +90,28 @@ if (MSVC)
      -errorreport:none
      -machine:X64)
 else ()
+  # HACK : because these need to come first, before any warning demotion
+  string (APPEND CMAKE_CXX_FLAGS " -Wall -Wdeprecated")
+  if (wextra)
+    string (APPEND CMAKE_CXX_FLAGS " -Wextra -Wno-unused-parameter")
+  endif ()
+  # not MSVC
  target_compile_options (common
    INTERFACE
-      -Wall
-      -Wdeprecated
-      $<$<BOOL:${is_clang}>:-Wno-deprecated-declarations>
-      $<$<BOOL:${wextra}>:-Wextra -Wno-unused-parameter>
      $<$<BOOL:${werr}>:-Werror>
-      -fstack-protector
+      $<$<COMPILE_LANGUAGE:CXX>:
+        -frtti
+        -Wnon-virtual-dtor
+      >
      -Wno-sign-compare
-      -Wno-unused-but-set-variable
+      -Wno-char-subscripts
+      -Wno-format
+      -Wno-unused-local-typedefs
+      -fstack-protector
+      $<$<BOOL:${is_gcc}>:
+        -Wno-unused-but-set-variable
+        -Wno-deprecated
+      >
      $<$<NOT:$<CONFIG:Debug>>:-fno-strict-aliasing>
      # tweak gcc optimization for debug
      $<$<AND:$<BOOL:${is_gcc}>,$<CONFIG:Debug>>:-O0>
--- a/cmake/RippledCore.cmake
+++ b/cmake/RippledCore.cmake
@@ -64,6 +64,7 @@ target_link_libraries(xrpl.imports.main
    secp256k1::secp256k1
    xrpl.libpb
    xxHash::xxhash
+    blake3
    $<$<BOOL:${voidstar}>:antithesis-sdk-cpp>
 )

@@ -99,15 +100,6 @@ target_link_libraries(xrpl.libxrpl.protocol PUBLIC
 add_module(xrpl resource)
 target_link_libraries(xrpl.libxrpl.resource PUBLIC xrpl.libxrpl.protocol)

-# Level 06
-add_module(xrpl net)
-target_link_libraries(xrpl.libxrpl.net PUBLIC 
-  xrpl.libxrpl.basics
-  xrpl.libxrpl.json
-  xrpl.libxrpl.protocol
-  xrpl.libxrpl.resource
-)
-
 add_module(xrpl server)
 target_link_libraries(xrpl.libxrpl.server PUBLIC xrpl.libxrpl.protocol)

@@ -130,7 +122,6 @@ target_link_modules(xrpl PUBLIC
  protocol
  resource
  server
-  net
 )

 # All headers in libxrpl are in modules.
--- a/cmake/RippledInstall.cmake
+++ b/cmake/RippledInstall.cmake
@@ -19,7 +19,6 @@ install (
    xrpl.libxrpl.protocol
    xrpl.libxrpl.resource
    xrpl.libxrpl.server
-    xrpl.libxrpl.net
    xrpl.libxrpl
    antithesis-sdk-cpp
  EXPORT RippleExports
--- a/cmake/RippledSettings.cmake
+++ b/cmake/RippledSettings.cmake
@@ -18,7 +18,7 @@ if(tests)
  endif()
 endif()

-option(unity "Creates a build using UNITY support in cmake." OFF)
+option(unity "Creates a build using UNITY support in cmake. This is the default" ON)
 if(unity)
  if(NOT is_ci)
    set(CMAKE_UNITY_BUILD_BATCH_SIZE 15 CACHE STRING "")
--- a/cmake/deps/Boost.cmake
+++ b/cmake/deps/Boost.cmake
@@ -2,6 +2,7 @@ find_package(Boost 1.82 REQUIRED
  COMPONENTS
    chrono
    container
+    context
    coroutine
    date_time
    filesystem
@@ -23,7 +24,7 @@ endif()

 target_link_libraries(ripple_boost
  INTERFACE
-    Boost::headers
+    Boost::boost
    Boost::chrono
    Boost::container
    Boost::coroutine
--- a/cmake/xrpl_add_test.cmake
+++ b/cmake/xrpl_add_test.cmake
@@ -1,41 +0,0 @@
-include(isolate_headers)
-
-function(xrpl_add_test name)
-  set(target ${PROJECT_NAME}.test.${name})
-
-  file(GLOB_RECURSE sources CONFIGURE_DEPENDS
-  "${CMAKE_CURRENT_SOURCE_DIR}/${name}/*.cpp"
-  "${CMAKE_CURRENT_SOURCE_DIR}/${name}.cpp"
-  )
-  add_executable(${target} EXCLUDE_FROM_ALL ${ARGN} ${sources})
-
-  isolate_headers(
-    ${target}
-    "${CMAKE_SOURCE_DIR}"
-    "${CMAKE_SOURCE_DIR}/tests/${name}"
-    PRIVATE
-  )
-
-  # Make sure the test isn't optimized away in unity builds
-  set_target_properties(${target} PROPERTIES
-    UNITY_BUILD_MODE GROUP
-    UNITY_BUILD_BATCH_SIZE 0)  # Adjust as needed
-
-  add_test(NAME ${target} COMMAND ${target})
-  set_tests_properties(
-    ${target} PROPERTIES
-    FIXTURES_REQUIRED ${target}_fixture
-  )
-
-  add_test(
-    NAME ${target}.build
-    COMMAND
-      ${CMAKE_COMMAND}
-      --build ${CMAKE_BINARY_DIR}
-      --config $<CONFIG>
-      --target ${target}
-  )
-  set_tests_properties(${target}.build PROPERTIES
-    FIXTURES_SETUP ${target}_fixture
-  )
-endfunction()
--- a/conan/profiles/default
+++ b/conan/profiles/default
@@ -1,34 +0,0 @@
-{% set os = detect_api.detect_os() %}
-{% set arch = detect_api.detect_arch() %}
-{% set compiler, version, compiler_exe = detect_api.detect_default_compiler() %}
-{% set compiler_version = version %}
-{% if os == "Linux" %}
-{% set compiler_version = detect_api.default_compiler_version(compiler, version) %}
-{% endif %}
-
-[settings]
-os={{ os }}
-arch={{ arch }}
-build_type=Debug
-compiler={{compiler}}
-compiler.version={{ compiler_version }}
-compiler.cppstd=20
-{% if os == "Windows" %}
-compiler.runtime=static
-{% else %}
-compiler.libcxx={{detect_api.detect_libcxx(compiler, version, compiler_exe)}}
-{% endif %}
-
-[conf]
-{% if compiler == "clang" and compiler_version >= 19 %}
-tools.build:cxxflags=['-Wno-missing-template-arg-list-after-template-kw']
-{% endif %}
-{% if compiler == "apple-clang" and compiler_version >= 17 %}
-tools.build:cxxflags=['-Wno-missing-template-arg-list-after-template-kw']
-{% endif %}
-{% if compiler == "gcc" and compiler_version < 13 %}
-tools.build:cxxflags=['-Wno-restrict']
-{% endif %}
-
-[tool_requires]
-!cmake/*: cmake/[>=3 <4]
--- a/conanfile.py
+++ b/conanfile.py
@@ -25,19 +25,15 @@ class Xrpl(ConanFile):

    requires = [
        'grpc/1.50.1',
-        'libarchive/3.8.1',
-        'nudb/2.0.9',
-        'openssl/1.1.1w',
+        'libarchive/3.7.6',
+        'nudb/2.0.8',
+        'openssl/1.1.1v',
        'soci/4.0.3',
        'zlib/1.3.1',
    ]

-    test_requires = [
-        'doctest/2.4.11',
-    ]
-
    tool_requires = [
-        'protobuf/3.21.12',
+        'protobuf/3.21.9',
    ]

    default_options = {
@@ -89,13 +85,12 @@ class Xrpl(ConanFile):
    }

    def set_version(self):
-        if self.version is None:
-            path = f'{self.recipe_folder}/src/libxrpl/protocol/BuildInfo.cpp'
-            regex = r'versionString\s?=\s?\"(.*)\"'
-            with open(path, encoding='utf-8') as file:
-                matches = (re.search(regex, line) for line in file)
-                match = next(m for m in matches if m)
-                self.version = match.group(1)
+        path = f'{self.recipe_folder}/src/libxrpl/protocol/BuildInfo.cpp'
+        regex = r'versionString\s?=\s?\"(.*)\"'
+        with open(path, 'r') as file:
+            matches = (re.search(regex, line) for line in file)
+            match = next(m for m in matches if m)
+            self.version = match.group(1)

    def configure(self):
        if self.settings.compiler == 'apple-clang':
@@ -104,19 +99,20 @@ class Xrpl(ConanFile):
    def requirements(self):
        # Conan 2 requires transitive headers to be specified
        transitive_headers_opt = {'transitive_headers': True} if conan_version.split('.')[0] == '2' else {}
-        self.requires('boost/1.86.0', force=True, **transitive_headers_opt)
-        self.requires('date/3.0.4', **transitive_headers_opt)
+        self.requires('boost/1.83.0', force=True, **transitive_headers_opt)
+        self.requires('date/3.0.3', **transitive_headers_opt)
        self.requires('lz4/1.10.0', force=True)
-        self.requires('protobuf/3.21.12', force=True)
-        self.requires('sqlite3/3.49.1', force=True)
+        self.requires('protobuf/3.21.9', force=True)
+        self.requires('sqlite3/3.47.0', force=True)
        if self.options.jemalloc:
            self.requires('jemalloc/5.3.0')
        if self.options.rocksdb:
-            self.requires('rocksdb/10.0.1')
-        self.requires('xxhash/0.8.3', **transitive_headers_opt)
+            self.requires('rocksdb/9.7.3')
+        self.requires('xxhash/0.8.2', **transitive_headers_opt)

    exports_sources = (
        'CMakeLists.txt',
+        'bin/getRippledInfo',
        'cfg/*',
        'cmake/*',
        'external/*',
@@ -167,17 +163,7 @@ class Xrpl(ConanFile):
        # `include/`, not `include/ripple/proto/`.
        libxrpl.includedirs = ['include', 'include/ripple/proto']
        libxrpl.requires = [
-            'boost::headers',
-            'boost::chrono',
-            'boost::container',
-            'boost::coroutine',
-            'boost::date_time',
-            'boost::filesystem',
-            'boost::json',
-            'boost::program_options',
-            'boost::regex',
-            'boost::system',
-            'boost::thread',
+            'boost::boost',
            'date::date',
            'grpc::grpc++',
            'libarchive::libarchive',
--- a/docs/0001-negative-unl/README.md
+++ b/docs/0001-negative-unl/README.md
@@ -30,7 +30,7 @@ the ledger (so the entire network has the same view). This will help the network
 see which validators are **currently** unreliable, and adjust their quorum
 calculation accordingly.

-_Improving the liveness of the network is the main motivation for the negative UNL._
+*Improving the liveness of the network is the main motivation for the negative UNL.*

 ### Targeted Faults

@@ -53,17 +53,16 @@ even if the number of remaining validators gets to 60%. Say we have a network
 with 10 validators on the UNL and everything is operating correctly. The quorum
 required for this network would be 8 (80% of 10). When validators fail, the
 quorum required would be as low as 6 (60% of 10), which is the absolute
-**_minimum quorum_**. We need the absolute minimum quorum to be strictly greater
+***minimum quorum***. We need the absolute minimum quorum to be strictly greater
 than 50% of the original UNL so that there cannot be two partitions of
 well-behaved nodes headed in different directions. We arbitrarily choose 60% as
 the minimum quorum to give a margin of safety.

 Consider these events in the absence of negative UNL:
-
 1. 1:00pm - validator1 fails, votes vs. quorum: 9 >= 8, we have quorum
 1. 3:00pm - validator2 fails, votes vs. quorum: 8 >= 8, we have quorum
 1. 5:00pm - validator3 fails, votes vs. quorum: 7 < 8, we don’t have quorum
-   - **network cannot validate new ledgers with 3 failed validators**
+    * **network cannot validate new ledgers with 3 failed validators**

 We're below 80% agreement, so new ledgers cannot be validated. This is how the
 XRP Ledger operates today, but if the negative UNL was enabled, the events would
@@ -71,20 +70,18 @@ happen as follows. (Please note that the events below are from a simplified
 version of our protocol.)

 1. 1:00pm - validator1 fails, votes vs. quorum: 9 >= 8, we have quorum
-1. 1:40pm - network adds validator1 to negative UNL, quorum changes to ceil(9 \* 0.8), or 8
+1. 1:40pm - network adds validator1 to negative UNL, quorum changes to ceil(9 * 0.8), or 8
 1. 3:00pm - validator2 fails, votes vs. quorum: 8 >= 8, we have quorum
-1. 3:40pm - network adds validator2 to negative UNL, quorum changes to ceil(8 \* 0.8), or 7
+1. 3:40pm - network adds validator2 to negative UNL, quorum changes to ceil(8 * 0.8), or 7
 1. 5:00pm - validator3 fails, votes vs. quorum: 7 >= 7, we have quorum
-1. 5:40pm - network adds validator3 to negative UNL, quorum changes to ceil(7 \* 0.8), or 6
+1. 5:40pm - network adds validator3 to negative UNL, quorum changes to ceil(7 * 0.8), or 6
 1. 7:00pm - validator4 fails, votes vs. quorum: 6 >= 6, we have quorum
-   - **network can still validate new ledgers with 4 failed validators**
+    * **network can still validate new ledgers with 4 failed validators**

 ## External Interactions

 ### Message Format Changes
-
 This proposal will:
-
 1. add a new pseudo-transaction type
 1. add the negative UNL to the ledger data structure.

@@ -92,20 +89,19 @@ Any tools or systems that rely on the format of this data will have to be
 updated.

 ### Amendment
-
 This feature **will** need an amendment to activate.

 ## Design

 This section discusses the following topics about the Negative UNL design:

- [Negative UNL protocol overview](#Negative-UNL-Protocol-Overview)
- [Validator reliability measurement](#Validator-Reliability-Measurement)
- [Format Changes](#Format-Changes)
- [Negative UNL maintenance](#Negative-UNL-Maintenance)
- [Quorum size calculation](#Quorum-Size-Calculation)
- [Filter validation messages](#Filter-Validation-Messages)
- [High level sequence diagram of code
+* [Negative UNL protocol overview](#Negative-UNL-Protocol-Overview)
+* [Validator reliability measurement](#Validator-Reliability-Measurement)
+* [Format Changes](#Format-Changes)
+* [Negative UNL maintenance](#Negative-UNL-Maintenance)
+* [Quorum size calculation](#Quorum-Size-Calculation)
+* [Filter validation messages](#Filter-Validation-Messages)
+* [High level sequence diagram of code
  changes](#High-Level-Sequence-Diagram-of-Code-Changes)

 ### Negative UNL Protocol Overview
@@ -118,9 +114,9 @@ with V in their UNL adjust the quorum and V’s validation message is not counte
 when verifying if a ledger is fully validated. V’s flow of messages and network
 interactions, however, will remain the same.

-We define the **\*effective UNL** = original UNL - negative UNL\*, and the
-**_effective quorum_** as the quorum of the _effective UNL_. And we set
-_effective quorum = Ceiling(80% _ effective UNL)\*.
+We define the ***effective UNL** = original UNL - negative UNL*, and the
+***effective quorum*** as the quorum of the *effective UNL*. And we set
+*effective quorum = Ceiling(80% * effective UNL)*.

 ### Validator Reliability Measurement

@@ -130,16 +126,16 @@ measure about its validators, but we have chosen ledger validation messages.
 This is because every validator shall send one and only one signed validation
 message per ledger. This keeps the measurement simple and removes
 timing/clock-sync issues. A node will measure the percentage of agreeing
-validation messages (_PAV_) received from each validator on the node's UNL. Note
+validation messages (*PAV*) received from each validator on the node's UNL. Note
 that the node will only count the validation messages that agree with its own
 validations.

 We define the **PAV** as the **P**ercentage of **A**greed **V**alidation
 messages received for the last N ledgers, where N = 256 by default.

-When the PAV drops below the **_low-water mark_**, the validator is considered
+When the PAV drops below the ***low-water mark***, the validator is considered
 unreliable, and is a candidate to be disabled by being added to the negative
-UNL. A validator must have a PAV higher than the **_high-water mark_** to be
+UNL. A validator must have a PAV higher than the ***high-water mark*** to be
 re-enabled. The validator is re-enabled by removing it from the negative UNL. In
 the implementation, we plan to set the low-water mark as 50% and the high-water
 mark as 80%.
@@ -147,24 +143,22 @@ mark as 80%.
 ### Format Changes

 The negative UNL component in a ledger contains three fields.
-
- **_NegativeUNL_**: The current negative UNL, a list of unreliable validators.
- **_ToDisable_**: The validator to be added to the negative UNL on the next
+* ***NegativeUNL***: The current negative UNL, a list of unreliable validators.
+* ***ToDisable***: The validator to be added to the negative UNL on the next
  flag ledger.
- **_ToReEnable_**: The validator to be removed from the negative UNL on the
+* ***ToReEnable***: The validator to be removed from the negative UNL on the
  next flag ledger.

-All three fields are optional. When the _ToReEnable_ field exists, the
-_NegativeUNL_ field cannot be empty.
+All three fields are optional. When the *ToReEnable* field exists, the
+*NegativeUNL* field cannot be empty.

-A new pseudo-transaction, **_UNLModify_**, is added. It has three fields
-
- **_Disabling_**: A flag indicating whether the modification is to disable or
+A new pseudo-transaction, ***UNLModify***, is added. It has three fields
+* ***Disabling***: A flag indicating whether the modification is to disable or
  to re-enable a validator.
- **_Seq_**: The ledger sequence number.
- **_Validator_**: The validator to be disabled or re-enabled.
+* ***Seq***: The ledger sequence number.
+* ***Validator***: The validator to be disabled or re-enabled.

-There would be at most one _disable_ `UNLModify` and one _re-enable_ `UNLModify`
+There would be at most one *disable* `UNLModify` and one *re-enable* `UNLModify`
 transaction per flag ledger. The full machinery is described further on.

 ### Negative UNL Maintenance
@@ -173,19 +167,19 @@ The negative UNL can only be modified on the flag ledgers. If a validator's
 reliability status changes, it takes two flag ledgers to modify the negative
 UNL. Let's see an example of the algorithm:

- Ledger seq = 100: A validator V goes offline.
- Ledger seq = 256: This is a flag ledger, and V's reliability measurement _PAV_
+* Ledger seq = 100: A validator V goes offline.
+* Ledger seq = 256: This is a flag ledger, and V's reliability measurement *PAV*
  is lower than the low-water mark. Other validators add `UNLModify`
  pseudo-transactions `{true, 256, V}` to the transaction set which goes through
  the consensus. Then the pseudo-transaction is applied to the negative UNL
  ledger component by setting `ToDisable = V`.
- Ledger seq = 257 ~ 511: The negative UNL ledger component is copied from the
+* Ledger seq = 257 ~ 511: The negative UNL ledger component is copied from the
  parent ledger.
- Ledger seq=512: This is a flag ledger, and the negative UNL is updated
+* Ledger seq=512: This is a flag ledger, and the negative UNL is updated
  `NegativeUNL = NegativeUNL + ToDisable`.

 The negative UNL may have up to `MaxNegativeListed = floor(original UNL * 25%)`
-validators. The 25% is because of 75% \* 80% = 60%, where 75% = 100% - 25%, 80%
+validators. The 25% is because of 75% * 80% = 60%, where 75% = 100% - 25%, 80%
 is the quorum of the effective UNL, and 60% is the absolute minimum quorum of
 the original UNL. Adding more than 25% validators to the negative UNL does not
 improve the liveness of the network, because adding more validators to the
@@ -193,43 +187,52 @@ negative UNL cannot lower the effective quorum.

 The following is the detailed algorithm:

- **If** the ledger seq = x is a flag ledger
-  1.  Compute `NegativeUNL = NegativeUNL + ToDisable - ToReEnable` if they
-      exist in the parent ledger
+* **If** the ledger seq = x is a flag ledger

-  1.  Try to find a candidate to disable if `sizeof NegativeUNL < MaxNegativeListed`
+    1. Compute `NegativeUNL = NegativeUNL + ToDisable - ToReEnable` if they
+    exist in the parent ledger

-  1.  Find a validator V that has a _PAV_ lower than the low-water
-      mark, but is not in `NegativeUNL`.
+		1. Try to find a candidate to disable if `sizeof NegativeUNL < MaxNegativeListed`

-  1.  If two or more are found, their public keys are XORed with the hash
-      of the parent ledger and the one with the lowest XOR result is chosen.
-  1.  If V is found, create a `UNLModify` pseudo-transaction
-      `TxDisableValidator = {true, x, V}`
-  1.  Try to find a candidate to re-enable if `sizeof NegativeUNL > 0`:
-      1. Find a validator U that is in `NegativeUNL` and has a _PAV_ higher
-         than the high-water mark.
-      1. If U is not found, try to find one in `NegativeUNL` but not in the
-         local _UNL_.
-      1. If two or more are found, their public keys are XORed with the hash
-         of the parent ledger and the one with the lowest XOR result is chosen.
-      1. If U is found, create a `UNLModify` pseudo-transaction
-         `TxReEnableValidator = {false, x, U}`
+		1. Find a validator V that has a *PAV* lower than the low-water
+		mark, but is not in `NegativeUNL`.

-  1.  If any `UNLModify` pseudo-transactions are created, add them to the
-      transaction set. The transaction set goes through the consensus algorithm.
-  1.  If have enough support, the `UNLModify` pseudo-transactions remain in the
-      transaction set agreed by the validators. Then the pseudo-transactions are
-      applied to the ledger:
+        1. If two or more are found, their public keys are XORed with the hash
+        of the parent ledger and the one with the lowest XOR result is chosen.
+				
+        1. If V is found, create a `UNLModify` pseudo-transaction
+        `TxDisableValidator = {true, x, V}`
+				
+    1. Try to find a candidate to re-enable if `sizeof NegativeUNL > 0`:
+		
+        1. Find a validator U that is in `NegativeUNL` and has a *PAV* higher
+        than the high-water mark.
+				
+        1. If U is not found, try to find one in `NegativeUNL` but not in the
+        local *UNL*.
+				
+        1. If two or more are found, their public keys are XORed with the hash
+        of the parent ledger and the one with the lowest XOR result is chosen.
+				
+        1. If U is found, create a `UNLModify` pseudo-transaction
+        `TxReEnableValidator = {false, x, U}`
+				
+    1. If any `UNLModify` pseudo-transactions are created, add them to the
+    transaction set. The transaction set goes through the consensus algorithm.
+		
+    1. If have enough support, the `UNLModify` pseudo-transactions remain in the
+    transaction set agreed by the validators. Then the pseudo-transactions are
+    applied to the ledger:
+		
+        1. If have `TxDisableValidator`, set `ToDisable=TxDisableValidator.V`.
+        Else clear `ToDisable`.
+				
+        1. If have `TxReEnableValidator`, set
+        `ToReEnable=TxReEnableValidator.U`. Else clear `ToReEnable`.
+				
+* **Else** (not a flag ledger)

-          1. If have `TxDisableValidator`, set `ToDisable=TxDisableValidator.V`.
-          Else clear `ToDisable`.
-
-          1. If have `TxReEnableValidator`, set
-          `ToReEnable=TxReEnableValidator.U`. Else clear `ToReEnable`.
-
- **Else** (not a flag ledger)
-  1. Copy the negative UNL ledger component from the parent ledger
+    1. Copy the negative UNL ledger component from the parent ledger

 The negative UNL is stored on each ledger because we don't know when a validator
 may reconnect to the network. If the negative UNL was stored only on every flag
@@ -270,26 +273,31 @@ not counted when checking if the ledger is fully validated.
 The diagram below is the sequence of one round of consensus. Classes and
 components with non-trivial changes are colored green.

- The `ValidatorList` class is modified to compute the quorum of the effective
+* The `ValidatorList` class is modified to compute the quorum of the effective
  UNL.

- The `Validations` class provides an interface for querying the validation
+* The `Validations` class provides an interface for querying the validation
  messages from trusted validators.

- The `ConsensusAdaptor` component:
-  - The `RCLConsensus::Adaptor` class is modified for creating `UNLModify`
-    Pseudo-Transactions.
-  - The `Change` class is modified for applying `UNLModify`
-    Pseudo-Transactions.
-  - The `Ledger` class is modified for creating and adjusting the negative UNL
-    ledger component.
-  - The `LedgerMaster` class is modified for filtering out validation messages
-    from negative UNL validators when verifying if a ledger is fully
-    validated.
+* The `ConsensusAdaptor` component:
+
+    * The `RCLConsensus::Adaptor` class is modified for creating `UNLModify`
+      Pseudo-Transactions.
+		
+    * The `Change` class is modified for applying `UNLModify`
+      Pseudo-Transactions.
+		
+    * The `Ledger` class is modified for creating and adjusting the negative UNL
+      ledger component.
+		
+    * The `LedgerMaster` class is modified for filtering out validation messages
+      from negative UNL validators when verifying if a ledger is fully
+      validated.

 ![Sequence diagram](./negativeUNL_highLevel_sequence.png?raw=true "Negative UNL
 Changes")

+
 ## Roads Not Taken

 ### Use a Mechanism Like Fee Voting to Process UNLModify Pseudo-Transactions
@@ -303,7 +311,7 @@ and different quorums for the same ledger. As a result, the network's safety is
 impacted.

 This updated version does not impact safety though operates a bit more slowly.
-The negative UNL modifications in the _UNLModify_ pseudo-transaction approved by
+The negative UNL modifications in the *UNLModify* pseudo-transaction approved by
 the consensus will take effect at the next flag ledger. The extra time of the
 256 ledgers should be enough for nodes to be in sync of the negative UNL
 modifications.
@@ -326,28 +334,29 @@ expiration approach cannot be simply applied.
 ### Validator Reliability Measurement and Flag Ledger Frequency

 If the ledger time is about 4.5 seconds and the low-water mark is 50%, then in
-the worst case, it takes 48 minutes _((0.5 _ 256 + 256 + 256) _ 4.5 / 60 = 48)_
+the worst case, it takes 48 minutes *((0.5 * 256 + 256 + 256) * 4.5 / 60 = 48)*
 to put an offline validator on the negative UNL. We considered lowering the flag
 ledger frequency so that the negative UNL can be more responsive. We also
 considered decoupling the reliability measurement and flag ledger frequency to
 be more flexible. In practice, however, their benefits are not clear.

+
 ## New Attack Vectors

 A group of malicious validators may try to frame a reliable validator and put it
 on the negative UNL. But they cannot succeed. Because:

 1. A reliable validator sends a signed validation message every ledger. A
-   sufficient peer-to-peer network will propagate the validation messages to other
-   validators. The validators will decide if another validator is reliable or not
-   only by its local observation of the validation messages received. So an honest
-   validator’s vote on another validator’s reliability is accurate.
+sufficient peer-to-peer network will propagate the validation messages to other
+validators. The validators will decide if another validator is reliable or not
+only by its local observation of the validation messages received. So an honest
+validator’s vote on another validator’s reliability is accurate.

 1. Given the votes are accurate, and one vote per validator, an honest validator
-   will not create a UNLModify transaction of a reliable validator.
+will not create a UNLModify transaction of a reliable validator.

 1. A validator can be added to a negative UNL only through a UNLModify
-   transaction.
+transaction.

 Assuming the group of malicious validators is less than the quorum, they cannot
 frame a reliable validator.
@@ -356,32 +365,32 @@ frame a reliable validator.

 The bullet points below briefly summarize the current proposal:

- The motivation of the negative UNL is to improve the liveness of the network.
+* The motivation of the negative UNL is to improve the liveness of the network.

- The targeted faults are the ones frequently observed in the production
+* The targeted faults are the ones frequently observed in the production
  network.

- Validators propose negative UNL candidates based on their local measurements.
+* Validators propose negative UNL candidates based on their local measurements.

- The absolute minimum quorum is 60% of the original UNL.
+* The absolute minimum quorum is 60% of the original UNL.

- The format of the ledger is changed, and a new _UNLModify_ pseudo-transaction
+* The format of the ledger is changed, and a new *UNLModify* pseudo-transaction
  is added. Any tools or systems that rely on the format of these data will have
  to be updated.

- The negative UNL can only be modified on the flag ledgers.
+* The negative UNL can only be modified on the flag ledgers.

- At most one validator can be added to the negative UNL at a flag ledger.
+* At most one validator can be added to the negative UNL at a flag ledger.

- At most one validator can be removed from the negative UNL at a flag ledger.
+* At most one validator can be removed from the negative UNL at a flag ledger.

- If a validator's reliability status changes, it takes two flag ledgers to
+* If a validator's reliability status changes, it takes two flag ledgers to
  modify the negative UNL.

- The quorum is the larger of 80% of the effective UNL and 60% of the original
+* The quorum is the larger of 80% of the effective UNL and 60% of the original
  UNL.

- If a validator is on the negative UNL, its validation messages are ignored
+* If a validator is on the negative UNL, its validation messages are ignored
  when the local node verifies if a ledger is fully validated.

 ## FAQ
@@ -406,7 +415,7 @@ lower quorum size while keeping the network safe.
 validator removed from the negative UNL? </h3>

 A validator’s reliability is measured by other validators. If a validator
-becomes unreliable, at a flag ledger, other validators propose _UNLModify_
+becomes unreliable, at a flag ledger, other validators propose *UNLModify*
 pseudo-transactions which vote the validator to add to the negative UNL during
 the consensus session. If agreed, the validator is added to the negative UNL at
 the next flag ledger. The mechanism of removing a validator from the negative
@@ -414,32 +423,32 @@ UNL is the same.

 ### Question: Given a negative UNL, what happens if the UNL changes?

-Answer: Let’s consider the cases:
+Answer: Let’s consider the cases: 

-1.  A validator is added to the UNL, and it is already in the negative UNL. This
-    case could happen when not all the nodes have the same UNL. Note that the
-    negative UNL on the ledger lists unreliable nodes that are not necessarily the
-    validators for everyone.
+1. A validator is added to the UNL, and it is already in the negative UNL. This
+case could happen when not all the nodes have the same UNL. Note that the
+negative UNL on the ledger lists unreliable nodes that are not necessarily the
+validators for everyone.

-        In this case, the liveness is affected negatively. Because the minimum
-        quorum could be larger but the usable validators are not increased.
+    In this case, the liveness is affected negatively. Because the minimum
+    quorum could be larger but the usable validators are not increased.

-1.  A validator is removed from the UNL, and it is in the negative UNL.
+1. A validator is removed from the UNL, and it is in the negative UNL.

    In this case, the liveness is affected positively. Because the quorum could
    be smaller but the usable validators are not reduced.

-1.  A validator is added to the UNL, and it is not in the negative UNL.
-1.  A validator is removed from the UNL, and it is not in the negative UNL.
-
+1. A validator is added to the UNL, and it is not in the negative UNL.
+1. A validator is removed from the UNL, and it is not in the negative UNL.
+	
    Case 3 and 4 are not affected by the negative UNL protocol.

-### Question: Can we simply lower the quorum to 60% without the negative UNL?
+### Question: Can we simply lower the quorum to 60% without the negative UNL? 

 Answer: No, because the negative UNL approach is safer.

-First let’s compare the two approaches intuitively, (1) the _negative UNL_
-approach, and (2) _lower quorum_: simply lowering the quorum from 80% to 60%
+First let’s compare the two approaches intuitively, (1) the *negative UNL*
+approach, and (2) *lower quorum*: simply lowering the quorum from 80% to 60%
 without the negative UNL. The negative UNL approach uses consensus to come up
 with a list of unreliable validators, which are then removed from the effective
 UNL temporarily. With this approach, the list of unreliable validators is agreed
@@ -453,75 +462,75 @@ Next we compare the two approaches quantitatively with examples, and apply
 Theorem 8 of [Analysis of the XRP Ledger Consensus
 Protocol](https://arxiv.org/abs/1802.07242) paper:

-_XRP LCP guarantees fork safety if **O<sub>i,j</sub> > n<sub>j</sub> / 2 +
+*XRP LCP guarantees fork safety if **O<sub>i,j</sub> > n<sub>j</sub> / 2 +
 n<sub>i</sub> − q<sub>i</sub> + t<sub>i,j</sub>** for every pair of nodes
-P<sub>i</sub>, P<sub>j</sub>,_
+P<sub>i</sub>, P<sub>j</sub>,*

-where _O<sub>i,j</sub>_ is the overlapping requirement, n<sub>j</sub> and
+where *O<sub>i,j</sub>* is the overlapping requirement, n<sub>j</sub> and
 n<sub>i</sub> are UNL sizes, q<sub>i</sub> is the quorum size of P<sub>i</sub>,
-_t<sub>i,j</sub> = min(t<sub>i</sub>, t<sub>j</sub>, O<sub>i,j</sub>)_, and
+*t<sub>i,j</sub> = min(t<sub>i</sub>, t<sub>j</sub>, O<sub>i,j</sub>)*, and
 t<sub>i</sub> and t<sub>j</sub> are the number of faults can be tolerated by
 P<sub>i</sub> and P<sub>j</sub>.

-We denote _UNL<sub>i</sub>_ as _P<sub>i</sub>'s UNL_, and _|UNL<sub>i</sub>|_ as
-the size of _P<sub>i</sub>'s UNL_.
+We denote *UNL<sub>i</sub>* as *P<sub>i</sub>'s UNL*, and *|UNL<sub>i</sub>|* as
+the size of *P<sub>i</sub>'s UNL*.

-Assuming _|UNL<sub>i</sub>| = |UNL<sub>j</sub>|_, let's consider the following
+Assuming *|UNL<sub>i</sub>| = |UNL<sub>j</sub>|*, let's consider the following
 three cases:

-1.  With 80% quorum and 20% faults, _O<sub>i,j</sub> > 100% / 2 + 100% - 80% +
-    20% = 90%_. I.e. fork safety requires > 90% UNL overlaps. This is one of the
-    results in the analysis paper.
+1. With 80% quorum and 20% faults, *O<sub>i,j</sub> > 100% / 2 + 100% - 80% +
+20% = 90%*. I.e. fork safety requires > 90% UNL overlaps. This is one of the
+results in the analysis paper.

-1.  If the quorum is 60%, the relationship between the overlapping requirement
-    and the faults that can be tolerated is _O<sub>i,j</sub> > 90% +
-    t<sub>i,j</sub>_. Under the same overlapping condition (i.e. 90%), to guarantee
-    the fork safety, the network cannot tolerate any faults. So under the same
-    overlapping condition, if the quorum is simply lowered, the network can tolerate
-    fewer faults.
+1. If the quorum is 60%, the relationship between the overlapping requirement
+and the faults that can be tolerated is *O<sub>i,j</sub> > 90% +
+t<sub>i,j</sub>*. Under the same overlapping condition (i.e. 90%), to guarantee
+the fork safety, the network cannot tolerate any faults. So under the same
+overlapping condition, if the quorum is simply lowered, the network can tolerate
+fewer faults.

-1.  With the negative UNL approach, we want to argue that the inequation
-    _O<sub>i,j</sub> > n<sub>j</sub> / 2 + n<sub>i</sub> − q<sub>i</sub> +
-    t<sub>i,j</sub>_ is always true to guarantee fork safety, while the negative UNL
-    protocol runs, i.e. the effective quorum is lowered without weakening the
-    network's fault tolerance. To make the discussion easier, we rewrite the
-    inequation as _O<sub>i,j</sub> > n<sub>j</sub> / 2 + (n<sub>i</sub> −
-    q<sub>i</sub>) + min(t<sub>i</sub>, t<sub>j</sub>)_, where O<sub>i,j</sub> is
-    dropped from the definition of t<sub>i,j</sub> because _O<sub>i,j</sub> >
-    min(t<sub>i</sub>, t<sub>j</sub>)_ always holds under the parameters we will
-    use. Assuming a validator V is added to the negative UNL, now let's consider the
-    4 cases:
+1. With the negative UNL approach, we want to argue that the inequation
+*O<sub>i,j</sub> > n<sub>j</sub> / 2 + n<sub>i</sub> − q<sub>i</sub> +
+t<sub>i,j</sub>* is always true to guarantee fork safety, while the negative UNL
+protocol runs, i.e. the effective quorum is lowered without weakening the
+network's fault tolerance. To make the discussion easier, we rewrite the
+inequation as *O<sub>i,j</sub> > n<sub>j</sub> / 2 + (n<sub>i</sub> −
+q<sub>i</sub>) + min(t<sub>i</sub>, t<sub>j</sub>)*, where O<sub>i,j</sub> is
+dropped from the definition of t<sub>i,j</sub> because *O<sub>i,j</sub> >
+min(t<sub>i</sub>, t<sub>j</sub>)* always holds under the parameters we will
+use. Assuming a validator V is added to the negative UNL, now let's consider the
+4 cases:

-        1. V is not on UNL<sub>i</sub> nor UNL<sub>j</sub>
+    1. V is not on UNL<sub>i</sub> nor UNL<sub>j</sub>

-            The inequation holds because none of the variables change.
+        The inequation holds because none of the variables change.

-        1. V is on UNL<sub>i</sub> but not on UNL<sub>j</sub>
+    1. V is on UNL<sub>i</sub> but not on UNL<sub>j</sub>

-            The value of *(n<sub>i</sub> − q<sub>i</sub>)* is smaller. The value of
-            *min(t<sub>i</sub>, t<sub>j</sub>)* could be smaller too. Other
-            variables do not change. Overall, the left side of the inequation does
-            not change, but the right side is smaller. So the inequation holds.
+        The value of *(n<sub>i</sub> − q<sub>i</sub>)* is smaller. The value of
+        *min(t<sub>i</sub>, t<sub>j</sub>)* could be smaller too. Other
+        variables do not change. Overall, the left side of the inequation does
+        not change, but the right side is smaller. So the inequation holds.
+    
+    1. V is not on UNL<sub>i</sub> but on UNL<sub>j</sub>

-        1. V is not on UNL<sub>i</sub> but on UNL<sub>j</sub>
+        The value of *n<sub>j</sub> / 2* is smaller. The value of
+        *min(t<sub>i</sub>, t<sub>j</sub>)* could be smaller too. Other
+        variables do not change. Overall, the left side of the inequation does
+        not change, but the right side is smaller. So the inequation holds.
+    
+    1. V is on both UNL<sub>i</sub> and UNL<sub>j</sub>

-            The value of *n<sub>j</sub> / 2* is smaller. The value of
-            *min(t<sub>i</sub>, t<sub>j</sub>)* could be smaller too. Other
-            variables do not change. Overall, the left side of the inequation does
-            not change, but the right side is smaller. So the inequation holds.
+        The value of *O<sub>i,j</sub>* is reduced by 1. The values of
+        *n<sub>j</sub> / 2*, *(n<sub>i</sub> − q<sub>i</sub>)*, and
+        *min(t<sub>i</sub>, t<sub>j</sub>)* are reduced by 0.5, 0.2, and 1
+        respectively. The right side is reduced by 1.7. Overall, the left side
+        of the inequation is reduced by 1, and the right side is reduced by 1.7.
+        So the inequation holds.

-        1. V is on both UNL<sub>i</sub> and UNL<sub>j</sub>
-
-            The value of *O<sub>i,j</sub>* is reduced by 1. The values of
-            *n<sub>j</sub> / 2*, *(n<sub>i</sub> − q<sub>i</sub>)*, and
-            *min(t<sub>i</sub>, t<sub>j</sub>)* are reduced by 0.5, 0.2, and 1
-            respectively. The right side is reduced by 1.7. Overall, the left side
-            of the inequation is reduced by 1, and the right side is reduced by 1.7.
-            So the inequation holds.
-
-        The inequation holds for all the cases. So with the negative UNL approach,
-        the network's fork safety is preserved, while the quorum is lowered that
-        increases the network's liveness.
+    The inequation holds for all the cases. So with the negative UNL approach,
+    the network's fork safety is preserved, while the quorum is lowered that
+    increases the network's liveness.

 <h3> Question: We have observed that occasionally a validator wanders off on its
 own chain. How is this case handled by the negative UNL algorithm? </h3>
@@ -556,11 +565,11 @@ will be used after that. We want to see the test cases still pass with real
 network delay. A test case specifies:

 1. a UNL with different number of validators for different test cases,
-1. a network with zero or more non-validator nodes,
+1. a network with zero or more non-validator nodes, 
 1. a sequence of validator reliability change events (by killing/restarting
   nodes, or by running modified rippled that does not send all validation
   messages),
-1. the correct outcomes.
+1. the correct outcomes. 

 For all the test cases, the correct outcomes are verified by examining logs. We
 will grep the log to see if the correct negative UNLs are generated, and whether
@@ -570,7 +579,6 @@ timing parameters of rippled will be changed to have faster ledger time. Most if
 not all test cases do not need client transactions.

 For example, the test cases for the prototype:
-
 1. A 10-validator UNL.
 1. The network does not have other nodes.
 1. The validators will be started from the genesis. Once they start to produce
@@ -579,11 +587,11 @@ For example, the test cases for the prototype:
 1. A sequence of events (or the lack of events) such as a killed validator is
   added to the negative UNL.

-#### Roads Not Taken: Test with Extended CSF
+#### Roads Not Taken: Test with Extended CSF 

 We considered testing with the current unit test framework, specifically the
 [Consensus Simulation
 Framework](https://github.com/ripple/rippled/blob/develop/src/test/csf/README.md)
 (CSF). However, the CSF currently can only test the generic consensus algorithm
 as in the paper: [Analysis of the XRP Ledger Consensus
-Protocol](https://arxiv.org/abs/1802.07242).
+Protocol](https://arxiv.org/abs/1802.07242).
--- a/docs/0010-ledger-replay/README.md
+++ b/docs/0010-ledger-replay/README.md
@@ -82,9 +82,7 @@ pattern and the way coroutines are implemented, where every yield saves the spot
 in the code where it left off and every resume jumps back to that spot.

 ### Sequence Diagram
-
 ![Sequence diagram](./ledger_replay_sequence.png?raw=true "A successful ledger replay")

 ### Class Diagram
-
 ![Class diagram](./ledger_replay_classes.png?raw=true "Ledger replay classes")
--- a/docs/CheatSheet.md
+++ b/docs/CheatSheet.md
@@ -16,5 +16,5 @@
 ## Function

 - Minimize external dependencies
-  - Pass options in the ctor instead of using theConfig
-  - Use as few other classes as possible
+  * Pass options in the ctor instead of using theConfig
+  * Use as few other classes as possible
--- a/docs/CodingStyle.md
+++ b/docs/CodingStyle.md
@@ -1,18 +1,18 @@
 # Coding Standards

-Coding standards used here gradually evolve and propagate through
+Coding standards used here gradually evolve and propagate through 
 code reviews. Some aspects are enforced more strictly than others.

 ## Rules

-These rules only apply to our own code. We can't enforce any sort of
+These rules only apply to our own code. We can't enforce any sort of 
 style on the external repositories and libraries we include. The best
 guideline is to maintain the standards that are used in those libraries.

- Tab inserts 4 spaces. No tab characters.
- Braces are indented in the [Allman style][1].
- Modern C++ principles. No naked `new` or `delete`.
- Line lengths limited to 80 characters. Exceptions limited to data and tables.
+* Tab inserts 4 spaces. No tab characters.
+* Braces are indented in the [Allman style][1].
+* Modern C++ principles. No naked ```new``` or ```delete```.
+* Line lengths limited to 80 characters. Exceptions limited to data and tables.

 ## Guidelines

@@ -21,17 +21,17 @@ why you're doing it. Think, use common sense, and consider that this
 your changes will probably need to be maintained long after you've
 moved on to other projects.

- Use white space and blank lines to guide the eye and keep your intent clear.
- Put private data members at the top of a class, and the 6 public special
-  members immediately after, in the following order:
-  - Destructor
-  - Default constructor
-  - Copy constructor
-  - Copy assignment
-  - Move constructor
-  - Move assignment
- Don't over-inline by defining large functions within the class
-  declaration, not even for template classes.
+* Use white space and blank lines to guide the eye and keep your intent clear.
+* Put private data members at the top of a class, and the 6 public special
+members immediately after, in the following order:
+  * Destructor
+  * Default constructor
+  * Copy constructor
+  * Copy assignment
+  * Move constructor
+  * Move assignment
+* Don't over-inline by defining large functions within the class
+declaration, not even for template classes.

 ## Formatting

@@ -39,44 +39,44 @@ The goal of source code formatting should always be to make things as easy to
 read as possible. White space is used to guide the eye so that details are not
 overlooked. Blank lines are used to separate code into "paragraphs."

- Always place a space before and after all binary operators,
+* Always place a space before and after all binary operators,
  especially assignments (`operator=`).
- The `!` operator should be preceded by a space, but not followed by one.
- The `~` operator should be preceded by a space, but not followed by one.
- The `++` and `--` operators should have no spaces between the operator and
+* The `!` operator should be preceded by a space, but not followed by one.
+* The `~` operator should be preceded by a space, but not followed by one.
+* The `++` and `--` operators should have no spaces between the operator and
  the operand.
- A space never appears before a comma, and always appears after a comma.
- Don't put spaces after a parenthesis. A typical member function call might
+* A space never appears before a comma, and always appears after a comma.
+* Don't put spaces after a parenthesis. A typical member function call might
  look like this: `foobar (1, 2, 3);`
- In general, leave a blank line before an `if` statement.
- In general, leave a blank line after a closing brace `}`.
- Do not place code on the same line as any opening or
+* In general, leave a blank line before an `if` statement.
+* In general, leave a blank line after a closing brace `}`.
+* Do not place code on the same line as any opening or
  closing brace.
- Do not write `if` statements all-on-one-line. The exception to this is when
+* Do not write `if` statements all-on-one-line. The exception to this is when
  you've got a sequence of similar `if` statements, and are aligning them all
  vertically to highlight their similarities.
- In an `if-else` statement, if you surround one half of the statement with
+* In an `if-else` statement, if you surround one half of the statement with
  braces, you also need to put braces around the other half, to match.
- When writing a pointer type, use this spacing: `SomeObject* myObject`.
+* When writing a pointer type, use this spacing: `SomeObject* myObject`.
  Technically, a more correct spacing would be `SomeObject *myObject`, but
  it makes more sense for the asterisk to be grouped with the type name,
  since being a pointer is part of the type, not the variable name. The only
  time that this can lead to any problems is when you're declaring multiple
  pointers of the same type in the same statement - which leads on to the next
  rule:
- When declaring multiple pointers, never do so in a single statement, e.g.
+* When declaring multiple pointers, never do so in a single statement, e.g.
  `SomeObject* p1, *p2;` - instead, always split them out onto separate lines
  and write the type name again, to make it quite clear what's going on, and
  avoid the danger of missing out any vital asterisks.
- The previous point also applies to references, so always put the `&` next to
+* The previous point also applies to references, so always put the `&` next to
  the type rather than the variable, e.g. `void foo (Thing const& thing)`. And
  don't put a space on both sides of the `*` or `&` - always put a space after
  it, but never before it.
- The word `const` should be placed to the right of the thing that it modifies,
+* The word `const` should be placed to the right of the thing that it modifies,
  for consistency. For example `int const` refers to an int which is const.
  `int const*` is a pointer to an int which is const. `int *const` is a const
  pointer to an int.
- Always place a space in between the template angle brackets and the type
+* Always place a space in between the template angle brackets and the type
  name. Template code is already hard enough to read!

 [1]: http://en.wikipedia.org/wiki/Indent_style#Allman_style
--- a/docs/HeapProfiling.md
+++ b/docs/HeapProfiling.md
@@ -31,7 +31,7 @@ and header under /opt/local/include:

    $ scons clang profile-jemalloc=/opt/local

---
+----------------------

 ## Using the jemalloc library from within the code

@@ -60,3 +60,4 @@ Linking against the jemalloc library will override
 the system's default `malloc()` and related functions with jemalloc's
 implementation. This is the case even if the code is not instrumented
 to use jemalloc's specific API.
+
--- a/docs/README.md
+++ b/docs/README.md
@@ -7,6 +7,7 @@ Install these dependencies:
 - [Doxygen](http://www.doxygen.nl): All major platforms have [official binary
  distributions](http://www.doxygen.nl/download.html#srcbin), or you can
  build from [source](http://www.doxygen.nl/download.html#srcbin).
+
  - MacOS: We recommend installing via Homebrew: `brew install doxygen`.
    The executable will be installed in `/usr/local/bin` which is already
    in the default `PATH`.
@@ -20,15 +21,18 @@ Install these dependencies:
    $ ln -s /Applications/Doxygen.app/Contents/Resources/doxygen /usr/local/bin/doxygen
    ```

- [PlantUML](http://plantuml.com):
+- [PlantUML](http://plantuml.com): 
+
  1. Install a functioning Java runtime, if you don't already have one.
  2. Download [`plantuml.jar`](http://sourceforge.net/projects/plantuml/files/plantuml.jar/download).

 - [Graphviz](https://www.graphviz.org):
+
  - Linux: Install from your package manager.
  - Windows: Use an [official installer](https://graphviz.gitlab.io/_pages/Download/Download_windows.html).
  - MacOS: Install via Homebrew: `brew install graphviz`.

+
 ## Docker

 Instead of installing the above dependencies locally, you can use the official
@@ -36,16 +40,14 @@ build environment Docker image, which has all of them installed already.

 1. Install [Docker](https://docs.docker.com/engine/installation/)
 2. Pull the image:
-
-```
-sudo docker pull rippleci/rippled-ci-builder:2944b78d22db
-```
-
+  ```
+  sudo docker pull rippleci/rippled-ci-builder:2944b78d22db
+  ```
 3. Run the image from the project folder:
+  ```
+  sudo docker run -v $PWD:/opt/rippled --rm rippleci/rippled-ci-builder:2944b78d22db
+  ```

-```
-sudo docker run -v $PWD:/opt/rippled --rm rippleci/rippled-ci-builder:2944b78d22db
-```

 ## Build

--- a/docs/build/conan.md
+++ b/docs/build/conan.md
@@ -5,6 +5,7 @@ we should first understand _why_ we use Conan,
 and to understand that,
 we need to understand how we use CMake.

+
 ### CMake

 Technically, you don't need CMake to build this project.
@@ -32,9 +33,9 @@ Parameters include:
 - where to find the compiler and linker
 - where to find dependencies, e.g. libraries and headers
 - how to link dependencies, e.g. any special compiler or linker flags that
-  need to be used with them, including preprocessor definitions
+    need to be used with them, including preprocessor definitions
 - how to compile translation units, e.g. with optimizations, debug symbols,
-  position-independent code, etc.
+    position-independent code, etc.
 - on Windows, which runtime library to link with

 For some of these parameters, like the build system and compiler,
@@ -53,6 +54,7 @@ Most humans prefer to put them into a configuration file, once, that
 CMake can read every time it is configured.
 For CMake, that file is a [toolchain file][toolchain].

+
 ### Conan

 These next few paragraphs on Conan are going to read much like the ones above
@@ -77,10 +79,10 @@ Those files include:

 - A single toolchain file.
 - For every dependency, a CMake [package configuration file][pcf],
-  [package version file][pvf], and for every build type, a package
-  targets file.
-  Together, these files implement version checking and define `IMPORTED`
-  targets for the dependencies.
+    [package version file][pvf], and for every build type, a package
+    targets file.
+    Together, these files implement version checking and define `IMPORTED`
+    targets for the dependencies.

 The toolchain file itself amends the search path
 ([`CMAKE_PREFIX_PATH`][prefix_path]) so that [`find_package()`][find_package]
--- a/docs/build/depend.md
+++ b/docs/build/depend.md
@@ -2,7 +2,8 @@ We recommend two different methods to depend on libxrpl in your own [CMake][]
 project.
 Both methods add a CMake library target named `xrpl::libxrpl`.

-## Conan requirement
+
+## Conan requirement 

 The first method adds libxrpl as a [Conan][] requirement.
 With this method, there is no need for a Git [submodule][].
@@ -47,6 +48,7 @@ cmake \
 cmake --build . --parallel
 ```

+
 ## CMake subdirectory

 The second method adds the [rippled][] project as a CMake
@@ -88,6 +90,7 @@ cmake \
 cmake --build . --parallel
 ```

+
 [add_subdirectory]: https://cmake.org/cmake/help/latest/command/add_subdirectory.html
 [submodule]: https://git-scm.com/book/en/v2/Git-Tools-Submodules
 [rippled]: https://github.com/ripple/rippled
--- a/docs/build/environment.md
+++ b/docs/build/environment.md
@@ -5,39 +5,42 @@ platforms: Linux, macOS, or Windows.

 [BUILD.md]: ../../BUILD.md

+
 ## Linux

 Package ecosystems vary across Linux distributions,
 so there is no one set of instructions that will work for every Linux user.
-The instructions below are written for Debian 12 (Bookworm).
+These instructions are written for Ubuntu 22.04.
+They are largely copied from the [script][1] used to configure our Docker
+container for continuous integration.
+That script handles many more responsibilities.
+These instructions are just the bare minimum to build one configuration of
+rippled.
+You can check that codebase for other Linux distributions and versions.
+If you cannot find yours there,
+then we hope that these instructions can at least guide you in the right
+direction.

 ```
-export GCC_RELEASE=12
-sudo apt update
-sudo apt install --yes gcc-${GCC_RELEASE} g++-${GCC_RELEASE} python3-pip \
-  python-is-python3 python3-venv python3-dev curl wget ca-certificates \
-  git build-essential cmake ninja-build libc6-dev
-sudo pip install --break-system-packages conan
+apt update
+apt install --yes curl git libssl-dev pipx python3.10-dev python3-pip make g++-11 libprotobuf-dev protobuf-compiler

-sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc-${GCC_RELEASE} 999
-sudo update-alternatives --install \
-  /usr/bin/gcc gcc /usr/bin/gcc-${GCC_RELEASE} 100 \
-  --slave /usr/bin/g++ g++ /usr/bin/g++-${GCC_RELEASE} \
-  --slave /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-${GCC_RELEASE} \
-  --slave /usr/bin/gcc-nm gcc-nm /usr/bin/gcc-nm-${GCC_RELEASE} \
-  --slave /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-${GCC_RELEASE} \
-  --slave /usr/bin/gcov gcov /usr/bin/gcov-${GCC_RELEASE} \
-  --slave /usr/bin/gcov-tool gcov-tool /usr/bin/gcov-tool-${GCC_RELEASE} \
-  --slave /usr/bin/gcov-dump gcov-dump /usr/bin/gcov-dump-${GCC_RELEASE} \
-  --slave /usr/bin/lto-dump lto-dump /usr/bin/lto-dump-${GCC_RELEASE}
-sudo update-alternatives --auto cc
-sudo update-alternatives --auto gcc
+curl --location --remote-name \
+  "https://github.com/Kitware/CMake/releases/download/v3.25.1/cmake-3.25.1.tar.gz"
+tar -xzf cmake-3.25.1.tar.gz
+rm cmake-3.25.1.tar.gz
+cd cmake-3.25.1
+./bootstrap --parallel=$(nproc)
+make --jobs $(nproc)
+make install
+cd ..
+
+pipx install 'conan<2'
+pipx ensurepath
 ```

-If you use different Linux distribution, hope the instruction above can guide
-you in the right direction. We try to maintain compatibility with all recent
-compiler releases, so if you use a rolling distribution like e.g. Arch or CentOS
-then there is a chance that everything will "just work".
+[1]: https://github.com/thejohnfreeman/rippled-docker/blob/master/ubuntu-22.04/install.sh
+

 ## macOS

@@ -50,33 +53,6 @@ minimum required (see [BUILD.md][]).
 clang --version
 ```

-### Install Xcode Specific Version (Optional)
-
-If you develop other applications using XCode you might be consistently updating to the newest version of Apple Clang.
-This will likely cause issues building rippled. You may want to install a specific version of Xcode:
-
-1. **Download Xcode**
-   - Visit [Apple Developer Downloads](https://developer.apple.com/download/more/)
-   - Sign in with your Apple Developer account
-   - Search for an Xcode version that includes **Apple Clang (Expected Version)**
-   - Download the `.xip` file
-
-2. **Install and Configure Xcode**
-
-   ```bash
-   # Extract the .xip file and rename for version management
-   # Example: Xcode_16.2.app
-
-   # Move to Applications directory
-   sudo mv Xcode_16.2.app /Applications/
-
-   # Set as default toolchain (persistent)
-   sudo xcode-select -s /Applications/Xcode_16.2.app/Contents/Developer
-
-   # Set as environment variable (temporary)
-   export DEVELOPER_DIR=/Applications/Xcode_16.2.app/Contents/Developer
-   ```
-
 The command line developer tools should include Git too:

 ```
@@ -96,10 +72,10 @@ and use it to install Conan:
 brew update
 brew install xz
 brew install pyenv
-pyenv install 3.11
-pyenv global 3.11
+pyenv install 3.10-dev
+pyenv global 3.10-dev
 eval "$(pyenv init -)"
-pip install 'conan'
+pip install 'conan<2'
 ```

 Install CMake with Homebrew too:
--- a/docs/build/install.md
+++ b/docs/build/install.md
@@ -6,6 +6,7 @@ like CentOS.
 Installing from source is an option for all platforms,
 and the only supported option for installing custom builds.

+
 ## From source

 From a source build, you can install rippled and libxrpl using CMake's
@@ -20,23 +21,25 @@ The default [prefix][1] is typically `/usr/local` on Linux and macOS and

 [1]: https://cmake.org/cmake/help/latest/variable/CMAKE_INSTALL_PREFIX.html

+
 ## With the APT package manager

-1.  Update repositories:
+1. Update repositories:

        sudo apt update -y

-2.  Install utilities:
+2. Install utilities:

        sudo apt install -y apt-transport-https ca-certificates wget gnupg

-3.  Add Ripple's package-signing GPG key to your list of trusted keys:
+3. Add Ripple's package-signing GPG key to your list of trusted keys:

        sudo mkdir /usr/local/share/keyrings/
        wget -q -O - "https://repos.ripple.com/repos/api/gpg/key/public" | gpg --dearmor > ripple-key.gpg
        sudo mv ripple-key.gpg /usr/local/share/keyrings

-4.  Check the fingerprint of the newly-added key:
+
+4. Check the fingerprint of the newly-added key:

        gpg /usr/local/share/keyrings/ripple-key.gpg

@@ -48,34 +51,37 @@ The default [prefix][1] is typically `/usr/local` on Linux and macOS and
        uid           TechOps Team at Ripple <techops+rippled@ripple.com>
        sub   rsa3072 2019-02-14 [E] [expires: 2026-02-17]

+
    In particular, make sure that the fingerprint matches. (In the above example, the fingerprint is on the third line, starting with `C001`.)

-5.  Add the appropriate Ripple repository for your operating system version:
+4. Add the appropriate Ripple repository for your operating system version:

        echo "deb [signed-by=/usr/local/share/keyrings/ripple-key.gpg] https://repos.ripple.com/repos/rippled-deb focal stable" | \
            sudo tee -a /etc/apt/sources.list.d/ripple.list

    The above example is appropriate for **Ubuntu 20.04 Focal Fossa**. For other operating systems, replace the word `focal` with one of the following:
+
    - `jammy` for **Ubuntu 22.04 Jammy Jellyfish**
    - `bionic` for **Ubuntu 18.04 Bionic Beaver**
    - `bullseye` for **Debian 11 Bullseye**
    - `buster` for **Debian 10 Buster**

    If you want access to development or pre-release versions of `rippled`, use one of the following instead of `stable`:
+
    - `unstable` - Pre-release builds ([`release` branch](https://github.com/ripple/rippled/tree/release))
    - `nightly` - Experimental/development builds ([`develop` branch](https://github.com/ripple/rippled/tree/develop))

    **Warning:** Unstable and nightly builds may be broken at any time. Do not use these builds for production servers.

-6.  Fetch the Ripple repository.
+5. Fetch the Ripple repository.

        sudo apt -y update

-7.  Install the `rippled` software package:
+6. Install the `rippled` software package:

        sudo apt -y install rippled

-8.  Check the status of the `rippled` service:
+7. Check the status of the `rippled` service:

        systemctl status rippled.service

@@ -83,22 +89,24 @@ The default [prefix][1] is typically `/usr/local` on Linux and macOS and

        sudo systemctl start rippled.service

-9.  Optional: allow `rippled` to bind to privileged ports.
+8. Optional: allow `rippled` to bind to privileged ports.

    This allows you to serve incoming API requests on port 80 or 443. (If you want to do so, you must also update the config file's port settings.)

        sudo setcap 'cap_net_bind_service=+ep' /opt/ripple/bin/rippled

+
 ## With the YUM package manager

-1.  Install the Ripple RPM repository:
+1. Install the Ripple RPM repository:

    Choose the appropriate RPM repository for the stability of releases you want:
+
    - `stable` for the latest production release (`master` branch)
    - `unstable` for pre-release builds (`release` branch)
    - `nightly` for experimental/development builds (`develop` branch)

-    _Stable_
+    *Stable*

        cat << REPOFILE | sudo tee /etc/yum.repos.d/ripple.repo
        [ripple-stable]
@@ -110,7 +118,7 @@ The default [prefix][1] is typically `/usr/local` on Linux and macOS and
        gpgkey=https://repos.ripple.com/repos/rippled-rpm/stable/repodata/repomd.xml.key
        REPOFILE

-    _Unstable_
+    *Unstable*

        cat << REPOFILE | sudo tee /etc/yum.repos.d/ripple.repo
        [ripple-unstable]
@@ -122,7 +130,7 @@ The default [prefix][1] is typically `/usr/local` on Linux and macOS and
        gpgkey=https://repos.ripple.com/repos/rippled-rpm/unstable/repodata/repomd.xml.key
        REPOFILE

-    _Nightly_
+    *Nightly*

        cat << REPOFILE | sudo tee /etc/yum.repos.d/ripple.repo
        [ripple-nightly]
@@ -134,18 +142,18 @@ The default [prefix][1] is typically `/usr/local` on Linux and macOS and
        gpgkey=https://repos.ripple.com/repos/rippled-rpm/nightly/repodata/repomd.xml.key
        REPOFILE

-2.  Fetch the latest repo updates:
+2. Fetch the latest repo updates:

        sudo yum -y update

-3.  Install the new `rippled` package:
+3. Install the new `rippled` package:

        sudo yum install -y rippled

-4.  Configure the `rippled` service to start on boot:
+4. Configure the `rippled` service to start on boot:

        sudo systemctl enable rippled.service

-5.  Start the `rippled` service:
+5. Start the `rippled` service:

        sudo systemctl start rippled.service
--- a/docs/consensus.md
+++ b/docs/consensus.md
@@ -3,7 +3,7 @@
 **This section is a work in progress!!**

 Consensus is the task of reaching agreement within a distributed system in the
-presence of faulty or even malicious participants. This document outlines the
+presence of faulty or even malicious participants.  This document outlines the
 [XRP Ledger Consensus Algorithm](https://arxiv.org/abs/1802.07242)
 as implemented in [rippled](https://github.com/ripple/rippled), but
 focuses on its utility as a generic consensus algorithm independent of the
@@ -15,38 +15,38 @@ collectively trusted subnetworks.
 ## Distributed Agreement

 A challenge for distributed systems is reaching agreement on changes in shared
-state. For the Ripple network, the shared state is the current ledger--account
-information, account balances, order books and other financial data. We will
+state.  For the Ripple network, the shared state is the current ledger--account
+information, account balances, order books and other financial data.  We will
 refer to shared distributed state as a /ledger/ throughout the remainder of this
 document.

 ![Ledger Chain](images/consensus/ledger_chain.png "Ledger Chain")

 As shown above, new ledgers are made by applying a set of transactions to the
-prior ledger. For the Ripple network, transactions include payments,
+prior ledger.  For the Ripple network, transactions include payments,
 modification of account settings, updates to offers and more.

 In a centralized system, generating the next ledger is trivial since there is a
 single unique arbiter of which transactions to include and how to apply them to
-a ledger. For decentralized systems, participants must resolve disagreements on
+a ledger.  For decentralized systems, participants must resolve disagreements on
 the set of transactions to include, the order to apply those transactions, and
-even the resulting ledger after applying the transactions. This is even more
+even the resulting ledger after applying the transactions.  This is even more
 difficult when some participants are faulty or malicious.

-The Ripple network is a decentralized and **trust-full** network. Anyone is free
+The Ripple network is a decentralized and **trust-full** network.  Anyone is free
 to join and participants are free to choose a subset of peers that are
 collectively trusted to not collude in an attempt to defraud the participant.
 Leveraging this network of trust, the Ripple algorithm has two main components.

- _Consensus_ in which network participants agree on the transactions to apply
+* *Consensus* in which network participants agree on the transactions to apply
  to a prior ledger, based on the positions of their chosen peers.
- _Validation_ in which network participants agree on what ledger was
+* *Validation* in which network participants agree on what ledger was
  generated, based on the ledgers generated by chosen peers.

 These phases are continually repeated to process transactions submitted to the
 network, generating successive ledgers and giving rise to the blockchain ledger
-history depicted below. In this diagram, time is flowing to the right, but
-links between ledgers point backward to the parent. Also note the alternate
+history depicted below.  In this diagram, time is flowing to the right, but
+links between ledgers point backward to the parent.  Also note the alternate
 Ledger 2 that was generated by some participants, but which failed validation
 and was abandoned.

@@ -54,7 +54,7 @@ and was abandoned.

 The remainder of this section describes the Consensus and Validation algorithms
 in more detail and is meant as a companion guide to understanding the generic
-implementation in `rippled`. The document **does not** discuss correctness,
+implementation in `rippled`.  The document **does not** discuss correctness,
 fault-tolerance or liveness properties of the algorithms or the full details of
 how they integrate within `rippled` to support the Ripple Consensus Ledger.

@@ -62,76 +62,76 @@ how they integrate within `rippled` to support the Ripple Consensus Ledger.

 ### Definitions

- The _ledger_ is the shared distributed state. Each ledger has a unique ID to
-  distinguish it from all other ledgers. During consensus, the _previous_,
-  _prior_ or _last-closed_ ledger is the most recent ledger seen by consensus
+* The *ledger* is the shared distributed state.  Each ledger has a unique ID to
+  distinguish it from all other ledgers.  During consensus, the *previous*,
+  *prior* or *last-closed* ledger is the most recent ledger seen by consensus
  and is the basis upon which it will build the next ledger.
- A _transaction_ is an instruction for an atomic change in the ledger state. A
+* A *transaction* is an instruction for an atomic change in the ledger state.  A
  unique ID distinguishes a transaction from other transactions.
- A _transaction set_ is a set of transactions under consideration by consensus.
-  The goal of consensus is to reach agreement on this set. The generic
+* A *transaction set* is a set of transactions under consideration by consensus.
+  The goal of consensus is to reach agreement on this set.  The generic
  consensus algorithm does not rely on an ordering of transactions within the
  set, nor does it specify how to apply a transaction set to a ledger to
-  generate a new ledger. A unique ID distinguishes a set of transactions from
+  generate a new ledger.  A unique ID distinguishes a set of transactions from
  all other sets of transactions.
- A _node_ is one of the distributed actors running the consensus algorithm. It
+* A *node* is one of the distributed actors running the consensus algorithm.  It
  has a unique ID to distinguish it from all other nodes.
- A _peer_ of a node is another node that it has chosen to follow and which it
-  believes will not collude with other chosen peers. The choice of peers is not
+* A *peer*  of a node is another node that it has chosen to follow and which it
+  believes will not collude with other chosen peers.  The choice of peers is not
  symmetric, since participants can decide on their chosen sets independently.
- A /position/ is the current belief of the next ledger's transaction set and
+* A /position/ is the current belief of the next ledger's transaction set and
  close time. Position can refer to the node's own position or the position of a
  peer.
- A _proposal_ is one of a sequence of positions a node shares during consensus.
+* A *proposal* is one of a sequence of positions a node shares during consensus.
  An initial proposal contains the starting position taken by a node before it
-  considers any peer positions. If a node subsequently updates its position in
-  response to its peers, it will issue an updated proposal. A proposal is
+  considers any peer positions.  If a node subsequently updates its position in
+  response to its peers, it will issue an updated proposal.  A proposal is
  uniquely identified by the ID of the proposing node, the ID of the position
  taken, the ID of the prior ledger the proposal is for, and the sequence number
  of the proposal.
- A _dispute_ is a transaction that is either not part of a node's position or
+* A *dispute* is a transaction that is either not part of a node's position or
  not in a peer's position. During consensus, the node will add or remove
  disputed transactions from its position based on that transaction's support
  amongst its peers.

 Note that most types have an ID as a lightweight identifier of instances of that
-type. Consensus often operates on the IDs directly since the underlying type is
-potentially expensive to share over the network. For example, proposal's only
-contain the ID of the position of a peer. Since many peers likely have the same
+type.  Consensus often operates on the IDs directly since the underlying type is
+potentially expensive to share over the network.  For example, proposal's only
+contain the ID of the position of a peer.  Since many peers likely have the same
 position, this reduces the need to send the full transaction set multiple times.
 Instead, a node can request the transaction set from the network if necessary.

-### Overview
+### Overview 

 ![Consensus Overview](images/consensus/consensus_overview.png "Consensus Overview")

 The diagram above is an overview of the consensus process from the perspective
-of a single participant. Recall that during a single consensus round, a node is
+of a single participant.  Recall that during a single consensus round, a node is
 trying to agree with its peers on which transactions to apply to its prior
-ledger when generating the next ledger. It also attempts to agree on the
-[network time when the ledger closed](#effective_close_time). There are
+ledger when generating the next ledger.  It also attempts to agree on the
+[network time when the ledger closed](#effective_close_time).  There are
 3 main phases to a consensus round:

- A call to `startRound` places the node in the `Open` phase. In this phase,
-  the node is waiting for transactions to include in its open ledger.
- At some point, the node will `Close` the open ledger and transition to the
-  `Establish` phase. In this phase, the node shares/receives peer proposals on
-  which transactions should be accepted in the closed ledger.
- At some point, the node determines it has reached consensus with its peers on
-  which transactions to include. It transitions to the `Accept` phase. In this
-  phase, the node works on applying the transactions to the prior ledger to
-  generate a new closed ledger. Once the new ledger is completed, the node shares
-  the validated ledger hash with the network and makes a call to `startRound` to
-  start the cycle again for the next ledger.
+* A call to `startRound` places the node in the `Open` phase.  In this phase,
+the node is waiting for transactions to include in its open ledger.
+* At some point, the node will `Close` the open ledger and transition to the
+`Establish` phase.  In this phase, the node shares/receives peer proposals on
+which transactions should be accepted in the closed ledger.
+* At some point, the node determines it has reached consensus with its peers on
+which transactions to include. It transitions to the `Accept` phase. In this
+phase, the node works on applying the transactions to the prior ledger to
+generate a new closed ledger. Once the new ledger is completed, the node shares
+the validated ledger hash with the network and makes a call to `startRound` to
+start the cycle again for the next ledger.

 Throughout, a heartbeat timer calls `timerEntry` at a regular frequency to drive
 the process forward. Although the `startRound` call occurs at arbitrary times
 based on when the initial round began and the time it takes to apply
 transactions, the transitions from `Open` to `Establish` and `Establish` to
-`Accept` only occur during calls to `timerEntry`. Similarly, transactions can
+`Accept` only occur during calls to `timerEntry`.  Similarly, transactions can
 arrive at arbitrary times, independent of the heartbeat timer. Transactions
 received after the `Open` to `Close` transition and not part of peer proposals
-won't be considered until the next consensus round. They are represented above
+won't be considered until the next consensus round.  They are represented above
 by the light green triangles.

 Peer proposals are issued by a node during a `timerEntry` call, but since peers
@@ -139,16 +139,16 @@ do not synchronize `timerEntry` calls, they are received by other peers at
 arbitrary times. Peer proposals are only considered if received prior to the
 `Establish` to `Accept` transition, and only if the peer is working on the same
 prior ledger. Peer proposals received after consensus is reached will not be
-meaningful and are represented above by the circle with the X in it. Only
+meaningful and are represented above by the circle with the X in it.  Only
 proposals from chosen peers are considered.

-### Effective Close Time ### {#effective_close_time}
-
+### Effective Close Time ###         {#effective_close_time}
+    
 In addition to agreeing on a transaction set, each consensus round tries to
-agree on the time the ledger closed. Each node calculates its own close time
-when it closes the open ledger. This exact close time is rounded to the nearest
-multiple of the current _effective close time resolution_. It is this
-_effective close time_ that nodes seek to agree on. This allows servers to
+agree on the time the ledger closed.  Each node calculates its own close time
+when it closes the open ledger.  This exact close time is rounded to the nearest
+multiple of the current *effective close time resolution*.  It is this
+*effective close time* that nodes seek to agree on. This allows servers to
 derive a common time for a ledger without the need for perfectly synchronized
 clocks. As depicted below, the 3 pink arrows represent exact close times from 3
 consensus nodes that round to the same effective close time given the current
@@ -158,9 +158,9 @@ different effective close time given the current resolution.
 ![Effective Close Time](images/consensus/EffCloseTime.png "Effective Close Time")

 The effective close time is part of the node's position and is shared with peers
-in its proposals. Just like the position on the consensus transaction set, a
+in its proposals.  Just like the position on the consensus transaction set, a
 node will update its close time position in response to its peers' effective
-close time positions. Peers can agree to disagree on the close time, in which
+close time positions.  Peers can agree to disagree on the close time, in which
 case the effective close time is taken as 1 second past the prior close.

 The close time resolution is itself dynamic, decreasing (coarser) resolution in
@@ -173,12 +173,12 @@ reach close time consensus.
 Internally, a node operates under one of the following consensus modes. Either
 of the first two modes may be chosen when a consensus round starts.

- _Proposing_ indicates the node is a full-fledged consensus participant. It
+* *Proposing* indicates the node is a full-fledged consensus participant.  It
  takes on positions and sends proposals to its peers.
- _Observing_ indicates the node is a passive consensus participant. It
+* *Observing* indicates the node is a passive consensus participant.  It
  maintains a position internally, but does not propose that position to its
  peers. Instead, it receives peer proposals and updates its position
-  to track the majority of its peers. This may be preferred if the node is only
+  to track the majority of its peers.  This may be preferred if the node is only
  being used to track the state of the network or during a start-up phase while
  it is still synchronizing with the network.

@@ -186,14 +186,14 @@ The other two modes are set internally during the consensus round when the node
 believes it is no longer working on the dominant ledger chain based on peer
 validations. It checks this on every call to `timerEntry`.

- _Wrong Ledger_ indicates the node is not working on the correct prior ledger
-  and does not have it available. It requests that ledger from the network, but
-  continues to work towards consensus this round while waiting. If it had been
-  _proposing_, it will send a special "bowout" proposal to its peers to indicate
+* *Wrong Ledger* indicates the node is not working on the correct prior ledger
+  and does not have it available.  It requests that ledger from the network, but
+  continues to work towards consensus this round while waiting.  If it had been
+  *proposing*, it will send a special "bowout" proposal to its peers to indicate
  its change in mode for the rest of this round. For the duration of the round,
  it defers to peer positions for determining the consensus outcome as if it
-  were just _observing_.
- _Switch Ledger_ indicates that the node has acquired the correct prior ledger
+  were just *observing*.
+* *Switch Ledger* indicates that the node has acquired the correct prior ledger
  from the network. Although it now has the correct prior ledger, the fact that
  it had the wrong one at some point during this round means it is likely behind
  and should defer to peer positions for determining the consensus outcome.
@@ -201,7 +201,7 @@ validations. It checks this on every call to `timerEntry`.
 ![Consensus Modes](images/consensus/consensus_modes.png "Consensus Modes")

 Once either wrong ledger or switch ledger are reached, the node cannot
-return to proposing or observing until the next consensus round. However,
+return to proposing or observing until the next consensus round.  However,
 the node could change its view of the correct prior ledger, so going from
 switch ledger to wrong ledger and back again is possible.

@@ -215,16 +215,16 @@ decide how best to generate the next ledger once it declares consensus.
 ### Phases

 As depicted in the overview diagram, consensus is best viewed as a progression
-through 3 phases. There are 4 public methods of the generic consensus algorithm
+through 3 phases.  There are 4 public methods of the generic consensus algorithm
 that determine this progression

- `startRound` begins a consensus round.
- `timerEntry` is called at a regular frequency (`LEDGER_MIN_CLOSE`) and is the
-  only call to consensus that can change the phase from `Open` to `Establish`
+* `startRound` begins a consensus round.
+* `timerEntry` is called at a regular frequency (`LEDGER_MIN_CLOSE`) and is the
+  only call to consensus that can change the  phase from `Open` to `Establish`
  or `Accept`.
- `peerProposal` is called whenever a peer proposal is received and is what
+* `peerProposal` is called whenever a peer proposal is received and is what
  allows a node to update its position in a subsequent `timerEntry` call.
- `gotTxSet` is called when a transaction set is received from the network. This
+* `gotTxSet` is called when a transaction set is received from the network. This
  is typically in response to a prior request from the node to acquire the
  transaction set corresponding to a disagreeing peer's position.

@@ -234,13 +234,13 @@ actions are taken in response to these calls.
 #### Open

 The `Open` phase is a quiescent period to allow transactions to build up in the
-node's open ledger. The duration is a trade-off between latency and throughput.
+node's open ledger.  The duration is a trade-off between latency and throughput.
 A shorter window reduces the latency to generating the next ledger, but also
 reduces transaction throughput due to fewer transactions accepted into the
 ledger.

 A call to `startRound` would forcibly begin the next consensus round, skipping
-completion of the current round. This is not expected during normal operation.
+completion of the current round.  This is not expected during normal operation.
 Calls to `peerProposal` or `gotTxSet` simply store the proposal or transaction
 set for use in the coming `Establish` phase.

@@ -254,27 +254,28 @@ the ledger.
 Under normal circumstances, the open ledger period ends when one of the following
 is true

- if there are transactions in the open ledger and more than `LEDGER_MIN_CLOSE`
-  have elapsed. This is the typical behavior.
- if there are no open transactions and a suitably longer idle interval has
-  elapsed. This increases the opportunity to get some transaction into
+* if there are transactions in the open ledger and more than `LEDGER_MIN_CLOSE`
+  have elapsed.  This is the typical behavior.
+* if there are no open transactions and a suitably longer idle interval has
+  elapsed.  This increases the opportunity to get some transaction into
  the next ledger and avoids doing useless work closing an empty ledger.
- if more than half the number of prior round peers have already closed or finished
+* if more than half the number of prior round peers have already closed or finished
  this round. This indicates the node is falling behind and needs to catch up.

+
 When closing the ledger, the node takes its initial position based on the
 transactions in the open ledger and uses the current time as
-its initial close time estimate. If in the proposing mode, the node shares its
-initial position with peers. Now that the node has taken a position, it will
-consider any peer positions for this round that arrived earlier. The node
+its initial close time estimate.  If in the proposing mode, the node shares its
+initial position with peers.  Now that the node has taken a position, it will
+consider any peer positions for this round that arrived earlier.  The node
 generates disputed transactions for each transaction not in common with a peer's
-position. The node also records the vote of each peer for each disputed
+position.  The node also records the vote of each peer for each disputed
 transaction.

-In the example below, we suppose our node has closed with transactions 1,2 and 3. It creates disputes
+In the example below, we suppose our node has closed with transactions 1,2 and 3.  It creates disputes
 for transactions 2,3 and 4, since at least one peer position differs on each.

-##### disputes ##### {#disputes_image}
+##### disputes #####     {#disputes_image}

 ![Disputes](images/consensus/disputes.png "Disputes")

@@ -285,22 +286,22 @@ exchanges proposals with peers in an attempt to reach agreement on the consensus
 transactions and effective close time.

 A call to `startRound` would forcibly begin the next consensus round, skipping
-completion of the current round. This is not expected during normal operation.
+completion of the current round.  This is not expected during normal operation.
 Calls to `peerProposal` or `gotTxSet` that reflect new positions will generate
 disputed transactions for any new disagreements and will update the peer's vote
 for all disputed transactions.

 A call to `timerEntry` first checks that the node is working from the correct
-prior ledger. If not, the node will update the mode and request the correct
-ledger. Otherwise, the node updates the node's position and considers whether
-to switch to the `Accepted` phase and declare consensus reached. However, at
-least `LEDGER_MIN_CONSENSUS` time must have elapsed before doing either. This
+prior ledger. If not, the node  will update the mode and request the correct
+ledger.  Otherwise, the node updates the node's position and considers whether
+to switch to the `Accepted` phase and declare consensus reached.  However, at
+least `LEDGER_MIN_CONSENSUS` time must have elapsed before doing either.  This
 allows peers an opportunity to take an initial position and share it.

 ##### Update Position

 In order to achieve consensus, the node is looking for a transaction set that is
-supported by a super-majority of peers. The node works towards this set by
+supported by a super-majority of peers.  The node works towards this set by
 adding or removing disputed transactions from its position based on an
 increasing threshold for inclusion.

@@ -309,23 +310,23 @@ increasing threshold for inclusion.
 By starting with a lower threshold, a node initially allows a wide set of
 transactions into its position. If the establish round continues and the node is
 "stuck", a higher threshold can focus on accepting transactions with the most
-support. The constants that define the thresholds and durations at which the
+support.  The constants that define the thresholds and durations at which the
 thresholds change are given by `AV_XXX_CONSENSUS_PCT` and
 `AV_XXX_CONSENSUS_TIME` respectively, where `XXX` is `INIT`,`MID`,`LATE` and
-`STUCK`. The effective close time position is updated using the same
+`STUCK`.  The effective close time position is updated using the same
 thresholds.

 Given the [example disputes above](#disputes_image) and an initial threshold
 of 50%, our node would retain its position since transaction 1 was not in
-dispute and transactions 2 and 3 have 75% support. Since its position did not
-change, it would not need to send a new proposal to peers. Peer C would not
+dispute and transactions 2 and 3 have 75% support.  Since its position did not
+change, it would not need to send a new proposal to peers.  Peer C would not
 change either. Peer A would add transaction 3 to its position and Peer B would
 remove transaction 4 from its position; both would then send an updated
 position.

 Conversely, if the diagram reflected a later call to =timerEntry= that occurs in
 the stuck region with a threshold of say 95%, our node would remove transactions
-2 and 3 from its candidate set and send an updated position. Likewise, all the
+2 and 3 from its candidate set and send an updated position.  Likewise, all the
 other peers would end up with only transaction 1 in their position.

 Lastly, if our node were not in the proposing mode, it would not include its own
@@ -335,7 +336,7 @@ our node would maintain its position of transactions 1, 2 and 3.
 ##### Checking Consensus

 After updating its position, the node checks for supermajority agreement with
-its peers on its current position. This agreement is of the exact transaction
+its peers on its current position.  This agreement is of the exact transaction
 set, not just the support of individual transactions. That is, if our position
 is a subset of a peer's position, that counts as a disagreement. Also recall
 that effective close time agreement allows a supermajority of participants
@@ -343,10 +344,10 @@ agreeing to disagree.

 Consensus is declared when the following 3 clauses are true:

- `LEDGER_MIN_CONSENSUS` time has elapsed in the establish phase
- At least 75% of the prior round proposers have proposed OR this establish
+* `LEDGER_MIN_CONSENSUS` time has elapsed in the establish phase
+* At least 75% of the prior round proposers have proposed OR this establish
  phase is `LEDGER_MIN_CONSENSUS` longer than the last round's establish phase
- `minimumConsensusPercentage` of ourself and our peers share the same position
+* `minimumConsensusPercentage` of ourself and our peers share the same position

 The middle condition ensures slower peers have a chance to share positions, but
 prevents waiting too long on peers that have disconnected. Additionally, a node
@@ -363,22 +364,22 @@ logic.
 Once consensus is reached (or moved on), the node switches to the `Accept` phase
 and signals to the implementing code that the round is complete. That code is
 responsible for using the consensus transaction set to generate the next ledger
-and calling `startRound` to begin the next round. The implementation has total
+and calling `startRound` to begin the next round.  The implementation has total
 freedom on ordering transactions, deciding what to do if consensus moved on,
 determining whether to retry or abandon local transactions that did not make the
 consensus set and updating any internal state based on the consensus progress.

 #### Accept

-The `Accept` phase is the terminal phase of the consensus algorithm. Calls to
+The `Accept` phase is the terminal phase of the consensus algorithm.  Calls to
 `timerEntry`, `peerProposal` and `gotTxSet` will not change the internal
-consensus state while in the accept phase. The expectation is that the
+consensus state while in the accept phase.  The expectation is that the
 application specific code is working to generate the new ledger based on the
 consensus outcome. Once complete, that code should make a call to `startRound`
 to kick off the next consensus round. The `startRound` call includes the new
 prior ledger, prior ledger ID and whether the round should begin in the
-proposing or observing mode. After setting some initial state, the phase
-transitions to `Open`. The node will also check if the provided prior ledger
+proposing or observing mode.  After setting some initial state, the phase
+transitions to `Open`.  The node will also check if the provided prior ledger
 and ID are correct, updating the mode and requesting the proper ledger from the
 network if necessary.

@@ -447,9 +448,9 @@ struct TxSet
 ### Ledger

 The `Ledger` type represents the state shared amongst the
-distributed participants. Notice that the details of how the next ledger is
+distributed participants.  Notice that the details of how the next ledger is
 generated from the prior ledger and the consensus accepted transaction set is
-not part of the interface. Within the generic code, this type is primarily used
+not part of the interface.  Within the generic code, this type is primarily used
 to know that peers are working on the same tip of the ledger chain and to
 provide some basic timing data for consensus.

@@ -625,7 +626,7 @@ struct Adaptor

    // Called when consensus operating mode changes
    void onModeChange(ConsensuMode before, ConsensusMode after);
-
+    
    // Called when ledger closes.  Implementation should generate an initial Result
    // with position based on the current open ledger's transactions.
    ConsensusResult onClose(Ledger const &, Ledger const & prev, ConsensusMode mode);
@@ -656,24 +657,27 @@ struct Adaptor
 The implementing class hides many details of the peer communication
 model from the generic code.

- The `share` member functions are responsible for sharing the given type with a
+* The `share` member functions are responsible for sharing the given type with a
  node's peers, but are agnostic to the mechanism. Ideally, messages are delivered
-  faster than `LEDGER_GRANULARITY`.
- The generic code does not specify how transactions are submitted by clients,
+  faster than `LEDGER_GRANULARITY`. 
+* The generic code does not specify how transactions are submitted by clients,
  propagated through the network or stored in the open ledger. Indeed, the open
  ledger is only conceptual from the perspective of the generic code---the
  initial position and transaction set are opaquely generated in a
  `Consensus::Result` instance returned from the `onClose` callback.
- The calls to `acquireLedger` and `acquireTxSet` only have non-trivial return
-  if the ledger or transaction set of interest is available. The implementing
+* The calls to `acquireLedger` and `acquireTxSet` only have non-trivial return
+  if the ledger or transaction set of interest is available.  The implementing
  class is free to block while acquiring, or return the empty option while
-  servicing the request asynchronously. Due to legacy reasons, the two calls
+  servicing the request asynchronously.  Due to legacy reasons, the two calls
  are not symmetric. `acquireTxSet` requires the host application to call
  `gotTxSet` when an asynchronous `acquire` completes. Conversely,
  `acquireLedger` will be called again later by the consensus code if it still
  desires the ledger with the hope that the asynchronous acquisition is
  complete.

+
 ## Validation

 Coming Soon!
+
+
--- a/external/README.md
+++ b/external/README.md
@@ -1,10 +1,14 @@
 # External Conan recipes

-The subdirectories in this directory contain copies of external libraries used
-by rippled.
+The subdirectories in this directory contain either copies or Conan recipes
+of external libraries used by rippled.
+The Conan recipes include patches we have not yet pushed upstream.

-| Folder           | Upstream                                                       | Description                                                                                  |
-| :--------------- | :------------------------------------------------------------- | :------------------------------------------------------------------------------------------- |
-| `antithesis-sdk` | [Project](https://github.com/antithesishq/antithesis-sdk-cpp/) | [Antithesis](https://antithesis.com/docs/using_antithesis/sdk/cpp/overview.html) SDK for C++ |
-| `ed25519-donna`  | [Project](https://github.com/floodyberry/ed25519-donna)        | [Ed25519](http://ed25519.cr.yp.to/) digital signatures                                       |
-| `secp256k1`      | [Project](https://github.com/bitcoin-core/secp256k1)           | ECDSA digital signatures using the **secp256k1** curve                                       |
+| Folder          | Upstream                                     | Description |
+|:----------------|:---------------------------------------------|:------------|
+| `antithesis-sdk`| [Project](https://github.com/antithesishq/antithesis-sdk-cpp/) | [Antithesis](https://antithesis.com/docs/using_antithesis/sdk/cpp/overview.html) SDK for C++ |
+| `ed25519-donna` | [Project](https://github.com/floodyberry/ed25519-donna) | [Ed25519](http://ed25519.cr.yp.to/) digital signatures |
+| `rocksdb`       | [Recipe](https://github.com/conan-io/conan-center-index/tree/master/recipes/rocksdb) | Fast key/value database. (Supports rotational disks better than NuDB.) |
+| `secp256k1`     | [Project](https://github.com/bitcoin-core/secp256k1)    | ECDSA digital signatures using the **secp256k1** curve |
+| `snappy`        | [Recipe](https://github.com/conan-io/conan-center-index/tree/master/recipes/snappy)  | "Snappy" lossless compression algorithm. |
+| `soci`          | [Recipe](https://github.com/conan-io/conan-center-index/tree/master/recipes/soci)    | Abstraction layer for database access. |
--- a/external/antithesis-sdk/CMakeLists.txt
+++ b/external/antithesis-sdk/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.18)
+cmake_minimum_required(VERSION 3.25)

 # Note, version set explicitly by rippled project
 project(antithesis-sdk-cpp VERSION 0.4.4 LANGUAGES CXX)
--- a/external/antithesis-sdk/README.md
+++ b/external/antithesis-sdk/README.md
@@ -1,9 +1,8 @@
 # Antithesis C++ SDK

 This library provides methods for C++ programs to configure the [Antithesis](https://antithesis.com) platform. It contains three kinds of functionality:
-
- Assertion macros that allow you to define test properties about your software or workload.
- Randomness functions for requesting both structured and unstructured randomness from the Antithesis platform.
- Lifecycle functions that inform the Antithesis environment that particular test phases or milestones have been reached.
+* Assertion macros that allow you to define test properties about your software or workload.
+* Randomness functions for requesting both structured and unstructured randomness from the Antithesis platform.
+* Lifecycle functions that inform the Antithesis environment that particular test phases or milestones have been reached.

 For general usage guidance see the [Antithesis C++ SDK Documentation](https://antithesis.com/docs/using_antithesis/sdk/cpp/overview/)
--- a/external/blake3/CMakeLists.txt
+++ b/external/blake3/CMakeLists.txt
@@ -0,0 +1,383 @@
+cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
+
+# respect C_EXTENSIONS OFF without explicitly setting C_STANDARD
+if (POLICY CMP0128)
+  cmake_policy(SET CMP0128 NEW)
+endif()
+# mark_as_advanced does not implicitly create UNINITIALIZED cache entries
+if (POLICY CMP0102)
+  cmake_policy(SET CMP0102 NEW)
+endif()
+
+project(libblake3
+  VERSION 1.8.2
+  DESCRIPTION "BLAKE3 C implementation"
+  LANGUAGES C CXX ASM
+)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+option(BLAKE3_USE_TBB "Enable oneTBB parallelism" OFF)
+option(BLAKE3_FETCH_TBB "Allow fetching oneTBB from GitHub if not found on system" OFF)
+
+include(CTest)
+include(FeatureSummary)
+include(GNUInstallDirs)
+
+add_subdirectory(dependencies)
+
+# architecture lists for which to enable assembly / SIMD sources
+set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
+set(BLAKE3_X86_NAMES i686 x86 X86)
+set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
+# default SIMD compiler flag configuration (can be overriden by toolchains or CLI)
+if(MSVC)
+  set(BLAKE3_CFLAGS_SSE2 "/arch:SSE2" CACHE STRING "the compiler flags to enable SSE2")
+  # MSVC has no dedicated sse4.1 flag (see https://learn.microsoft.com/en-us/cpp/build/reference/arch-x86?view=msvc-170)
+  set(BLAKE3_CFLAGS_SSE4.1 "/arch:AVX" CACHE STRING "the compiler flags to enable SSE4.1")
+  set(BLAKE3_CFLAGS_AVX2 "/arch:AVX2" CACHE STRING "the compiler flags to enable AVX2")
+  set(BLAKE3_CFLAGS_AVX512 "/arch:AVX512" CACHE STRING "the compiler flags to enable AVX512")
+
+  set(BLAKE3_AMD64_ASM_SOURCES
+    blake3_avx2_x86-64_windows_msvc.asm
+    blake3_avx512_x86-64_windows_msvc.asm
+    blake3_sse2_x86-64_windows_msvc.asm
+    blake3_sse41_x86-64_windows_msvc.asm
+  )
+
+elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
+       OR CMAKE_C_COMPILER_ID STREQUAL "Clang"
+       OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
+  set(BLAKE3_CFLAGS_SSE2 "-msse2" CACHE STRING "the compiler flags to enable SSE2")
+  set(BLAKE3_CFLAGS_SSE4.1 "-msse4.1" CACHE STRING "the compiler flags to enable SSE4.1")
+  set(BLAKE3_CFLAGS_AVX2 "-mavx2" CACHE STRING "the compiler flags to enable AVX2")
+  set(BLAKE3_CFLAGS_AVX512 "-mavx512f -mavx512vl" CACHE STRING "the compiler flags to enable AVX512")
+
+  if (WIN32 OR CYGWIN)
+    set(BLAKE3_AMD64_ASM_SOURCES
+      blake3_avx2_x86-64_windows_gnu.S
+      blake3_avx512_x86-64_windows_gnu.S
+      blake3_sse2_x86-64_windows_gnu.S
+      blake3_sse41_x86-64_windows_gnu.S
+    )
+
+  elseif(UNIX)
+    set(BLAKE3_AMD64_ASM_SOURCES
+      blake3_avx2_x86-64_unix.S
+      blake3_avx512_x86-64_unix.S
+      blake3_sse2_x86-64_unix.S
+      blake3_sse41_x86-64_unix.S
+    )
+  endif()
+
+  if (CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
+      AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+    # 32-bit ARMv8 needs NEON to be enabled explicitly
+    set(BLAKE3_CFLAGS_NEON "-mfpu=neon" CACHE STRING "the compiler flags to enable NEON")
+  endif()
+endif()
+
+mark_as_advanced(BLAKE3_CFLAGS_SSE2 BLAKE3_CFLAGS_SSE4.1 BLAKE3_CFLAGS_AVX2 BLAKE3_CFLAGS_AVX512 BLAKE3_CFLAGS_NEON)
+mark_as_advanced(BLAKE3_AMD64_ASM_SOURCES)
+
+message(STATUS "BLAKE3 SIMD configuration: ${CMAKE_C_COMPILER_ARCHITECTURE_ID}")
+if(MSVC AND DEFINED CMAKE_C_COMPILER_ARCHITECTURE_ID)
+  if(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Xx]86")
+    set(BLAKE3_SIMD_TYPE "x86-intrinsics" CACHE STRING "the SIMD acceleration type to use")
+
+  elseif(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Xx]64")
+    set(BLAKE3_SIMD_TYPE "amd64-asm" CACHE STRING "the SIMD acceleration type to use")
+
+  elseif(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Aa][Rr][Mm]64")
+    set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")
+
+  else()
+    set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
+  endif()
+
+elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_AMD64_NAMES)
+  set(BLAKE3_SIMD_TYPE "amd64-asm" CACHE STRING "the SIMD acceleration type to use")
+
+elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES
+       AND DEFINED BLAKE3_CFLAGS_SSE2
+       AND DEFINED BLAKE3_CFLAGS_SSE4.1
+       AND DEFINED BLAKE3_CFLAGS_AVX2
+       AND DEFINED BLAKE3_CFLAGS_AVX512)
+  set(BLAKE3_SIMD_TYPE "x86-intrinsics" CACHE STRING "the SIMD acceleration type to use")
+
+elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
+          OR ANDROID_ABI STREQUAL "armeabi-v7a"
+          OR BLAKE3_USE_NEON_INTRINSICS)
+        AND (DEFINED BLAKE3_CFLAGS_NEON
+          OR CMAKE_SIZEOF_VOID_P EQUAL 8))
+  set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")
+
+else()
+  set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
+endif()
+
+mark_as_advanced(BLAKE3_SIMD_TYPE)
+
+# library target
+add_library(blake3
+  blake3.c
+  blake3_dispatch.c
+  blake3_portable.c
+)
+add_library(BLAKE3::blake3 ALIAS blake3)
+
+# library configuration
+set(PKG_CONFIG_CFLAGS)
+if (BUILD_SHARED_LIBS)
+  target_compile_definitions(blake3
+    PUBLIC BLAKE3_DLL
+    PRIVATE BLAKE3_DLL_EXPORTS
+  )
+  list(APPEND PKG_CONFIG_CFLAGS -DBLAKE3_DLL)
+endif()
+target_include_directories(blake3 PUBLIC
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+  $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+)
+set_target_properties(blake3 PROPERTIES
+  VERSION ${PROJECT_VERSION}
+  SOVERSION 0
+  C_VISIBILITY_PRESET hidden
+  C_EXTENSIONS OFF
+)
+target_compile_features(blake3 PUBLIC c_std_99)
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12)
+  target_compile_features(blake3 PUBLIC cxx_std_20)
+  # else: add it further below through `BLAKE3_CMAKE_CXXFLAGS_*`
+endif()
+
+# ensure C_EXTENSIONS OFF is respected without overriding CMAKE_C_STANDARD
+# which may be set by the user or toolchain file
+if (NOT POLICY CMP0128 AND NOT DEFINED CMAKE_C_STANDARD)
+  set_target_properties(blake3 PROPERTIES C_STANDARD 99)
+endif()
+
+# optional SIMD sources
+if(BLAKE3_SIMD_TYPE STREQUAL "amd64-asm")
+  if (NOT DEFINED BLAKE3_AMD64_ASM_SOURCES)
+    message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to 'amd64-asm' but no assembly sources are available for the target architecture.")
+  endif()
+  set(BLAKE3_SIMD_AMD64_ASM ON)
+
+  if(MSVC)
+    enable_language(ASM_MASM)
+  endif()
+
+  target_sources(blake3 PRIVATE ${BLAKE3_AMD64_ASM_SOURCES})
+
+elseif(BLAKE3_SIMD_TYPE STREQUAL "x86-intrinsics")
+  if (NOT DEFINED BLAKE3_CFLAGS_SSE2
+      OR NOT DEFINED BLAKE3_CFLAGS_SSE4.1
+      OR NOT DEFINED BLAKE3_CFLAGS_AVX2
+      OR NOT DEFINED BLAKE3_CFLAGS_AVX512)
+    message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to 'x86-intrinsics' but no compiler flags are available for the target architecture.")
+  endif()
+  set(BLAKE3_SIMD_X86_INTRINSICS ON)
+
+  target_sources(blake3 PRIVATE
+    blake3_avx2.c
+    blake3_avx512.c
+    blake3_sse2.c
+    blake3_sse41.c
+  )
+  set_source_files_properties(blake3_avx2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_AVX2}")
+  set_source_files_properties(blake3_avx512.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_AVX512}")
+  set_source_files_properties(blake3_sse2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE2}")
+  set_source_files_properties(blake3_sse41.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE4.1}")
+
+elseif(BLAKE3_SIMD_TYPE STREQUAL "neon-intrinsics")
+  set(BLAKE3_SIMD_NEON_INTRINSICS ON)
+
+  target_sources(blake3 PRIVATE
+    blake3_neon.c
+  )
+  target_compile_definitions(blake3 PRIVATE
+    BLAKE3_USE_NEON=1
+  )
+
+  if (DEFINED BLAKE3_CFLAGS_NEON)
+    set_source_files_properties(blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
+  endif()
+
+elseif(BLAKE3_SIMD_TYPE STREQUAL "none")
+  target_compile_definitions(blake3 PRIVATE
+    BLAKE3_USE_NEON=0
+    BLAKE3_NO_SSE2
+    BLAKE3_NO_SSE41
+    BLAKE3_NO_AVX2
+    BLAKE3_NO_AVX512
+  )
+
+else()
+  message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to an unknown value: '${BLAKE3_SIMD_TYPE}'")
+endif()
+
+if(BLAKE3_USE_TBB)
+  find_package(TBB 2021.11.0 QUIET)
+  if(NOT TBB_FOUND AND NOT TARGET TBB::tbb)
+    message(WARNING
+      "oneTBB not found; disabling BLAKE3_USE_TBB\n"
+      "Enable BLAKE3_FETCH_TBB to automatically fetch and build oneTBB"
+    )
+    set(BLAKE3_USE_TBB OFF)
+  else()
+    target_sources(blake3
+      PRIVATE
+        blake3_tbb.cpp)
+    target_link_libraries(blake3
+      PUBLIC
+        # Make shared TBB a transitive dependency. The consuming program is technically not required
+        # to link TBB in order for libblake3 to function but we do this in order to prevent the
+        # possibility of multiple separate TBB runtimes being linked into a final program in case
+        # the consuming program also happens to already use TBB.
+        TBB::tbb)
+    target_compile_definitions(blake3
+      PUBLIC
+        BLAKE3_USE_TBB)
+  endif()
+  list(APPEND PKG_CONFIG_REQUIRES "tbb >= ${TBB_VERSION}")
+  list(APPEND PKG_CONFIG_CFLAGS -DBLAKE3_USE_TBB)
+  include(CheckCXXSymbolExists)
+  check_cxx_symbol_exists(_LIBCPP_VERSION "version" BLAKE3_HAVE_LIBCPP)
+  check_cxx_symbol_exists(__GLIBCXX__ "version" BLAKE3_HAVE_GLIBCXX)
+  if(BLAKE3_HAVE_GLIBCXX)
+    list(APPEND PKG_CONFIG_LIBS -lstdc++)
+  elseif(BLAKE3_HAVE_LIBCPP)
+    list(APPEND PKG_CONFIG_LIBS -lc++)
+  endif()
+endif()
+
+if(BLAKE3_USE_TBB)
+  # Define some scratch variables for building appropriate flags per compiler
+  if(CMAKE_VERSION VERSION_LESS 3.12)
+    set(APPEND BLAKE3_CXX_STANDARD_FLAGS_GNU -std=c++20)
+    set(APPEND BLAKE3_CXX_STANDARD_FLAGS_MSVC /std:c++20)
+  endif()
+  set(BLAKE3_CXXFLAGS_GNU "-fno-exceptions;-fno-rtti;${BLAKE3_CXX_STANDARD_FLAGS_GNU}" CACHE STRING "C++ flags used for compiling private BLAKE3 library components with GNU-like compiler frontends.")
+  set(BLAKE3_CXXFLAGS_MSVC "/EHs-c-;/GR-;${BLAKE3_CXX_STANDARD_FLAGS_MSVC}" CACHE STRING "C++ flags used for compiling private BLAKE3 library components with MSVC-like compiler frontends.")
+  # Get the C++ compiler name without extension
+  get_filename_component(BLAKE3_CMAKE_CXX_COMPILER_NAME "${CMAKE_CXX_COMPILER}" NAME_WE)
+  # Strip any trailing versioning from the C++ compiler name
+  string(REGEX MATCH "^(clang\\+\\+|clang-cl)" BLAKE3_CMAKE_CXX_COMPILER_NAME "${BLAKE3_CMAKE_CXX_COMPILER_NAME}")
+
+  # TODO: Simplify with CMAKE_CXX_COMPILER_FRONTEND_VARIANT once min CMake version is 3.14.
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+    target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_GNU}>)
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    if(BLAKE3_CMAKE_CXX_COMPILER_NAME STREQUAL "clang++")
+      target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_GNU}>)
+    elseif(BLAKE3_CMAKE_CXX_COMPILER_NAME STREQUAL "clang-cl")
+      target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_MSVC}>)
+    endif()
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_GNU}>)
+  elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_MSVC}>)
+  endif()
+
+  # Undefine scratch variables
+  unset(BLAKE3_CXX_STANDARD_FLAGS_GNU)
+  unset(BLAKE3_CXX_STANDARD_FLAGS_MSVC)
+  unset(BLAKE3_CMAKE_CXX_COMPILER_NAME)
+  unset(BLAKE3_CXXFLAGS_GNU)
+  unset(BLAKE3_CXXFLAGS_MSVC)
+endif()
+
+# cmake install support
+install(FILES blake3.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+install(TARGETS blake3 EXPORT blake3-targets
+  ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
+)
+install(EXPORT blake3-targets
+  NAMESPACE BLAKE3::
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/blake3"
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(blake3-config.cmake.in
+    "${CMAKE_CURRENT_BINARY_DIR}/blake3-config.cmake"
+
+    INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/blake3"
+)
+write_basic_package_version_file(
+    "${CMAKE_CURRENT_BINARY_DIR}/blake3-config-version.cmake"
+    VERSION ${libblake3_VERSION}
+    COMPATIBILITY SameMajorVersion
+)
+install(FILES
+        "${CMAKE_CURRENT_BINARY_DIR}/blake3-config.cmake"
+        "${CMAKE_CURRENT_BINARY_DIR}/blake3-config-version.cmake"
+    DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/blake3"
+)
+
+# Function for joining paths known from most languages
+#
+# SPDX-License-Identifier: (MIT OR CC0-1.0)
+# Copyright 2020 Jan Tojnar
+# https://github.com/jtojnar/cmake-snips
+#
+# Modelled after Python’s os.path.join
+# https://docs.python.org/3.7/library/os.path.html#os.path.join
+# Windows not supported
+function(join_paths joined_path first_path_segment)
+    set(temp_path "${first_path_segment}")
+    foreach(current_segment IN LISTS ARGN)
+        if(NOT ("${current_segment}" STREQUAL ""))
+            if(IS_ABSOLUTE "${current_segment}")
+                set(temp_path "${current_segment}")
+            else()
+                set(temp_path "${temp_path}/${current_segment}")
+            endif()
+        endif()
+    endforeach()
+    set(${joined_path} "${temp_path}" PARENT_SCOPE)
+endfunction()
+
+# In-place rewrite a string and and join by `sep`.
+#
+# TODO: Replace function with list(JOIN) when updating to CMake 3.12
+function(join_pkg_config_field sep requires)
+  set(_requires "${${requires}}") # avoid shadowing issues, e.g. "${requires}"=len
+  list(LENGTH "${requires}" len)
+  set(idx 1)
+  foreach(req IN LISTS _requires)
+    string(APPEND acc "${req}")
+    if(idx LESS len)
+      string(APPEND acc "${sep}")
+    endif()
+    math(EXPR idx "${idx} + 1")
+  endforeach()
+  set("${requires}" "${acc}" PARENT_SCOPE)
+endfunction()
+
+# pkg-config support
+join_pkg_config_field(", " PKG_CONFIG_REQUIRES)
+join_pkg_config_field(" " PKG_CONFIG_LIBS)
+join_pkg_config_field(" " PKG_CONFIG_CFLAGS)
+join_paths(PKG_CONFIG_INSTALL_LIBDIR "\${prefix}" "${CMAKE_INSTALL_LIBDIR}")
+join_paths(PKG_CONFIG_INSTALL_INCLUDEDIR "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
+configure_file(libblake3.pc.in libblake3.pc @ONLY)
+install(FILES "${CMAKE_BINARY_DIR}/libblake3.pc"
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+
+# print feature summary
+# add_feature_info cannot directly use the BLAKE3_SIMD_TYPE :(
+add_feature_info("AMD64 assembly" BLAKE3_SIMD_AMD64_ASM "The library uses hand written amd64 SIMD assembly.")
+add_feature_info("x86 SIMD intrinsics" BLAKE3_SIMD_X86_INTRINSICS "The library uses x86 SIMD intrinsics.")
+add_feature_info("NEON SIMD intrinsics" BLAKE3_SIMD_NEON_INTRINSICS "The library uses NEON SIMD intrinsics.")
+add_feature_info("oneTBB parallelism" BLAKE3_USE_TBB "The library uses oneTBB parallelism.")
+feature_summary(WHAT ENABLED_FEATURES)
+
+if(BLAKE3_EXAMPLES)
+  include(BLAKE3/Examples)
+endif()
+if(BLAKE3_TESTING)
+  include(BLAKE3/Testing)
+endif()
--- a/external/blake3/Makefile.testing
+++ b/external/blake3/Makefile.testing
@@ -0,0 +1,82 @@
+# This Makefile is only for testing. C callers should follow the instructions
+# in ./README.md to incorporate these C files into their existing build.
+
+NAME=blake3
+CC=gcc
+CFLAGS=-O3 -Wall -Wextra -std=c11 -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2 -fPIE -fvisibility=hidden
+LDFLAGS=-pie -Wl,-z,relro,-z,now
+TARGETS=
+ASM_TARGETS=
+EXTRAFLAGS=-Wa,--noexecstack
+
+ifdef BLAKE3_NO_SSE2
+EXTRAFLAGS += -DBLAKE3_NO_SSE2
+else
+TARGETS += blake3_sse2.o
+ASM_TARGETS += blake3_sse2_x86-64_unix.S
+endif
+
+ifdef BLAKE3_NO_SSE41
+EXTRAFLAGS += -DBLAKE3_NO_SSE41
+else
+TARGETS += blake3_sse41.o
+ASM_TARGETS += blake3_sse41_x86-64_unix.S
+endif
+
+ifdef BLAKE3_NO_AVX2
+EXTRAFLAGS += -DBLAKE3_NO_AVX2
+else
+TARGETS += blake3_avx2.o
+ASM_TARGETS += blake3_avx2_x86-64_unix.S
+endif
+
+ifdef BLAKE3_NO_AVX512
+EXTRAFLAGS += -DBLAKE3_NO_AVX512
+else
+TARGETS += blake3_avx512.o
+ASM_TARGETS += blake3_avx512_x86-64_unix.S
+endif
+
+ifdef BLAKE3_USE_NEON
+EXTRAFLAGS += -DBLAKE3_USE_NEON=1
+TARGETS += blake3_neon.o
+endif
+
+ifdef BLAKE3_NO_NEON
+EXTRAFLAGS += -DBLAKE3_USE_NEON=0
+endif
+
+all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
+	$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS)
+
+blake3_sse2.o: blake3_sse2.c
+	$(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -msse2
+
+blake3_sse41.o: blake3_sse41.c
+	$(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -msse4.1
+
+blake3_avx2.o: blake3_avx2.c
+	$(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -mavx2
+
+blake3_avx512.o: blake3_avx512.c
+	$(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -mavx512f -mavx512vl
+
+blake3_neon.o: blake3_neon.c
+	$(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@
+
+test: CFLAGS += -DBLAKE3_TESTING -fsanitize=address,undefined
+test: all
+	./test.py
+
+asm: blake3.c blake3_dispatch.c blake3_portable.c main.c $(ASM_TARGETS)
+	$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS)
+
+test_asm: CFLAGS += -DBLAKE3_TESTING -fsanitize=address,undefined 
+test_asm: asm
+	./test.py
+
+example: example.c blake3.c blake3_dispatch.c blake3_portable.c $(ASM_TARGETS)
+	$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $@ $(LDFLAGS)
+
+clean: 
+	rm -f $(NAME) *.o
--- a/external/blake3/README.md
+++ b/external/blake3/README.md
@@ -0,0 +1,403 @@
+The official C implementation of BLAKE3.
+
+# Example
+
+An example program that hashes bytes from standard input and prints the
+result:
+
+```c
+#include "blake3.h"
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(void) {
+  // Initialize the hasher.
+  blake3_hasher hasher;
+  blake3_hasher_init(&hasher);
+
+  // Read input bytes from stdin.
+  unsigned char buf[65536];
+  while (1) {
+    ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
+    if (n > 0) {
+      blake3_hasher_update(&hasher, buf, n);
+    } else if (n == 0) {
+      break; // end of file
+    } else {
+      fprintf(stderr, "read failed: %s\n", strerror(errno));
+      return 1;
+    }
+  }
+
+  // Finalize the hash. BLAKE3_OUT_LEN is the default output length, 32 bytes.
+  uint8_t output[BLAKE3_OUT_LEN];
+  blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN);
+
+  // Print the hash as hexadecimal.
+  for (size_t i = 0; i < BLAKE3_OUT_LEN; i++) {
+    printf("%02x", output[i]);
+  }
+  printf("\n");
+  return 0;
+}
+```
+
+The code above is included in this directory as `example.c`. If you're
+on x86\_64 with a Unix-like OS, you can compile a working binary like
+this:
+
+```bash
+gcc -O3 -o example example.c blake3.c blake3_dispatch.c blake3_portable.c \
+    blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S blake3_avx2_x86-64_unix.S \
+    blake3_avx512_x86-64_unix.S
+```
+
+# API
+
+## The Struct
+
+```c
+typedef struct {
+  // private fields
+} blake3_hasher;
+```
+
+An incremental BLAKE3 hashing state, which can accept any number of
+updates. This implementation doesn't allocate any heap memory, but
+`sizeof(blake3_hasher)` itself is relatively large, currently 1912 bytes
+on x86-64. This size can be reduced by restricting the maximum input
+length, as described in Section 5.4 of [the BLAKE3
+spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf),
+but this implementation doesn't currently support that strategy.
+
+## Common API Functions
+
+```c
+void blake3_hasher_init(
+  blake3_hasher *self);
+```
+
+Initialize a `blake3_hasher` in the default hashing mode.
+
+---
+
+```c
+void blake3_hasher_update(
+  blake3_hasher *self,
+  const void *input,
+  size_t input_len);
+```
+
+Add input to the hasher. This can be called any number of times. This function
+is always single-threaded; for multithreading see `blake3_hasher_update_tbb`
+below.
+
+
+---
+
+```c
+void blake3_hasher_finalize(
+  const blake3_hasher *self,
+  uint8_t *out,
+  size_t out_len);
+```
+
+Finalize the hasher and return an output of any length, given in bytes.
+This doesn't modify the hasher itself, and it's possible to finalize
+again after adding more input. The constant `BLAKE3_OUT_LEN` provides
+the default output length, 32 bytes, which is recommended for most
+callers. See the [Security Notes](#security-notes) below.
+
+## Less Common API Functions
+
+```c
+void blake3_hasher_init_keyed(
+  blake3_hasher *self,
+  const uint8_t key[BLAKE3_KEY_LEN]);
+```
+
+Initialize a `blake3_hasher` in the keyed hashing mode. The key must be
+exactly 32 bytes.
+
+---
+
+```c
+void blake3_hasher_init_derive_key(
+  blake3_hasher *self,
+  const char *context);
+```
+
+Initialize a `blake3_hasher` in the key derivation mode. The context
+string is given as an initialization parameter, and afterwards input key
+material should be given with `blake3_hasher_update`. The context string
+is a null-terminated C string which should be **hardcoded, globally
+unique, and application-specific**. The context string should not
+include any dynamic input like salts, nonces, or identifiers read from a
+database at runtime. A good default format for the context string is
+`"[application] [commit timestamp] [purpose]"`, e.g., `"example.com
+2019-12-25 16:18:03 session tokens v1"`.
+
+This function is intended for application code written in C. For
+language bindings, see `blake3_hasher_init_derive_key_raw` below.
+
+---
+
+```c
+void blake3_hasher_init_derive_key_raw(
+  blake3_hasher *self,
+  const void *context,
+  size_t context_len);
+```
+
+As `blake3_hasher_init_derive_key` above, except that the context string
+is given as a pointer to an array of arbitrary bytes with a provided
+length. This is intended for writing language bindings, where C string
+conversion would add unnecessary overhead and new error cases. Unicode
+strings should be encoded as UTF-8.
+
+Application code in C should prefer `blake3_hasher_init_derive_key`,
+which takes the context as a C string. If you need to use arbitrary
+bytes as a context string in application code, consider whether you're
+violating the requirement that context strings should be hardcoded.
+
+---
+
+```c
+void blake3_hasher_update_tbb(
+  blake3_hasher *self,
+  const void *input,
+  size_t input_len);
+```
+
+Add input to the hasher, using [oneTBB] to process large inputs using multiple
+threads. This can be called any number of times. This gives the same result as
+`blake3_hasher_update` above.
+
+[oneTBB]: https://uxlfoundation.github.io/oneTBB/
+
+NOTE: This function is only enabled when the library is compiled with CMake option `BLAKE3_USE_TBB`
+and when the oneTBB library is detected on the host system. See the building instructions for
+further details.
+
+To get any performance benefit from multithreading, the input buffer needs to
+be large. As a rule of thumb on x86_64, `blake3_hasher_update_tbb` is _slower_
+than `blake3_hasher_update` for inputs under 128 KiB. That threshold varies
+quite a lot across different processors, and it's important to benchmark your
+specific use case.
+
+Hashing large files with this function usually requires
+[memory-mapping](https://en.wikipedia.org/wiki/Memory-mapped_file), since
+reading a file into memory in a single-threaded loop takes longer than hashing
+the resulting buffer. Note that hashing a memory-mapped file with this function
+produces a "random" pattern of disk reads, which can be slow on spinning disks.
+Again it's important to benchmark your specific use case.
+
+This implementation doesn't require configuration of thread resources and will
+use as many cores as possible by default. More fine-grained control of
+resources is possible using the [oneTBB] API.
+
+---
+
+```c
+void blake3_hasher_finalize_seek(
+  const blake3_hasher *self,
+  uint64_t seek,
+  uint8_t *out,
+  size_t out_len);
+```
+
+The same as `blake3_hasher_finalize`, but with an additional `seek`
+parameter for the starting byte position in the output stream. To
+efficiently stream a large output without allocating memory, call this
+function in a loop, incrementing `seek` by the output length each time.
+
+---
+
+```c
+void blake3_hasher_reset(
+  blake3_hasher *self);
+```
+
+Reset the hasher to its initial state, prior to any calls to
+`blake3_hasher_update`. Currently this is no different from calling
+`blake3_hasher_init` or similar again.
+
+# Security Notes
+
+Outputs shorter than the default length of 32 bytes (256 bits) provide less security. An N-bit
+BLAKE3 output is intended to provide N bits of first and second preimage resistance and N/2
+bits of collision resistance, for any N up to 256. Longer outputs don't provide any additional
+security.
+
+Avoid relying on the secrecy of the output offset, that is, the `seek` argument of
+`blake3_hasher_finalize_seek`. [_Block-Cipher-Based Tree Hashing_ by Aldo
+Gunsing](https://eprint.iacr.org/2022/283) shows that an attacker who knows both the message
+and the key (if any) can easily determine the offset of an extended output. For comparison,
+AES-CTR has a similar property: if you know the key, you can decrypt a block from an unknown
+position in the output stream to recover its block index. Callers with strong secret keys
+aren't affected in practice, but secret offsets are a [design
+smell](https://en.wikipedia.org/wiki/Design_smell) in any case.
+
+# Building
+
+The easiest and most complete method of compiling this library is with CMake.
+This is the method described in the next section. Toward the end of the
+building section there are more in depth notes about compiling manually and
+things that are useful to understand if you need to integrate this library with
+another build system.
+
+## CMake
+
+The minimum version of CMake is 3.9. The following invocations will compile and
+install `libblake3`. With recent CMake:
+
+```bash
+cmake -S c -B c/build "-DCMAKE_INSTALL_PREFIX=/usr/local"
+cmake --build c/build --target install
+```
+
+With an older CMake:
+
+```bash
+cd c
+mkdir build
+cd build
+cmake .. "-DCMAKE_INSTALL_PREFIX=/usr/local"
+cmake --build . --target install
+```
+
+The following options are available when compiling with CMake:
+
+- `BLAKE3_USE_TBB`: Enable oneTBB parallelism (Requires a C++20 capable compiler)
+- `BLAKE3_FETCH_TBB`: Allow fetching oneTBB from GitHub (only if not found on system)
+- `BLAKE3_EXAMPLES`: Compile and install example programs
+
+Options can be enabled like this:
+
+```bash
+cmake -S c -B c/build "-DCMAKE_INSTALL_PREFIX=/usr/local" -DBLAKE3_USE_TBB=1 -DBLAKE3_FETCH_TBB=1
+```
+
+## Building manually
+
+We try to keep the build simple enough that you can compile this library "by
+hand", and it's expected that many callers will integrate it with their
+pre-existing build systems. See the `gcc` one-liner in the "Example" section
+above.
+
+### x86
+
+Dynamic dispatch is enabled by default on x86. The implementation will
+query the CPU at runtime to detect SIMD support, and it will use the
+widest instruction set available. By default, `blake3_dispatch.c`
+expects to be linked with code for five different instruction sets:
+portable C, SSE2, SSE4.1, AVX2, and AVX-512.
+
+For each of the x86 SIMD instruction sets, four versions are available:
+three flavors of assembly (Unix, Windows MSVC, and Windows GNU) and one
+version using C intrinsics. The assembly versions are generally
+preferred. They perform better, they perform more consistently across
+different compilers, and they build more quickly. On the other hand, the
+assembly versions are x86\_64-only, and you need to select the right
+flavor for your target platform.
+
+Here's an example of building a shared library on x86\_64 Linux using
+the assembly implementations:
+
+```bash
+gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c \
+    blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S blake3_avx2_x86-64_unix.S \
+    blake3_avx512_x86-64_unix.S
+```
+
+When building the intrinsics-based implementations, you need to build
+each implementation separately, with the corresponding instruction set
+explicitly enabled in the compiler. Here's the same shared library using
+the intrinsics-based implementations:
+
+```bash
+gcc -c -fPIC -O3 -msse2 blake3_sse2.c -o blake3_sse2.o
+gcc -c -fPIC -O3 -msse4.1 blake3_sse41.c -o blake3_sse41.o
+gcc -c -fPIC -O3 -mavx2 blake3_avx2.c -o blake3_avx2.o
+gcc -c -fPIC -O3 -mavx512f -mavx512vl blake3_avx512.c -o blake3_avx512.o
+gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c \
+    blake3_avx2.o blake3_avx512.o blake3_sse41.o blake3_sse2.o
+```
+
+Note above that building `blake3_avx512.c` requires both `-mavx512f` and
+`-mavx512vl` under GCC and Clang. Under MSVC, the single `/arch:AVX512`
+flag is sufficient. The MSVC equivalent of `-mavx2` is `/arch:AVX2`.
+MSVC enables SSE2 and SSE4.1 by default, and it doesn't have a
+corresponding flag.
+
+If you want to omit SIMD code entirely, you need to explicitly disable
+each instruction set. Here's an example of building a shared library on
+x86 with only portable code:
+
+```bash
+gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX2 \
+    -DBLAKE3_NO_AVX512 blake3.c blake3_dispatch.c blake3_portable.c
+```
+
+### ARM NEON
+
+The NEON implementation is enabled by default on AArch64, but not on
+other ARM targets, since not all of them support it. To enable it, set
+`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on
+ARM Linux with NEON support:
+
+```bash
+gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=1 blake3.c blake3_dispatch.c \
+    blake3_portable.c blake3_neon.c
+```
+
+To explicitiy disable using NEON instructions on AArch64, set
+`BLAKE3_USE_NEON=0`.
+
+```bash
+gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=0 blake3.c blake3_dispatch.c \
+    blake3_portable.c 
+```
+
+Note that on some targets (ARMv7 in particular), extra flags may be
+required to activate NEON support in the compiler. If you see an error
+like...
+
+```
+/usr/lib/gcc/armv7l-unknown-linux-gnueabihf/9.2.0/include/arm_neon.h:635:1: error: inlining failed
+in call to always_inline ‘vaddq_u32’: target specific option mismatch
+```
+
+...then you may need to add something like `-mfpu=neon-vfpv4
+-mfloat-abi=hard`.
+
+### Other Platforms
+
+The portable implementation should work on most other architectures. For
+example:
+
+```bash
+gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c
+```
+
+### Multithreading
+
+Multithreading is available using [oneTBB], by compiling the optional C++
+support file [`blake3_tbb.cpp`](./blake3_tbb.cpp). For an example of using
+`mmap` (non-Windows) and `blake3_hasher_update_tbb` to get large-file
+performance on par with [`b3sum`](../b3sum), see
+[`example_tbb.c`](./example_tbb.c). You can build it like this:
+
+```bash
+g++ -c -O3 -fno-exceptions -fno-rtti -DBLAKE3_USE_TBB -o blake3_tbb.o blake3_tbb.cpp
+gcc -O3 -o example_tbb -lstdc++ -ltbb -DBLAKE3_USE_TBB blake3_tbb.o example_tbb.c blake3.c \
+    blake3_dispatch.c blake3_portable.c blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S \
+    blake3_avx2_x86-64_unix.S blake3_avx512_x86-64_unix.S
+```
+
+NOTE: `-fno-exceptions` or equivalent is required to compile `blake3_tbb.cpp`,
+and public API methods with external C linkage are marked `noexcept`. Compiling
+that file with exceptions enabled will fail. Compiling with RTTI disabled isn't
+required but is recommended for code size.
--- a/external/blake3/blake3-config.cmake.in
+++ b/external/blake3/blake3-config.cmake.in
@@ -0,0 +1,14 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+
+# Remember TBB option state
+set(BLAKE3_USE_TBB @BLAKE3_USE_TBB@)
+
+if(BLAKE3_USE_TBB)
+    find_dependency(TBB @TBB_VERSION@)
+endif()
+
+include("${CMAKE_CURRENT_LIST_DIR}/blake3-targets.cmake")
+
+check_required_components(blake3)
--- a/external/blake3/blake3.c
+++ b/external/blake3/blake3.c
@@ -0,0 +1,650 @@
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "blake3.h"
+#include "blake3_impl.h"
+
+const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }
+
+INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
+                             uint8_t flags) {
+  memcpy(self->cv, key, BLAKE3_KEY_LEN);
+  self->chunk_counter = 0;
+  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+  self->buf_len = 0;
+  self->blocks_compressed = 0;
+  self->flags = flags;
+}
+
+INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8],
+                              uint64_t chunk_counter) {
+  memcpy(self->cv, key, BLAKE3_KEY_LEN);
+  self->chunk_counter = chunk_counter;
+  self->blocks_compressed = 0;
+  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+  self->buf_len = 0;
+}
+
+INLINE size_t chunk_state_len(const blake3_chunk_state *self) {
+  return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) +
+         ((size_t)self->buf_len);
+}
+
+INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self,
+                                   const uint8_t *input, size_t input_len) {
+  size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len);
+  if (take > input_len) {
+    take = input_len;
+  }
+  uint8_t *dest = self->buf + ((size_t)self->buf_len);
+  memcpy(dest, input, take);
+  self->buf_len += (uint8_t)take;
+  return take;
+}
+
+INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self) {
+  if (self->blocks_compressed == 0) {
+    return CHUNK_START;
+  } else {
+    return 0;
+  }
+}
+
+typedef struct {
+  uint32_t input_cv[8];
+  uint64_t counter;
+  uint8_t block[BLAKE3_BLOCK_LEN];
+  uint8_t block_len;
+  uint8_t flags;
+} output_t;
+
+INLINE output_t make_output(const uint32_t input_cv[8],
+                            const uint8_t block[BLAKE3_BLOCK_LEN],
+                            uint8_t block_len, uint64_t counter,
+                            uint8_t flags) {
+  output_t ret;
+  memcpy(ret.input_cv, input_cv, 32);
+  memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
+  ret.block_len = block_len;
+  ret.counter = counter;
+  ret.flags = flags;
+  return ret;
+}
+
+// Chaining values within a given chunk (specifically the compress_in_place
+// interface) are represented as words. This avoids unnecessary bytes<->words
+// conversion overhead in the portable implementation. However, the hash_many
+// interface handles both user input and parent node blocks, so it accepts
+// bytes. For that reason, chaining values in the CV stack are represented as
+// bytes.
+INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
+  uint32_t cv_words[8];
+  memcpy(cv_words, self->input_cv, 32);
+  blake3_compress_in_place(cv_words, self->block, self->block_len,
+                           self->counter, self->flags);
+  store_cv_words(cv, cv_words);
+}
+
+INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
+                              size_t out_len) {
+  if (out_len == 0) {
+      return;
+  }
+  uint64_t output_block_counter = seek / 64;
+  size_t offset_within_block = seek % 64;
+  uint8_t wide_buf[64];
+  if(offset_within_block) {
+    blake3_compress_xof(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, wide_buf);
+    const size_t available_bytes = 64 - offset_within_block;
+    const size_t bytes = out_len > available_bytes ? available_bytes : out_len;
+    memcpy(out, wide_buf + offset_within_block, bytes);
+    out += bytes;
+    out_len -= bytes;
+    output_block_counter += 1;
+  }
+  if(out_len / 64) {
+    blake3_xof_many(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, out, out_len / 64);
+  }
+  output_block_counter += out_len / 64;
+  out += out_len & -64;
+  out_len -= out_len & -64;
+  if(out_len) {
+    blake3_compress_xof(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, wide_buf);
+    memcpy(out, wide_buf, out_len);
+  }
+}
+
+INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
+                               size_t input_len) {
+  if (self->buf_len > 0) {
+    size_t take = chunk_state_fill_buf(self, input, input_len);
+    input += take;
+    input_len -= take;
+    if (input_len > 0) {
+      blake3_compress_in_place(
+          self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
+          self->flags | chunk_state_maybe_start_flag(self));
+      self->blocks_compressed += 1;
+      self->buf_len = 0;
+      memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+    }
+  }
+
+  while (input_len > BLAKE3_BLOCK_LEN) {
+    blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
+                             self->chunk_counter,
+                             self->flags | chunk_state_maybe_start_flag(self));
+    self->blocks_compressed += 1;
+    input += BLAKE3_BLOCK_LEN;
+    input_len -= BLAKE3_BLOCK_LEN;
+  }
+
+  chunk_state_fill_buf(self, input, input_len);
+}
+
+INLINE output_t chunk_state_output(const blake3_chunk_state *self) {
+  uint8_t block_flags =
+      self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END;
+  return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,
+                     block_flags);
+}
+
+INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
+                              const uint32_t key[8], uint8_t flags) {
+  return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT);
+}
+
+// Given some input larger than one chunk, return the number of bytes that
+// should go in the left subtree. This is the largest power-of-2 number of
+// chunks that leaves at least 1 byte for the right subtree.
+INLINE size_t left_subtree_len(size_t input_len) {
+  // Subtract 1 to reserve at least one byte for the right side. input_len
+  // should always be greater than BLAKE3_CHUNK_LEN.
+  size_t full_chunks = (input_len - 1) / BLAKE3_CHUNK_LEN;
+  return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN;
+}
+
+// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time
+// on a single thread. Write out the chunk chaining values and return the
+// number of chunks hashed. These chunks are never the root and never empty;
+// those cases use a different codepath.
+INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len,
+                                       const uint32_t key[8],
+                                       uint64_t chunk_counter, uint8_t flags,
+                                       uint8_t *out) {
+#if defined(BLAKE3_TESTING)
+  assert(0 < input_len);
+  assert(input_len <= MAX_SIMD_DEGREE * BLAKE3_CHUNK_LEN);
+#endif
+
+  const uint8_t *chunks_array[MAX_SIMD_DEGREE];
+  size_t input_position = 0;
+  size_t chunks_array_len = 0;
+  while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
+    chunks_array[chunks_array_len] = &input[input_position];
+    input_position += BLAKE3_CHUNK_LEN;
+    chunks_array_len += 1;
+  }
+
+  blake3_hash_many(chunks_array, chunks_array_len,
+                   BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,
+                   true, flags, CHUNK_START, CHUNK_END, out);
+
+  // Hash the remaining partial chunk, if there is one. Note that the empty
+  // chunk (meaning the empty message) is a different codepath.
+  if (input_len > input_position) {
+    uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
+    blake3_chunk_state chunk_state;
+    chunk_state_init(&chunk_state, key, flags);
+    chunk_state.chunk_counter = counter;
+    chunk_state_update(&chunk_state, &input[input_position],
+                       input_len - input_position);
+    output_t output = chunk_state_output(&chunk_state);
+    output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]);
+    return chunks_array_len + 1;
+  } else {
+    return chunks_array_len;
+  }
+}
+
+// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time
+// on a single thread. Write out the parent chaining values and return the
+// number of parents hashed. (If there's an odd input chaining value left over,
+// return it as an additional output.) These parents are never the root and
+// never empty; those cases use a different codepath.
+INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
+                                        size_t num_chaining_values,
+                                        const uint32_t key[8], uint8_t flags,
+                                        uint8_t *out) {
+#if defined(BLAKE3_TESTING)
+  assert(2 <= num_chaining_values);
+  assert(num_chaining_values <= 2 * MAX_SIMD_DEGREE_OR_2);
+#endif
+
+  const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];
+  size_t parents_array_len = 0;
+  while (num_chaining_values - (2 * parents_array_len) >= 2) {
+    parents_array[parents_array_len] =
+        &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];
+    parents_array_len += 1;
+  }
+
+  blake3_hash_many(parents_array, parents_array_len, 1, key,
+                   0, // Parents always use counter 0.
+                   false, flags | PARENT,
+                   0, // Parents have no start flags.
+                   0, // Parents have no end flags.
+                   out);
+
+  // If there's an odd child left over, it becomes an output.
+  if (num_chaining_values > 2 * parents_array_len) {
+    memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
+           &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN],
+           BLAKE3_OUT_LEN);
+    return parents_array_len + 1;
+  } else {
+    return parents_array_len;
+  }
+}
+
+// The wide helper function returns (writes out) an array of chaining values
+// and returns the length of that array. The number of chaining values returned
+// is the dynamically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
+// if the input is shorter than that many chunks. The reason for maintaining a
+// wide array of chaining values going back up the tree, is to allow the
+// implementation to hash as many parents in parallel as possible.
+//
+// As a special case when the SIMD degree is 1, this function will still return
+// at least 2 outputs. This guarantees that this function doesn't perform the
+// root compression. (If it did, it would use the wrong flags, and also we
+// wouldn't be able to implement extendable output.) Note that this function is
+// not used when the whole input is only 1 chunk long; that's a different
+// codepath.
+//
+// Why not just have the caller split the input on the first update(), instead
+// of implementing this special rule? Because we don't want to limit SIMD or
+// multi-threading parallelism for that update().
+size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len,
+                                    const uint32_t key[8],
+                                    uint64_t chunk_counter, uint8_t flags,
+                                    uint8_t *out, bool use_tbb) {
+  // Note that the single chunk case does *not* bump the SIMD degree up to 2
+  // when it is 1. If this implementation adds multi-threading in the future,
+  // this gives us the option of multi-threading even the 2-chunk case, which
+  // can help performance on smaller platforms.
+  if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) {
+    return compress_chunks_parallel(input, input_len, key, chunk_counter, flags,
+                                    out);
+  }
+
+  // With more than simd_degree chunks, we need to recurse. Start by dividing
+  // the input into left and right subtrees. (Note that this is only optimal
+  // as long as the SIMD degree is a power of 2. If we ever get a SIMD degree
+  // of 3 or something, we'll need a more complicated strategy.)
+  size_t left_input_len = left_subtree_len(input_len);
+  size_t right_input_len = input_len - left_input_len;
+  const uint8_t *right_input = &input[left_input_len];
+  uint64_t right_chunk_counter =
+      chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
+
+  // Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2 to
+  // account for the special case of returning 2 outputs when the SIMD degree
+  // is 1.
+  uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+  size_t degree = blake3_simd_degree();
+  if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
+    // The special case: We always use a degree of at least two, to make
+    // sure there are two outputs. Except, as noted above, at the chunk
+    // level, where we allow degree=1. (Note that the 1-chunk-input case is
+    // a different codepath.)
+    degree = 2;
+  }
+  uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
+
+  // Recurse!
+  size_t left_n = -1;
+  size_t right_n = -1;
+
+#if defined(BLAKE3_USE_TBB)
+  blake3_compress_subtree_wide_join_tbb(
+      key, flags, use_tbb,
+      // left-hand side
+      input, left_input_len, chunk_counter, cv_array, &left_n,
+      // right-hand side
+      right_input, right_input_len, right_chunk_counter, right_cvs, &right_n);
+#else
+  left_n = blake3_compress_subtree_wide(
+      input, left_input_len, key, chunk_counter, flags, cv_array, use_tbb);
+  right_n = blake3_compress_subtree_wide(right_input, right_input_len, key,
+                                         right_chunk_counter, flags, right_cvs,
+                                         use_tbb);
+#endif // BLAKE3_USE_TBB
+
+  // The special case again. If simd_degree=1, then we'll have left_n=1 and
+  // right_n=1. Rather than compressing them into a single output, return
+  // them directly, to make sure we always have at least two outputs.
+  if (left_n == 1) {
+    memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+    return 2;
+  }
+
+  // Otherwise, do one layer of parent node compression.
+  size_t num_chaining_values = left_n + right_n;
+  return compress_parents_parallel(cv_array, num_chaining_values, key, flags,
+                                   out);
+}
+
+// Hash a subtree with compress_subtree_wide(), and then condense the resulting
+// list of chaining values down to a single parent node. Don't compress that
+// last parent node, however. Instead, return its message bytes (the
+// concatenated chaining values of its children). This is necessary when the
+// first call to update() supplies a complete subtree, because the topmost
+// parent node of that subtree could end up being the root. It's also necessary
+// for extended output in the general case.
+//
+// As with compress_subtree_wide(), this function is not used on inputs of 1
+// chunk or less. That's a different codepath.
+INLINE void
+compress_subtree_to_parent_node(const uint8_t *input, size_t input_len,
+                                const uint32_t key[8], uint64_t chunk_counter,
+                                uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN],
+                                bool use_tbb) {
+#if defined(BLAKE3_TESTING)
+  assert(input_len > BLAKE3_CHUNK_LEN);
+#endif
+
+  uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+  size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
+                                                chunk_counter, flags, cv_array, use_tbb);
+  assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
+  // The following loop never executes when MAX_SIMD_DEGREE_OR_2 is 2, because
+  // as we just asserted, num_cvs will always be <=2 in that case. But GCC
+  // (particularly GCC 8.5) can't tell that it never executes, and if NDEBUG is
+  // set then it emits incorrect warnings here. We tried a few different
+  // hacks to silence these, but in the end our hacks just produced different
+  // warnings (see https://github.com/BLAKE3-team/BLAKE3/pull/380). Out of
+  // desperation, we ifdef out this entire loop when we know it's not needed.
+#if MAX_SIMD_DEGREE_OR_2 > 2
+  // If MAX_SIMD_DEGREE_OR_2 is greater than 2 and there's enough input,
+  // compress_subtree_wide() returns more than 2 chaining values. Condense
+  // them into 2 by forming parent nodes repeatedly.
+  uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
+  while (num_cvs > 2) {
+    num_cvs =
+        compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
+    memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
+  }
+#endif
+  memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+}
+
+INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8],
+                             uint8_t flags) {
+  memcpy(self->key, key, BLAKE3_KEY_LEN);
+  chunk_state_init(&self->chunk, key, flags);
+  self->cv_stack_len = 0;
+}
+
+void blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
+
+void blake3_hasher_init_keyed(blake3_hasher *self,
+                              const uint8_t key[BLAKE3_KEY_LEN]) {
+  uint32_t key_words[8];
+  load_key_words(key, key_words);
+  hasher_init_base(self, key_words, KEYED_HASH);
+}
+
+void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
+                                       size_t context_len) {
+  blake3_hasher context_hasher;
+  hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT);
+  blake3_hasher_update(&context_hasher, context, context_len);
+  uint8_t context_key[BLAKE3_KEY_LEN];
+  blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
+  uint32_t context_key_words[8];
+  load_key_words(context_key, context_key_words);
+  hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL);
+}
+
+void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
+  blake3_hasher_init_derive_key_raw(self, context, strlen(context));
+}
+
+// As described in hasher_push_cv() below, we do "lazy merging", delaying
+// merges until right before the next CV is about to be added. This is
+// different from the reference implementation. Another difference is that we
+// aren't always merging 1 chunk at a time. Instead, each CV might represent
+// any power-of-two number of chunks, as long as the smaller-above-larger stack
+// order is maintained. Instead of the "count the trailing 0-bits" algorithm
+// described in the spec, we use a "count the total number of 1-bits" variant
+// that doesn't require us to retain the subtree size of the CV on top of the
+// stack. The principle is the same: each CV that should remain in the stack is
+// represented by a 1-bit in the total number of chunks (or bytes) so far.
+INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
+  size_t post_merge_stack_len = (size_t)popcnt(total_len);
+  while (self->cv_stack_len > post_merge_stack_len) {
+    uint8_t *parent_node =
+        &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];
+    output_t output = parent_output(parent_node, self->key, self->chunk.flags);
+    output_chaining_value(&output, parent_node);
+    self->cv_stack_len -= 1;
+  }
+}
+
+// In reference_impl.rs, we merge the new CV with existing CVs from the stack
+// before pushing it. We can do that because we know more input is coming, so
+// we know none of the merges are root.
+//
+// This setting is different. We want to feed as much input as possible to
+// compress_subtree_wide(), without setting aside anything for the chunk_state.
+// If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once
+// as a single subtree, if at all possible.
+//
+// This leads to two problems:
+// 1) This 64 KiB input might be the only call that ever gets made to update.
+//    In this case, the root node of the 64 KiB subtree would be the root node
+//    of the whole tree, and it would need to be ROOT finalized. We can't
+//    compress it until we know.
+// 2) This 64 KiB input might complete a larger tree, whose root node is
+//    similarly going to be the root of the whole tree. For example, maybe
+//    we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
+//    node at the root of the 256 KiB subtree until we know how to finalize it.
+//
+// The second problem is solved with "lazy merging". That is, when we're about
+// to add a CV to the stack, we don't merge it with anything first, as the
+// reference impl does. Instead we do merges using the *previous* CV that was
+// added, which is sitting on top of the stack, and we put the new CV
+// (unmerged) on top of the stack afterwards. This guarantees that we never
+// merge the root node until finalize().
+//
+// Solving the first problem requires an additional tool,
+// compress_subtree_to_parent_node(). That function always returns the top
+// *two* chaining values of the subtree it's compressing. We then do lazy
+// merging with each of them separately, so that the second CV will always
+// remain unmerged. (That also helps us support extendable output when we're
+// hashing an input all-at-once.)
+INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
+                           uint64_t chunk_counter) {
+  hasher_merge_cv_stack(self, chunk_counter);
+  memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
+         BLAKE3_OUT_LEN);
+  self->cv_stack_len += 1;
+}
+
+INLINE void blake3_hasher_update_base(blake3_hasher *self, const void *input,
+                                      size_t input_len, bool use_tbb) {
+  // Explicitly checking for zero avoids causing UB by passing a null pointer
+  // to memcpy. This comes up in practice with things like:
+  //   std::vector<uint8_t> v;
+  //   blake3_hasher_update(&hasher, v.data(), v.size());
+  if (input_len == 0) {
+    return;
+  }
+
+  const uint8_t *input_bytes = (const uint8_t *)input;
+
+  // If we have some partial chunk bytes in the internal chunk_state, we need
+  // to finish that chunk first.
+  if (chunk_state_len(&self->chunk) > 0) {
+    size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk);
+    if (take > input_len) {
+      take = input_len;
+    }
+    chunk_state_update(&self->chunk, input_bytes, take);
+    input_bytes += take;
+    input_len -= take;
+    // If we've filled the current chunk and there's more coming, finalize this
+    // chunk and proceed. In this case we know it's not the root.
+    if (input_len > 0) {
+      output_t output = chunk_state_output(&self->chunk);
+      uint8_t chunk_cv[32];
+      output_chaining_value(&output, chunk_cv);
+      hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);
+      chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);
+    } else {
+      return;
+    }
+  }
+
+  // Now the chunk_state is clear, and we have more input. If there's more than
+  // a single chunk (so, definitely not the root chunk), hash the largest whole
+  // subtree we can, with the full benefits of SIMD (and maybe in the future,
+  // multi-threading) parallelism. Two restrictions:
+  // - The subtree has to be a power-of-2 number of chunks. Only subtrees along
+  //   the right edge can be incomplete, and we don't know where the right edge
+  //   is going to be until we get to finalize().
+  // - The subtree must evenly divide the total number of chunks up until this
+  //   point (if total is not 0). If the current incomplete subtree is only
+  //   waiting for 1 more chunk, we can't hash a subtree of 4 chunks. We have
+  //   to complete the current subtree first.
+  // Because we might need to break up the input to form powers of 2, or to
+  // evenly divide what we already have, this part runs in a loop.
+  while (input_len > BLAKE3_CHUNK_LEN) {
+    size_t subtree_len = round_down_to_power_of_2(input_len);
+    uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
+    // Shrink the subtree_len until it evenly divides the count so far. We know
+    // that subtree_len itself is a power of 2, so we can use a bitmasking
+    // trick instead of an actual remainder operation. (Note that if the caller
+    // consistently passes power-of-2 inputs of the same size, as is hopefully
+    // typical, this loop condition will always fail, and subtree_len will
+    // always be the full length of the input.)
+    //
+    // An aside: We don't have to shrink subtree_len quite this much. For
+    // example, if count_so_far is 1, we could pass 2 chunks to
+    // compress_subtree_to_parent_node. Since we'll get 2 CVs back, we'll still
+    // get the right answer in the end, and we might get to use 2-way SIMD
+    // parallelism. The problem with this optimization, is that it gets us
+    // stuck always hashing 2 chunks. The total number of chunks will remain
+    // odd, and we'll never graduate to higher degrees of parallelism. See
+    // https://github.com/BLAKE3-team/BLAKE3/issues/69.
+    while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
+      subtree_len /= 2;
+    }
+    // The shrunken subtree_len might now be 1 chunk long. If so, hash that one
+    // chunk by itself. Otherwise, compress the subtree into a pair of CVs.
+    uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
+    if (subtree_len <= BLAKE3_CHUNK_LEN) {
+      blake3_chunk_state chunk_state;
+      chunk_state_init(&chunk_state, self->key, self->chunk.flags);
+      chunk_state.chunk_counter = self->chunk.chunk_counter;
+      chunk_state_update(&chunk_state, input_bytes, subtree_len);
+      output_t output = chunk_state_output(&chunk_state);
+      uint8_t cv[BLAKE3_OUT_LEN];
+      output_chaining_value(&output, cv);
+      hasher_push_cv(self, cv, chunk_state.chunk_counter);
+    } else {
+      // This is the high-performance happy path, though getting here depends
+      // on the caller giving us a long enough input.
+      uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
+      compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,
+                                      self->chunk.chunk_counter,
+                                      self->chunk.flags, cv_pair, use_tbb);
+      hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);
+      hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN],
+                     self->chunk.chunk_counter + (subtree_chunks / 2));
+    }
+    self->chunk.chunk_counter += subtree_chunks;
+    input_bytes += subtree_len;
+    input_len -= subtree_len;
+  }
+
+  // If there's any remaining input less than a full chunk, add it to the chunk
+  // state. In that case, also do a final merge loop to make sure the subtree
+  // stack doesn't contain any unmerged pairs. The remaining input means we
+  // know these merges are non-root. This merge loop isn't strictly necessary
+  // here, because hasher_push_chunk_cv already does its own merge loop, but it
+  // simplifies blake3_hasher_finalize below.
+  if (input_len > 0) {
+    chunk_state_update(&self->chunk, input_bytes, input_len);
+    hasher_merge_cv_stack(self, self->chunk.chunk_counter);
+  }
+}
+
+void blake3_hasher_update(blake3_hasher *self, const void *input,
+                          size_t input_len) {
+  bool use_tbb = false;
+  blake3_hasher_update_base(self, input, input_len, use_tbb);
+}
+
+#if defined(BLAKE3_USE_TBB)
+void blake3_hasher_update_tbb(blake3_hasher *self, const void *input,
+                              size_t input_len) {
+  bool use_tbb = true;
+  blake3_hasher_update_base(self, input, input_len, use_tbb);
+}
+#endif // BLAKE3_USE_TBB
+
+void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
+                            size_t out_len) {
+  blake3_hasher_finalize_seek(self, 0, out, out_len);
+}
+
+void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
+                                 uint8_t *out, size_t out_len) {
+  // Explicitly checking for zero avoids causing UB by passing a null pointer
+  // to memcpy. This comes up in practice with things like:
+  //   std::vector<uint8_t> v;
+  //   blake3_hasher_finalize(&hasher, v.data(), v.size());
+  if (out_len == 0) {
+    return;
+  }
+
+  // If the subtree stack is empty, then the current chunk is the root.
+  if (self->cv_stack_len == 0) {
+    output_t output = chunk_state_output(&self->chunk);
+    output_root_bytes(&output, seek, out, out_len);
+    return;
+  }
+  // If there are any bytes in the chunk state, finalize that chunk and do a
+  // roll-up merge between that chunk hash and every subtree in the stack. In
+  // this case, the extra merge loop at the end of blake3_hasher_update
+  // guarantees that none of the subtrees in the stack need to be merged with
+  // each other first. Otherwise, if there are no bytes in the chunk state,
+  // then the top of the stack is a chunk hash, and we start the merge from
+  // that.
+  output_t output;
+  size_t cvs_remaining;
+  if (chunk_state_len(&self->chunk) > 0) {
+    cvs_remaining = self->cv_stack_len;
+    output = chunk_state_output(&self->chunk);
+  } else {
+    // There are always at least 2 CVs in the stack in this case.
+    cvs_remaining = self->cv_stack_len - 2;
+    output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key,
+                           self->chunk.flags);
+  }
+  while (cvs_remaining > 0) {
+    cvs_remaining -= 1;
+    uint8_t parent_block[BLAKE3_BLOCK_LEN];
+    memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);
+    output_chaining_value(&output, &parent_block[32]);
+    output = parent_output(parent_block, self->key, self->chunk.flags);
+  }
+  output_root_bytes(&output, seek, out, out_len);
+}
+
+void blake3_hasher_reset(blake3_hasher *self) {
+  chunk_state_reset(&self->chunk, self->key, 0);
+  self->cv_stack_len = 0;
+}
--- a/external/blake3/blake3.h
+++ b/external/blake3/blake3.h
@@ -0,0 +1,86 @@
+#ifndef BLAKE3_H
+#define BLAKE3_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if !defined(BLAKE3_API)
+# if defined(_WIN32) || defined(__CYGWIN__)
+#   if defined(BLAKE3_DLL)
+#     if defined(BLAKE3_DLL_EXPORTS)
+#       define BLAKE3_API __declspec(dllexport)
+#     else
+#       define BLAKE3_API __declspec(dllimport)
+#     endif
+#     define BLAKE3_PRIVATE
+#   else
+#     define BLAKE3_API
+#     define BLAKE3_PRIVATE
+#   endif
+# elif __GNUC__ >= 4
+#   define BLAKE3_API __attribute__((visibility("default")))
+#   define BLAKE3_PRIVATE __attribute__((visibility("hidden")))
+# else
+#   define BLAKE3_API
+#   define BLAKE3_PRIVATE
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BLAKE3_VERSION_STRING "1.8.2"
+#define BLAKE3_KEY_LEN 32
+#define BLAKE3_OUT_LEN 32
+#define BLAKE3_BLOCK_LEN 64
+#define BLAKE3_CHUNK_LEN 1024
+#define BLAKE3_MAX_DEPTH 54
+
+// This struct is a private implementation detail. It has to be here because
+// it's part of blake3_hasher below.
+typedef struct {
+  uint32_t cv[8];
+  uint64_t chunk_counter;
+  uint8_t buf[BLAKE3_BLOCK_LEN];
+  uint8_t buf_len;
+  uint8_t blocks_compressed;
+  uint8_t flags;
+} blake3_chunk_state;
+
+typedef struct {
+  uint32_t key[8];
+  blake3_chunk_state chunk;
+  uint8_t cv_stack_len;
+  // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
+  // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
+  // requires a 4th entry, rather than merging everything down to 1, because we
+  // don't know whether more input is coming. This is different from how the
+  // reference implementation does things.
+  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
+} blake3_hasher;
+
+BLAKE3_API const char *blake3_version(void);
+BLAKE3_API void blake3_hasher_init(blake3_hasher *self);
+BLAKE3_API void blake3_hasher_init_keyed(blake3_hasher *self,
+                                         const uint8_t key[BLAKE3_KEY_LEN]);
+BLAKE3_API void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
+BLAKE3_API void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
+                                                  size_t context_len);
+BLAKE3_API void blake3_hasher_update(blake3_hasher *self, const void *input,
+                                     size_t input_len);
+#if defined(BLAKE3_USE_TBB)
+BLAKE3_API void blake3_hasher_update_tbb(blake3_hasher *self, const void *input,
+                                         size_t input_len);
+#endif // BLAKE3_USE_TBB
+BLAKE3_API void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
+                                       size_t out_len);
+BLAKE3_API void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
+                                            uint8_t *out, size_t out_len);
+BLAKE3_API void blake3_hasher_reset(blake3_hasher *self);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BLAKE3_H */
--- a/external/blake3/blake3_avx2.c
+++ b/external/blake3/blake3_avx2.c
@@ -0,0 +1,326 @@
+#include "blake3_impl.h"
+
+#include <immintrin.h>
+
+#define DEGREE 8
+
+INLINE __m256i loadu(const uint8_t src[32]) {
+  return _mm256_loadu_si256((const __m256i *)src);
+}
+
+INLINE void storeu(__m256i src, uint8_t dest[16]) {
+  _mm256_storeu_si256((__m256i *)dest, src);
+}
+
+INLINE __m256i addv(__m256i a, __m256i b) { return _mm256_add_epi32(a, b); }
+
+// Note that clang-format doesn't like the name "xor" for some reason.
+INLINE __m256i xorv(__m256i a, __m256i b) { return _mm256_xor_si256(a, b); }
+
+INLINE __m256i set1(uint32_t x) { return _mm256_set1_epi32((int32_t)x); }
+
+INLINE __m256i rot16(__m256i x) {
+  return _mm256_shuffle_epi8(
+      x, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
+                         13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
+}
+
+INLINE __m256i rot12(__m256i x) {
+  return _mm256_or_si256(_mm256_srli_epi32(x, 12), _mm256_slli_epi32(x, 32 - 12));
+}
+
+INLINE __m256i rot8(__m256i x) {
+  return _mm256_shuffle_epi8(
+      x, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1,
+                         12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
+}
+
+INLINE __m256i rot7(__m256i x) {
+  return _mm256_or_si256(_mm256_srli_epi32(x, 7), _mm256_slli_epi32(x, 32 - 7));
+}
+
+INLINE void round_fn(__m256i v[16], __m256i m[16], size_t r) {
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+  v[0] = addv(v[0], v[4]);
+  v[1] = addv(v[1], v[5]);
+  v[2] = addv(v[2], v[6]);
+  v[3] = addv(v[3], v[7]);
+  v[12] = xorv(v[12], v[0]);
+  v[13] = xorv(v[13], v[1]);
+  v[14] = xorv(v[14], v[2]);
+  v[15] = xorv(v[15], v[3]);
+  v[12] = rot16(v[12]);
+  v[13] = rot16(v[13]);
+  v[14] = rot16(v[14]);
+  v[15] = rot16(v[15]);
+  v[8] = addv(v[8], v[12]);
+  v[9] = addv(v[9], v[13]);
+  v[10] = addv(v[10], v[14]);
+  v[11] = addv(v[11], v[15]);
+  v[4] = xorv(v[4], v[8]);
+  v[5] = xorv(v[5], v[9]);
+  v[6] = xorv(v[6], v[10]);
+  v[7] = xorv(v[7], v[11]);
+  v[4] = rot12(v[4]);
+  v[5] = rot12(v[5]);
+  v[6] = rot12(v[6]);
+  v[7] = rot12(v[7]);
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+  v[0] = addv(v[0], v[4]);
+  v[1] = addv(v[1], v[5]);
+  v[2] = addv(v[2], v[6]);
+  v[3] = addv(v[3], v[7]);
+  v[12] = xorv(v[12], v[0]);
+  v[13] = xorv(v[13], v[1]);
+  v[14] = xorv(v[14], v[2]);
+  v[15] = xorv(v[15], v[3]);
+  v[12] = rot8(v[12]);
+  v[13] = rot8(v[13]);
+  v[14] = rot8(v[14]);
+  v[15] = rot8(v[15]);
+  v[8] = addv(v[8], v[12]);
+  v[9] = addv(v[9], v[13]);
+  v[10] = addv(v[10], v[14]);
+  v[11] = addv(v[11], v[15]);
+  v[4] = xorv(v[4], v[8]);
+  v[5] = xorv(v[5], v[9]);
+  v[6] = xorv(v[6], v[10]);
+  v[7] = xorv(v[7], v[11]);
+  v[4] = rot7(v[4]);
+  v[5] = rot7(v[5]);
+  v[6] = rot7(v[6]);
+  v[7] = rot7(v[7]);
+
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+  v[0] = addv(v[0], v[5]);
+  v[1] = addv(v[1], v[6]);
+  v[2] = addv(v[2], v[7]);
+  v[3] = addv(v[3], v[4]);
+  v[15] = xorv(v[15], v[0]);
+  v[12] = xorv(v[12], v[1]);
+  v[13] = xorv(v[13], v[2]);
+  v[14] = xorv(v[14], v[3]);
+  v[15] = rot16(v[15]);
+  v[12] = rot16(v[12]);
+  v[13] = rot16(v[13]);
+  v[14] = rot16(v[14]);
+  v[10] = addv(v[10], v[15]);
+  v[11] = addv(v[11], v[12]);
+  v[8] = addv(v[8], v[13]);
+  v[9] = addv(v[9], v[14]);
+  v[5] = xorv(v[5], v[10]);
+  v[6] = xorv(v[6], v[11]);
+  v[7] = xorv(v[7], v[8]);
+  v[4] = xorv(v[4], v[9]);
+  v[5] = rot12(v[5]);
+  v[6] = rot12(v[6]);
+  v[7] = rot12(v[7]);
+  v[4] = rot12(v[4]);
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+  v[0] = addv(v[0], v[5]);
+  v[1] = addv(v[1], v[6]);
+  v[2] = addv(v[2], v[7]);
+  v[3] = addv(v[3], v[4]);
+  v[15] = xorv(v[15], v[0]);
+  v[12] = xorv(v[12], v[1]);
+  v[13] = xorv(v[13], v[2]);
+  v[14] = xorv(v[14], v[3]);
+  v[15] = rot8(v[15]);
+  v[12] = rot8(v[12]);
+  v[13] = rot8(v[13]);
+  v[14] = rot8(v[14]);
+  v[10] = addv(v[10], v[15]);
+  v[11] = addv(v[11], v[12]);
+  v[8] = addv(v[8], v[13]);
+  v[9] = addv(v[9], v[14]);
+  v[5] = xorv(v[5], v[10]);
+  v[6] = xorv(v[6], v[11]);
+  v[7] = xorv(v[7], v[8]);
+  v[4] = xorv(v[4], v[9]);
+  v[5] = rot7(v[5]);
+  v[6] = rot7(v[6]);
+  v[7] = rot7(v[7]);
+  v[4] = rot7(v[4]);
+}
+
+INLINE void transpose_vecs(__m256i vecs[DEGREE]) {
+  // Interleave 32-bit lanes. The low unpack is lanes 00/11/44/55, and the high
+  // is 22/33/66/77.
+  __m256i ab_0145 = _mm256_unpacklo_epi32(vecs[0], vecs[1]);
+  __m256i ab_2367 = _mm256_unpackhi_epi32(vecs[0], vecs[1]);
+  __m256i cd_0145 = _mm256_unpacklo_epi32(vecs[2], vecs[3]);
+  __m256i cd_2367 = _mm256_unpackhi_epi32(vecs[2], vecs[3]);
+  __m256i ef_0145 = _mm256_unpacklo_epi32(vecs[4], vecs[5]);
+  __m256i ef_2367 = _mm256_unpackhi_epi32(vecs[4], vecs[5]);
+  __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]);
+  __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]);
+
+  // Interleave 64-bit lanes. The low unpack is lanes 00/22 and the high is
+  // 11/33.
+  __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145);
+  __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145);
+  __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367);
+  __m256i abcd_37 = _mm256_unpackhi_epi64(ab_2367, cd_2367);
+  __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145);
+  __m256i efgh_15 = _mm256_unpackhi_epi64(ef_0145, gh_0145);
+  __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367);
+  __m256i efgh_37 = _mm256_unpackhi_epi64(ef_2367, gh_2367);
+
+  // Interleave 128-bit lanes.
+  vecs[0] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x20);
+  vecs[1] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x20);
+  vecs[2] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x20);
+  vecs[3] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x20);
+  vecs[4] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x31);
+  vecs[5] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x31);
+  vecs[6] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x31);
+  vecs[7] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x31);
+}
+
+INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
+                               size_t block_offset, __m256i out[16]) {
+  out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m256i)]);
+  out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m256i)]);
+  out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m256i)]);
+  out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m256i)]);
+  out[4] = loadu(&inputs[4][block_offset + 0 * sizeof(__m256i)]);
+  out[5] = loadu(&inputs[5][block_offset + 0 * sizeof(__m256i)]);
+  out[6] = loadu(&inputs[6][block_offset + 0 * sizeof(__m256i)]);
+  out[7] = loadu(&inputs[7][block_offset + 0 * sizeof(__m256i)]);
+  out[8] = loadu(&inputs[0][block_offset + 1 * sizeof(__m256i)]);
+  out[9] = loadu(&inputs[1][block_offset + 1 * sizeof(__m256i)]);
+  out[10] = loadu(&inputs[2][block_offset + 1 * sizeof(__m256i)]);
+  out[11] = loadu(&inputs[3][block_offset + 1 * sizeof(__m256i)]);
+  out[12] = loadu(&inputs[4][block_offset + 1 * sizeof(__m256i)]);
+  out[13] = loadu(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
+  out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
+  out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
+  for (size_t i = 0; i < 8; ++i) {
+    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+  }
+  transpose_vecs(&out[0]);
+  transpose_vecs(&out[8]);
+}
+
+INLINE void load_counters(uint64_t counter, bool increment_counter,
+                          __m256i *out_lo, __m256i *out_hi) {
+  const __m256i mask = _mm256_set1_epi32(-(int32_t)increment_counter);
+  const __m256i add0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+  const __m256i add1 = _mm256_and_si256(mask, add0);
+  __m256i l = _mm256_add_epi32(_mm256_set1_epi32((int32_t)counter), add1);
+  __m256i carry = _mm256_cmpgt_epi32(_mm256_xor_si256(add1, _mm256_set1_epi32(0x80000000)), 
+                                     _mm256_xor_si256(   l, _mm256_set1_epi32(0x80000000)));
+  __m256i h = _mm256_sub_epi32(_mm256_set1_epi32((int32_t)(counter >> 32)), carry);
+  *out_lo = l;
+  *out_hi = h;
+}
+
+static
+void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
+                       const uint32_t key[8], uint64_t counter,
+                       bool increment_counter, uint8_t flags,
+                       uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+  __m256i h_vecs[8] = {
+      set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
+      set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
+  };
+  __m256i counter_low_vec, counter_high_vec;
+  load_counters(counter, increment_counter, &counter_low_vec,
+                &counter_high_vec);
+  uint8_t block_flags = flags | flags_start;
+
+  for (size_t block = 0; block < blocks; block++) {
+    if (block + 1 == blocks) {
+      block_flags |= flags_end;
+    }
+    __m256i block_len_vec = set1(BLAKE3_BLOCK_LEN);
+    __m256i block_flags_vec = set1(block_flags);
+    __m256i msg_vecs[16];
+    transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+    __m256i v[16] = {
+        h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
+        h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
+        set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
+        counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+    };
+    round_fn(v, msg_vecs, 0);
+    round_fn(v, msg_vecs, 1);
+    round_fn(v, msg_vecs, 2);
+    round_fn(v, msg_vecs, 3);
+    round_fn(v, msg_vecs, 4);
+    round_fn(v, msg_vecs, 5);
+    round_fn(v, msg_vecs, 6);
+    h_vecs[0] = xorv(v[0], v[8]);
+    h_vecs[1] = xorv(v[1], v[9]);
+    h_vecs[2] = xorv(v[2], v[10]);
+    h_vecs[3] = xorv(v[3], v[11]);
+    h_vecs[4] = xorv(v[4], v[12]);
+    h_vecs[5] = xorv(v[5], v[13]);
+    h_vecs[6] = xorv(v[6], v[14]);
+    h_vecs[7] = xorv(v[7], v[15]);
+
+    block_flags = flags;
+  }
+
+  transpose_vecs(h_vecs);
+  storeu(h_vecs[0], &out[0 * sizeof(__m256i)]);
+  storeu(h_vecs[1], &out[1 * sizeof(__m256i)]);
+  storeu(h_vecs[2], &out[2 * sizeof(__m256i)]);
+  storeu(h_vecs[3], &out[3 * sizeof(__m256i)]);
+  storeu(h_vecs[4], &out[4 * sizeof(__m256i)]);
+  storeu(h_vecs[5], &out[5 * sizeof(__m256i)]);
+  storeu(h_vecs[6], &out[6 * sizeof(__m256i)]);
+  storeu(h_vecs[7], &out[7 * sizeof(__m256i)]);
+}
+
+#if !defined(BLAKE3_NO_SSE41)
+void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
+                            size_t blocks, const uint32_t key[8],
+                            uint64_t counter, bool increment_counter,
+                            uint8_t flags, uint8_t flags_start,
+                            uint8_t flags_end, uint8_t *out);
+#else
+void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
+                               size_t blocks, const uint32_t key[8],
+                               uint64_t counter, bool increment_counter,
+                               uint8_t flags, uint8_t flags_start,
+                               uint8_t flags_end, uint8_t *out);
+#endif
+
+void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
+                           size_t blocks, const uint32_t key[8],
+                           uint64_t counter, bool increment_counter,
+                           uint8_t flags, uint8_t flags_start,
+                           uint8_t flags_end, uint8_t *out) {
+  while (num_inputs >= DEGREE) {
+    blake3_hash8_avx2(inputs, blocks, key, counter, increment_counter, flags,
+                      flags_start, flags_end, out);
+    if (increment_counter) {
+      counter += DEGREE;
+    }
+    inputs += DEGREE;
+    num_inputs -= DEGREE;
+    out = &out[DEGREE * BLAKE3_OUT_LEN];
+  }
+#if !defined(BLAKE3_NO_SSE41)
+  blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+                         increment_counter, flags, flags_start, flags_end, out);
+#else
+  blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
+                            increment_counter, flags, flags_start, flags_end,
+                            out);
+#endif
+}
--- a/external/blake3/blake3_avx2_x86-64_unix.S
+++ b/external/blake3/blake3_avx2_x86-64_unix.S
--- a/external/blake3/blake3_avx2_x86-64_windows_gnu.S
+++ b/external/blake3/blake3_avx2_x86-64_windows_gnu.S
--- a/external/blake3/blake3_avx2_x86-64_windows_msvc.asm
+++ b/external/blake3/blake3_avx2_x86-64_windows_msvc.asm
--- a/external/blake3/blake3_avx512.c
+++ b/external/blake3/blake3_avx512.c
--- a/external/blake3/blake3_avx512_x86-64_unix.S
+++ b/external/blake3/blake3_avx512_x86-64_unix.S
--- a/external/blake3/blake3_avx512_x86-64_windows_gnu.S
+++ b/external/blake3/blake3_avx512_x86-64_windows_gnu.S
--- a/external/blake3/blake3_avx512_x86-64_windows_msvc.asm
+++ b/external/blake3/blake3_avx512_x86-64_windows_msvc.asm
--- a/external/blake3/blake3_c_rust_bindings/Cargo.toml
+++ b/external/blake3/blake3_c_rust_bindings/Cargo.toml
@@ -0,0 +1,32 @@
+# These are Rust bindings for the C implementation of BLAKE3. As there is a
+# native (and faster) Rust implementation of BLAKE3 provided in this same repo,
+# these bindings are not expected to be used in production. They're intended
+# for testing and benchmarking.
+
+[package]
+name = "blake3_c_rust_bindings"
+version = "0.0.0"
+description = "TESTING ONLY Rust bindings for the BLAKE3 C implementation"
+edition = "2021"
+
+[features]
+# By default the x86-64 build uses assembly implementations. This feature makes
+# the build use the C intrinsics implementations instead.
+prefer_intrinsics = []
+# Activate NEON bindings. We don't currently do any CPU feature detection for
+# this. If this Cargo feature is on, the NEON gets used.
+neon = []
+# Enable TBB-based multithreading.
+tbb = []
+
+[dev-dependencies]
+arrayref = "0.3.5"
+arrayvec = { version = "0.7.0", default-features = false }
+page_size = "0.6.0"
+rand = "0.9.0"
+rand_chacha = "0.9.0"
+reference_impl = { path = "../../reference_impl" }
+
+[build-dependencies]
+cc = "1.0.48"
+ignore = "0.4.23"
--- a/external/blake3/blake3_c_rust_bindings/README.md
+++ b/external/blake3/blake3_c_rust_bindings/README.md
@@ -0,0 +1,4 @@
+These are Rust bindings for the C implementation of BLAKE3. As there is
+a native Rust implementation of BLAKE3 provided in this same repo, these
+bindings are not expected to be used in production. They're intended for
+testing and benchmarking.
--- a/external/blake3/blake3_c_rust_bindings/benches/bench.rs
+++ b/external/blake3/blake3_c_rust_bindings/benches/bench.rs
@@ -0,0 +1,477 @@
+#![feature(test)]
+
+extern crate test;
+
+use arrayref::array_ref;
+use arrayvec::ArrayVec;
+use rand::prelude::*;
+use test::Bencher;
+
+const KIB: usize = 1024;
+const MAX_SIMD_DEGREE: usize = 16;
+
+const BLOCK_LEN: usize = 64;
+const CHUNK_LEN: usize = 1024;
+const OUT_LEN: usize = 32;
+
+// This struct randomizes two things:
+// 1. The actual bytes of input.
+// 2. The page offset the input starts at.
+pub struct RandomInput {
+    buf: Vec<u8>,
+    len: usize,
+    offsets: Vec<usize>,
+    offset_index: usize,
+}
+
+impl RandomInput {
+    pub fn new(b: &mut Bencher, len: usize) -> Self {
+        b.bytes += len as u64;
+        let page_size: usize = page_size::get();
+        let mut buf = vec![0u8; len + page_size];
+        let mut rng = rand::rng();
+        rng.fill_bytes(&mut buf);
+        let mut offsets: Vec<usize> = (0..page_size).collect();
+        offsets.shuffle(&mut rng);
+        Self {
+            buf,
+            len,
+            offsets,
+            offset_index: 0,
+        }
+    }
+
+    pub fn get(&mut self) -> &[u8] {
+        let offset = self.offsets[self.offset_index];
+        self.offset_index += 1;
+        if self.offset_index >= self.offsets.len() {
+            self.offset_index = 0;
+        }
+        &self.buf[offset..][..self.len]
+    }
+}
+
+type CompressInPlaceFn =
+    unsafe extern "C" fn(cv: *mut u32, block: *const u8, block_len: u8, counter: u64, flags: u8);
+
+fn bench_single_compression_fn(b: &mut Bencher, f: CompressInPlaceFn) {
+    let mut state = [1u32; 8];
+    let mut r = RandomInput::new(b, 64);
+    let input = array_ref!(r.get(), 0, 64);
+    b.iter(|| unsafe { f(state.as_mut_ptr(), input.as_ptr(), 64, 0, 0) });
+}
+
+#[bench]
+fn bench_single_compression_portable(b: &mut Bencher) {
+    bench_single_compression_fn(
+        b,
+        blake3_c_rust_bindings::ffi::blake3_compress_in_place_portable,
+    );
+}
+
+#[bench]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bench_single_compression_sse2(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::sse2_detected() {
+        return;
+    }
+    bench_single_compression_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_compress_in_place_sse2,
+    );
+}
+
+#[bench]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bench_single_compression_sse41(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::sse41_detected() {
+        return;
+    }
+    bench_single_compression_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_compress_in_place_sse41,
+    );
+}
+
+#[bench]
+fn bench_single_compression_avx512(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::avx512_detected() {
+        return;
+    }
+    bench_single_compression_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_compress_in_place_avx512,
+    );
+}
+
+type HashManyFn = unsafe extern "C" fn(
+    inputs: *const *const u8,
+    num_inputs: usize,
+    blocks: usize,
+    key: *const u32,
+    counter: u64,
+    increment_counter: bool,
+    flags: u8,
+    flags_start: u8,
+    flags_end: u8,
+    out: *mut u8,
+);
+
+fn bench_many_chunks_fn(b: &mut Bencher, f: HashManyFn, degree: usize) {
+    let mut inputs = Vec::new();
+    for _ in 0..degree {
+        inputs.push(RandomInput::new(b, CHUNK_LEN));
+    }
+    b.iter(|| {
+        let input_arrays: ArrayVec<&[u8; CHUNK_LEN], MAX_SIMD_DEGREE> = inputs
+            .iter_mut()
+            .take(degree)
+            .map(|i| array_ref!(i.get(), 0, CHUNK_LEN))
+            .collect();
+        let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
+        unsafe {
+            f(
+                input_arrays.as_ptr() as _,
+                input_arrays.len(),
+                CHUNK_LEN / BLOCK_LEN,
+                [0u32; 8].as_ptr(),
+                0,
+                true,
+                0,
+                0,
+                0,
+                out.as_mut_ptr(),
+            )
+        }
+    });
+}
+
+#[bench]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bench_many_chunks_sse2(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::sse2_detected() {
+        return;
+    }
+    bench_many_chunks_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_hash_many_sse2,
+        4,
+    );
+}
+
+#[bench]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bench_many_chunks_sse41(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::sse41_detected() {
+        return;
+    }
+    bench_many_chunks_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_hash_many_sse41,
+        4,
+    );
+}
+
+#[bench]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bench_many_chunks_avx2(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::avx2_detected() {
+        return;
+    }
+    bench_many_chunks_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_hash_many_avx2,
+        8,
+    );
+}
+
+#[bench]
+fn bench_many_chunks_avx512(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::avx512_detected() {
+        return;
+    }
+    bench_many_chunks_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_hash_many_avx512,
+        16,
+    );
+}
+
+#[bench]
+#[cfg(feature = "neon")]
+fn bench_many_chunks_neon(b: &mut Bencher) {
+    // When "neon" is on, NEON support is assumed.
+    bench_many_chunks_fn(
+        b,
+        blake3_c_rust_bindings::ffi::neon::blake3_hash_many_neon,
+        4,
+    );
+}
+
+// TODO: When we get const generics we can unify this with the chunks code.
+fn bench_many_parents_fn(b: &mut Bencher, f: HashManyFn, degree: usize) {
+    let mut inputs = Vec::new();
+    for _ in 0..degree {
+        inputs.push(RandomInput::new(b, BLOCK_LEN));
+    }
+    b.iter(|| {
+        let input_arrays: ArrayVec<&[u8; BLOCK_LEN], MAX_SIMD_DEGREE> = inputs
+            .iter_mut()
+            .take(degree)
+            .map(|i| array_ref!(i.get(), 0, BLOCK_LEN))
+            .collect();
+        let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
+        unsafe {
+            f(
+                input_arrays.as_ptr() as _,
+                input_arrays.len(),
+                1,
+                [0u32; 8].as_ptr(),
+                0,
+                false,
+                0,
+                0,
+                0,
+                out.as_mut_ptr(),
+            )
+        }
+    });
+}
+
+#[bench]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bench_many_parents_sse2(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::sse2_detected() {
+        return;
+    }
+    bench_many_parents_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_hash_many_sse2,
+        4,
+    );
+}
+
+#[bench]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bench_many_parents_sse41(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::sse41_detected() {
+        return;
+    }
+    bench_many_parents_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_hash_many_sse41,
+        4,
+    );
+}
+
+#[bench]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn bench_many_parents_avx2(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::avx2_detected() {
+        return;
+    }
+    bench_many_parents_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_hash_many_avx2,
+        8,
+    );
+}
+
+#[bench]
+fn bench_many_parents_avx512(b: &mut Bencher) {
+    if !blake3_c_rust_bindings::avx512_detected() {
+        return;
+    }
+    bench_many_parents_fn(
+        b,
+        blake3_c_rust_bindings::ffi::x86::blake3_hash_many_avx512,
+        16,
+    );
+}
+
+#[bench]
+#[cfg(feature = "neon")]
+fn bench_many_parents_neon(b: &mut Bencher) {
+    // When "neon" is on, NEON support is assumed.
+    bench_many_parents_fn(
+        b,
+        blake3_c_rust_bindings::ffi::neon::blake3_hash_many_neon,
+        4,
+    );
+}
+
+fn bench_incremental(b: &mut Bencher, len: usize) {
+    let mut input = RandomInput::new(b, len);
+    b.iter(|| {
+        let mut hasher = blake3_c_rust_bindings::Hasher::new();
+        hasher.update(input.get());
+        let mut out = [0; 32];
+        hasher.finalize(&mut out);
+        out
+    });
+}
+
+#[bench]
+fn bench_incremental_0001_block(b: &mut Bencher) {
+    bench_incremental(b, BLOCK_LEN);
+}
+
+#[bench]
+fn bench_incremental_0001_kib(b: &mut Bencher) {
+    bench_incremental(b, 1 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0002_kib(b: &mut Bencher) {
+    bench_incremental(b, 2 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0004_kib(b: &mut Bencher) {
+    bench_incremental(b, 4 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0008_kib(b: &mut Bencher) {
+    bench_incremental(b, 8 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0016_kib(b: &mut Bencher) {
+    bench_incremental(b, 16 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0032_kib(b: &mut Bencher) {
+    bench_incremental(b, 32 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0064_kib(b: &mut Bencher) {
+    bench_incremental(b, 64 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0128_kib(b: &mut Bencher) {
+    bench_incremental(b, 128 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0256_kib(b: &mut Bencher) {
+    bench_incremental(b, 256 * KIB);
+}
+
+#[bench]
+fn bench_incremental_0512_kib(b: &mut Bencher) {
+    bench_incremental(b, 512 * KIB);
+}
+
+#[bench]
+fn bench_incremental_1024_kib(b: &mut Bencher) {
+    bench_incremental(b, 1024 * KIB);
+}
+
+#[cfg(feature = "tbb")]
+fn bench_tbb(b: &mut Bencher, len: usize) {
+    let mut input = RandomInput::new(b, len);
+    b.iter(|| {
+        let mut hasher = blake3_c_rust_bindings::Hasher::new();
+        hasher.update_tbb(input.get());
+        let mut out = [0; 32];
+        hasher.finalize(&mut out);
+        out
+    });
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0001_block(b: &mut Bencher) {
+    bench_tbb(b, BLOCK_LEN);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0001_kib(b: &mut Bencher) {
+    bench_tbb(b, 1 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0002_kib(b: &mut Bencher) {
+    bench_tbb(b, 2 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0004_kib(b: &mut Bencher) {
+    bench_tbb(b, 4 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0008_kib(b: &mut Bencher) {
+    bench_tbb(b, 8 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0016_kib(b: &mut Bencher) {
+    bench_tbb(b, 16 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0032_kib(b: &mut Bencher) {
+    bench_tbb(b, 32 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0064_kib(b: &mut Bencher) {
+    bench_tbb(b, 64 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0128_kib(b: &mut Bencher) {
+    bench_tbb(b, 128 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0256_kib(b: &mut Bencher) {
+    bench_tbb(b, 256 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_0512_kib(b: &mut Bencher) {
+    bench_tbb(b, 512 * KIB);
+}
+
+#[bench]
+#[cfg(feature = "tbb")]
+fn bench_tbb_1024_kib(b: &mut Bencher) {
+    bench_tbb(b, 1024 * KIB);
+}
+
+// This checks that update() splits up its input in increasing powers of 2, so
+// that it can recover a high degree of parallelism when the number of bytes
+// hashed so far is uneven. The performance of this benchmark should be
+// reasonably close to bench_incremental_0064_kib, within 80% or so. When we
+// had a bug in this logic (https://github.com/BLAKE3-team/BLAKE3/issues/69),
+// performance was less than half.
+#[bench]
+fn bench_two_updates(b: &mut Bencher) {
+    let len = 65536;
+    let mut input = RandomInput::new(b, len);
+    b.iter(|| {
+        let mut hasher = blake3_c_rust_bindings::Hasher::new();
+        let input = input.get();
+        hasher.update(&input[..1]);
+        hasher.update(&input[1..]);
+        let mut out = [0; 32];
+        hasher.finalize(&mut out);
+        out
+    });
+}
--- a/external/blake3/blake3_c_rust_bindings/build.rs
+++ b/external/blake3/blake3_c_rust_bindings/build.rs
@@ -0,0 +1,253 @@
+use std::env;
+
+fn defined(var: &str) -> bool {
+    env::var_os(var).is_some()
+}
+
+fn target_components() -> Vec<String> {
+    let target = env::var("TARGET").unwrap();
+    target.split("-").map(|s| s.to_string()).collect()
+}
+
+fn is_x86_64() -> bool {
+    target_components()[0] == "x86_64"
+}
+
+fn is_windows_target() -> bool {
+    env::var("CARGO_CFG_TARGET_OS").unwrap() == "windows"
+}
+
+fn use_msvc_asm() -> bool {
+    const MSVC_NAMES: &[&str] = &["", "cl", "cl.exe"];
+    let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
+    let target_env = env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default();
+    let target_windows_msvc = target_os == "windows" && target_env == "msvc";
+    let host_triple = env::var("HOST").unwrap_or_default();
+    let target_triple = env::var("TARGET").unwrap_or_default();
+    let cross_compiling = host_triple != target_triple;
+    let cc = env::var("CC").unwrap_or_default().to_ascii_lowercase();
+    if !target_windows_msvc {
+        // We are not building for Windows with the MSVC toolchain.
+        false
+    } else if !cross_compiling && MSVC_NAMES.contains(&&*cc) {
+        // We are building on Windows with the MSVC toolchain (and not cross-compiling for another architecture or target).
+        true
+    } else {
+        // We are cross-compiling to Windows with the MSVC toolchain.
+        let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
+        let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default();
+        let cc = env::var(format!("CC_{target_arch}_{target_vendor}_windows_msvc"))
+            .unwrap_or_default()
+            .to_ascii_lowercase();
+        // Check if we are using the MSVC compiler.
+        MSVC_NAMES.contains(&&*cc)
+    }
+}
+
+fn is_x86_32() -> bool {
+    let arch = &target_components()[0];
+    arch == "i386" || arch == "i586" || arch == "i686"
+}
+
+fn is_armv7() -> bool {
+    target_components()[0] == "armv7"
+}
+
+fn is_aarch64() -> bool {
+    target_components()[0] == "aarch64"
+}
+
+// Windows targets may be using the MSVC toolchain or the GNU toolchain. The
+// right compiler flags to use depend on the toolchain. (And we don't want to
+// use flag_if_supported, because we don't want features to be silently
+// disabled by old compilers.)
+fn is_windows_msvc() -> bool {
+    // Some targets are only two components long, so check in steps.
+    target_components()[1] == "pc"
+        && target_components()[2] == "windows"
+        && target_components()[3] == "msvc"
+}
+
+fn new_build() -> cc::Build {
+    let mut build = cc::Build::new();
+    if !is_windows_msvc() {
+        build.flag("-std=c11");
+    }
+    build
+}
+
+fn new_cpp_build() -> cc::Build {
+    let mut build = cc::Build::new();
+    build.cpp(true);
+    if is_windows_msvc() {
+        build.flag("/std:c++20");
+        build.flag("/EHs-c-");
+        build.flag("/GR-");
+    } else {
+        build.flag("-std=c++20");
+        build.flag("-fno-exceptions");
+        build.flag("-fno-rtti");
+    }
+    build
+}
+
+fn c_dir_path(filename: &str) -> String {
+    // The `cross` tool doesn't support reading files in parent directories. As a hacky workaround
+    // in `cross_test.sh`, we move the c/ directory around and set BLAKE3_C_DIR_OVERRIDE. Regular
+    // building and testing doesn't require this.
+    if let Ok(c_dir_override) = env::var("BLAKE3_C_DIR_OVERRIDE") {
+        c_dir_override + "/" + filename
+    } else {
+        "../".to_string() + filename
+    }
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let mut base_build = new_build();
+    base_build.file(c_dir_path("blake3.c"));
+    base_build.file(c_dir_path("blake3_dispatch.c"));
+    base_build.file(c_dir_path("blake3_portable.c"));
+    if cfg!(feature = "tbb") {
+        base_build.define("BLAKE3_USE_TBB", "1");
+    }
+    base_build.compile("blake3_base");
+
+    if cfg!(feature = "tbb") {
+        let mut tbb_build = new_cpp_build();
+        tbb_build.define("BLAKE3_USE_TBB", "1");
+        tbb_build.file(c_dir_path("blake3_tbb.cpp"));
+        tbb_build.compile("blake3_tbb");
+        println!("cargo::rustc-link-lib=tbb");
+    }
+
+    if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") {
+        // On 64-bit, use the assembly implementations, unless the
+        // "prefer_intrinsics" feature is enabled.
+        if is_windows_target() {
+            if use_msvc_asm() {
+                let mut build = new_build();
+                build.file(c_dir_path("blake3_sse2_x86-64_windows_msvc.asm"));
+                build.file(c_dir_path("blake3_sse41_x86-64_windows_msvc.asm"));
+                build.file(c_dir_path("blake3_avx2_x86-64_windows_msvc.asm"));
+                build.file(c_dir_path("blake3_avx512_x86-64_windows_msvc.asm"));
+                build.compile("blake3_asm");
+            } else {
+                let mut build = new_build();
+                build.file(c_dir_path("blake3_sse2_x86-64_windows_gnu.S"));
+                build.file(c_dir_path("blake3_sse41_x86-64_windows_gnu.S"));
+                build.file(c_dir_path("blake3_avx2_x86-64_windows_gnu.S"));
+                build.file(c_dir_path("blake3_avx512_x86-64_windows_gnu.S"));
+                build.compile("blake3_asm");
+            }
+        } else {
+            // All non-Windows implementations are assumed to support
+            // Linux-style assembly. These files do contain a small
+            // explicit workaround for macOS also.
+            let mut build = new_build();
+            build.file(c_dir_path("blake3_sse2_x86-64_unix.S"));
+            build.file(c_dir_path("blake3_sse41_x86-64_unix.S"));
+            build.file(c_dir_path("blake3_avx2_x86-64_unix.S"));
+            build.file(c_dir_path("blake3_avx512_x86-64_unix.S"));
+            build.compile("blake3_asm");
+        }
+    } else if is_x86_64() || is_x86_32() {
+        // Assembly implementations are only for 64-bit. On 32-bit, or if
+        // the "prefer_intrinsics" feature is enabled, use the
+        // intrinsics-based C implementations. These each need to be
+        // compiled separately, with the corresponding instruction set
+        // extension explicitly enabled in the compiler.
+
+        let mut sse2_build = new_build();
+        sse2_build.file(c_dir_path("blake3_sse2.c"));
+        if is_windows_msvc() {
+            // /arch:SSE2 is the default on x86 and undefined on x86_64:
+            // https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86
+            // It also includes SSE4.1 intrinsics:
+            // https://stackoverflow.com/a/32183222/823869
+        } else {
+            sse2_build.flag("-msse2");
+        }
+        sse2_build.compile("blake3_sse2");
+
+        let mut sse41_build = new_build();
+        sse41_build.file(c_dir_path("blake3_sse41.c"));
+        if is_windows_msvc() {
+            // /arch:SSE2 is the default on x86 and undefined on x86_64:
+            // https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86
+            // It also includes SSE4.1 intrinsics:
+            // https://stackoverflow.com/a/32183222/823869
+        } else {
+            sse41_build.flag("-msse4.1");
+        }
+        sse41_build.compile("blake3_sse41");
+
+        let mut avx2_build = new_build();
+        avx2_build.file(c_dir_path("blake3_avx2.c"));
+        if is_windows_msvc() {
+            avx2_build.flag("/arch:AVX2");
+        } else {
+            avx2_build.flag("-mavx2");
+        }
+        avx2_build.compile("blake3_avx2");
+
+        let mut avx512_build = new_build();
+        avx512_build.file(c_dir_path("blake3_avx512.c"));
+        if is_windows_msvc() {
+            // Note that a lot of versions of MSVC don't support /arch:AVX512,
+            // and they'll discard it with a warning, hopefully leading to a
+            // build error.
+            avx512_build.flag("/arch:AVX512");
+        } else {
+            avx512_build.flag("-mavx512f");
+            avx512_build.flag("-mavx512vl");
+        }
+        avx512_build.compile("blake3_avx512");
+    }
+
+    // We only build NEON code here if
+    // 1) it's requested
+    // and 2) the root crate is not already building it.
+    // The only time this will really happen is if you build this
+    // crate by hand with the "neon" feature for some reason.
+    //
+    // In addition, 3) if the target is aarch64, NEON is on by default.
+    if defined("CARGO_FEATURE_NEON") || is_aarch64() {
+        let mut neon_build = new_build();
+        neon_build.file(c_dir_path("blake3_neon.c"));
+        // ARMv7 platforms that support NEON generally need the following
+        // flags. AArch64 supports NEON by default and does not support -mpfu.
+        if is_armv7() {
+            neon_build.flag("-mfpu=neon-vfpv4");
+            neon_build.flag("-mfloat-abi=hard");
+        }
+        neon_build.compile("blake3_neon");
+    }
+
+    // The `cc` crate does not automatically emit rerun-if directives for the
+    // environment variables it supports, in particular for $CC. We expect to
+    // do a lot of benchmarking across different compilers, so we explicitly
+    // add the variables that we're likely to need.
+    println!("cargo:rerun-if-env-changed=CC");
+    println!("cargo:rerun-if-env-changed=CFLAGS");
+
+    // Ditto for source files, though these shouldn't change as often. `ignore::Walk` respects
+    // .gitignore, so this doesn't traverse target/.
+    for result in ignore::Walk::new("..") {
+        let result = result?;
+        let path = result.path();
+        if path.is_file() {
+            println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
+        }
+    }
+
+    // When compiling with clang-cl for windows, it adds .asm files to the root
+    // which we need to delete so cargo doesn't get angry
+    if is_windows_target() && !use_msvc_asm() {
+        let _ = std::fs::remove_file("blake3_avx2_x86-64_windows_gnu.asm");
+        let _ = std::fs::remove_file("blake3_avx512_x86-64_windows_gnu.asm");
+        let _ = std::fs::remove_file("blake3_sse2_x86-64_windows_gnu.asm");
+        let _ = std::fs::remove_file("blake3_sse41_x86-64_windows_gnu.asm");
+    }
+
+    Ok(())
+}
--- a/external/blake3/blake3_c_rust_bindings/cross_test.sh
+++ b/external/blake3/blake3_c_rust_bindings/cross_test.sh
@@ -0,0 +1,31 @@
+#! /usr/bin/env bash
+
+# This hacky script works around the fact that `cross test` does not support
+# path dependencies. (It uses a docker shared folder to let the guest access
+# project files, so parent directories aren't available.) Solve this problem by
+# copying the entire project to a temp dir and rearranging paths to put "c" and
+# "reference_impl" underneath "blake3_c_rust_bindings", so that everything is
+# accessible. Hopefully this will just run on CI forever and no one will ever
+# read this and discover my deep shame.
+
+set -e -u -o pipefail
+
+project_root="$(realpath "$(dirname "$BASH_SOURCE")/../..")"
+tmpdir="$(mktemp -d)"
+echo "Running cross tests in $tmpdir"
+cd "$tmpdir"
+git clone "$project_root" blake3
+mv blake3/c/blake3_c_rust_bindings .
+mv blake3/reference_impl blake3_c_rust_bindings
+mv blake3/c blake3_c_rust_bindings
+cd blake3_c_rust_bindings
+sed -i 's|reference_impl = { path = "../../reference_impl" }|reference_impl = { path = "reference_impl" }|' Cargo.toml
+
+export BLAKE3_C_DIR_OVERRIDE="./c"
+cat > Cross.toml << EOF
+[build.env]
+passthrough = [
+    "BLAKE3_C_DIR_OVERRIDE",
+]
+EOF
+cross test "$@"
--- a/external/blake3/blake3_c_rust_bindings/src/lib.rs
+++ b/external/blake3/blake3_c_rust_bindings/src/lib.rs
@@ -0,0 +1,333 @@
+//! These are Rust bindings for the C implementation of BLAKE3. As there is a
+//! native (and faster) Rust implementation of BLAKE3 provided in this same
+//! repo, these bindings are not expected to be used in production. They're
+//! intended for testing and benchmarking.
+
+use std::ffi::{c_void, CString};
+use std::mem::MaybeUninit;
+
+#[cfg(test)]
+mod test;
+
+pub const BLOCK_LEN: usize = 64;
+pub const CHUNK_LEN: usize = 1024;
+pub const OUT_LEN: usize = 32;
+
+// Feature detection functions for tests and benchmarks. Note that the C code
+// does its own feature detection in blake3_dispatch.c.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+pub fn sse2_detected() -> bool {
+    is_x86_feature_detected!("sse2")
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+pub fn sse41_detected() -> bool {
+    is_x86_feature_detected!("sse4.1")
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+pub fn avx2_detected() -> bool {
+    is_x86_feature_detected!("avx2")
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+pub fn avx512_detected() -> bool {
+    is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl")
+}
+
+#[derive(Clone)]
+pub struct Hasher(ffi::blake3_hasher);
+
+impl Hasher {
+    pub fn new() -> Self {
+        let mut c_state = MaybeUninit::uninit();
+        unsafe {
+            ffi::blake3_hasher_init(c_state.as_mut_ptr());
+            Self(c_state.assume_init())
+        }
+    }
+
+    pub fn new_keyed(key: &[u8; 32]) -> Self {
+        let mut c_state = MaybeUninit::uninit();
+        unsafe {
+            ffi::blake3_hasher_init_keyed(c_state.as_mut_ptr(), key.as_ptr());
+            Self(c_state.assume_init())
+        }
+    }
+
+    pub fn new_derive_key(context: &str) -> Self {
+        let mut c_state = MaybeUninit::uninit();
+        let context_c_string = CString::new(context).expect("valid C string, no null bytes");
+        unsafe {
+            ffi::blake3_hasher_init_derive_key(c_state.as_mut_ptr(), context_c_string.as_ptr());
+            Self(c_state.assume_init())
+        }
+    }
+
+    pub fn new_derive_key_raw(context: &[u8]) -> Self {
+        let mut c_state = MaybeUninit::uninit();
+        unsafe {
+            ffi::blake3_hasher_init_derive_key_raw(
+                c_state.as_mut_ptr(),
+                context.as_ptr() as *const _,
+                context.len(),
+            );
+            Self(c_state.assume_init())
+        }
+    }
+
+    pub fn update(&mut self, input: &[u8]) {
+        unsafe {
+            ffi::blake3_hasher_update(&mut self.0, input.as_ptr() as *const c_void, input.len());
+        }
+    }
+
+    #[cfg(feature = "tbb")]
+    pub fn update_tbb(&mut self, input: &[u8]) {
+        unsafe {
+            ffi::blake3_hasher_update_tbb(
+                &mut self.0,
+                input.as_ptr() as *const c_void,
+                input.len(),
+            );
+        }
+    }
+
+    pub fn finalize(&self, output: &mut [u8]) {
+        unsafe {
+            ffi::blake3_hasher_finalize(&self.0, output.as_mut_ptr(), output.len());
+        }
+    }
+
+    pub fn finalize_seek(&self, seek: u64, output: &mut [u8]) {
+        unsafe {
+            ffi::blake3_hasher_finalize_seek(&self.0, seek, output.as_mut_ptr(), output.len());
+        }
+    }
+
+    pub fn reset(&mut self) {
+        unsafe {
+            ffi::blake3_hasher_reset(&mut self.0);
+        }
+    }
+}
+
+pub mod ffi {
+    #[repr(C)]
+    #[derive(Copy, Clone)]
+    pub struct blake3_chunk_state {
+        pub cv: [u32; 8usize],
+        pub chunk_counter: u64,
+        pub buf: [u8; 64usize],
+        pub buf_len: u8,
+        pub blocks_compressed: u8,
+        pub flags: u8,
+    }
+
+    #[repr(C)]
+    #[derive(Copy, Clone)]
+    pub struct blake3_hasher {
+        pub key: [u32; 8usize],
+        pub chunk: blake3_chunk_state,
+        pub cv_stack_len: u8,
+        pub cv_stack: [u8; 1728usize],
+    }
+
+    extern "C" {
+        // public interface
+        pub fn blake3_hasher_init(self_: *mut blake3_hasher);
+        pub fn blake3_hasher_init_keyed(self_: *mut blake3_hasher, key: *const u8);
+        pub fn blake3_hasher_init_derive_key(
+            self_: *mut blake3_hasher,
+            context: *const ::std::os::raw::c_char,
+        );
+        pub fn blake3_hasher_init_derive_key_raw(
+            self_: *mut blake3_hasher,
+            context: *const ::std::os::raw::c_void,
+            context_len: usize,
+        );
+        pub fn blake3_hasher_update(
+            self_: *mut blake3_hasher,
+            input: *const ::std::os::raw::c_void,
+            input_len: usize,
+        );
+        #[cfg(feature = "tbb")]
+        pub fn blake3_hasher_update_tbb(
+            self_: *mut blake3_hasher,
+            input: *const ::std::os::raw::c_void,
+            input_len: usize,
+        );
+        pub fn blake3_hasher_finalize(self_: *const blake3_hasher, out: *mut u8, out_len: usize);
+        pub fn blake3_hasher_finalize_seek(
+            self_: *const blake3_hasher,
+            seek: u64,
+            out: *mut u8,
+            out_len: usize,
+        );
+        pub fn blake3_hasher_reset(self_: *mut blake3_hasher);
+
+        // portable low-level functions
+        pub fn blake3_compress_in_place_portable(
+            cv: *mut u32,
+            block: *const u8,
+            block_len: u8,
+            counter: u64,
+            flags: u8,
+        );
+        pub fn blake3_compress_xof_portable(
+            cv: *const u32,
+            block: *const u8,
+            block_len: u8,
+            counter: u64,
+            flags: u8,
+            out: *mut u8,
+        );
+        pub fn blake3_hash_many_portable(
+            inputs: *const *const u8,
+            num_inputs: usize,
+            blocks: usize,
+            key: *const u32,
+            counter: u64,
+            increment_counter: bool,
+            flags: u8,
+            flags_start: u8,
+            flags_end: u8,
+            out: *mut u8,
+        );
+    }
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    pub mod x86 {
+        extern "C" {
+            // SSE2 low level functions
+            pub fn blake3_compress_in_place_sse2(
+                cv: *mut u32,
+                block: *const u8,
+                block_len: u8,
+                counter: u64,
+                flags: u8,
+            );
+            pub fn blake3_compress_xof_sse2(
+                cv: *const u32,
+                block: *const u8,
+                block_len: u8,
+                counter: u64,
+                flags: u8,
+                out: *mut u8,
+            );
+            pub fn blake3_hash_many_sse2(
+                inputs: *const *const u8,
+                num_inputs: usize,
+                blocks: usize,
+                key: *const u32,
+                counter: u64,
+                increment_counter: bool,
+                flags: u8,
+                flags_start: u8,
+                flags_end: u8,
+                out: *mut u8,
+            );
+
+            // SSE4.1 low level functions
+            pub fn blake3_compress_in_place_sse41(
+                cv: *mut u32,
+                block: *const u8,
+                block_len: u8,
+                counter: u64,
+                flags: u8,
+            );
+            pub fn blake3_compress_xof_sse41(
+                cv: *const u32,
+                block: *const u8,
+                block_len: u8,
+                counter: u64,
+                flags: u8,
+                out: *mut u8,
+            );
+            pub fn blake3_hash_many_sse41(
+                inputs: *const *const u8,
+                num_inputs: usize,
+                blocks: usize,
+                key: *const u32,
+                counter: u64,
+                increment_counter: bool,
+                flags: u8,
+                flags_start: u8,
+                flags_end: u8,
+                out: *mut u8,
+            );
+
+            // AVX2 low level functions
+            pub fn blake3_hash_many_avx2(
+                inputs: *const *const u8,
+                num_inputs: usize,
+                blocks: usize,
+                key: *const u32,
+                counter: u64,
+                increment_counter: bool,
+                flags: u8,
+                flags_start: u8,
+                flags_end: u8,
+                out: *mut u8,
+            );
+
+            // AVX-512 low level functions
+            pub fn blake3_compress_xof_avx512(
+                cv: *const u32,
+                block: *const u8,
+                block_len: u8,
+                counter: u64,
+                flags: u8,
+                out: *mut u8,
+            );
+            pub fn blake3_compress_in_place_avx512(
+                cv: *mut u32,
+                block: *const u8,
+                block_len: u8,
+                counter: u64,
+                flags: u8,
+            );
+            pub fn blake3_hash_many_avx512(
+                inputs: *const *const u8,
+                num_inputs: usize,
+                blocks: usize,
+                key: *const u32,
+                counter: u64,
+                increment_counter: bool,
+                flags: u8,
+                flags_start: u8,
+                flags_end: u8,
+                out: *mut u8,
+            );
+            #[cfg(unix)]
+            pub fn blake3_xof_many_avx512(
+                cv: *const u32,
+                block: *const u8,
+                block_len: u8,
+                counter: u64,
+                flags: u8,
+                out: *mut u8,
+                outblocks: usize,
+            );
+        }
+    }
+
+    #[cfg(feature = "neon")]
+    pub mod neon {
+        extern "C" {
+            // NEON low level functions
+            pub fn blake3_hash_many_neon(
+                inputs: *const *const u8,
+                num_inputs: usize,
+                blocks: usize,
+                key: *const u32,
+                counter: u64,
+                increment_counter: bool,
+                flags: u8,
+                flags_start: u8,
+                flags_end: u8,
+                out: *mut u8,
+            );
+        }
+    }
+}
--- a/external/blake3/blake3_c_rust_bindings/src/test.rs
+++ b/external/blake3/blake3_c_rust_bindings/src/test.rs
@@ -0,0 +1,696 @@
+// Most of this code is duplicated from the root `blake3` crate. Perhaps we
+// could share more of it in the future.
+
+use crate::{BLOCK_LEN, CHUNK_LEN, OUT_LEN};
+use arrayref::{array_mut_ref, array_ref};
+use arrayvec::ArrayVec;
+use core::usize;
+use rand::prelude::*;
+
+const CHUNK_START: u8 = 1 << 0;
+const CHUNK_END: u8 = 1 << 1;
+const PARENT: u8 = 1 << 2;
+const ROOT: u8 = 1 << 3;
+const KEYED_HASH: u8 = 1 << 4;
+// const DERIVE_KEY_CONTEXT: u8 = 1 << 5;
+// const DERIVE_KEY_MATERIAL: u8 = 1 << 6;
+
+// Interesting input lengths to run tests on.
+pub const TEST_CASES: &[usize] = &[
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    BLOCK_LEN - 1,
+    BLOCK_LEN,
+    BLOCK_LEN + 1,
+    2 * BLOCK_LEN - 1,
+    2 * BLOCK_LEN,
+    2 * BLOCK_LEN + 1,
+    CHUNK_LEN - 1,
+    CHUNK_LEN,
+    CHUNK_LEN + 1,
+    2 * CHUNK_LEN,
+    2 * CHUNK_LEN + 1,
+    3 * CHUNK_LEN,
+    3 * CHUNK_LEN + 1,
+    4 * CHUNK_LEN,
+    4 * CHUNK_LEN + 1,
+    5 * CHUNK_LEN,
+    5 * CHUNK_LEN + 1,
+    6 * CHUNK_LEN,
+    6 * CHUNK_LEN + 1,
+    7 * CHUNK_LEN,
+    7 * CHUNK_LEN + 1,
+    8 * CHUNK_LEN,
+    8 * CHUNK_LEN + 1,
+    16 * CHUNK_LEN,  // AVX512's bandwidth
+    31 * CHUNK_LEN,  // 16 + 8 + 4 + 2 + 1
+    100 * CHUNK_LEN, // subtrees larger than MAX_SIMD_DEGREE chunks
+];
+
+pub const TEST_CASES_MAX: usize = 100 * CHUNK_LEN;
+
+// There's a test to make sure these two are equal below.
+pub const TEST_KEY: [u8; 32] = *b"whats the Elvish word for friend";
+pub const TEST_KEY_WORDS: [u32; 8] = [
+    1952540791, 1752440947, 1816469605, 1752394102, 1919907616, 1868963940, 1919295602, 1684956521,
+];
+
+// Paint the input with a repeating byte pattern. We use a cycle length of 251,
+// because that's the largest prime number less than 256. This makes it
+// unlikely to swapping any two adjacent input blocks or chunks will give the
+// same answer.
+fn paint_test_input(buf: &mut [u8]) {
+    for (i, b) in buf.iter_mut().enumerate() {
+        *b = (i % 251) as u8;
+    }
+}
+
+#[inline(always)]
+fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] {
+    let mut out = [0; 32];
+    *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
+    *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
+    *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
+    *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
+    *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
+    *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
+    *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
+    *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
+    out
+}
+
+type CompressInPlaceFn =
+    unsafe extern "C" fn(cv: *mut u32, block: *const u8, block_len: u8, counter: u64, flags: u8);
+
+type CompressXofFn = unsafe extern "C" fn(
+    cv: *const u32,
+    block: *const u8,
+    block_len: u8,
+    counter: u64,
+    flags: u8,
+    out: *mut u8,
+);
+
+// A shared helper function for platform-specific tests.
+pub fn test_compress_fn(compress_in_place_fn: CompressInPlaceFn, compress_xof_fn: CompressXofFn) {
+    let initial_state = TEST_KEY_WORDS;
+    let block_len: u8 = 61;
+    let mut block = [0; BLOCK_LEN];
+    paint_test_input(&mut block[..block_len as usize]);
+    // Use a counter with set bits in both 32-bit words.
+    let counter = (5u64 << 32) + 6;
+    let flags = CHUNK_END | ROOT | KEYED_HASH;
+
+    let mut portable_out = [0; 64];
+    unsafe {
+        crate::ffi::blake3_compress_xof_portable(
+            initial_state.as_ptr(),
+            block.as_ptr(),
+            block_len,
+            counter,
+            flags,
+            portable_out.as_mut_ptr(),
+        );
+    }
+
+    let mut test_state = initial_state;
+    unsafe {
+        compress_in_place_fn(
+            test_state.as_mut_ptr(),
+            block.as_ptr(),
+            block_len,
+            counter,
+            flags,
+        )
+    };
+    let test_state_bytes = le_bytes_from_words_32(&test_state);
+    let mut test_xof = [0; 64];
+    unsafe {
+        compress_xof_fn(
+            initial_state.as_ptr(),
+            block.as_ptr(),
+            block_len,
+            counter,
+            flags,
+            test_xof.as_mut_ptr(),
+        )
+    };
+
+    assert_eq!(&portable_out[..32], &test_state_bytes[..]);
+    assert_eq!(&portable_out[..], &test_xof[..]);
+}
+
+// Testing the portable implementation against itself is circular, but why not.
+#[test]
+fn test_compress_portable() {
+    test_compress_fn(
+        crate::ffi::blake3_compress_in_place_portable,
+        crate::ffi::blake3_compress_xof_portable,
+    );
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn test_compress_sse2() {
+    if !crate::sse2_detected() {
+        return;
+    }
+    test_compress_fn(
+        crate::ffi::x86::blake3_compress_in_place_sse2,
+        crate::ffi::x86::blake3_compress_xof_sse2,
+    );
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn test_compress_sse41() {
+    if !crate::sse41_detected() {
+        return;
+    }
+    test_compress_fn(
+        crate::ffi::x86::blake3_compress_in_place_sse41,
+        crate::ffi::x86::blake3_compress_xof_sse41,
+    );
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn test_compress_avx512() {
+    if !crate::avx512_detected() {
+        return;
+    }
+    test_compress_fn(
+        crate::ffi::x86::blake3_compress_in_place_avx512,
+        crate::ffi::x86::blake3_compress_xof_avx512,
+    );
+}
+
+type HashManyFn = unsafe extern "C" fn(
+    inputs: *const *const u8,
+    num_inputs: usize,
+    blocks: usize,
+    key: *const u32,
+    counter: u64,
+    increment_counter: bool,
+    flags: u8,
+    flags_start: u8,
+    flags_end: u8,
+    out: *mut u8,
+);
+
+// A shared helper function for platform-specific tests.
+pub fn test_hash_many_fn(hash_many_fn: HashManyFn) {
+    // Test a few different initial counter values.
+    // - 0: The base case.
+    // - u32::MAX: The low word of the counter overflows for all inputs except the first.
+    // - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR
+    //   when you're supposed to ANDNOT...
+    let initial_counters = [0, u32::MAX as u64, i32::MAX as u64];
+    for counter in initial_counters {
+        dbg!(counter);
+
+        // 31 (16 + 8 + 4 + 2 + 1) inputs
+        const NUM_INPUTS: usize = 31;
+        let mut input_buf = [0; CHUNK_LEN * NUM_INPUTS];
+        crate::test::paint_test_input(&mut input_buf);
+
+        // First hash chunks.
+        let mut chunks = ArrayVec::<&[u8; CHUNK_LEN], NUM_INPUTS>::new();
+        for i in 0..NUM_INPUTS {
+            chunks.push(array_ref!(input_buf, i * CHUNK_LEN, CHUNK_LEN));
+        }
+        let mut portable_chunks_out = [0; NUM_INPUTS * OUT_LEN];
+        unsafe {
+            crate::ffi::blake3_hash_many_portable(
+                chunks.as_ptr() as _,
+                chunks.len(),
+                CHUNK_LEN / BLOCK_LEN,
+                TEST_KEY_WORDS.as_ptr(),
+                counter,
+                true,
+                KEYED_HASH,
+                CHUNK_START,
+                CHUNK_END,
+                portable_chunks_out.as_mut_ptr(),
+            );
+        }
+
+        let mut test_chunks_out = [0; NUM_INPUTS * OUT_LEN];
+        unsafe {
+            hash_many_fn(
+                chunks.as_ptr() as _,
+                chunks.len(),
+                CHUNK_LEN / BLOCK_LEN,
+                TEST_KEY_WORDS.as_ptr(),
+                counter,
+                true,
+                KEYED_HASH,
+                CHUNK_START,
+                CHUNK_END,
+                test_chunks_out.as_mut_ptr(),
+            );
+        }
+        for n in 0..NUM_INPUTS {
+            dbg!(n);
+            assert_eq!(
+                &portable_chunks_out[n * OUT_LEN..][..OUT_LEN],
+                &test_chunks_out[n * OUT_LEN..][..OUT_LEN]
+            );
+        }
+
+        // Then hash parents.
+        let mut parents = ArrayVec::<&[u8; 2 * OUT_LEN], NUM_INPUTS>::new();
+        for i in 0..NUM_INPUTS {
+            parents.push(array_ref!(input_buf, i * 2 * OUT_LEN, 2 * OUT_LEN));
+        }
+        let mut portable_parents_out = [0; NUM_INPUTS * OUT_LEN];
+        unsafe {
+            crate::ffi::blake3_hash_many_portable(
+                parents.as_ptr() as _,
+                parents.len(),
+                1,
+                TEST_KEY_WORDS.as_ptr(),
+                counter,
+                false,
+                KEYED_HASH | PARENT,
+                0,
+                0,
+                portable_parents_out.as_mut_ptr(),
+            );
+        }
+
+        let mut test_parents_out = [0; NUM_INPUTS * OUT_LEN];
+        unsafe {
+            hash_many_fn(
+                parents.as_ptr() as _,
+                parents.len(),
+                1,
+                TEST_KEY_WORDS.as_ptr(),
+                counter,
+                false,
+                KEYED_HASH | PARENT,
+                0,
+                0,
+                test_parents_out.as_mut_ptr(),
+            );
+        }
+        for n in 0..NUM_INPUTS {
+            dbg!(n);
+            assert_eq!(
+                &portable_parents_out[n * OUT_LEN..][..OUT_LEN],
+                &test_parents_out[n * OUT_LEN..][..OUT_LEN]
+            );
+        }
+    }
+}
+
+// Testing the portable implementation against itself is circular, but why not.
+#[test]
+fn test_hash_many_portable() {
+    test_hash_many_fn(crate::ffi::blake3_hash_many_portable);
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn test_hash_many_sse2() {
+    if !crate::sse2_detected() {
+        return;
+    }
+    test_hash_many_fn(crate::ffi::x86::blake3_hash_many_sse2);
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn test_hash_many_sse41() {
+    if !crate::sse41_detected() {
+        return;
+    }
+    test_hash_many_fn(crate::ffi::x86::blake3_hash_many_sse41);
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn test_hash_many_avx2() {
+    if !crate::avx2_detected() {
+        return;
+    }
+    test_hash_many_fn(crate::ffi::x86::blake3_hash_many_avx2);
+}
+
+#[test]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn test_hash_many_avx512() {
+    if !crate::avx512_detected() {
+        return;
+    }
+    test_hash_many_fn(crate::ffi::x86::blake3_hash_many_avx512);
+}
+
+#[test]
+#[cfg(feature = "neon")]
+fn test_hash_many_neon() {
+    test_hash_many_fn(crate::ffi::neon::blake3_hash_many_neon);
+}
+
+#[allow(unused)]
+type XofManyFunction = unsafe extern "C" fn(
+    cv: *const u32,
+    block: *const u8,
+    block_len: u8,
+    counter: u64,
+    flags: u8,
+    out: *mut u8,
+    outblocks: usize,
+);
+
+// A shared helper function for platform-specific tests.
+#[allow(unused)]
+pub fn test_xof_many_fn(xof_many_function: XofManyFunction) {
+    let mut block = [0; BLOCK_LEN];
+    let block_len = 42;
+    crate::test::paint_test_input(&mut block[..block_len]);
+    let cv = [40, 41, 42, 43, 44, 45, 46, 47];
+    let flags = KEYED_HASH;
+
+    // Test a few different initial counter values.
+    // - 0: The base case.
+    // - u32::MAX: The low word of the counter overflows for all inputs except the first.
+    // - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR
+    //   when you're supposed to ANDNOT...
+    let initial_counters = [0, u32::MAX as u64, i32::MAX as u64];
+    for counter in initial_counters {
+        dbg!(counter);
+
+        // 31 (16 + 8 + 4 + 2 + 1) outputs
+        const OUTPUT_SIZE: usize = 31 * BLOCK_LEN;
+
+        let mut portable_out = [0u8; OUTPUT_SIZE];
+        for (i, out_block) in portable_out.chunks_exact_mut(BLOCK_LEN).enumerate() {
+            unsafe {
+                crate::ffi::blake3_compress_xof_portable(
+                    cv.as_ptr(),
+                    block.as_ptr(),
+                    block_len as u8,
+                    counter + i as u64,
+                    flags,
+                    out_block.as_mut_ptr(),
+                );
+            }
+        }
+
+        let mut test_out = [0u8; OUTPUT_SIZE];
+        unsafe {
+            xof_many_function(
+                cv.as_ptr(),
+                block.as_ptr(),
+                block_len as u8,
+                counter,
+                flags,
+                test_out.as_mut_ptr(),
+                OUTPUT_SIZE / BLOCK_LEN,
+            );
+        }
+
+        assert_eq!(portable_out, test_out);
+    }
+
+    // Test that xof_many doesn't write more blocks than requested. Note that the current assembly
+    // implementation always outputs at least one block, so we don't test the zero case.
+    for block_count in 1..=32 {
+        let mut array = [0; BLOCK_LEN * 33];
+        let output_start = 17;
+        let output_len = block_count * BLOCK_LEN;
+        let output_end = output_start + output_len;
+        let output = &mut array[output_start..output_end];
+        unsafe {
+            xof_many_function(
+                cv.as_ptr(),
+                block.as_ptr(),
+                block_len as u8,
+                0,
+                flags,
+                output.as_mut_ptr(),
+                block_count,
+            );
+        }
+        for i in 0..array.len() {
+            if i < output_start || output_end <= i {
+                assert_eq!(0, array[i], "index {i}");
+            }
+        }
+    }
+}
+
+#[test]
+#[cfg(unix)]
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn test_xof_many_avx512() {
+    if !crate::avx512_detected() {
+        return;
+    }
+    test_xof_many_fn(crate::ffi::x86::blake3_xof_many_avx512);
+}
+
+#[test]
+fn test_compare_reference_impl() {
+    const OUT: usize = 303; // more than 64, not a multiple of 4
+    let mut input_buf = [0; TEST_CASES_MAX];
+    paint_test_input(&mut input_buf);
+    for &case in TEST_CASES {
+        let input = &input_buf[..case];
+        dbg!(case);
+
+        // regular
+        {
+            let mut reference_hasher = reference_impl::Hasher::new();
+            reference_hasher.update(input);
+            let mut expected_out = [0; OUT];
+            reference_hasher.finalize(&mut expected_out);
+
+            let mut test_hasher = crate::Hasher::new();
+            test_hasher.update(input);
+            let mut test_out = [0; OUT];
+            test_hasher.finalize(&mut test_out);
+            assert_eq!(test_out[..], expected_out[..]);
+
+            #[cfg(feature = "tbb")]
+            {
+                let mut tbb_hasher = crate::Hasher::new();
+                tbb_hasher.update_tbb(input);
+                let mut tbb_out = [0; OUT];
+                tbb_hasher.finalize(&mut tbb_out);
+                assert_eq!(tbb_out[..], expected_out[..]);
+            }
+        }
+
+        // keyed
+        {
+            let mut reference_hasher = reference_impl::Hasher::new_keyed(&TEST_KEY);
+            reference_hasher.update(input);
+            let mut expected_out = [0; OUT];
+            reference_hasher.finalize(&mut expected_out);
+
+            let mut test_hasher = crate::Hasher::new_keyed(&TEST_KEY);
+            test_hasher.update(input);
+            let mut test_out = [0; OUT];
+            test_hasher.finalize(&mut test_out);
+            assert_eq!(test_out[..], expected_out[..]);
+
+            #[cfg(feature = "tbb")]
+            {
+                let mut tbb_hasher = crate::Hasher::new_keyed(&TEST_KEY);
+                tbb_hasher.update_tbb(input);
+                let mut tbb_out = [0; OUT];
+                tbb_hasher.finalize(&mut tbb_out);
+                assert_eq!(tbb_out[..], expected_out[..]);
+            }
+        }
+
+        // derive_key
+        {
+            let context = "BLAKE3 2019-12-27 16:13:59 example context (not the test vector one)";
+            let mut reference_hasher = reference_impl::Hasher::new_derive_key(context);
+            reference_hasher.update(input);
+            let mut expected_out = [0; OUT];
+            reference_hasher.finalize(&mut expected_out);
+
+            // the regular C string API
+            let mut test_hasher = crate::Hasher::new_derive_key(context);
+            test_hasher.update(input);
+            let mut test_out = [0; OUT];
+            test_hasher.finalize(&mut test_out);
+            assert_eq!(test_out[..], expected_out[..]);
+
+            // the raw bytes API
+            let mut test_hasher_raw = crate::Hasher::new_derive_key_raw(context.as_bytes());
+            test_hasher_raw.update(input);
+            let mut test_out_raw = [0; OUT];
+            test_hasher_raw.finalize(&mut test_out_raw);
+            assert_eq!(test_out_raw[..], expected_out[..]);
+
+            #[cfg(feature = "tbb")]
+            {
+                let mut tbb_hasher = crate::Hasher::new_derive_key(context);
+                tbb_hasher.update_tbb(input);
+                let mut tbb_out = [0; OUT];
+                tbb_hasher.finalize(&mut tbb_out);
+                assert_eq!(tbb_out[..], expected_out[..]);
+            }
+        }
+    }
+}
+
+fn reference_hash(input: &[u8]) -> [u8; OUT_LEN] {
+    let mut hasher = reference_impl::Hasher::new();
+    hasher.update(input);
+    let mut bytes = [0; OUT_LEN];
+    hasher.finalize(&mut bytes);
+    bytes.into()
+}
+
+#[test]
+fn test_compare_update_multiple() {
+    // Don't use all the long test cases here, since that's unnecessarily slow
+    // in debug mode.
+    let mut short_test_cases = TEST_CASES;
+    while *short_test_cases.last().unwrap() > 4 * CHUNK_LEN {
+        short_test_cases = &short_test_cases[..short_test_cases.len() - 1];
+    }
+    assert_eq!(*short_test_cases.last().unwrap(), 4 * CHUNK_LEN);
+
+    let mut input_buf = [0; 2 * TEST_CASES_MAX];
+    paint_test_input(&mut input_buf);
+
+    for &first_update in short_test_cases {
+        dbg!(first_update);
+        let first_input = &input_buf[..first_update];
+        let mut test_hasher = crate::Hasher::new();
+        test_hasher.update(first_input);
+
+        for &second_update in short_test_cases {
+            dbg!(second_update);
+            let second_input = &input_buf[first_update..][..second_update];
+            let total_input = &input_buf[..first_update + second_update];
+
+            // Clone the hasher with first_update bytes already written, so
+            // that the next iteration can reuse it.
+            let mut test_hasher = test_hasher.clone();
+            test_hasher.update(second_input);
+            let mut test_out = [0; OUT_LEN];
+            test_hasher.finalize(&mut test_out);
+
+            let expected = reference_hash(total_input);
+            assert_eq!(expected, test_out);
+        }
+    }
+}
+
+#[test]
+fn test_fuzz_hasher() {
+    const INPUT_MAX: usize = 4 * CHUNK_LEN;
+    let mut input_buf = [0; 3 * INPUT_MAX];
+    paint_test_input(&mut input_buf);
+
+    // Don't do too many iterations in debug mode, to keep the tests under a
+    // second or so. CI should run tests in release mode also. Provide an
+    // environment variable for specifying a larger number of fuzz iterations.
+    let num_tests = if cfg!(debug_assertions) { 100 } else { 10_000 };
+
+    // Use a fixed RNG seed for reproducibility.
+    let mut rng = rand_chacha::ChaCha8Rng::from_seed([1; 32]);
+    for _num_test in 0..num_tests {
+        dbg!(_num_test);
+        let mut hasher = crate::Hasher::new();
+        let mut total_input = 0;
+        // For each test, write 3 inputs of random length.
+        for _ in 0..3 {
+            let input_len = rng.random_range(0..INPUT_MAX + 1);
+            dbg!(input_len);
+            let input = &input_buf[total_input..][..input_len];
+            hasher.update(input);
+            total_input += input_len;
+        }
+        let expected = reference_hash(&input_buf[..total_input]);
+        let mut test_out = [0; 32];
+        hasher.finalize(&mut test_out);
+        assert_eq!(expected, test_out);
+    }
+}
+
+#[test]
+fn test_finalize_seek() {
+    let mut expected = [0; 1000];
+    {
+        let mut reference_hasher = reference_impl::Hasher::new();
+        reference_hasher.update(b"foobarbaz");
+        reference_hasher.finalize(&mut expected);
+    }
+
+    let mut test_hasher = crate::Hasher::new();
+    test_hasher.update(b"foobarbaz");
+
+    let mut out = [0; 103];
+    for &seek in &[0, 1, 7, 59, 63, 64, 65, 501, expected.len() - out.len()] {
+        dbg!(seek);
+        test_hasher.finalize_seek(seek as u64, &mut out);
+        assert_eq!(&expected[seek..][..out.len()], &out[..]);
+    }
+}
+
+#[test]
+fn test_reset() {
+    {
+        let mut hasher = crate::Hasher::new();
+        hasher.update(&[42; 3 * CHUNK_LEN + 7]);
+        hasher.reset();
+        hasher.update(&[42; CHUNK_LEN + 3]);
+        let mut output = [0; 32];
+        hasher.finalize(&mut output);
+
+        let mut reference_hasher = reference_impl::Hasher::new();
+        reference_hasher.update(&[42; CHUNK_LEN + 3]);
+        let mut reference_hash = [0; 32];
+        reference_hasher.finalize(&mut reference_hash);
+
+        assert_eq!(reference_hash, output);
+    }
+    {
+        let key = &[99; 32];
+        let mut hasher = crate::Hasher::new_keyed(key);
+        hasher.update(&[42; 3 * CHUNK_LEN + 7]);
+        hasher.reset();
+        hasher.update(&[42; CHUNK_LEN + 3]);
+        let mut output = [0; 32];
+        hasher.finalize(&mut output);
+
+        let mut reference_hasher = reference_impl::Hasher::new_keyed(key);
+        reference_hasher.update(&[42; CHUNK_LEN + 3]);
+        let mut reference_hash = [0; 32];
+        reference_hasher.finalize(&mut reference_hash);
+
+        assert_eq!(reference_hash, output);
+    }
+    {
+        let context = "BLAKE3 2020-02-12 10:20:58 reset test";
+        let mut hasher = crate::Hasher::new_derive_key(context);
+        hasher.update(&[42; 3 * CHUNK_LEN + 7]);
+        hasher.reset();
+        hasher.update(&[42; CHUNK_LEN + 3]);
+        let mut output = [0; 32];
+        hasher.finalize(&mut output);
+
+        let mut reference_hasher = reference_impl::Hasher::new_derive_key(context);
+        reference_hasher.update(&[42; CHUNK_LEN + 3]);
+        let mut reference_hash = [0; 32];
+        reference_hasher.finalize(&mut reference_hash);
+
+        assert_eq!(reference_hash, output);
+    }
+}
--- a/external/blake3/blake3_dispatch.c
+++ b/external/blake3/blake3_dispatch.c
@@ -0,0 +1,332 @@
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "blake3_impl.h"
+
+#if defined(_MSC_VER)
+#include <Windows.h>
+#endif
+
+#if defined(IS_X86)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <immintrin.h>
+#else
+#undef IS_X86 /* Unimplemented! */
+#endif
+#endif
+
+#if !defined(BLAKE3_ATOMICS)
+#if defined(__has_include)
+#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
+#define BLAKE3_ATOMICS 1
+#else
+#define BLAKE3_ATOMICS 0
+#endif /* __has_include(<stdatomic.h>) && !defined(_MSC_VER) */
+#else
+#define BLAKE3_ATOMICS 0
+#endif /* defined(__has_include) */
+#endif /* BLAKE3_ATOMICS */
+
+#if BLAKE3_ATOMICS
+#define ATOMIC_INT _Atomic int
+#define ATOMIC_LOAD(x) x
+#define ATOMIC_STORE(x, y) x = y
+#elif defined(_MSC_VER)
+#define ATOMIC_INT LONG
+#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
+#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
+#else
+#define ATOMIC_INT int
+#define ATOMIC_LOAD(x) x
+#define ATOMIC_STORE(x, y) x = y
+#endif
+
+#define MAYBE_UNUSED(x) (void)((x))
+
+#if defined(IS_X86)
+static uint64_t xgetbv(void) {
+#if defined(_MSC_VER)
+  return _xgetbv(0);
+#else
+  uint32_t eax = 0, edx = 0;
+  __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
+  return ((uint64_t)edx << 32) | eax;
+#endif
+}
+
+static void cpuid(uint32_t out[4], uint32_t id) {
+#if defined(_MSC_VER)
+  __cpuid((int *)out, id);
+#elif defined(__i386__) || defined(_M_IX86)
+  __asm__ __volatile__("movl %%ebx, %1\n"
+                       "cpuid\n"
+                       "xchgl %1, %%ebx\n"
+                       : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
+                       : "a"(id));
+#else
+  __asm__ __volatile__("cpuid\n"
+                       : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
+                       : "a"(id));
+#endif
+}
+
+static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
+#if defined(_MSC_VER)
+  __cpuidex((int *)out, id, sid);
+#elif defined(__i386__) || defined(_M_IX86)
+  __asm__ __volatile__("movl %%ebx, %1\n"
+                       "cpuid\n"
+                       "xchgl %1, %%ebx\n"
+                       : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
+                       : "a"(id), "c"(sid));
+#else
+  __asm__ __volatile__("cpuid\n"
+                       : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
+                       : "a"(id), "c"(sid));
+#endif
+}
+
+#endif
+
+enum cpu_feature {
+  SSE2 = 1 << 0,
+  SSSE3 = 1 << 1,
+  SSE41 = 1 << 2,
+  AVX = 1 << 3,
+  AVX2 = 1 << 4,
+  AVX512F = 1 << 5,
+  AVX512VL = 1 << 6,
+  /* ... */
+  UNDEFINED = 1 << 30
+};
+
+#if !defined(BLAKE3_TESTING)
+static /* Allow the variable to be controlled manually for testing */
+#endif
+    ATOMIC_INT g_cpu_features = UNDEFINED;
+
+#if !defined(BLAKE3_TESTING)
+static
+#endif
+    enum cpu_feature
+    get_cpu_features(void) {
+
+  /* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */
+  enum cpu_feature features = ATOMIC_LOAD(g_cpu_features);
+  if (features != UNDEFINED) {
+    return features;
+  } else {
+#if defined(IS_X86)
+    uint32_t regs[4] = {0};
+    uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
+    (void)edx;
+    features = 0;
+    cpuid(regs, 0);
+    const int max_id = *eax;
+    cpuid(regs, 1);
+#if defined(__amd64__) || defined(_M_X64)
+    features |= SSE2;
+#else
+    if (*edx & (1UL << 26))
+      features |= SSE2;
+#endif
+    if (*ecx & (1UL << 9))
+      features |= SSSE3;
+    if (*ecx & (1UL << 19))
+      features |= SSE41;
+
+    if (*ecx & (1UL << 27)) { // OSXSAVE
+      const uint64_t mask = xgetbv();
+      if ((mask & 6) == 6) { // SSE and AVX states
+        if (*ecx & (1UL << 28))
+          features |= AVX;
+        if (max_id >= 7) {
+          cpuidex(regs, 7, 0);
+          if (*ebx & (1UL << 5))
+            features |= AVX2;
+          if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
+            if (*ebx & (1UL << 31))
+              features |= AVX512VL;
+            if (*ebx & (1UL << 16))
+              features |= AVX512F;
+          }
+        }
+      }
+    }
+    ATOMIC_STORE(g_cpu_features, features);
+    return features;
+#else
+    /* How to detect NEON? */
+    return 0;
+#endif
+  }
+}
+
+void blake3_compress_in_place(uint32_t cv[8],
+                              const uint8_t block[BLAKE3_BLOCK_LEN],
+                              uint8_t block_len, uint64_t counter,
+                              uint8_t flags) {
+#if defined(IS_X86)
+  const enum cpu_feature features = get_cpu_features();
+  MAYBE_UNUSED(features);
+#if !defined(BLAKE3_NO_AVX512)
+  if (features & AVX512VL) {
+    blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
+    return;
+  }
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+  if (features & SSE41) {
+    blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
+    return;
+  }
+#endif
+#if !defined(BLAKE3_NO_SSE2)
+  if (features & SSE2) {
+    blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
+    return;
+  }
+#endif
+#endif
+  blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
+}
+
+void blake3_compress_xof(const uint32_t cv[8],
+                         const uint8_t block[BLAKE3_BLOCK_LEN],
+                         uint8_t block_len, uint64_t counter, uint8_t flags,
+                         uint8_t out[64]) {
+#if defined(IS_X86)
+  const enum cpu_feature features = get_cpu_features();
+  MAYBE_UNUSED(features);
+#if !defined(BLAKE3_NO_AVX512)
+  if (features & AVX512VL) {
+    blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
+    return;
+  }
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+  if (features & SSE41) {
+    blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
+    return;
+  }
+#endif
+#if !defined(BLAKE3_NO_SSE2)
+  if (features & SSE2) {
+    blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
+    return;
+  }
+#endif
+#endif
+  blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
+}
+
+
+void blake3_xof_many(const uint32_t cv[8],
+                     const uint8_t block[BLAKE3_BLOCK_LEN],
+                     uint8_t block_len, uint64_t counter, uint8_t flags,
+                     uint8_t out[64], size_t outblocks) {
+  if (outblocks == 0) {
+    // The current assembly implementation always outputs at least 1 block.
+    return;
+  }
+#if defined(IS_X86)
+  const enum cpu_feature features = get_cpu_features();
+  MAYBE_UNUSED(features);
+#if !defined(_WIN32) && !defined(BLAKE3_NO_AVX512)
+  if (features & AVX512VL) {
+    blake3_xof_many_avx512(cv, block, block_len, counter, flags, out, outblocks);
+    return;
+  }
+#endif
+#endif
+  for(size_t i = 0; i < outblocks; ++i) {
+    blake3_compress_xof(cv, block, block_len, counter + i, flags, out + 64*i);
+  }
+}
+
+void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
+                      size_t blocks, const uint32_t key[8], uint64_t counter,
+                      bool increment_counter, uint8_t flags,
+                      uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+#if defined(IS_X86)
+  const enum cpu_feature features = get_cpu_features();
+  MAYBE_UNUSED(features);
+#if !defined(BLAKE3_NO_AVX512)
+  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
+    blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
+                            increment_counter, flags, flags_start, flags_end,
+                            out);
+    return;
+  }
+#endif
+#if !defined(BLAKE3_NO_AVX2)
+  if (features & AVX2) {
+    blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
+                          increment_counter, flags, flags_start, flags_end,
+                          out);
+    return;
+  }
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+  if (features & SSE41) {
+    blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+                           increment_counter, flags, flags_start, flags_end,
+                           out);
+    return;
+  }
+#endif
+#if !defined(BLAKE3_NO_SSE2)
+  if (features & SSE2) {
+    blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
+                          increment_counter, flags, flags_start, flags_end,
+                          out);
+    return;
+  }
+#endif
+#endif
+
+#if BLAKE3_USE_NEON == 1
+  blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
+                        increment_counter, flags, flags_start, flags_end, out);
+  return;
+#endif
+
+  blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
+                            increment_counter, flags, flags_start, flags_end,
+                            out);
+}
+
+// The dynamically detected SIMD degree of the current platform.
+size_t blake3_simd_degree(void) {
+#if defined(IS_X86)
+  const enum cpu_feature features = get_cpu_features();
+  MAYBE_UNUSED(features);
+#if !defined(BLAKE3_NO_AVX512)
+  if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
+    return 16;
+  }
+#endif
+#if !defined(BLAKE3_NO_AVX2)
+  if (features & AVX2) {
+    return 8;
+  }
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+  if (features & SSE41) {
+    return 4;
+  }
+#endif
+#if !defined(BLAKE3_NO_SSE2)
+  if (features & SSE2) {
+    return 4;
+  }
+#endif
+#endif
+#if BLAKE3_USE_NEON == 1
+  return 4;
+#endif
+  return 1;
+}
--- a/external/blake3/blake3_impl.h
+++ b/external/blake3/blake3_impl.h
@@ -0,0 +1,333 @@
+#ifndef BLAKE3_IMPL_H
+#define BLAKE3_IMPL_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "blake3.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// internal flags
+enum blake3_flags {
+  CHUNK_START         = 1 << 0,
+  CHUNK_END           = 1 << 1,
+  PARENT              = 1 << 2,
+  ROOT                = 1 << 3,
+  KEYED_HASH          = 1 << 4,
+  DERIVE_KEY_CONTEXT  = 1 << 5,
+  DERIVE_KEY_MATERIAL = 1 << 6,
+};
+
+// This C implementation tries to support recent versions of GCC, Clang, and
+// MSVC.
+#if defined(_MSC_VER)
+#define INLINE static __forceinline
+#else
+#define INLINE static inline __attribute__((always_inline))
+#endif
+
+#ifdef __cplusplus
+#define NOEXCEPT noexcept
+#else
+#define NOEXCEPT
+#endif
+
+#if (defined(__x86_64__) || defined(_M_X64)) && !defined(_M_ARM64EC)
+#define IS_X86
+#define IS_X86_64
+#endif
+
+#if defined(__i386__) || defined(_M_IX86)
+#define IS_X86
+#define IS_X86_32
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
+#define IS_AARCH64
+#endif
+
+#if defined(IS_X86)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+#endif
+
+#if !defined(BLAKE3_USE_NEON) 
+  // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
+  #if defined(IS_AARCH64)
+    #if defined(__ARM_BIG_ENDIAN)
+      #define BLAKE3_USE_NEON 0
+    #else
+      #define BLAKE3_USE_NEON 1
+    #endif
+  #else
+    #define BLAKE3_USE_NEON 0
+  #endif
+#endif
+
+#if defined(IS_X86)
+#define MAX_SIMD_DEGREE 16
+#elif BLAKE3_USE_NEON == 1
+#define MAX_SIMD_DEGREE 4
+#else
+#define MAX_SIMD_DEGREE 1
+#endif
+
+// There are some places where we want a static size that's equal to the
+// MAX_SIMD_DEGREE, but also at least 2.
+#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
+
+static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
+                               0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
+                               0x1F83D9ABUL, 0x5BE0CD19UL};
+
+static const uint8_t MSG_SCHEDULE[7][16] = {
+    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+    {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
+    {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
+    {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
+    {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
+    {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
+    {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
+};
+
+/* Find index of the highest set bit */
+/* x is assumed to be nonzero.       */
+static unsigned int highest_one(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
+  return 63 ^ (unsigned int)__builtin_clzll(x);
+#elif defined(_MSC_VER) && defined(IS_X86_64)
+  unsigned long index;
+  _BitScanReverse64(&index, x);
+  return index;
+#elif defined(_MSC_VER) && defined(IS_X86_32)
+  if(x >> 32) {
+    unsigned long index;
+    _BitScanReverse(&index, (unsigned long)(x >> 32));
+    return 32 + index;
+  } else {
+    unsigned long index;
+    _BitScanReverse(&index, (unsigned long)x);
+    return index;
+  }
+#else
+  unsigned int c = 0;
+  if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
+  if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
+  if(x & 0x000000000000ff00ULL) { x >>=  8; c +=  8; }
+  if(x & 0x00000000000000f0ULL) { x >>=  4; c +=  4; }
+  if(x & 0x000000000000000cULL) { x >>=  2; c +=  2; }
+  if(x & 0x0000000000000002ULL) {           c +=  1; }
+  return c;
+#endif
+}
+
+// Count the number of 1 bits.
+INLINE unsigned int popcnt(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
+  return (unsigned int)__builtin_popcountll(x);
+#else
+  unsigned int count = 0;
+  while (x != 0) {
+    count += 1;
+    x &= x - 1;
+  }
+  return count;
+#endif
+}
+
+// Largest power of two less than or equal to x. As a special case, returns 1
+// when x is 0. 
+INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
+  return 1ULL << highest_one(x | 1);
+}
+
+INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
+
+INLINE uint32_t counter_high(uint64_t counter) {
+  return (uint32_t)(counter >> 32);
+}
+
+INLINE uint32_t load32(const void *src) {
+  const uint8_t *p = (const uint8_t *)src;
+  return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
+         ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
+}
+
+INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
+                           uint32_t key_words[8]) {
+  key_words[0] = load32(&key[0 * 4]);
+  key_words[1] = load32(&key[1 * 4]);
+  key_words[2] = load32(&key[2 * 4]);
+  key_words[3] = load32(&key[3 * 4]);
+  key_words[4] = load32(&key[4 * 4]);
+  key_words[5] = load32(&key[5 * 4]);
+  key_words[6] = load32(&key[6 * 4]);
+  key_words[7] = load32(&key[7 * 4]);
+}
+
+INLINE void load_block_words(const uint8_t block[BLAKE3_BLOCK_LEN],
+                             uint32_t block_words[16]) {
+  for (size_t i = 0; i < 16; i++) {
+      block_words[i] = load32(&block[i * 4]);
+  }
+}
+
+INLINE void store32(void *dst, uint32_t w) {
+  uint8_t *p = (uint8_t *)dst;
+  p[0] = (uint8_t)(w >> 0);
+  p[1] = (uint8_t)(w >> 8);
+  p[2] = (uint8_t)(w >> 16);
+  p[3] = (uint8_t)(w >> 24);
+}
+
+INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
+  store32(&bytes_out[0 * 4], cv_words[0]);
+  store32(&bytes_out[1 * 4], cv_words[1]);
+  store32(&bytes_out[2 * 4], cv_words[2]);
+  store32(&bytes_out[3 * 4], cv_words[3]);
+  store32(&bytes_out[4 * 4], cv_words[4]);
+  store32(&bytes_out[5 * 4], cv_words[5]);
+  store32(&bytes_out[6 * 4], cv_words[6]);
+  store32(&bytes_out[7 * 4], cv_words[7]);
+}
+
+void blake3_compress_in_place(uint32_t cv[8],
+                              const uint8_t block[BLAKE3_BLOCK_LEN],
+                              uint8_t block_len, uint64_t counter,
+                              uint8_t flags);
+
+void blake3_compress_xof(const uint32_t cv[8],
+                         const uint8_t block[BLAKE3_BLOCK_LEN],
+                         uint8_t block_len, uint64_t counter, uint8_t flags,
+                         uint8_t out[64]);
+
+void blake3_xof_many(const uint32_t cv[8],
+                     const uint8_t block[BLAKE3_BLOCK_LEN],
+                     uint8_t block_len, uint64_t counter, uint8_t flags,
+                     uint8_t out[64], size_t outblocks);
+
+void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
+                      size_t blocks, const uint32_t key[8], uint64_t counter,
+                      bool increment_counter, uint8_t flags,
+                      uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+size_t blake3_simd_degree(void);
+
+BLAKE3_PRIVATE size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len,
+                                                   const uint32_t key[8],
+                                                   uint64_t chunk_counter, uint8_t flags,
+                                                   uint8_t *out, bool use_tbb);
+
+#if defined(BLAKE3_USE_TBB)
+BLAKE3_PRIVATE void blake3_compress_subtree_wide_join_tbb(
+    // shared params
+    const uint32_t key[8], uint8_t flags, bool use_tbb,
+    // left-hand side params
+    const uint8_t *l_input, size_t l_input_len, uint64_t l_chunk_counter,
+    uint8_t *l_cvs, size_t *l_n,
+    // right-hand side params
+    const uint8_t *r_input, size_t r_input_len, uint64_t r_chunk_counter,
+    uint8_t *r_cvs, size_t *r_n) NOEXCEPT;
+#endif
+
+// Declarations for implementation-specific functions.
+void blake3_compress_in_place_portable(uint32_t cv[8],
+                                       const uint8_t block[BLAKE3_BLOCK_LEN],
+                                       uint8_t block_len, uint64_t counter,
+                                       uint8_t flags);
+
+void blake3_compress_xof_portable(const uint32_t cv[8],
+                                  const uint8_t block[BLAKE3_BLOCK_LEN],
+                                  uint8_t block_len, uint64_t counter,
+                                  uint8_t flags, uint8_t out[64]);
+
+void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
+                               size_t blocks, const uint32_t key[8],
+                               uint64_t counter, bool increment_counter,
+                               uint8_t flags, uint8_t flags_start,
+                               uint8_t flags_end, uint8_t *out);
+
+#if defined(IS_X86)
+#if !defined(BLAKE3_NO_SSE2)
+void blake3_compress_in_place_sse2(uint32_t cv[8],
+                                   const uint8_t block[BLAKE3_BLOCK_LEN],
+                                   uint8_t block_len, uint64_t counter,
+                                   uint8_t flags);
+void blake3_compress_xof_sse2(const uint32_t cv[8],
+                              const uint8_t block[BLAKE3_BLOCK_LEN],
+                              uint8_t block_len, uint64_t counter,
+                              uint8_t flags, uint8_t out[64]);
+void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
+                           size_t blocks, const uint32_t key[8],
+                           uint64_t counter, bool increment_counter,
+                           uint8_t flags, uint8_t flags_start,
+                           uint8_t flags_end, uint8_t *out);
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+void blake3_compress_in_place_sse41(uint32_t cv[8],
+                                    const uint8_t block[BLAKE3_BLOCK_LEN],
+                                    uint8_t block_len, uint64_t counter,
+                                    uint8_t flags);
+void blake3_compress_xof_sse41(const uint32_t cv[8],
+                               const uint8_t block[BLAKE3_BLOCK_LEN],
+                               uint8_t block_len, uint64_t counter,
+                               uint8_t flags, uint8_t out[64]);
+void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
+                            size_t blocks, const uint32_t key[8],
+                            uint64_t counter, bool increment_counter,
+                            uint8_t flags, uint8_t flags_start,
+                            uint8_t flags_end, uint8_t *out);
+#endif
+#if !defined(BLAKE3_NO_AVX2)
+void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
+                           size_t blocks, const uint32_t key[8],
+                           uint64_t counter, bool increment_counter,
+                           uint8_t flags, uint8_t flags_start,
+                           uint8_t flags_end, uint8_t *out);
+#endif
+#if !defined(BLAKE3_NO_AVX512)
+void blake3_compress_in_place_avx512(uint32_t cv[8],
+                                     const uint8_t block[BLAKE3_BLOCK_LEN],
+                                     uint8_t block_len, uint64_t counter,
+                                     uint8_t flags);
+
+void blake3_compress_xof_avx512(const uint32_t cv[8],
+                                const uint8_t block[BLAKE3_BLOCK_LEN],
+                                uint8_t block_len, uint64_t counter,
+                                uint8_t flags, uint8_t out[64]);
+
+void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
+                             size_t blocks, const uint32_t key[8],
+                             uint64_t counter, bool increment_counter,
+                             uint8_t flags, uint8_t flags_start,
+                             uint8_t flags_end, uint8_t *out);
+
+#if !defined(_WIN32)
+void blake3_xof_many_avx512(const uint32_t cv[8],
+                            const uint8_t block[BLAKE3_BLOCK_LEN],
+                            uint8_t block_len, uint64_t counter, uint8_t flags,
+                            uint8_t* out, size_t outblocks);
+#endif
+#endif
+#endif
+
+#if BLAKE3_USE_NEON == 1
+void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
+                           size_t blocks, const uint32_t key[8],
+                           uint64_t counter, bool increment_counter,
+                           uint8_t flags, uint8_t flags_start,
+                           uint8_t flags_end, uint8_t *out);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BLAKE3_IMPL_H */
--- a/external/blake3/blake3_neon.c
+++ b/external/blake3/blake3_neon.c
@@ -0,0 +1,366 @@
+#include "blake3_impl.h"
+
+#include <arm_neon.h>
+
+#ifdef __ARM_BIG_ENDIAN
+#error "This implementation only supports little-endian ARM."
+// It might be that all we need for big-endian support here is to get the loads
+// and stores right, but step zero would be finding a way to test it in CI.
+#endif
+
+INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
+  // vld1q_u32 has alignment requirements. Don't use it.
+  return vreinterpretq_u32_u8(vld1q_u8(src));
+}
+
+INLINE void storeu_128(uint32x4_t src, uint8_t dest[16]) {
+  // vst1q_u32 has alignment requirements. Don't use it.
+  vst1q_u8(dest, vreinterpretq_u8_u32(src));
+}
+
+INLINE uint32x4_t add_128(uint32x4_t a, uint32x4_t b) {
+  return vaddq_u32(a, b);
+}
+
+INLINE uint32x4_t xor_128(uint32x4_t a, uint32x4_t b) {
+  return veorq_u32(a, b);
+}
+
+INLINE uint32x4_t set1_128(uint32_t x) { return vld1q_dup_u32(&x); }
+
+INLINE uint32x4_t set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  uint32_t array[4] = {a, b, c, d};
+  return vld1q_u32(array);
+}
+
+INLINE uint32x4_t rot16_128(uint32x4_t x) {
+  // The straightforward implementation would be two shifts and an or, but that's
+  // slower on microarchitectures we've tested. See
+  // https://github.com/BLAKE3-team/BLAKE3/pull/319.
+  // return vorrq_u32(vshrq_n_u32(x, 16), vshlq_n_u32(x, 32 - 16));
+  return vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x)));
+}
+
+INLINE uint32x4_t rot12_128(uint32x4_t x) {
+  // See comment in rot16_128.
+  // return vorrq_u32(vshrq_n_u32(x, 12), vshlq_n_u32(x, 32 - 12));
+  return vsriq_n_u32(vshlq_n_u32(x, 32-12), x, 12);
+}
+
+INLINE uint32x4_t rot8_128(uint32x4_t x) {
+  // See comment in rot16_128.
+  // return vorrq_u32(vshrq_n_u32(x, 8), vshlq_n_u32(x, 32 - 8));
+#if defined(__clang__)
+  return vreinterpretq_u32_u8(__builtin_shufflevector(vreinterpretq_u8_u32(x), vreinterpretq_u8_u32(x), 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12));
+#elif __GNUC__ * 10000 + __GNUC_MINOR__ * 100 >=40700
+  static const uint8x16_t r8 = {1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12};
+  return vreinterpretq_u32_u8(__builtin_shuffle(vreinterpretq_u8_u32(x), vreinterpretq_u8_u32(x), r8));
+#else 
+  return vsriq_n_u32(vshlq_n_u32(x, 32-8), x, 8);
+#endif
+}
+
+INLINE uint32x4_t rot7_128(uint32x4_t x) {
+  // See comment in rot16_128.
+  // return vorrq_u32(vshrq_n_u32(x, 7), vshlq_n_u32(x, 32 - 7));
+  return vsriq_n_u32(vshlq_n_u32(x, 32-7), x, 7);
+}
+
+// TODO: compress_neon
+
+// TODO: hash2_neon
+
+/*
+ * ----------------------------------------------------------------------------
+ * hash4_neon
+ * ----------------------------------------------------------------------------
+ */
+
+INLINE void round_fn4(uint32x4_t v[16], uint32x4_t m[16], size_t r) {
+  v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+  v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+  v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+  v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+  v[0] = add_128(v[0], v[4]);
+  v[1] = add_128(v[1], v[5]);
+  v[2] = add_128(v[2], v[6]);
+  v[3] = add_128(v[3], v[7]);
+  v[12] = xor_128(v[12], v[0]);
+  v[13] = xor_128(v[13], v[1]);
+  v[14] = xor_128(v[14], v[2]);
+  v[15] = xor_128(v[15], v[3]);
+  v[12] = rot16_128(v[12]);
+  v[13] = rot16_128(v[13]);
+  v[14] = rot16_128(v[14]);
+  v[15] = rot16_128(v[15]);
+  v[8] = add_128(v[8], v[12]);
+  v[9] = add_128(v[9], v[13]);
+  v[10] = add_128(v[10], v[14]);
+  v[11] = add_128(v[11], v[15]);
+  v[4] = xor_128(v[4], v[8]);
+  v[5] = xor_128(v[5], v[9]);
+  v[6] = xor_128(v[6], v[10]);
+  v[7] = xor_128(v[7], v[11]);
+  v[4] = rot12_128(v[4]);
+  v[5] = rot12_128(v[5]);
+  v[6] = rot12_128(v[6]);
+  v[7] = rot12_128(v[7]);
+  v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+  v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+  v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+  v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+  v[0] = add_128(v[0], v[4]);
+  v[1] = add_128(v[1], v[5]);
+  v[2] = add_128(v[2], v[6]);
+  v[3] = add_128(v[3], v[7]);
+  v[12] = xor_128(v[12], v[0]);
+  v[13] = xor_128(v[13], v[1]);
+  v[14] = xor_128(v[14], v[2]);
+  v[15] = xor_128(v[15], v[3]);
+  v[12] = rot8_128(v[12]);
+  v[13] = rot8_128(v[13]);
+  v[14] = rot8_128(v[14]);
+  v[15] = rot8_128(v[15]);
+  v[8] = add_128(v[8], v[12]);
+  v[9] = add_128(v[9], v[13]);
+  v[10] = add_128(v[10], v[14]);
+  v[11] = add_128(v[11], v[15]);
+  v[4] = xor_128(v[4], v[8]);
+  v[5] = xor_128(v[5], v[9]);
+  v[6] = xor_128(v[6], v[10]);
+  v[7] = xor_128(v[7], v[11]);
+  v[4] = rot7_128(v[4]);
+  v[5] = rot7_128(v[5]);
+  v[6] = rot7_128(v[6]);
+  v[7] = rot7_128(v[7]);
+
+  v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+  v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+  v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+  v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+  v[0] = add_128(v[0], v[5]);
+  v[1] = add_128(v[1], v[6]);
+  v[2] = add_128(v[2], v[7]);
+  v[3] = add_128(v[3], v[4]);
+  v[15] = xor_128(v[15], v[0]);
+  v[12] = xor_128(v[12], v[1]);
+  v[13] = xor_128(v[13], v[2]);
+  v[14] = xor_128(v[14], v[3]);
+  v[15] = rot16_128(v[15]);
+  v[12] = rot16_128(v[12]);
+  v[13] = rot16_128(v[13]);
+  v[14] = rot16_128(v[14]);
+  v[10] = add_128(v[10], v[15]);
+  v[11] = add_128(v[11], v[12]);
+  v[8] = add_128(v[8], v[13]);
+  v[9] = add_128(v[9], v[14]);
+  v[5] = xor_128(v[5], v[10]);
+  v[6] = xor_128(v[6], v[11]);
+  v[7] = xor_128(v[7], v[8]);
+  v[4] = xor_128(v[4], v[9]);
+  v[5] = rot12_128(v[5]);
+  v[6] = rot12_128(v[6]);
+  v[7] = rot12_128(v[7]);
+  v[4] = rot12_128(v[4]);
+  v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+  v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+  v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+  v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+  v[0] = add_128(v[0], v[5]);
+  v[1] = add_128(v[1], v[6]);
+  v[2] = add_128(v[2], v[7]);
+  v[3] = add_128(v[3], v[4]);
+  v[15] = xor_128(v[15], v[0]);
+  v[12] = xor_128(v[12], v[1]);
+  v[13] = xor_128(v[13], v[2]);
+  v[14] = xor_128(v[14], v[3]);
+  v[15] = rot8_128(v[15]);
+  v[12] = rot8_128(v[12]);
+  v[13] = rot8_128(v[13]);
+  v[14] = rot8_128(v[14]);
+  v[10] = add_128(v[10], v[15]);
+  v[11] = add_128(v[11], v[12]);
+  v[8] = add_128(v[8], v[13]);
+  v[9] = add_128(v[9], v[14]);
+  v[5] = xor_128(v[5], v[10]);
+  v[6] = xor_128(v[6], v[11]);
+  v[7] = xor_128(v[7], v[8]);
+  v[4] = xor_128(v[4], v[9]);
+  v[5] = rot7_128(v[5]);
+  v[6] = rot7_128(v[6]);
+  v[7] = rot7_128(v[7]);
+  v[4] = rot7_128(v[4]);
+}
+
+INLINE void transpose_vecs_128(uint32x4_t vecs[4]) {
+  // Individually transpose the four 2x2 sub-matrices in each corner.
+  uint32x4x2_t rows01 = vtrnq_u32(vecs[0], vecs[1]);
+  uint32x4x2_t rows23 = vtrnq_u32(vecs[2], vecs[3]);
+
+  // Swap the top-right and bottom-left 2x2s (which just got transposed).
+  vecs[0] =
+      vcombine_u32(vget_low_u32(rows01.val[0]), vget_low_u32(rows23.val[0]));
+  vecs[1] =
+      vcombine_u32(vget_low_u32(rows01.val[1]), vget_low_u32(rows23.val[1]));
+  vecs[2] =
+      vcombine_u32(vget_high_u32(rows01.val[0]), vget_high_u32(rows23.val[0]));
+  vecs[3] =
+      vcombine_u32(vget_high_u32(rows01.val[1]), vget_high_u32(rows23.val[1]));
+}
+
+INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
+                                size_t block_offset, uint32x4_t out[16]) {
+  out[0] = loadu_128(&inputs[0][block_offset + 0 * sizeof(uint32x4_t)]);
+  out[1] = loadu_128(&inputs[1][block_offset + 0 * sizeof(uint32x4_t)]);
+  out[2] = loadu_128(&inputs[2][block_offset + 0 * sizeof(uint32x4_t)]);
+  out[3] = loadu_128(&inputs[3][block_offset + 0 * sizeof(uint32x4_t)]);
+  out[4] = loadu_128(&inputs[0][block_offset + 1 * sizeof(uint32x4_t)]);
+  out[5] = loadu_128(&inputs[1][block_offset + 1 * sizeof(uint32x4_t)]);
+  out[6] = loadu_128(&inputs[2][block_offset + 1 * sizeof(uint32x4_t)]);
+  out[7] = loadu_128(&inputs[3][block_offset + 1 * sizeof(uint32x4_t)]);
+  out[8] = loadu_128(&inputs[0][block_offset + 2 * sizeof(uint32x4_t)]);
+  out[9] = loadu_128(&inputs[1][block_offset + 2 * sizeof(uint32x4_t)]);
+  out[10] = loadu_128(&inputs[2][block_offset + 2 * sizeof(uint32x4_t)]);
+  out[11] = loadu_128(&inputs[3][block_offset + 2 * sizeof(uint32x4_t)]);
+  out[12] = loadu_128(&inputs[0][block_offset + 3 * sizeof(uint32x4_t)]);
+  out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(uint32x4_t)]);
+  out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(uint32x4_t)]);
+  out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(uint32x4_t)]);
+  transpose_vecs_128(&out[0]);
+  transpose_vecs_128(&out[4]);
+  transpose_vecs_128(&out[8]);
+  transpose_vecs_128(&out[12]);
+}
+
+INLINE void load_counters4(uint64_t counter, bool increment_counter,
+                           uint32x4_t *out_low, uint32x4_t *out_high) {
+  uint64_t mask = (increment_counter ? ~0 : 0);
+  *out_low = set4(
+      counter_low(counter + (mask & 0)), counter_low(counter + (mask & 1)),
+      counter_low(counter + (mask & 2)), counter_low(counter + (mask & 3)));
+  *out_high = set4(
+      counter_high(counter + (mask & 0)), counter_high(counter + (mask & 1)),
+      counter_high(counter + (mask & 2)), counter_high(counter + (mask & 3)));
+}
+
+void blake3_hash4_neon(const uint8_t *const *inputs, size_t blocks,
+                       const uint32_t key[8], uint64_t counter,
+                       bool increment_counter, uint8_t flags,
+                       uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+  uint32x4_t h_vecs[8] = {
+      set1_128(key[0]), set1_128(key[1]), set1_128(key[2]), set1_128(key[3]),
+      set1_128(key[4]), set1_128(key[5]), set1_128(key[6]), set1_128(key[7]),
+  };
+  uint32x4_t counter_low_vec, counter_high_vec;
+  load_counters4(counter, increment_counter, &counter_low_vec,
+                 &counter_high_vec);
+  uint8_t block_flags = flags | flags_start;
+
+  for (size_t block = 0; block < blocks; block++) {
+    if (block + 1 == blocks) {
+      block_flags |= flags_end;
+    }
+    uint32x4_t block_len_vec = set1_128(BLAKE3_BLOCK_LEN);
+    uint32x4_t block_flags_vec = set1_128(block_flags);
+    uint32x4_t msg_vecs[16];
+    transpose_msg_vecs4(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+    uint32x4_t v[16] = {
+        h_vecs[0],       h_vecs[1],        h_vecs[2],       h_vecs[3],
+        h_vecs[4],       h_vecs[5],        h_vecs[6],       h_vecs[7],
+        set1_128(IV[0]), set1_128(IV[1]),  set1_128(IV[2]), set1_128(IV[3]),
+        counter_low_vec, counter_high_vec, block_len_vec,   block_flags_vec,
+    };
+    round_fn4(v, msg_vecs, 0);
+    round_fn4(v, msg_vecs, 1);
+    round_fn4(v, msg_vecs, 2);
+    round_fn4(v, msg_vecs, 3);
+    round_fn4(v, msg_vecs, 4);
+    round_fn4(v, msg_vecs, 5);
+    round_fn4(v, msg_vecs, 6);
+    h_vecs[0] = xor_128(v[0], v[8]);
+    h_vecs[1] = xor_128(v[1], v[9]);
+    h_vecs[2] = xor_128(v[2], v[10]);
+    h_vecs[3] = xor_128(v[3], v[11]);
+    h_vecs[4] = xor_128(v[4], v[12]);
+    h_vecs[5] = xor_128(v[5], v[13]);
+    h_vecs[6] = xor_128(v[6], v[14]);
+    h_vecs[7] = xor_128(v[7], v[15]);
+
+    block_flags = flags;
+  }
+
+  transpose_vecs_128(&h_vecs[0]);
+  transpose_vecs_128(&h_vecs[4]);
+  // The first four vecs now contain the first half of each output, and the
+  // second four vecs contain the second half of each output.
+  storeu_128(h_vecs[0], &out[0 * sizeof(uint32x4_t)]);
+  storeu_128(h_vecs[4], &out[1 * sizeof(uint32x4_t)]);
+  storeu_128(h_vecs[1], &out[2 * sizeof(uint32x4_t)]);
+  storeu_128(h_vecs[5], &out[3 * sizeof(uint32x4_t)]);
+  storeu_128(h_vecs[2], &out[4 * sizeof(uint32x4_t)]);
+  storeu_128(h_vecs[6], &out[5 * sizeof(uint32x4_t)]);
+  storeu_128(h_vecs[3], &out[6 * sizeof(uint32x4_t)]);
+  storeu_128(h_vecs[7], &out[7 * sizeof(uint32x4_t)]);
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * hash_many_neon
+ * ----------------------------------------------------------------------------
+ */
+
+void blake3_compress_in_place_portable(uint32_t cv[8],
+                                       const uint8_t block[BLAKE3_BLOCK_LEN],
+                                       uint8_t block_len, uint64_t counter,
+                                       uint8_t flags);
+
+INLINE void hash_one_neon(const uint8_t *input, size_t blocks,
+                          const uint32_t key[8], uint64_t counter,
+                          uint8_t flags, uint8_t flags_start, uint8_t flags_end,
+                          uint8_t out[BLAKE3_OUT_LEN]) {
+  uint32_t cv[8];
+  memcpy(cv, key, BLAKE3_KEY_LEN);
+  uint8_t block_flags = flags | flags_start;
+  while (blocks > 0) {
+    if (blocks == 1) {
+      block_flags |= flags_end;
+    }
+    // TODO: Implement compress_neon. However note that according to
+    // https://github.com/BLAKE2/BLAKE2/commit/7965d3e6e1b4193438b8d3a656787587d2579227,
+    // compress_neon might not be any faster than compress_portable.
+    blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
+                                      block_flags);
+    input = &input[BLAKE3_BLOCK_LEN];
+    blocks -= 1;
+    block_flags = flags;
+  }
+  memcpy(out, cv, BLAKE3_OUT_LEN);
+}
+
+void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
+                           size_t blocks, const uint32_t key[8],
+                           uint64_t counter, bool increment_counter,
+                           uint8_t flags, uint8_t flags_start,
+                           uint8_t flags_end, uint8_t *out) {
+  while (num_inputs >= 4) {
+    blake3_hash4_neon(inputs, blocks, key, counter, increment_counter, flags,
+                      flags_start, flags_end, out);
+    if (increment_counter) {
+      counter += 4;
+    }
+    inputs += 4;
+    num_inputs -= 4;
+    out = &out[4 * BLAKE3_OUT_LEN];
+  }
+  while (num_inputs > 0) {
+    hash_one_neon(inputs[0], blocks, key, counter, flags, flags_start,
+                  flags_end, out);
+    if (increment_counter) {
+      counter += 1;
+    }
+    inputs += 1;
+    num_inputs -= 1;
+    out = &out[BLAKE3_OUT_LEN];
+  }
+}
--- a/external/blake3/blake3_portable.c
+++ b/external/blake3/blake3_portable.c
@@ -0,0 +1,160 @@
+#include "blake3_impl.h"
+#include <string.h>
+
+INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
+  return (w >> c) | (w << (32 - c));
+}
+
+INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
+              uint32_t x, uint32_t y) {
+  state[a] = state[a] + state[b] + x;
+  state[d] = rotr32(state[d] ^ state[a], 16);
+  state[c] = state[c] + state[d];
+  state[b] = rotr32(state[b] ^ state[c], 12);
+  state[a] = state[a] + state[b] + y;
+  state[d] = rotr32(state[d] ^ state[a], 8);
+  state[c] = state[c] + state[d];
+  state[b] = rotr32(state[b] ^ state[c], 7);
+}
+
+INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
+  // Select the message schedule based on the round.
+  const uint8_t *schedule = MSG_SCHEDULE[round];
+
+  // Mix the columns.
+  g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
+  g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
+  g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
+  g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
+
+  // Mix the rows.
+  g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
+  g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
+  g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
+  g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
+}
+
+INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
+                         const uint8_t block[BLAKE3_BLOCK_LEN],
+                         uint8_t block_len, uint64_t counter, uint8_t flags) {
+  uint32_t block_words[16];
+  block_words[0] = load32(block + 4 * 0);
+  block_words[1] = load32(block + 4 * 1);
+  block_words[2] = load32(block + 4 * 2);
+  block_words[3] = load32(block + 4 * 3);
+  block_words[4] = load32(block + 4 * 4);
+  block_words[5] = load32(block + 4 * 5);
+  block_words[6] = load32(block + 4 * 6);
+  block_words[7] = load32(block + 4 * 7);
+  block_words[8] = load32(block + 4 * 8);
+  block_words[9] = load32(block + 4 * 9);
+  block_words[10] = load32(block + 4 * 10);
+  block_words[11] = load32(block + 4 * 11);
+  block_words[12] = load32(block + 4 * 12);
+  block_words[13] = load32(block + 4 * 13);
+  block_words[14] = load32(block + 4 * 14);
+  block_words[15] = load32(block + 4 * 15);
+
+  state[0] = cv[0];
+  state[1] = cv[1];
+  state[2] = cv[2];
+  state[3] = cv[3];
+  state[4] = cv[4];
+  state[5] = cv[5];
+  state[6] = cv[6];
+  state[7] = cv[7];
+  state[8] = IV[0];
+  state[9] = IV[1];
+  state[10] = IV[2];
+  state[11] = IV[3];
+  state[12] = counter_low(counter);
+  state[13] = counter_high(counter);
+  state[14] = (uint32_t)block_len;
+  state[15] = (uint32_t)flags;
+
+  round_fn(state, &block_words[0], 0);
+  round_fn(state, &block_words[0], 1);
+  round_fn(state, &block_words[0], 2);
+  round_fn(state, &block_words[0], 3);
+  round_fn(state, &block_words[0], 4);
+  round_fn(state, &block_words[0], 5);
+  round_fn(state, &block_words[0], 6);
+}
+
+void blake3_compress_in_place_portable(uint32_t cv[8],
+                                       const uint8_t block[BLAKE3_BLOCK_LEN],
+                                       uint8_t block_len, uint64_t counter,
+                                       uint8_t flags) {
+  uint32_t state[16];
+  compress_pre(state, cv, block, block_len, counter, flags);
+  cv[0] = state[0] ^ state[8];
+  cv[1] = state[1] ^ state[9];
+  cv[2] = state[2] ^ state[10];
+  cv[3] = state[3] ^ state[11];
+  cv[4] = state[4] ^ state[12];
+  cv[5] = state[5] ^ state[13];
+  cv[6] = state[6] ^ state[14];
+  cv[7] = state[7] ^ state[15];
+}
+
+void blake3_compress_xof_portable(const uint32_t cv[8],
+                                  const uint8_t block[BLAKE3_BLOCK_LEN],
+                                  uint8_t block_len, uint64_t counter,
+                                  uint8_t flags, uint8_t out[64]) {
+  uint32_t state[16];
+  compress_pre(state, cv, block, block_len, counter, flags);
+
+  store32(&out[0 * 4], state[0] ^ state[8]);
+  store32(&out[1 * 4], state[1] ^ state[9]);
+  store32(&out[2 * 4], state[2] ^ state[10]);
+  store32(&out[3 * 4], state[3] ^ state[11]);
+  store32(&out[4 * 4], state[4] ^ state[12]);
+  store32(&out[5 * 4], state[5] ^ state[13]);
+  store32(&out[6 * 4], state[6] ^ state[14]);
+  store32(&out[7 * 4], state[7] ^ state[15]);
+  store32(&out[8 * 4], state[8] ^ cv[0]);
+  store32(&out[9 * 4], state[9] ^ cv[1]);
+  store32(&out[10 * 4], state[10] ^ cv[2]);
+  store32(&out[11 * 4], state[11] ^ cv[3]);
+  store32(&out[12 * 4], state[12] ^ cv[4]);
+  store32(&out[13 * 4], state[13] ^ cv[5]);
+  store32(&out[14 * 4], state[14] ^ cv[6]);
+  store32(&out[15 * 4], state[15] ^ cv[7]);
+}
+
+INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
+                              const uint32_t key[8], uint64_t counter,
+                              uint8_t flags, uint8_t flags_start,
+                              uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
+  uint32_t cv[8];
+  memcpy(cv, key, BLAKE3_KEY_LEN);
+  uint8_t block_flags = flags | flags_start;
+  while (blocks > 0) {
+    if (blocks == 1) {
+      block_flags |= flags_end;
+    }
+    blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
+                                      block_flags);
+    input = &input[BLAKE3_BLOCK_LEN];
+    blocks -= 1;
+    block_flags = flags;
+  }
+  store_cv_words(out, cv);
+}
+
+void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
+                               size_t blocks, const uint32_t key[8],
+                               uint64_t counter, bool increment_counter,
+                               uint8_t flags, uint8_t flags_start,
+                               uint8_t flags_end, uint8_t *out) {
+  while (num_inputs > 0) {
+    hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
+                      flags_end, out);
+    if (increment_counter) {
+      counter += 1;
+    }
+    inputs += 1;
+    num_inputs -= 1;
+    out = &out[BLAKE3_OUT_LEN];
+  }
+}
--- a/external/blake3/blake3_sse2.c
+++ b/external/blake3/blake3_sse2.c
@@ -0,0 +1,566 @@
+#include "blake3_impl.h"
+
+#include <immintrin.h>
+
+#define DEGREE 4
+
+#define _mm_shuffle_ps2(a, b, c)                                               \
+  (_mm_castps_si128(                                                           \
+      _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
+
+INLINE __m128i loadu(const uint8_t src[16]) {
+  return _mm_loadu_si128((const __m128i *)src);
+}
+
+INLINE void storeu(__m128i src, uint8_t dest[16]) {
+  _mm_storeu_si128((__m128i *)dest, src);
+}
+
+INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
+
+// Note that clang-format doesn't like the name "xor" for some reason.
+INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
+
+INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
+
+INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
+}
+
+INLINE __m128i rot16(__m128i x) {
+  return _mm_shufflehi_epi16(_mm_shufflelo_epi16(x, 0xB1), 0xB1);
+}
+
+INLINE __m128i rot12(__m128i x) {
+  return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
+}
+
+INLINE __m128i rot8(__m128i x) {
+  return xorv(_mm_srli_epi32(x, 8), _mm_slli_epi32(x, 32 - 8));
+}
+
+INLINE __m128i rot7(__m128i x) {
+  return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
+}
+
+INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+               __m128i m) {
+  *row0 = addv(addv(*row0, m), *row1);
+  *row3 = xorv(*row3, *row0);
+  *row3 = rot16(*row3);
+  *row2 = addv(*row2, *row3);
+  *row1 = xorv(*row1, *row2);
+  *row1 = rot12(*row1);
+}
+
+INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+               __m128i m) {
+  *row0 = addv(addv(*row0, m), *row1);
+  *row3 = xorv(*row3, *row0);
+  *row3 = rot8(*row3);
+  *row2 = addv(*row2, *row3);
+  *row1 = xorv(*row1, *row2);
+  *row1 = rot7(*row1);
+}
+
+// Note the optimization here of leaving row1 as the unrotated row, rather than
+// row0. All the message loads below are adjusted to compensate for this. See
+// discussion at https://github.com/sneves/blake2-avx2/pull/4
+INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
+  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
+}
+
+INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
+  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
+}
+
+INLINE __m128i blend_epi16(__m128i a, __m128i b, const int16_t imm8) {
+  const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
+  __m128i mask = _mm_set1_epi16(imm8);
+  mask = _mm_and_si128(mask, bits);
+  mask = _mm_cmpeq_epi16(mask, bits);
+  return _mm_or_si128(_mm_and_si128(mask, b), _mm_andnot_si128(mask, a));
+}
+
+INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
+                         const uint8_t block[BLAKE3_BLOCK_LEN],
+                         uint8_t block_len, uint64_t counter, uint8_t flags) {
+  rows[0] = loadu((uint8_t *)&cv[0]);
+  rows[1] = loadu((uint8_t *)&cv[4]);
+  rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
+  rows[3] = set4(counter_low(counter), counter_high(counter),
+                 (uint32_t)block_len, (uint32_t)flags);
+
+  __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
+  __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
+  __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
+  __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
+
+  __m128i t0, t1, t2, t3, tt;
+
+  // Round 1. The first round permutes the message words from the original
+  // input order, into the groups that get mixed in parallel.
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
+  t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3));   // 12 10  8 14
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
+  t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3));   // 13 11  9 15
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 2. This round and all following rounds apply a fixed permutation
+  // to the message words from the round before.
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 3
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 4
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 5
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 6
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 7
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+}
+
+void blake3_compress_in_place_sse2(uint32_t cv[8],
+                                   const uint8_t block[BLAKE3_BLOCK_LEN],
+                                   uint8_t block_len, uint64_t counter,
+                                   uint8_t flags) {
+  __m128i rows[4];
+  compress_pre(rows, cv, block, block_len, counter, flags);
+  storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
+  storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
+}
+
+void blake3_compress_xof_sse2(const uint32_t cv[8],
+                              const uint8_t block[BLAKE3_BLOCK_LEN],
+                              uint8_t block_len, uint64_t counter,
+                              uint8_t flags, uint8_t out[64]) {
+  __m128i rows[4];
+  compress_pre(rows, cv, block, block_len, counter, flags);
+  storeu(xorv(rows[0], rows[2]), &out[0]);
+  storeu(xorv(rows[1], rows[3]), &out[16]);
+  storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
+  storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
+}
+
+INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+  v[0] = addv(v[0], v[4]);
+  v[1] = addv(v[1], v[5]);
+  v[2] = addv(v[2], v[6]);
+  v[3] = addv(v[3], v[7]);
+  v[12] = xorv(v[12], v[0]);
+  v[13] = xorv(v[13], v[1]);
+  v[14] = xorv(v[14], v[2]);
+  v[15] = xorv(v[15], v[3]);
+  v[12] = rot16(v[12]);
+  v[13] = rot16(v[13]);
+  v[14] = rot16(v[14]);
+  v[15] = rot16(v[15]);
+  v[8] = addv(v[8], v[12]);
+  v[9] = addv(v[9], v[13]);
+  v[10] = addv(v[10], v[14]);
+  v[11] = addv(v[11], v[15]);
+  v[4] = xorv(v[4], v[8]);
+  v[5] = xorv(v[5], v[9]);
+  v[6] = xorv(v[6], v[10]);
+  v[7] = xorv(v[7], v[11]);
+  v[4] = rot12(v[4]);
+  v[5] = rot12(v[5]);
+  v[6] = rot12(v[6]);
+  v[7] = rot12(v[7]);
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+  v[0] = addv(v[0], v[4]);
+  v[1] = addv(v[1], v[5]);
+  v[2] = addv(v[2], v[6]);
+  v[3] = addv(v[3], v[7]);
+  v[12] = xorv(v[12], v[0]);
+  v[13] = xorv(v[13], v[1]);
+  v[14] = xorv(v[14], v[2]);
+  v[15] = xorv(v[15], v[3]);
+  v[12] = rot8(v[12]);
+  v[13] = rot8(v[13]);
+  v[14] = rot8(v[14]);
+  v[15] = rot8(v[15]);
+  v[8] = addv(v[8], v[12]);
+  v[9] = addv(v[9], v[13]);
+  v[10] = addv(v[10], v[14]);
+  v[11] = addv(v[11], v[15]);
+  v[4] = xorv(v[4], v[8]);
+  v[5] = xorv(v[5], v[9]);
+  v[6] = xorv(v[6], v[10]);
+  v[7] = xorv(v[7], v[11]);
+  v[4] = rot7(v[4]);
+  v[5] = rot7(v[5]);
+  v[6] = rot7(v[6]);
+  v[7] = rot7(v[7]);
+
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+  v[0] = addv(v[0], v[5]);
+  v[1] = addv(v[1], v[6]);
+  v[2] = addv(v[2], v[7]);
+  v[3] = addv(v[3], v[4]);
+  v[15] = xorv(v[15], v[0]);
+  v[12] = xorv(v[12], v[1]);
+  v[13] = xorv(v[13], v[2]);
+  v[14] = xorv(v[14], v[3]);
+  v[15] = rot16(v[15]);
+  v[12] = rot16(v[12]);
+  v[13] = rot16(v[13]);
+  v[14] = rot16(v[14]);
+  v[10] = addv(v[10], v[15]);
+  v[11] = addv(v[11], v[12]);
+  v[8] = addv(v[8], v[13]);
+  v[9] = addv(v[9], v[14]);
+  v[5] = xorv(v[5], v[10]);
+  v[6] = xorv(v[6], v[11]);
+  v[7] = xorv(v[7], v[8]);
+  v[4] = xorv(v[4], v[9]);
+  v[5] = rot12(v[5]);
+  v[6] = rot12(v[6]);
+  v[7] = rot12(v[7]);
+  v[4] = rot12(v[4]);
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+  v[0] = addv(v[0], v[5]);
+  v[1] = addv(v[1], v[6]);
+  v[2] = addv(v[2], v[7]);
+  v[3] = addv(v[3], v[4]);
+  v[15] = xorv(v[15], v[0]);
+  v[12] = xorv(v[12], v[1]);
+  v[13] = xorv(v[13], v[2]);
+  v[14] = xorv(v[14], v[3]);
+  v[15] = rot8(v[15]);
+  v[12] = rot8(v[12]);
+  v[13] = rot8(v[13]);
+  v[14] = rot8(v[14]);
+  v[10] = addv(v[10], v[15]);
+  v[11] = addv(v[11], v[12]);
+  v[8] = addv(v[8], v[13]);
+  v[9] = addv(v[9], v[14]);
+  v[5] = xorv(v[5], v[10]);
+  v[6] = xorv(v[6], v[11]);
+  v[7] = xorv(v[7], v[8]);
+  v[4] = xorv(v[4], v[9]);
+  v[5] = rot7(v[5]);
+  v[6] = rot7(v[6]);
+  v[7] = rot7(v[7]);
+  v[4] = rot7(v[4]);
+}
+
+INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
+  // Interleave 32-bit lanes. The low unpack is lanes 00/11 and the high is
+  // 22/33. Note that this doesn't split the vector into two lanes, as the
+  // AVX2 counterparts do.
+  __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+  __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+  __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+  __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+
+  // Interleave 64-bit lanes.
+  __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
+  __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
+  __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
+  __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
+
+  vecs[0] = abcd_0;
+  vecs[1] = abcd_1;
+  vecs[2] = abcd_2;
+  vecs[3] = abcd_3;
+}
+
+INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
+                               size_t block_offset, __m128i out[16]) {
+  out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
+  out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
+  out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
+  out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
+  out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
+  out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
+  out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
+  out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
+  out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
+  out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
+  out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
+  out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
+  out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
+  out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
+  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
+  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
+  for (size_t i = 0; i < 4; ++i) {
+    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+  }
+  transpose_vecs(&out[0]);
+  transpose_vecs(&out[4]);
+  transpose_vecs(&out[8]);
+  transpose_vecs(&out[12]);
+}
+
+INLINE void load_counters(uint64_t counter, bool increment_counter,
+                          __m128i *out_lo, __m128i *out_hi) {
+  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
+  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
+  const __m128i add1 = _mm_and_si128(mask, add0);
+  __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
+  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)), 
+                                  _mm_xor_si128(   l, _mm_set1_epi32(0x80000000)));
+  __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
+  *out_lo = l;
+  *out_hi = h;
+}
+
+static
+void blake3_hash4_sse2(const uint8_t *const *inputs, size_t blocks,
+                       const uint32_t key[8], uint64_t counter,
+                       bool increment_counter, uint8_t flags,
+                       uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+  __m128i h_vecs[8] = {
+      set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
+      set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
+  };
+  __m128i counter_low_vec, counter_high_vec;
+  load_counters(counter, increment_counter, &counter_low_vec,
+                &counter_high_vec);
+  uint8_t block_flags = flags | flags_start;
+
+  for (size_t block = 0; block < blocks; block++) {
+    if (block + 1 == blocks) {
+      block_flags |= flags_end;
+    }
+    __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
+    __m128i block_flags_vec = set1(block_flags);
+    __m128i msg_vecs[16];
+    transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+    __m128i v[16] = {
+        h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
+        h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
+        set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
+        counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+    };
+    round_fn(v, msg_vecs, 0);
+    round_fn(v, msg_vecs, 1);
+    round_fn(v, msg_vecs, 2);
+    round_fn(v, msg_vecs, 3);
+    round_fn(v, msg_vecs, 4);
+    round_fn(v, msg_vecs, 5);
+    round_fn(v, msg_vecs, 6);
+    h_vecs[0] = xorv(v[0], v[8]);
+    h_vecs[1] = xorv(v[1], v[9]);
+    h_vecs[2] = xorv(v[2], v[10]);
+    h_vecs[3] = xorv(v[3], v[11]);
+    h_vecs[4] = xorv(v[4], v[12]);
+    h_vecs[5] = xorv(v[5], v[13]);
+    h_vecs[6] = xorv(v[6], v[14]);
+    h_vecs[7] = xorv(v[7], v[15]);
+
+    block_flags = flags;
+  }
+
+  transpose_vecs(&h_vecs[0]);
+  transpose_vecs(&h_vecs[4]);
+  // The first four vecs now contain the first half of each output, and the
+  // second four vecs contain the second half of each output.
+  storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
+  storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
+  storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
+  storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
+  storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
+  storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
+  storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
+  storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
+}
+
+INLINE void hash_one_sse2(const uint8_t *input, size_t blocks,
+                          const uint32_t key[8], uint64_t counter,
+                          uint8_t flags, uint8_t flags_start,
+                          uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
+  uint32_t cv[8];
+  memcpy(cv, key, BLAKE3_KEY_LEN);
+  uint8_t block_flags = flags | flags_start;
+  while (blocks > 0) {
+    if (blocks == 1) {
+      block_flags |= flags_end;
+    }
+    blake3_compress_in_place_sse2(cv, input, BLAKE3_BLOCK_LEN, counter,
+                                  block_flags);
+    input = &input[BLAKE3_BLOCK_LEN];
+    blocks -= 1;
+    block_flags = flags;
+  }
+  memcpy(out, cv, BLAKE3_OUT_LEN);
+}
+
+void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
+                           size_t blocks, const uint32_t key[8],
+                           uint64_t counter, bool increment_counter,
+                           uint8_t flags, uint8_t flags_start,
+                           uint8_t flags_end, uint8_t *out) {
+  while (num_inputs >= DEGREE) {
+    blake3_hash4_sse2(inputs, blocks, key, counter, increment_counter, flags,
+                      flags_start, flags_end, out);
+    if (increment_counter) {
+      counter += DEGREE;
+    }
+    inputs += DEGREE;
+    num_inputs -= DEGREE;
+    out = &out[DEGREE * BLAKE3_OUT_LEN];
+  }
+  while (num_inputs > 0) {
+    hash_one_sse2(inputs[0], blocks, key, counter, flags, flags_start,
+                  flags_end, out);
+    if (increment_counter) {
+      counter += 1;
+    }
+    inputs += 1;
+    num_inputs -= 1;
+    out = &out[BLAKE3_OUT_LEN];
+  }
+}
--- a/external/blake3/blake3_sse2_x86-64_unix.S
+++ b/external/blake3/blake3_sse2_x86-64_unix.S
--- a/external/blake3/blake3_sse2_x86-64_windows_gnu.S
+++ b/external/blake3/blake3_sse2_x86-64_windows_gnu.S
--- a/external/blake3/blake3_sse2_x86-64_windows_msvc.asm
+++ b/external/blake3/blake3_sse2_x86-64_windows_msvc.asm
--- a/external/blake3/blake3_sse41.c
+++ b/external/blake3/blake3_sse41.c
@@ -0,0 +1,560 @@
+#include "blake3_impl.h"
+
+#include <immintrin.h>
+
+#define DEGREE 4
+
+#define _mm_shuffle_ps2(a, b, c)                                               \
+  (_mm_castps_si128(                                                           \
+      _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
+
+INLINE __m128i loadu(const uint8_t src[16]) {
+  return _mm_loadu_si128((const __m128i *)src);
+}
+
+INLINE void storeu(__m128i src, uint8_t dest[16]) {
+  _mm_storeu_si128((__m128i *)dest, src);
+}
+
+INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
+
+// Note that clang-format doesn't like the name "xor" for some reason.
+INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
+
+INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
+
+INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+  return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
+}
+
+INLINE __m128i rot16(__m128i x) {
+  return _mm_shuffle_epi8(
+      x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
+}
+
+INLINE __m128i rot12(__m128i x) {
+  return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
+}
+
+INLINE __m128i rot8(__m128i x) {
+  return _mm_shuffle_epi8(
+      x, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
+}
+
+INLINE __m128i rot7(__m128i x) {
+  return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
+}
+
+INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+               __m128i m) {
+  *row0 = addv(addv(*row0, m), *row1);
+  *row3 = xorv(*row3, *row0);
+  *row3 = rot16(*row3);
+  *row2 = addv(*row2, *row3);
+  *row1 = xorv(*row1, *row2);
+  *row1 = rot12(*row1);
+}
+
+INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+               __m128i m) {
+  *row0 = addv(addv(*row0, m), *row1);
+  *row3 = xorv(*row3, *row0);
+  *row3 = rot8(*row3);
+  *row2 = addv(*row2, *row3);
+  *row1 = xorv(*row1, *row2);
+  *row1 = rot7(*row1);
+}
+
+// Note the optimization here of leaving row1 as the unrotated row, rather than
+// row0. All the message loads below are adjusted to compensate for this. See
+// discussion at https://github.com/sneves/blake2-avx2/pull/4
+INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
+  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
+}
+
+INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
+  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
+}
+
+INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
+                         const uint8_t block[BLAKE3_BLOCK_LEN],
+                         uint8_t block_len, uint64_t counter, uint8_t flags) {
+  rows[0] = loadu((uint8_t *)&cv[0]);
+  rows[1] = loadu((uint8_t *)&cv[4]);
+  rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
+  rows[3] = set4(counter_low(counter), counter_high(counter),
+                 (uint32_t)block_len, (uint32_t)flags);
+
+  __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
+  __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
+  __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
+  __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
+
+  __m128i t0, t1, t2, t3, tt;
+
+  // Round 1. The first round permutes the message words from the original
+  // input order, into the groups that get mixed in parallel.
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
+  t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3));   // 12 10  8 14
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
+  t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3));   // 13 11  9 15
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 2. This round and all following rounds apply a fixed permutation
+  // to the message words from the round before.
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = _mm_blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = _mm_blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 3
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = _mm_blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = _mm_blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 4
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = _mm_blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = _mm_blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 5
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = _mm_blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = _mm_blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 6
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = _mm_blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = _mm_blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+  m0 = t0;
+  m1 = t1;
+  m2 = t2;
+  m3 = t3;
+
+  // Round 7
+  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+  t1 = _mm_blend_epi16(tt, t1, 0xCC);
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+  diagonalize(&rows[0], &rows[2], &rows[3]);
+  t2 = _mm_unpacklo_epi64(m3, m1);
+  tt = _mm_blend_epi16(t2, m2, 0xC0);
+  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+  t3 = _mm_unpackhi_epi32(m1, m3);
+  tt = _mm_unpacklo_epi32(m2, t3);
+  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+  undiagonalize(&rows[0], &rows[2], &rows[3]);
+}
+
+void blake3_compress_in_place_sse41(uint32_t cv[8],
+                                    const uint8_t block[BLAKE3_BLOCK_LEN],
+                                    uint8_t block_len, uint64_t counter,
+                                    uint8_t flags) {
+  __m128i rows[4];
+  compress_pre(rows, cv, block, block_len, counter, flags);
+  storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
+  storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
+}
+
+void blake3_compress_xof_sse41(const uint32_t cv[8],
+                               const uint8_t block[BLAKE3_BLOCK_LEN],
+                               uint8_t block_len, uint64_t counter,
+                               uint8_t flags, uint8_t out[64]) {
+  __m128i rows[4];
+  compress_pre(rows, cv, block, block_len, counter, flags);
+  storeu(xorv(rows[0], rows[2]), &out[0]);
+  storeu(xorv(rows[1], rows[3]), &out[16]);
+  storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
+  storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
+}
+
+INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+  v[0] = addv(v[0], v[4]);
+  v[1] = addv(v[1], v[5]);
+  v[2] = addv(v[2], v[6]);
+  v[3] = addv(v[3], v[7]);
+  v[12] = xorv(v[12], v[0]);
+  v[13] = xorv(v[13], v[1]);
+  v[14] = xorv(v[14], v[2]);
+  v[15] = xorv(v[15], v[3]);
+  v[12] = rot16(v[12]);
+  v[13] = rot16(v[13]);
+  v[14] = rot16(v[14]);
+  v[15] = rot16(v[15]);
+  v[8] = addv(v[8], v[12]);
+  v[9] = addv(v[9], v[13]);
+  v[10] = addv(v[10], v[14]);
+  v[11] = addv(v[11], v[15]);
+  v[4] = xorv(v[4], v[8]);
+  v[5] = xorv(v[5], v[9]);
+  v[6] = xorv(v[6], v[10]);
+  v[7] = xorv(v[7], v[11]);
+  v[4] = rot12(v[4]);
+  v[5] = rot12(v[5]);
+  v[6] = rot12(v[6]);
+  v[7] = rot12(v[7]);
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+  v[0] = addv(v[0], v[4]);
+  v[1] = addv(v[1], v[5]);
+  v[2] = addv(v[2], v[6]);
+  v[3] = addv(v[3], v[7]);
+  v[12] = xorv(v[12], v[0]);
+  v[13] = xorv(v[13], v[1]);
+  v[14] = xorv(v[14], v[2]);
+  v[15] = xorv(v[15], v[3]);
+  v[12] = rot8(v[12]);
+  v[13] = rot8(v[13]);
+  v[14] = rot8(v[14]);
+  v[15] = rot8(v[15]);
+  v[8] = addv(v[8], v[12]);
+  v[9] = addv(v[9], v[13]);
+  v[10] = addv(v[10], v[14]);
+  v[11] = addv(v[11], v[15]);
+  v[4] = xorv(v[4], v[8]);
+  v[5] = xorv(v[5], v[9]);
+  v[6] = xorv(v[6], v[10]);
+  v[7] = xorv(v[7], v[11]);
+  v[4] = rot7(v[4]);
+  v[5] = rot7(v[5]);
+  v[6] = rot7(v[6]);
+  v[7] = rot7(v[7]);
+
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+  v[0] = addv(v[0], v[5]);
+  v[1] = addv(v[1], v[6]);
+  v[2] = addv(v[2], v[7]);
+  v[3] = addv(v[3], v[4]);
+  v[15] = xorv(v[15], v[0]);
+  v[12] = xorv(v[12], v[1]);
+  v[13] = xorv(v[13], v[2]);
+  v[14] = xorv(v[14], v[3]);
+  v[15] = rot16(v[15]);
+  v[12] = rot16(v[12]);
+  v[13] = rot16(v[13]);
+  v[14] = rot16(v[14]);
+  v[10] = addv(v[10], v[15]);
+  v[11] = addv(v[11], v[12]);
+  v[8] = addv(v[8], v[13]);
+  v[9] = addv(v[9], v[14]);
+  v[5] = xorv(v[5], v[10]);
+  v[6] = xorv(v[6], v[11]);
+  v[7] = xorv(v[7], v[8]);
+  v[4] = xorv(v[4], v[9]);
+  v[5] = rot12(v[5]);
+  v[6] = rot12(v[6]);
+  v[7] = rot12(v[7]);
+  v[4] = rot12(v[4]);
+  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+  v[0] = addv(v[0], v[5]);
+  v[1] = addv(v[1], v[6]);
+  v[2] = addv(v[2], v[7]);
+  v[3] = addv(v[3], v[4]);
+  v[15] = xorv(v[15], v[0]);
+  v[12] = xorv(v[12], v[1]);
+  v[13] = xorv(v[13], v[2]);
+  v[14] = xorv(v[14], v[3]);
+  v[15] = rot8(v[15]);
+  v[12] = rot8(v[12]);
+  v[13] = rot8(v[13]);
+  v[14] = rot8(v[14]);
+  v[10] = addv(v[10], v[15]);
+  v[11] = addv(v[11], v[12]);
+  v[8] = addv(v[8], v[13]);
+  v[9] = addv(v[9], v[14]);
+  v[5] = xorv(v[5], v[10]);
+  v[6] = xorv(v[6], v[11]);
+  v[7] = xorv(v[7], v[8]);
+  v[4] = xorv(v[4], v[9]);
+  v[5] = rot7(v[5]);
+  v[6] = rot7(v[6]);
+  v[7] = rot7(v[7]);
+  v[4] = rot7(v[4]);
+}
+
+INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
+  // Interleave 32-bit lanes. The low unpack is lanes 00/11 and the high is
+  // 22/33. Note that this doesn't split the vector into two lanes, as the
+  // AVX2 counterparts do.
+  __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+  __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+  __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+  __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+
+  // Interleave 64-bit lanes.
+  __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
+  __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
+  __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
+  __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
+
+  vecs[0] = abcd_0;
+  vecs[1] = abcd_1;
+  vecs[2] = abcd_2;
+  vecs[3] = abcd_3;
+}
+
+INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
+                               size_t block_offset, __m128i out[16]) {
+  out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
+  out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
+  out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
+  out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
+  out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
+  out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
+  out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
+  out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
+  out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
+  out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
+  out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
+  out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
+  out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
+  out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
+  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
+  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
+  for (size_t i = 0; i < 4; ++i) {
+    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+  }
+  transpose_vecs(&out[0]);
+  transpose_vecs(&out[4]);
+  transpose_vecs(&out[8]);
+  transpose_vecs(&out[12]);
+}
+
+INLINE void load_counters(uint64_t counter, bool increment_counter,
+                          __m128i *out_lo, __m128i *out_hi) {
+  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
+  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
+  const __m128i add1 = _mm_and_si128(mask, add0);
+  __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
+  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)), 
+                                  _mm_xor_si128(   l, _mm_set1_epi32(0x80000000)));
+  __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
+  *out_lo = l;
+  *out_hi = h;
+}
+
+static
+void blake3_hash4_sse41(const uint8_t *const *inputs, size_t blocks,
+                        const uint32_t key[8], uint64_t counter,
+                        bool increment_counter, uint8_t flags,
+                        uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+  __m128i h_vecs[8] = {
+      set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
+      set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
+  };
+  __m128i counter_low_vec, counter_high_vec;
+  load_counters(counter, increment_counter, &counter_low_vec,
+                &counter_high_vec);
+  uint8_t block_flags = flags | flags_start;
+
+  for (size_t block = 0; block < blocks; block++) {
+    if (block + 1 == blocks) {
+      block_flags |= flags_end;
+    }
+    __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
+    __m128i block_flags_vec = set1(block_flags);
+    __m128i msg_vecs[16];
+    transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+    __m128i v[16] = {
+        h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
+        h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
+        set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
+        counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+    };
+    round_fn(v, msg_vecs, 0);
+    round_fn(v, msg_vecs, 1);
+    round_fn(v, msg_vecs, 2);
+    round_fn(v, msg_vecs, 3);
+    round_fn(v, msg_vecs, 4);
+    round_fn(v, msg_vecs, 5);
+    round_fn(v, msg_vecs, 6);
+    h_vecs[0] = xorv(v[0], v[8]);
+    h_vecs[1] = xorv(v[1], v[9]);
+    h_vecs[2] = xorv(v[2], v[10]);
+    h_vecs[3] = xorv(v[3], v[11]);
+    h_vecs[4] = xorv(v[4], v[12]);
+    h_vecs[5] = xorv(v[5], v[13]);
+    h_vecs[6] = xorv(v[6], v[14]);
+    h_vecs[7] = xorv(v[7], v[15]);
+
+    block_flags = flags;
+  }
+
+  transpose_vecs(&h_vecs[0]);
+  transpose_vecs(&h_vecs[4]);
+  // The first four vecs now contain the first half of each output, and the
+  // second four vecs contain the second half of each output.
+  storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
+  storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
+  storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
+  storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
+  storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
+  storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
+  storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
+  storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
+}
+
+INLINE void hash_one_sse41(const uint8_t *input, size_t blocks,
+                           const uint32_t key[8], uint64_t counter,
+                           uint8_t flags, uint8_t flags_start,
+                           uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
+  uint32_t cv[8];
+  memcpy(cv, key, BLAKE3_KEY_LEN);
+  uint8_t block_flags = flags | flags_start;
+  while (blocks > 0) {
+    if (blocks == 1) {
+      block_flags |= flags_end;
+    }
+    blake3_compress_in_place_sse41(cv, input, BLAKE3_BLOCK_LEN, counter,
+                                   block_flags);
+    input = &input[BLAKE3_BLOCK_LEN];
+    blocks -= 1;
+    block_flags = flags;
+  }
+  memcpy(out, cv, BLAKE3_OUT_LEN);
+}
+
+void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
+                            size_t blocks, const uint32_t key[8],
+                            uint64_t counter, bool increment_counter,
+                            uint8_t flags, uint8_t flags_start,
+                            uint8_t flags_end, uint8_t *out) {
+  while (num_inputs >= DEGREE) {
+    blake3_hash4_sse41(inputs, blocks, key, counter, increment_counter, flags,
+                       flags_start, flags_end, out);
+    if (increment_counter) {
+      counter += DEGREE;
+    }
+    inputs += DEGREE;
+    num_inputs -= DEGREE;
+    out = &out[DEGREE * BLAKE3_OUT_LEN];
+  }
+  while (num_inputs > 0) {
+    hash_one_sse41(inputs[0], blocks, key, counter, flags, flags_start,
+                   flags_end, out);
+    if (increment_counter) {
+      counter += 1;
+    }
+    inputs += 1;
+    num_inputs -= 1;
+    out = &out[BLAKE3_OUT_LEN];
+  }
+}
--- a/external/blake3/blake3_sse41_x86-64_unix.S
+++ b/external/blake3/blake3_sse41_x86-64_unix.S
--- a/external/blake3/blake3_sse41_x86-64_windows_gnu.S
+++ b/external/blake3/blake3_sse41_x86-64_windows_gnu.S
--- a/external/blake3/blake3_sse41_x86-64_windows_msvc.asm
+++ b/external/blake3/blake3_sse41_x86-64_windows_msvc.asm
--- a/external/blake3/blake3_tbb.cpp
+++ b/external/blake3/blake3_tbb.cpp
@@ -0,0 +1,37 @@
+#include <cstddef>
+#include <cstdint>
+
+#include <oneapi/tbb/parallel_invoke.h>
+
+#include "blake3_impl.h"
+
+static_assert(TBB_USE_EXCEPTIONS == 0,
+              "This file should be compiled with C++ exceptions disabled.");
+
+extern "C" void blake3_compress_subtree_wide_join_tbb(
+    // shared params
+    const uint32_t key[8], uint8_t flags, bool use_tbb,
+    // left-hand side params
+    const uint8_t *l_input, size_t l_input_len, uint64_t l_chunk_counter,
+    uint8_t *l_cvs, size_t *l_n,
+    // right-hand side params
+    const uint8_t *r_input, size_t r_input_len, uint64_t r_chunk_counter,
+    uint8_t *r_cvs, size_t *r_n) noexcept {
+  if (!use_tbb) {
+    *l_n = blake3_compress_subtree_wide(l_input, l_input_len, key,
+                                        l_chunk_counter, flags, l_cvs, use_tbb);
+    *r_n = blake3_compress_subtree_wide(r_input, r_input_len, key,
+                                        r_chunk_counter, flags, r_cvs, use_tbb);
+    return;
+  }
+
+  oneapi::tbb::parallel_invoke(
+      [=]() {
+        *l_n = blake3_compress_subtree_wide(
+            l_input, l_input_len, key, l_chunk_counter, flags, l_cvs, use_tbb);
+      },
+      [=]() {
+        *r_n = blake3_compress_subtree_wide(
+            r_input, r_input_len, key, r_chunk_counter, flags, r_cvs, use_tbb);
+      });
+}
--- a/external/blake3/cmake/BLAKE3/ContinuousIntegration.cmake
+++ b/external/blake3/cmake/BLAKE3/ContinuousIntegration.cmake
@@ -0,0 +1,235 @@
+cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
+
+if(BUILD_SHARED_LIBS)
+  message(FATAL_ERROR "BUILD_SHARED_LIBS is incompatible with BLAKE3_TESTING_CI")
+endif()
+
+include(CTest)
+
+# Declare a testing specific variant of the `blake3` library target.
+#
+# We use a separate library target in order to be able to perform compilation with various
+# combinations of features which are too noisy to specify in the main CMake config as options for
+# the normal `blake3` target.
+#
+# Initially this target has no properties but eventually we will populate them by copying all of the
+# relevant properties from the normal `blake3` target.
+add_library(blake3-testing
+  blake3.c
+  blake3_dispatch.c
+  blake3_portable.c
+)
+
+if(BLAKE3_USE_TBB AND TBB_FOUND)
+  target_sources(blake3-testing
+    PRIVATE
+      blake3_tbb.cpp)
+endif()
+
+if(BLAKE3_SIMD_TYPE STREQUAL "amd64-asm")
+  # Conditionally add amd64 asm files to `blake3-testing` sources
+  if(MSVC)
+    if(NOT BLAKE3_NO_AVX2)
+      list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx2_x86-64_windows_msvc.asm)
+    endif()
+    if(NOT BLAKE3_NO_AVX512)
+      list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx512_x86-64_windows_msvc.asm)
+    endif()
+    if(NOT BLAKE3_NO_SSE2)
+      list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse2_x86-64_windows_msvc.asm)
+    endif()
+    if(NOT BLAKE3_NO_SSE41)
+      list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse41_x86-64_windows_msvc.asm)
+    endif()
+  elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
+        OR CMAKE_C_COMPILER_ID STREQUAL "Clang"
+        OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
+    if (WIN32)
+      if(NOT BLAKE3_NO_AVX2)
+        list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx2_x86-64_windows_gnu.S)
+      endif()
+      if(NOT BLAKE3_NO_AVX512)
+        list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx512_x86-64_windows_gnu.S)
+      endif()
+      if(NOT BLAKE3_NO_SSE2)
+        list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse2_x86-64_windows_gnu.S)
+      endif()
+      if(NOT BLAKE3_NO_SSE41)
+        list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse41_x86-64_windows_gnu.S)
+      endif()
+    elseif(UNIX)
+      if(NOT BLAKE3_NO_AVX2)
+        list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx2_x86-64_unix.S)
+      endif()
+      if(NOT BLAKE3_NO_AVX512)
+        list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx512_x86-64_unix.S)
+      endif()
+      if(NOT BLAKE3_NO_SSE2)
+        list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse2_x86-64_unix.S)
+      endif()
+      if(NOT BLAKE3_NO_SSE41)
+        list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse41_x86-64_unix.S)
+      endif()
+    endif()
+  endif()
+  target_sources(blake3-testing PRIVATE ${BLAKE3_AMD64_ASM_SOURCES})
+elseif(BLAKE3_SIMD_TYPE STREQUAL "x86-intrinsics")
+  # Conditionally add amd64 C files to `blake3-testing` sources
+  if (NOT DEFINED BLAKE3_CFLAGS_SSE2
+      OR NOT DEFINED BLAKE3_CFLAGS_SSE4.1
+      OR NOT DEFINED BLAKE3_CFLAGS_AVX2
+      OR NOT DEFINED BLAKE3_CFLAGS_AVX512)
+    message(WARNING "BLAKE3_SIMD_TYPE is set to 'x86-intrinsics' but no compiler flags are available for the target architecture.")
+  else()
+    set(BLAKE3_SIMD_X86_INTRINSICS ON)
+  endif()
+
+  if(NOT BLAKE3_NO_AVX2)
+    target_sources(blake3-testing PRIVATE blake3_avx2.c)
+    set_source_files_properties(blake3_avx2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_AVX2}")
+  endif()
+  if(NOT BLAKE3_NO_AVX512)
+    target_sources(blake3-testing PRIVATE blake3_avx512.c)
+    set_source_files_properties(blake3_avx512.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_AVX512}")
+  endif()
+  if(NOT BLAKE3_NO_SSE2)
+    target_sources(blake3-testing PRIVATE blake3_sse2.c)
+    set_source_files_properties(blake3_sse2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE2}")
+  endif()
+  if(NOT BLAKE3_NO_SSE41)
+    target_sources(blake3-testing PRIVATE blake3_sse41.c)
+    set_source_files_properties(blake3_sse41.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE4.1}")
+  endif()
+
+elseif(BLAKE3_SIMD_TYPE STREQUAL "neon-intrinsics")
+  # Conditionally add neon C files to `blake3-testing` sources
+
+  target_sources(blake3-testing PRIVATE
+    blake3_neon.c
+  )
+  target_compile_definitions(blake3-testing PRIVATE
+    BLAKE3_USE_NEON=1
+  )
+
+  if (DEFINED BLAKE3_CFLAGS_NEON)
+    set_source_files_properties(blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
+  endif()
+
+elseif(BLAKE3_SIMD_TYPE STREQUAL "none")
+  # Disable neon if simd type is "none". We check for individual amd64 features further below.
+
+  target_compile_definitions(blake3-testing PRIVATE
+    BLAKE3_USE_NEON=0
+  )
+
+endif()
+
+if(BLAKE3_NO_AVX2)
+  target_compile_definitions(blake3-testing PRIVATE BLAKE3_NO_AVX2)
+endif()
+if(BLAKE3_NO_AVX512)
+  target_compile_definitions(blake3-testing PRIVATE BLAKE3_NO_AVX512)
+endif()
+if(BLAKE3_NO_SSE2)
+  target_compile_definitions(blake3-testing PRIVATE BLAKE3_NO_SSE2)
+endif()
+if(BLAKE3_NO_SSE41)
+  target_compile_definitions(blake3-testing PRIVATE BLAKE3_NO_SSE41)
+endif()
+
+target_compile_definitions(blake3-testing PUBLIC BLAKE3_TESTING)
+
+get_target_property(BLAKE3_COMPILE_DEFINITIONS blake3 COMPILE_DEFINITIONS)
+if(BLAKE3_COMPILE_DEFINITIONS)
+  target_compile_definitions(blake3-testing PUBLIC
+    ${BLAKE3_COMPILE_DEFINITIONS})
+endif()
+
+get_target_property(BLAKE3_COMPILE_OPTIONS blake3 COMPILE_OPTIONS)
+if(BLAKE3_COMPILE_OPTIONS)
+  target_compile_options(blake3-testing PRIVATE
+    ${BLAKE3_COMPILE_OPTIONS}
+    -O3
+    -Wall
+    -Wextra
+    -pedantic
+    -fstack-protector-strong
+    -D_FORTIFY_SOURCE=2
+    -fPIE
+    -fvisibility=hidden
+    -fsanitize=address,undefined
+  )
+endif()
+
+get_target_property(BLAKE3_INCLUDE_DIRECTORIES blake3 INCLUDE_DIRECTORIES)
+if(BLAKE3_INCLUDE_DIRECTORIES)
+  target_include_directories(blake3-testing PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+  )
+endif()
+
+get_target_property(BLAKE3_LINK_LIBRARIES blake3 LINK_LIBRARIES)
+if(BLAKE3_LINK_LIBRARIES)
+  target_link_libraries(blake3-testing PRIVATE ${BLAKE3_LINK_LIBRARIES})
+endif()
+
+get_target_property(BLAKE3_LINK_OPTIONS blake3 LINK_OPTIONS)
+if(BLAKE3_LINK_OPTIONS)
+  target_link_options(blake3-testing PRIVATE
+    ${BLAKE3_LINK_OPTIONS}
+    -fsanitize=address,undefined
+    -pie
+    -Wl,-z,relro,-z,now
+  )
+endif()
+
+# test asm target
+add_executable(blake3-asm-test
+  main.c
+)
+set_target_properties(blake3-asm-test PROPERTIES
+  OUTPUT_NAME blake3
+  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR})
+target_link_libraries(blake3-asm-test PRIVATE blake3-testing)
+target_compile_definitions(blake3-asm-test PRIVATE BLAKE3_TESTING)
+target_compile_options(blake3-asm-test PRIVATE
+  -O3
+  -Wall
+  -Wextra
+  -pedantic
+  -fstack-protector-strong
+  -D_FORTIFY_SOURCE=2
+  -fPIE
+  -fvisibility=hidden
+  -fsanitize=address,undefined
+)
+target_link_options(blake3-asm-test PRIVATE
+  -fsanitize=address,undefined
+  -pie
+  -Wl,-z,relro,-z,now
+)
+
+add_test(NAME blake3-testing
+  COMMAND "${CMAKE_CTEST_COMMAND}"
+    --verbose
+    --extra-verbose
+    --build-and-test "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}"
+    --build-generator "${CMAKE_GENERATOR}"
+    --build-makeprogram "${CMAKE_MAKE_PROGRAM}"
+    --build-project libblake3
+    --build-target blake3-asm-test
+    --build-options
+      --fresh
+      "-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}"
+      "-DBLAKE3_TESTING=${BLAKE3_TESTING}"
+      "-DBLAKE3_TESTING_CI=${BLAKE3_TESTING_CI}"
+      "-DBLAKE3_USE_TBB=${BLAKE3_USE_TBB}"
+      "-DBLAKE3_SIMD_TYPE=${BLAKE3_SIMD_TYPE}"
+      "-DBLAKE3_NO_SSE2=${BLAKE3_NO_SSE2}"
+      "-DBLAKE3_NO_SSE41=${BLAKE3_NO_SSE41}"
+      "-DBLAKE3_NO_AVX2=${BLAKE3_NO_AVX2}"
+      "-DBLAKE3_NO_AVX512=${BLAKE3_NO_AVX512}"
+    --test-command
+      "${CMAKE_SOURCE_DIR}/test.py"
+  )
--- a/external/blake3/cmake/BLAKE3/Examples.cmake
+++ b/external/blake3/cmake/BLAKE3/Examples.cmake
@@ -0,0 +1,13 @@
+if(NOT WIN32)
+  add_executable(blake3-example
+    example.c)
+  target_link_libraries(blake3-example PRIVATE blake3)
+  install(TARGETS blake3-example)
+
+  if(BLAKE3_USE_TBB)
+    add_executable(blake3-example-tbb
+      example_tbb.c)
+    target_link_libraries(blake3-example-tbb PRIVATE blake3)
+    install(TARGETS blake3-example-tbb)
+  endif()
+endif()
--- a/external/blake3/cmake/BLAKE3/Testing.cmake
+++ b/external/blake3/cmake/BLAKE3/Testing.cmake
@@ -0,0 +1,3 @@
+if(BLAKE3_TESTING_CI)
+  include(BLAKE3/ContinuousIntegration)
+endif()
--- a/external/blake3/dependencies/CMakeLists.txt
+++ b/external/blake3/dependencies/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(BLAKE3_USE_TBB)
+    add_subdirectory(tbb)
+endif()
--- a/external/blake3/dependencies/tbb/CMakeLists.txt
+++ b/external/blake3/dependencies/tbb/CMakeLists.txt
@@ -0,0 +1,28 @@
+find_package(TBB 2021.11.0 QUIET)
+
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.11)
+  include(FetchContent)
+
+  if(NOT TBB_FOUND AND BLAKE3_FETCH_TBB)
+    set(CMAKE_C_STANDARD 99)
+    set(CMAKE_C_EXTENSIONS OFF)
+
+    set(CMAKE_CXX_STANDARD 20)
+    set(CMAKE_CXX_EXTENSIONS ON)
+
+    option(TBB_TEST OFF "")
+    option(TBBMALLOC_BUILD OFF "")
+
+    mark_as_advanced(TBB_TEST)
+    mark_as_advanced(TBBMALLOC_BUILD)
+
+    FetchContent_Declare(
+      TBB
+      GIT_REPOSITORY https://github.com/uxlfoundation/oneTBB
+      GIT_TAG 0c0ff192a2304e114bc9e6557582dfba101360ff # v2022.0.0
+      GIT_SHALLOW TRUE
+    )
+
+    FetchContent_MakeAvailable(TBB)
+  endif()
+endif()
--- a/external/blake3/example.c
+++ b/external/blake3/example.c
@@ -0,0 +1,36 @@
+#include "blake3.h"
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(void) {
+  // Initialize the hasher.
+  blake3_hasher hasher;
+  blake3_hasher_init(&hasher);
+
+  // Read input bytes from stdin.
+  unsigned char buf[65536];
+  while (1) {
+    ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
+    if (n > 0) {
+      blake3_hasher_update(&hasher, buf, n);
+    } else if (n == 0) {
+      break; // end of file
+    } else {
+      fprintf(stderr, "read failed: %s\n", strerror(errno));
+      return 1;
+    }
+  }
+
+  // Finalize the hash. BLAKE3_OUT_LEN is the default output length, 32 bytes.
+  uint8_t output[BLAKE3_OUT_LEN];
+  blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN);
+
+  // Print the hash as hexadecimal.
+  for (size_t i = 0; i < BLAKE3_OUT_LEN; i++) {
+    printf("%02x", output[i]);
+  }
+  printf("\n");
+  return 0;
+}
--- a/external/blake3/example_tbb.c
+++ b/external/blake3/example_tbb.c
@@ -0,0 +1,57 @@
+#include "blake3.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+int main(int argc, char **argv) {
+  // For each filepath argument, memory map it and hash it.
+  for (int i = 1; i < argc; i++) {
+    // Open and memory map the file.
+    int fd = open(argv[i], O_RDONLY);
+    if (fd == -1) {
+      fprintf(stderr, "open failed: %s\n", strerror(errno));
+      return 1;
+    }
+    struct stat statbuf;
+    if (fstat(fd, &statbuf) == -1) {
+      fprintf(stderr, "stat failed: %s\n", strerror(errno));
+      return 1;
+    }
+    void *mapped = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (mapped == MAP_FAILED) {
+      fprintf(stderr, "mmap failed: %s\n", strerror(errno));
+      return 1;
+    }
+
+    // Initialize the hasher.
+    blake3_hasher hasher;
+    blake3_hasher_init(&hasher);
+
+    // Hash the mapped file using multiple threads.
+    blake3_hasher_update_tbb(&hasher, mapped, statbuf.st_size);
+
+    // Unmap and close the file.
+    if (munmap(mapped, statbuf.st_size) == -1) {
+      fprintf(stderr, "munmap failed: %s\n", strerror(errno));
+      return 1;
+    }
+    if (close(fd) == -1) {
+      fprintf(stderr, "close failed: %s\n", strerror(errno));
+      return 1;
+    }
+
+    // Finalize the hash. BLAKE3_OUT_LEN is the default output length, 32 bytes.
+    uint8_t output[BLAKE3_OUT_LEN];
+    blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN);
+
+    // Print the hash as hexadecimal.
+    for (size_t i = 0; i < BLAKE3_OUT_LEN; i++) {
+      printf("%02x", output[i]);
+    }
+    printf("\n");
+  }
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
JCW	af315c0c0a	Test blake3	2025-06-26 13:27:55 +01:00
JCW	ecfbe28837	Add blake 3 and skip some unit tests	2025-06-25 15:01:44 +01:00
JCW	033b8cc9e5	Fix a PR comment	2025-06-25 11:30:02 +01:00
JCW	5319edffb0	Fix comments	2025-06-25 11:28:04 +01:00
JCW	4cd5273b44	Add XRPL_ABANDON	2025-06-25 10:38:28 +01:00