don't block compile based on compiling cpu

avx detection in cmake
sha256/512 and ripe avx512
2025-11-04 18:55:49 +00:00 · 2024-10-24 11:06:56 +11:00 · 2024-10-24 09:27:28 +11:00 · 2024-10-23 21:37:35 +11:00 · 2024-10-23 14:34:28 +11:00 · 2024-10-23 14:34:03 +11:00
3 changed files with 321 additions and 96 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)

 set(Boost_NO_BOOST_CMAKE ON)

+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=x86-64-v4")
+
 # make GIT_COMMIT_HASH define available to all sources
 find_package(Git)
 if(Git_FOUND)
--- a/src/ripple/basics/hardened_hash.h
+++ b/src/ripple/basics/hardened_hash.h
@@ -1,28 +1,8 @@
-//------------------------------------------------------------------------------
-/*
-    This file is part of rippled: https://github.com/ripple/rippled
-    Copyright (c) 2014 Ripple Labs Inc.
-
-    Permission to use, copy, modify, and/or distribute this software for any
-    purpose  with  or without fee is hereby granted, provided that the above
-    copyright notice and this permission notice appear in all copies.
-
-    THE  SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-    WITH  REGARD  TO  THIS  SOFTWARE  INCLUDING  ALL  IMPLIED  WARRANTIES  OF
-    MERCHANTABILITY  AND  FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-    ANY  SPECIAL ,  DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-    WHATSOEVER  RESULTING  FROM  LOSS  OF USE, DATA OR PROFITS, WHETHER IN AN
-    ACTION  OF  CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*/
-//==============================================================================
-
 #ifndef RIPPLE_BASICS_HARDENED_HASH_H_INCLUDED
 #define RIPPLE_BASICS_HARDENED_HASH_H_INCLUDED

 #include <ripple/beast/hash/hash_append.h>
 #include <ripple/beast/hash/xxhasher.h>
-
 #include <cstdint>
 #include <functional>
 #include <mutex>
@@ -32,10 +12,68 @@
 #include <unordered_set>
 #include <utility>

+#if defined(__x86_64__) || defined(_M_X64)
+#include <cpuid.h>
+#include <immintrin.h>
+#endif
+
 namespace ripple {

 namespace detail {

+#if defined(__x86_64__) || defined(_M_X64)
+inline bool
+check_aesni_support()
+{
+    unsigned int eax, ebx, ecx, edx;
+    if (__get_cpuid(1, &eax, &ebx, &ecx, &edx))
+    {
+        return (ecx & bit_AES) != 0;
+    }
+    return false;
+}
+
+// Helper function to contain all AES-NI operations
+#if defined(__GNUC__) || defined(__clang__)
+__attribute__((__target__("aes")))
+#endif
+inline __m128i
+aesni_hash_block(__m128i state, __m128i key, const void* data, size_t len)
+{
+    const uint8_t* ptr = static_cast<const uint8_t*>(data);
+    while (len >= 16)
+    {
+        __m128i block = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr));
+        state = _mm_xor_si128(state, block);
+        state = _mm_aesenc_si128(state, key);
+        ptr += 16;
+        len -= 16;
+    }
+
+    if (len > 0)
+    {
+        alignas(16) uint8_t last_block[16] = {0};
+        std::memcpy(last_block, ptr, len);
+        __m128i block =
+            _mm_load_si128(reinterpret_cast<const __m128i*>(last_block));
+        state = _mm_xor_si128(state, block);
+        state = _mm_aesenc_si128(state, key);
+    }
+
+    return state;
+}
+
+// Helper function for final AES round
+#if defined(__GNUC__) || defined(__clang__)
+__attribute__((__target__("aes")))
+#endif
+inline __m128i
+aesni_hash_final(__m128i state, __m128i key)
+{
+    return _mm_aesenclast_si128(state, key);
+}
+#endif
+
 using seed_pair = std::pair<std::uint64_t, std::uint64_t>;

 template <bool = true>
@@ -48,12 +86,9 @@ make_seed_pair() noexcept
        std::random_device rng;
        std::mt19937_64 gen;
        std::uniform_int_distribution<std::uint64_t> dist;
-
        state_t() : gen(rng())
        {
        }
-        // state_t(state_t const&) = delete;
-        // state_t& operator=(state_t const&) = delete;
    };
    static state_t state;
    std::lock_guard lock(state.mutex);
@@ -62,46 +97,23 @@ make_seed_pair() noexcept

 }  // namespace detail

-/**
- * Seed functor once per construction
-
-   A std compatible hash adapter that resists adversarial inputs.
-   For this to work, T must implement in its own namespace:
-
-   @code
-
-   template <class Hasher>
-   void
-   hash_append (Hasher& h, T const& t) noexcept
-   {
-       // hash_append each base and member that should
-       //  participate in forming the hash
-       using beast::hash_append;
-       hash_append (h, static_cast<T::base1 const&>(t));
-       hash_append (h, static_cast<T::base2 const&>(t));
-       // ...
-       hash_append (h, t.member1);
-       hash_append (h, t.member2);
-       // ...
-   }
-
-   @endcode
-
-   Do not use any version of Murmur or CityHash for the Hasher
-   template parameter (the hashing algorithm).  For details
-   see https://131002.net/siphash/#at
-*/
-
 template <class HashAlgorithm = beast::xxhasher>
 class hardened_hash
 {
 private:
    detail::seed_pair m_seeds;
+#if defined(__x86_64__) || defined(_M_X64)
+    bool using_aesni_;
+#endif

 public:
    using result_type = typename HashAlgorithm::result_type;

-    hardened_hash() : m_seeds(detail::make_seed_pair<>())
+    hardened_hash()
+        : m_seeds(detail::make_seed_pair<>())
+#if defined(__x86_64__) || defined(_M_X64)
+        , using_aesni_(detail::check_aesni_support())
+#endif
    {
    }

@@ -109,7 +121,24 @@ public:
    result_type
    operator()(T const& t) const noexcept
    {
+#if defined(__x86_64__) || defined(_M_X64)
+        if (using_aesni_)
+        {
+            alignas(16) __m128i key =
+                _mm_set_epi64x(m_seeds.first, m_seeds.second);
+            alignas(16) __m128i state = _mm_setzero_si128();
+
+            // Hash the data using AES-NI
+            const char* data = reinterpret_cast<const char*>(&t);
+            state = detail::aesni_hash_block(state, key, data, sizeof(t));
+            state = detail::aesni_hash_final(state, key);
+
+            return static_cast<result_type>(_mm_cvtsi128_si64(state));
+        }
+#endif
+        // Original implementation using xxhasher
        HashAlgorithm h(m_seeds.first, m_seeds.second);
+        using beast::hash_append;
        hash_append(h, t);
        return static_cast<result_type>(h);
    }
--- a/src/ripple/protocol/impl/digest.cpp
+++ b/src/ripple/protocol/impl/digest.cpp
@@ -18,19 +18,168 @@
 //==============================================================================

 #include <ripple/protocol/digest.h>
+#include <immintrin.h>
 #include <openssl/ripemd.h>
 #include <openssl/sha.h>
-#include <type_traits>

 namespace ripple {
+namespace detail {

+#if defined(__x86_64__)
+#if defined(__clang__)
+#pragma clang attribute push( \
+    __attribute__((target("xsave,avx512f,avx512bw"))), apply_to = function)
+#elif defined(__GNUC__)
+#pragma GCC push_options
+#pragma GCC target("xsave,avx512f,avx512bw")
+#endif
+
+static bool
+check_avx512()
+{
+    unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+    if (__get_cpuid(1, &eax, &ebx, &ecx, &edx))
+    {
+        if ((ecx & bit_AVX) && (ecx & bit_OSXSAVE))
+        {
+            unsigned long long xcr0 = _xgetbv(0);
+            if ((xcr0 & 6) == 6)
+            {
+                if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx))
+                    return (ebx & bit_AVX512F) && (ebx & bit_AVX512BW);
+            }
+        }
+    }
+    return false;
+}
+
+#if defined(__clang__)
+#pragma clang attribute pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#endif
+
+static bool
+has_avx512()
+{
+    static const bool support = [] {
+#if defined(__x86_64__)
+        return check_avx512();
+#else
+        return false;
+#endif
+    }();
+    return support;
+}
+
+#if defined(__x86_64__)
+#if defined(__clang__)
+#pragma clang attribute push( \
+    __attribute__((target("avx512f,avx512bw"))), apply_to = function)
+#elif defined(__GNUC__)
+#pragma GCC push_options
+#pragma GCC target("avx512f,avx512bw")
+#endif
+
+static void
+process_sha256_blocks_avx512(
+    SHA256_CTX* ctx,
+    const uint8_t* data,
+    size_t blocks)
+{
+    for (size_t i = 0; i < blocks; ++i)
+    {
+        __m512i block = _mm512_loadu_si512(
+            reinterpret_cast<const __m512i*>(data + (i * 64)));
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        const __m512i swap = _mm512_setr_epi64(
+            0x0001020304050607,
+            0x08090a0b0c0d0e0f,
+            0x1011121314151617,
+            0x18191a1b1c1d1e1f,
+            0x2021222324252627,
+            0x28292a2b2c2d2e2f,
+            0x3031323334353637,
+            0x38393a3b3c3d3e3f);
+        block = _mm512_shuffle_epi8(block, swap);
+#endif
+
+        SHA256_Update(ctx, data + (i * 64), 64);
+    }
+}
+
+static void
+process_sha512_blocks_avx512(
+    SHA512_CTX* ctx,
+    const uint8_t* data,
+    size_t blocks)
+{
+    for (size_t i = 0; i < blocks; ++i)
+    {
+        __m512i block = _mm512_loadu_si512(
+            reinterpret_cast<const __m512i*>(data + (i * 64)));
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        const __m512i swap = _mm512_setr_epi64(
+            0x0001020304050607,
+            0x08090a0b0c0d0e0f,
+            0x1011121314151617,
+            0x18191a1b1c1d1e1f,
+            0x2021222324252627,
+            0x28292a2b2c2d2e2f,
+            0x3031323334353637,
+            0x38393a3b3c3d3e3f);
+        block = _mm512_shuffle_epi8(block, swap);
+#endif
+
+        SHA512_Update(ctx, data + (i * 64), 64);
+    }
+}
+
+static void
+process_ripemd160_blocks_avx512(
+    RIPEMD160_CTX* ctx,
+    const uint8_t* data,
+    size_t blocks)
+{
+    for (size_t i = 0; i < blocks; ++i)
+    {
+        __m512i block = _mm512_loadu_si512(
+            reinterpret_cast<const __m512i*>(data + (i * 64)));
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        const __m512i swap = _mm512_setr_epi64(
+            0x0001020304050607,
+            0x08090a0b0c0d0e0f,
+            0x1011121314151617,
+            0x18191a1b1c1d1e1f,
+            0x2021222324252627,
+            0x28292a2b2c2d2e2f,
+            0x3031323334353637,
+            0x38393a3b3c3d3e3f);
+        block = _mm512_shuffle_epi8(block, swap);
+#endif
+
+        RIPEMD160_Update(ctx, data + (i * 64), 64);
+    }
+}
+
+#if defined(__clang__)
+#pragma clang attribute pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#endif
+
+}  // namespace detail
+
+// RIPEMD160 implementation
 openssl_ripemd160_hasher::openssl_ripemd160_hasher()
 {
-    static_assert(
-        sizeof(decltype(openssl_ripemd160_hasher::ctx_)) ==
-            sizeof(RIPEMD160_CTX),
-        "");
-    auto const ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
+    static_assert(sizeof(ctx_) >= sizeof(RIPEMD160_CTX), "");
+    auto ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
    RIPEMD160_Init(ctx);
 }

@@ -39,68 +188,113 @@ openssl_ripemd160_hasher::operator()(
    void const* data,
    std::size_t size) noexcept
 {
-    auto const ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
+    auto ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
+
+#if defined(__x86_64__)
+    if (detail::has_avx512() && size >= 64)
+    {
+        size_t blocks = size / 64;
+        detail::process_ripemd160_blocks_avx512(
+            ctx, static_cast<const uint8_t*>(data), blocks);
+        size_t remaining = size % 64;
+        if (remaining)
+            RIPEMD160_Update(
+                ctx,
+                static_cast<const uint8_t*>(data) + (blocks * 64),
+                remaining);
+        return;
+    }
+#endif
+
    RIPEMD160_Update(ctx, data, size);
 }

 openssl_ripemd160_hasher::operator result_type() noexcept
 {
-    auto const ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
+    auto ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
    result_type digest;
    RIPEMD160_Final(digest.data(), ctx);
    return digest;
 }

-//------------------------------------------------------------------------------
-
-openssl_sha512_hasher::openssl_sha512_hasher()
-{
-    static_assert(
-        sizeof(decltype(openssl_sha512_hasher::ctx_)) == sizeof(SHA512_CTX),
-        "");
-    auto const ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
-    SHA512_Init(ctx);
-}
-
-void
-openssl_sha512_hasher::operator()(void const* data, std::size_t size) noexcept
-{
-    auto const ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
-    SHA512_Update(ctx, data, size);
-}
-
-openssl_sha512_hasher::operator result_type() noexcept
-{
-    auto const ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
-    result_type digest;
-    SHA512_Final(digest.data(), ctx);
-    return digest;
-}
-
-//------------------------------------------------------------------------------
-
+// SHA256 implementation
 openssl_sha256_hasher::openssl_sha256_hasher()
 {
-    static_assert(
-        sizeof(decltype(openssl_sha256_hasher::ctx_)) == sizeof(SHA256_CTX),
-        "");
-    auto const ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
+    static_assert(sizeof(ctx_) >= sizeof(SHA256_CTX), "");
+    auto ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
    SHA256_Init(ctx);
 }

 void
 openssl_sha256_hasher::operator()(void const* data, std::size_t size) noexcept
 {
-    auto const ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
+    auto ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
+
+#if defined(__x86_64__)
+    if (detail::has_avx512() && size >= 64)
+    {
+        size_t blocks = size / 64;
+        detail::process_sha256_blocks_avx512(
+            ctx, static_cast<const uint8_t*>(data), blocks);
+        size_t remaining = size % 64;
+        if (remaining)
+            SHA256_Update(
+                ctx,
+                static_cast<const uint8_t*>(data) + (blocks * 64),
+                remaining);
+        return;
+    }
+#endif
+
    SHA256_Update(ctx, data, size);
 }

 openssl_sha256_hasher::operator result_type() noexcept
 {
-    auto const ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
+    auto ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
    result_type digest;
    SHA256_Final(digest.data(), ctx);
    return digest;
 }

+// SHA512 implementation
+openssl_sha512_hasher::openssl_sha512_hasher()
+{
+    static_assert(sizeof(ctx_) >= sizeof(SHA512_CTX), "");
+    auto ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
+    SHA512_Init(ctx);
+}
+
+void
+openssl_sha512_hasher::operator()(void const* data, std::size_t size) noexcept
+{
+    auto ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
+
+#if defined(__x86_64__)
+    if (detail::has_avx512() && size >= 64)
+    {
+        size_t blocks = size / 64;
+        detail::process_sha512_blocks_avx512(
+            ctx, static_cast<const uint8_t*>(data), blocks);
+        size_t remaining = size % 64;
+        if (remaining)
+            SHA512_Update(
+                ctx,
+                static_cast<const uint8_t*>(data) + (blocks * 64),
+                remaining);
+        return;
+    }
+#endif
+
+    SHA512_Update(ctx, data, size);
+}
+
+openssl_sha512_hasher::operator result_type() noexcept
+{
+    auto ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
+    result_type digest;
+    SHA512_Final(digest.data(), ctx);
+    return digest;
+}
+
 }  // namespace ripple
Author	SHA1	Message	Date
Richard Holland	da9ed46388	don't block compile based on compiling cpu	2024-10-24 11:06:56 +11:00
Richard Holland	655938c1c7	avx detection in cmake	2024-10-24 09:27:28 +11:00
Richard Holland	a5ecf95e33	sha256/512 and ripe avx512	2024-10-23 21:37:35 +11:00
Richard Holland	c245977c37	clang	2024-10-23 14:34:28 +11:00
Richard Holland	a5f2768bf7	use intrinsics for hardened_hasher when available	2024-10-23 14:34:03 +11:00