Compare commits

..

3 Commits

Author SHA1 Message Date
Denis Angell
a516266e5d fix apple build
Co-Authored-By: Denis Angell <dangell@transia.co>
2024-10-21 15:54:13 +02:00
Denis Angell
f0cabd162c Merge branch 'dev' into sha-optimization 2024-10-21 15:36:52 +02:00
Denis Angell
a6f596bc8b avx512
Co-Authored-By: Denis Angell <dangell@transia.co>
2024-10-21 15:32:54 +02:00
4 changed files with 182 additions and 343 deletions

View File

@@ -2,34 +2,92 @@
NIH dep: openssl
#]===================================================================]
#[===============================================[
OPENSSL_ROOT_DIR is the only variable that
FindOpenSSL honors for locating, so convert any
OPENSSL_ROOT vars to this
#]===============================================]
if (NOT DEFINED OPENSSL_ROOT_DIR)
if (DEFINED ENV{OPENSSL_ROOT})
set (OPENSSL_ROOT_DIR $ENV{OPENSSL_ROOT})
elseif (HOMEBREW)
execute_process (COMMAND ${HOMEBREW} --prefix openssl
OUTPUT_VARIABLE OPENSSL_ROOT_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif ()
file (TO_CMAKE_PATH "${OPENSSL_ROOT_DIR}" OPENSSL_ROOT_DIR)
endif ()
# Include modules
include(ExternalProject)
include(CheckCCompilerFlag)
check_c_compiler_flag("-mavx512f" COMPILER_SUPPORTS_AVX512)
option(ENABLE_AVX512 "Enable AVX512 instructions" ${COMPILER_SUPPORTS_AVX512})
set(OPENSSL_NISTP "")
set(OPENSSL_AVX512 "")
if(APPLE)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
set(OPENSSL_PLATFORM "darwin64-arm64-cc")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
set(OPENSSL_PLATFORM "darwin64-x86_64-cc")
else()
message(FATAL_ERROR "Unsupported macOS architecture: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
elseif(UNIX AND NOT APPLE)
set(OPENSSL_PLATFORM "linux-x86_64")
set(OPENSSL_NISTP "enable-ec_nistp_64_gcc_128")
set(OPENSSL_AVX512 "-march=skylake-avx512")
else()
message(FATAL_ERROR "Unsupported platform")
endif()
ExternalProject_Add(openssl_src
PREFIX ${nih_cache_path}
GIT_REPOSITORY https://github.com/openssl/openssl.git
GIT_TAG OpenSSL_1_1_1u
CONFIGURE_COMMAND
${nih_cache_path}/src/openssl_src/Configure
-DBUILD_SHARED_LIBS=OFF
-DBUILD_STATIC_LIBS=ON
${OPENSSL_PLATFORM}
${OPENSSL_NISTP}
${OPENSSL_AVX512}
enable-rmd160
no-ssl2
LOG_BUILD ON
LOG_CONFIGURE ON
LOG_DOWNLOAD ON
BUILD_IN_SOURCE 1
BUILD_COMMAND $(MAKE)
TEST_COMMAND ""
INSTALL_COMMAND ""
BUILD_BYPRODUCTS
<BINARY_DIR>/libssl.a
<BINARY_DIR>/libcrypto.a
)
ExternalProject_Get_Property (openssl_src BINARY_DIR)
set (openssl_src_BINARY_DIR "${BINARY_DIR}")
add_library (OpenSSL::SSL STATIC IMPORTED GLOBAL)
add_library (OpenSSL::Crypto STATIC IMPORTED GLOBAL)
add_dependencies(OpenSSL::SSL openssl_src)
add_dependencies(OpenSSL::Crypto openssl_src)
execute_process(
COMMAND
mkdir -p "${openssl_src_BINARY_DIR}/include"
)
set_target_properties(OpenSSL::SSL PROPERTIES
IMPORTED_LOCATION_DEBUG
"${openssl_src_BINARY_DIR}/libssl.a"
IMPORTED_LOCATION_RELEASE
"${openssl_src_BINARY_DIR}/libssl.a"
INTERFACE_INCLUDE_DIRECTORIES
"${openssl_src_BINARY_DIR}/include"
)
set_target_properties(OpenSSL::Crypto PROPERTIES
IMPORTED_LOCATION_DEBUG
"${openssl_src_BINARY_DIR}/libcrypto.a"
IMPORTED_LOCATION_RELEASE
"${openssl_src_BINARY_DIR}/libcrypto.a"
INTERFACE_INCLUDE_DIRECTORIES
"${openssl_src_BINARY_DIR}/include"
)
if (static)
set (OPENSSL_USE_STATIC_LIBS ON)
endif ()
set (OPENSSL_MSVC_STATIC_RT ON)
find_package (OpenSSL 1.1.1 REQUIRED)
target_link_libraries (ripple_libs
INTERFACE
OpenSSL::SSL
OpenSSL::Crypto)
# disable SSLv2...this can also be done when building/configuring OpenSSL
set_target_properties(OpenSSL::SSL PROPERTIES
INTERFACE_COMPILE_DEFINITIONS OPENSSL_NO_SSL2)
#[=========================================================[
https://gitlab.kitware.com/cmake/cmake/issues/16885
depending on how openssl is built, it might depend
@@ -46,3 +104,9 @@ if (TARGET ZLIB::ZLIB)
INTERFACE_LINK_LIBRARIES ZLIB::ZLIB)
set (has_zlib TRUE)
endif ()
if(ENABLE_AVX512 AND COMPILER_SUPPORTS_AVX512 AND NOT APPLE)
add_compile_options(-mavx512f)
add_compile_options(-march=skylake-avx512)
endif()
add_link_options(-lssl -lcrypto)

View File

@@ -11,8 +11,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(Boost_NO_BOOST_CMAKE ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=x86-64-v4")
# make GIT_COMMIT_HASH define available to all sources
find_package(Git)
if(Git_FOUND)

View File

@@ -1,8 +1,28 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2014 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef RIPPLE_BASICS_HARDENED_HASH_H_INCLUDED
#define RIPPLE_BASICS_HARDENED_HASH_H_INCLUDED
#include <ripple/beast/hash/hash_append.h>
#include <ripple/beast/hash/xxhasher.h>
#include <cstdint>
#include <functional>
#include <mutex>
@@ -12,68 +32,10 @@
#include <unordered_set>
#include <utility>
#if defined(__x86_64__) || defined(_M_X64)
#include <cpuid.h>
#include <immintrin.h>
#endif
namespace ripple {
namespace detail {
#if defined(__x86_64__) || defined(_M_X64)
inline bool
check_aesni_support()
{
unsigned int eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx))
{
return (ecx & bit_AES) != 0;
}
return false;
}
// Helper function to contain all AES-NI operations
#if defined(__GNUC__) || defined(__clang__)
__attribute__((__target__("aes")))
#endif
inline __m128i
aesni_hash_block(__m128i state, __m128i key, const void* data, size_t len)
{
const uint8_t* ptr = static_cast<const uint8_t*>(data);
while (len >= 16)
{
__m128i block = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr));
state = _mm_xor_si128(state, block);
state = _mm_aesenc_si128(state, key);
ptr += 16;
len -= 16;
}
if (len > 0)
{
alignas(16) uint8_t last_block[16] = {0};
std::memcpy(last_block, ptr, len);
__m128i block =
_mm_load_si128(reinterpret_cast<const __m128i*>(last_block));
state = _mm_xor_si128(state, block);
state = _mm_aesenc_si128(state, key);
}
return state;
}
// Helper function for final AES round
#if defined(__GNUC__) || defined(__clang__)
__attribute__((__target__("aes")))
#endif
inline __m128i
aesni_hash_final(__m128i state, __m128i key)
{
return _mm_aesenclast_si128(state, key);
}
#endif
using seed_pair = std::pair<std::uint64_t, std::uint64_t>;
template <bool = true>
@@ -86,9 +48,12 @@ make_seed_pair() noexcept
std::random_device rng;
std::mt19937_64 gen;
std::uniform_int_distribution<std::uint64_t> dist;
state_t() : gen(rng())
{
}
// state_t(state_t const&) = delete;
// state_t& operator=(state_t const&) = delete;
};
static state_t state;
std::lock_guard lock(state.mutex);
@@ -97,23 +62,46 @@ make_seed_pair() noexcept
} // namespace detail
/**
* Seed functor once per construction
A std compatible hash adapter that resists adversarial inputs.
For this to work, T must implement in its own namespace:
@code
template <class Hasher>
void
hash_append (Hasher& h, T const& t) noexcept
{
// hash_append each base and member that should
// participate in forming the hash
using beast::hash_append;
hash_append (h, static_cast<T::base1 const&>(t));
hash_append (h, static_cast<T::base2 const&>(t));
// ...
hash_append (h, t.member1);
hash_append (h, t.member2);
// ...
}
@endcode
Do not use any version of Murmur or CityHash for the Hasher
template parameter (the hashing algorithm). For details
see https://131002.net/siphash/#at
*/
template <class HashAlgorithm = beast::xxhasher>
class hardened_hash
{
private:
detail::seed_pair m_seeds;
#if defined(__x86_64__) || defined(_M_X64)
bool using_aesni_;
#endif
public:
using result_type = typename HashAlgorithm::result_type;
hardened_hash()
: m_seeds(detail::make_seed_pair<>())
#if defined(__x86_64__) || defined(_M_X64)
, using_aesni_(detail::check_aesni_support())
#endif
hardened_hash() : m_seeds(detail::make_seed_pair<>())
{
}
@@ -121,24 +109,7 @@ public:
result_type
operator()(T const& t) const noexcept
{
#if defined(__x86_64__) || defined(_M_X64)
if (using_aesni_)
{
alignas(16) __m128i key =
_mm_set_epi64x(m_seeds.first, m_seeds.second);
alignas(16) __m128i state = _mm_setzero_si128();
// Hash the data using AES-NI
const char* data = reinterpret_cast<const char*>(&t);
state = detail::aesni_hash_block(state, key, data, sizeof(t));
state = detail::aesni_hash_final(state, key);
return static_cast<result_type>(_mm_cvtsi128_si64(state));
}
#endif
// Original implementation using xxhasher
HashAlgorithm h(m_seeds.first, m_seeds.second);
using beast::hash_append;
hash_append(h, t);
return static_cast<result_type>(h);
}

View File

@@ -18,168 +18,19 @@
//==============================================================================
#include <ripple/protocol/digest.h>
#include <immintrin.h>
#include <openssl/ripemd.h>
#include <openssl/sha.h>
#include <type_traits>
namespace ripple {
namespace detail {
#if defined(__x86_64__)
#if defined(__clang__)
#pragma clang attribute push( \
__attribute__((target("xsave,avx512f,avx512bw"))), apply_to = function)
#elif defined(__GNUC__)
#pragma GCC push_options
#pragma GCC target("xsave,avx512f,avx512bw")
#endif
static bool
check_avx512()
{
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx))
{
if ((ecx & bit_AVX) && (ecx & bit_OSXSAVE))
{
unsigned long long xcr0 = _xgetbv(0);
if ((xcr0 & 6) == 6)
{
if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx))
return (ebx & bit_AVX512F) && (ebx & bit_AVX512BW);
}
}
}
return false;
}
#if defined(__clang__)
#pragma clang attribute pop
#elif defined(__GNUC__)
#pragma GCC pop_options
#endif
#endif
static bool
has_avx512()
{
static const bool support = [] {
#if defined(__x86_64__)
return check_avx512();
#else
return false;
#endif
}();
return support;
}
#if defined(__x86_64__)
#if defined(__clang__)
#pragma clang attribute push( \
__attribute__((target("avx512f,avx512bw"))), apply_to = function)
#elif defined(__GNUC__)
#pragma GCC push_options
#pragma GCC target("avx512f,avx512bw")
#endif
static void
process_sha256_blocks_avx512(
SHA256_CTX* ctx,
const uint8_t* data,
size_t blocks)
{
for (size_t i = 0; i < blocks; ++i)
{
__m512i block = _mm512_loadu_si512(
reinterpret_cast<const __m512i*>(data + (i * 64)));
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
const __m512i swap = _mm512_setr_epi64(
0x0001020304050607,
0x08090a0b0c0d0e0f,
0x1011121314151617,
0x18191a1b1c1d1e1f,
0x2021222324252627,
0x28292a2b2c2d2e2f,
0x3031323334353637,
0x38393a3b3c3d3e3f);
block = _mm512_shuffle_epi8(block, swap);
#endif
SHA256_Update(ctx, data + (i * 64), 64);
}
}
static void
process_sha512_blocks_avx512(
SHA512_CTX* ctx,
const uint8_t* data,
size_t blocks)
{
for (size_t i = 0; i < blocks; ++i)
{
__m512i block = _mm512_loadu_si512(
reinterpret_cast<const __m512i*>(data + (i * 64)));
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
const __m512i swap = _mm512_setr_epi64(
0x0001020304050607,
0x08090a0b0c0d0e0f,
0x1011121314151617,
0x18191a1b1c1d1e1f,
0x2021222324252627,
0x28292a2b2c2d2e2f,
0x3031323334353637,
0x38393a3b3c3d3e3f);
block = _mm512_shuffle_epi8(block, swap);
#endif
SHA512_Update(ctx, data + (i * 64), 64);
}
}
static void
process_ripemd160_blocks_avx512(
RIPEMD160_CTX* ctx,
const uint8_t* data,
size_t blocks)
{
for (size_t i = 0; i < blocks; ++i)
{
__m512i block = _mm512_loadu_si512(
reinterpret_cast<const __m512i*>(data + (i * 64)));
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
const __m512i swap = _mm512_setr_epi64(
0x0001020304050607,
0x08090a0b0c0d0e0f,
0x1011121314151617,
0x18191a1b1c1d1e1f,
0x2021222324252627,
0x28292a2b2c2d2e2f,
0x3031323334353637,
0x38393a3b3c3d3e3f);
block = _mm512_shuffle_epi8(block, swap);
#endif
RIPEMD160_Update(ctx, data + (i * 64), 64);
}
}
#if defined(__clang__)
#pragma clang attribute pop
#elif defined(__GNUC__)
#pragma GCC pop_options
#endif
#endif
} // namespace detail
// RIPEMD160 implementation
openssl_ripemd160_hasher::openssl_ripemd160_hasher()
{
static_assert(sizeof(ctx_) >= sizeof(RIPEMD160_CTX), "");
auto ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
static_assert(
sizeof(decltype(openssl_ripemd160_hasher::ctx_)) ==
sizeof(RIPEMD160_CTX),
"");
auto const ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
RIPEMD160_Init(ctx);
}
@@ -188,113 +39,68 @@ openssl_ripemd160_hasher::operator()(
void const* data,
std::size_t size) noexcept
{
auto ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
#if defined(__x86_64__)
if (detail::has_avx512() && size >= 64)
{
size_t blocks = size / 64;
detail::process_ripemd160_blocks_avx512(
ctx, static_cast<const uint8_t*>(data), blocks);
size_t remaining = size % 64;
if (remaining)
RIPEMD160_Update(
ctx,
static_cast<const uint8_t*>(data) + (blocks * 64),
remaining);
return;
}
#endif
auto const ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
RIPEMD160_Update(ctx, data, size);
}
openssl_ripemd160_hasher::operator result_type() noexcept
{
auto ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
auto const ctx = reinterpret_cast<RIPEMD160_CTX*>(ctx_);
result_type digest;
RIPEMD160_Final(digest.data(), ctx);
return digest;
}
// SHA256 implementation
openssl_sha256_hasher::openssl_sha256_hasher()
{
static_assert(sizeof(ctx_) >= sizeof(SHA256_CTX), "");
auto ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
SHA256_Init(ctx);
}
//------------------------------------------------------------------------------
void
openssl_sha256_hasher::operator()(void const* data, std::size_t size) noexcept
{
auto ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
#if defined(__x86_64__)
if (detail::has_avx512() && size >= 64)
{
size_t blocks = size / 64;
detail::process_sha256_blocks_avx512(
ctx, static_cast<const uint8_t*>(data), blocks);
size_t remaining = size % 64;
if (remaining)
SHA256_Update(
ctx,
static_cast<const uint8_t*>(data) + (blocks * 64),
remaining);
return;
}
#endif
SHA256_Update(ctx, data, size);
}
openssl_sha256_hasher::operator result_type() noexcept
{
auto ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
result_type digest;
SHA256_Final(digest.data(), ctx);
return digest;
}
// SHA512 implementation
openssl_sha512_hasher::openssl_sha512_hasher()
{
static_assert(sizeof(ctx_) >= sizeof(SHA512_CTX), "");
auto ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
static_assert(
sizeof(decltype(openssl_sha512_hasher::ctx_)) == sizeof(SHA512_CTX),
"");
auto const ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
SHA512_Init(ctx);
}
void
openssl_sha512_hasher::operator()(void const* data, std::size_t size) noexcept
{
auto ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
#if defined(__x86_64__)
if (detail::has_avx512() && size >= 64)
{
size_t blocks = size / 64;
detail::process_sha512_blocks_avx512(
ctx, static_cast<const uint8_t*>(data), blocks);
size_t remaining = size % 64;
if (remaining)
SHA512_Update(
ctx,
static_cast<const uint8_t*>(data) + (blocks * 64),
remaining);
return;
}
#endif
auto const ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
SHA512_Update(ctx, data, size);
}
openssl_sha512_hasher::operator result_type() noexcept
{
auto ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
auto const ctx = reinterpret_cast<SHA512_CTX*>(ctx_);
result_type digest;
SHA512_Final(digest.data(), ctx);
return digest;
}
//------------------------------------------------------------------------------
openssl_sha256_hasher::openssl_sha256_hasher()
{
static_assert(
sizeof(decltype(openssl_sha256_hasher::ctx_)) == sizeof(SHA256_CTX),
"");
auto const ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
SHA256_Init(ctx);
}
void
openssl_sha256_hasher::operator()(void const* data, std::size_t size) noexcept
{
auto const ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
SHA256_Update(ctx, data, size);
}
openssl_sha256_hasher::operator result_type() noexcept
{
auto const ctx = reinterpret_cast<SHA256_CTX*>(ctx_);
result_type digest;
SHA256_Final(digest.data(), ctx);
return digest;
}
} // namespace ripple