From 81a8fe1419f0ce08e6e726602c8aac6c38bf8316 Mon Sep 17 00:00:00 2001 From: Jingchen Date: Mon, 11 Aug 2025 11:21:26 +0100 Subject: [PATCH] perf: Optimize hash performance by avoiding allocating hash state object (#5469) We're currently calling `XXH3_createState` and `XXH3_freeState` when hashing an object. However, it may be slow because they call `malloc` and `free`, which may affect the performance. This change avoids the use of the streaming API as much as possible by using an internal buffer. --- include/xrpl/beast/hash/xxhasher.h | 120 ++++++++++++--- src/test/beast/xxhasher_test.cpp | 232 +++++++++++++++++++++++++++++ 2 files changed, 331 insertions(+), 21 deletions(-) create mode 100644 src/test/beast/xxhasher_test.cpp diff --git a/include/xrpl/beast/hash/xxhasher.h b/include/xrpl/beast/hash/xxhasher.h index 381980902..9cd343f54 100644 --- a/include/xrpl/beast/hash/xxhasher.h +++ b/include/xrpl/beast/hash/xxhasher.h @@ -24,32 +24,110 @@ #include +#include #include -#include -#include +#include +#include +#include namespace beast { class xxhasher { -private: - // requires 64-bit std::size_t - static_assert(sizeof(std::size_t) == 8, ""); +public: + using result_type = std::size_t; - XXH3_state_t* state_; +private: + static_assert(sizeof(std::size_t) == 8, "requires 64-bit std::size_t"); + // Have an internal buffer to avoid the streaming API + // A 64-byte buffer should to be big enough for us + static constexpr std::size_t INTERNAL_BUFFER_SIZE = 64; + + alignas(64) std::array buffer_; + std::span readBuffer_; + std::span writeBuffer_; + + std::optional seed_; + XXH3_state_t* state_ = nullptr; + + void + resetBuffers() + { + writeBuffer_ = std::span{buffer_}; + readBuffer_ = {}; + } + + void + updateHash(void const* data, std::size_t len) + { + if (writeBuffer_.size() < len) + { + flushToState(data, len); + } + else + { + std::memcpy(writeBuffer_.data(), data, len); + writeBuffer_ = writeBuffer_.subspan(len); + readBuffer_ = std::span{ + std::begin(buffer_), buffer_.size() - writeBuffer_.size()}; + } + } static XXH3_state_t* allocState() { auto ret = XXH3_createState(); if (ret == nullptr) - throw std::bad_alloc(); + throw std::bad_alloc(); // LCOV_EXCL_LINE return ret; } -public: - using result_type = std::size_t; + void + flushToState(void const* data, std::size_t len) + { + if (!state_) + { + state_ = allocState(); + if (seed_.has_value()) + { + XXH3_64bits_reset_withSeed(state_, *seed_); + } + else + { + XXH3_64bits_reset(state_); + } + } + XXH3_64bits_update(state_, readBuffer_.data(), readBuffer_.size()); + resetBuffers(); + if (data && len) + { + XXH3_64bits_update(state_, data, len); + } + } + result_type + retrieveHash() + { + if (state_) + { + flushToState(nullptr, 0); + return XXH3_64bits_digest(state_); + } + else + { + if (seed_.has_value()) + { + return XXH3_64bits_withSeed( + readBuffer_.data(), readBuffer_.size(), *seed_); + } + else + { + return XXH3_64bits(readBuffer_.data(), readBuffer_.size()); + } + } + } + +public: static constexpr auto const endian = boost::endian::order::native; xxhasher(xxhasher const&) = delete; @@ -58,43 +136,43 @@ public: xxhasher() { - state_ = allocState(); - XXH3_64bits_reset(state_); + resetBuffers(); } ~xxhasher() noexcept { - XXH3_freeState(state_); + if (state_) + { + XXH3_freeState(state_); + } } template < class Seed, std::enable_if_t::value>* = nullptr> - explicit xxhasher(Seed seed) + explicit xxhasher(Seed seed) : seed_(seed) { - state_ = allocState(); - XXH3_64bits_reset_withSeed(state_, seed); + resetBuffers(); } template < class Seed, std::enable_if_t::value>* = nullptr> - xxhasher(Seed seed, Seed) + xxhasher(Seed seed, Seed) : seed_(seed) { - state_ = allocState(); - XXH3_64bits_reset_withSeed(state_, seed); + resetBuffers(); } void operator()(void const* key, std::size_t len) noexcept { - XXH3_64bits_update(state_, key, len); + updateHash(key, len); } explicit - operator std::size_t() noexcept + operator result_type() noexcept { - return XXH3_64bits_digest(state_); + return retrieveHash(); } }; diff --git a/src/test/beast/xxhasher_test.cpp b/src/test/beast/xxhasher_test.cpp new file mode 100644 index 000000000..6c65fea60 --- /dev/null +++ b/src/test/beast/xxhasher_test.cpp @@ -0,0 +1,232 @@ +//------------------------------------------------------------------------------ +/* +This file is part of rippled: https://github.com/ripple/rippled +Copyright (c) 2025 Ripple Labs Inc. + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include +#include + +namespace beast { + +class XXHasher_test : public unit_test::suite +{ +public: + void + testWithoutSeed() + { + testcase("Without seed"); + + xxhasher hasher{}; + + std::string objectToHash{"Hello, xxHash!"}; + hasher(objectToHash.data(), objectToHash.size()); + + BEAST_EXPECT( + static_cast(hasher) == + 16042857369214894119ULL); + } + + void + testWithSeed() + { + testcase("With seed"); + + xxhasher hasher{static_cast(102)}; + + std::string objectToHash{"Hello, xxHash!"}; + hasher(objectToHash.data(), objectToHash.size()); + + BEAST_EXPECT( + static_cast(hasher) == + 14440132435660934800ULL); + } + + void + testWithTwoSeeds() + { + testcase("With two seeds"); + xxhasher hasher{ + static_cast(102), static_cast(103)}; + + std::string objectToHash{"Hello, xxHash!"}; + hasher(objectToHash.data(), objectToHash.size()); + + BEAST_EXPECT( + static_cast(hasher) == + 14440132435660934800ULL); + } + + void + testBigObjectWithMultiupleSmallUpdatesWithoutSeed() + { + testcase("Big object with multiple small updates without seed"); + xxhasher hasher{}; + + std::string objectToHash{"Hello, xxHash!"}; + for (int i = 0; i < 100; i++) + { + hasher(objectToHash.data(), objectToHash.size()); + } + + BEAST_EXPECT( + static_cast(hasher) == + 15296278154063476002ULL); + } + + void + testBigObjectWithMultiupleSmallUpdatesWithSeed() + { + testcase("Big object with multiple small updates with seed"); + xxhasher hasher{static_cast(103)}; + + std::string objectToHash{"Hello, xxHash!"}; + for (int i = 0; i < 100; i++) + { + hasher(objectToHash.data(), objectToHash.size()); + } + + BEAST_EXPECT( + static_cast(hasher) == + 17285302196561698791ULL); + } + + void + testBigObjectWithSmallAndBigUpdatesWithoutSeed() + { + testcase("Big object with small and big updates without seed"); + xxhasher hasher{}; + + std::string objectToHash{"Hello, xxHash!"}; + std::string bigObject; + for (int i = 0; i < 20; i++) + { + bigObject += "Hello, xxHash!"; + } + hasher(objectToHash.data(), objectToHash.size()); + hasher(bigObject.data(), bigObject.size()); + hasher(objectToHash.data(), objectToHash.size()); + + BEAST_EXPECT( + static_cast(hasher) == + 1865045178324729219ULL); + } + + void + testBigObjectWithSmallAndBigUpdatesWithSeed() + { + testcase("Big object with small and big updates with seed"); + xxhasher hasher{static_cast(103)}; + + std::string objectToHash{"Hello, xxHash!"}; + std::string bigObject; + for (int i = 0; i < 20; i++) + { + bigObject += "Hello, xxHash!"; + } + hasher(objectToHash.data(), objectToHash.size()); + hasher(bigObject.data(), bigObject.size()); + hasher(objectToHash.data(), objectToHash.size()); + + BEAST_EXPECT( + static_cast(hasher) == + 16189862915636005281ULL); + } + + void + testBigObjectWithOneUpdateWithoutSeed() + { + testcase("Big object with one update without seed"); + xxhasher hasher{}; + + std::string objectToHash; + for (int i = 0; i < 100; i++) + { + objectToHash += "Hello, xxHash!"; + } + hasher(objectToHash.data(), objectToHash.size()); + + BEAST_EXPECT( + static_cast(hasher) == + 15296278154063476002ULL); + } + + void + testBigObjectWithOneUpdateWithSeed() + { + testcase("Big object with one update with seed"); + xxhasher hasher{static_cast(103)}; + + std::string objectToHash; + for (int i = 0; i < 100; i++) + { + objectToHash += "Hello, xxHash!"; + } + hasher(objectToHash.data(), objectToHash.size()); + + BEAST_EXPECT( + static_cast(hasher) == + 17285302196561698791ULL); + } + + void + testOperatorResultTypeDoesNotChangeInternalState() + { + testcase("Operator result type doesn't change the internal state"); + { + xxhasher hasher; + + std::string object{"Hello xxhash"}; + hasher(object.data(), object.size()); + auto xxhashResult1 = static_cast(hasher); + auto xxhashResult2 = static_cast(hasher); + + BEAST_EXPECT(xxhashResult1 == xxhashResult2); + } + { + xxhasher hasher; + + std::string object; + for (int i = 0; i < 100; i++) + { + object += "Hello, xxHash!"; + } + hasher(object.data(), object.size()); + auto xxhashResult1 = hasher.operator xxhasher::result_type(); + auto xxhashResult2 = hasher.operator xxhasher::result_type(); + + BEAST_EXPECT(xxhashResult1 == xxhashResult2); + } + } + + void + run() override + { + testWithoutSeed(); + testWithSeed(); + testWithTwoSeeds(); + testBigObjectWithMultiupleSmallUpdatesWithoutSeed(); + testBigObjectWithMultiupleSmallUpdatesWithSeed(); + testBigObjectWithSmallAndBigUpdatesWithoutSeed(); + testBigObjectWithSmallAndBigUpdatesWithSeed(); + testBigObjectWithOneUpdateWithoutSeed(); + testBigObjectWithOneUpdateWithSeed(); + testOperatorResultTypeDoesNotChangeInternalState(); + } +}; + +BEAST_DEFINE_TESTSUITE(XXHasher, beast_core, beast); +} // namespace beast