perf: Optimize hash performance by avoiding allocating hash state object (#5469)

We're currently calling `XXH3_createState` and `XXH3_freeState` when hashing an object. However, it may be slow because they call `malloc` and `free`, which may affect the performance. This change avoids the use of the streaming API as much as possible by using an internal buffer.
This commit is contained in:
Jingchen
2025-08-11 11:21:26 +01:00
committed by GitHub
parent 86ef16dbeb
commit b40a3684ae
2 changed files with 331 additions and 21 deletions

View File

@@ -24,32 +24,110 @@
#include <xxhash.h> #include <xxhash.h>
#include <array>
#include <cstddef> #include <cstddef>
#include <new> #include <cstdint>
#include <type_traits> #include <optional>
#include <span>
namespace beast { namespace beast {
class xxhasher class xxhasher
{ {
private: public:
// requires 64-bit std::size_t using result_type = std::size_t;
static_assert(sizeof(std::size_t) == 8, "");
XXH3_state_t* state_; private:
static_assert(sizeof(std::size_t) == 8, "requires 64-bit std::size_t");
// Have an internal buffer to avoid the streaming API
// A 64-byte buffer should to be big enough for us
static constexpr std::size_t INTERNAL_BUFFER_SIZE = 64;
alignas(64) std::array<std::uint8_t, INTERNAL_BUFFER_SIZE> buffer_;
std::span<std::uint8_t> readBuffer_;
std::span<std::uint8_t> writeBuffer_;
std::optional<XXH64_hash_t> seed_;
XXH3_state_t* state_ = nullptr;
void
resetBuffers()
{
writeBuffer_ = std::span{buffer_};
readBuffer_ = {};
}
void
updateHash(void const* data, std::size_t len)
{
if (writeBuffer_.size() < len)
{
flushToState(data, len);
}
else
{
std::memcpy(writeBuffer_.data(), data, len);
writeBuffer_ = writeBuffer_.subspan(len);
readBuffer_ = std::span{
std::begin(buffer_), buffer_.size() - writeBuffer_.size()};
}
}
static XXH3_state_t* static XXH3_state_t*
allocState() allocState()
{ {
auto ret = XXH3_createState(); auto ret = XXH3_createState();
if (ret == nullptr) if (ret == nullptr)
throw std::bad_alloc(); throw std::bad_alloc(); // LCOV_EXCL_LINE
return ret; return ret;
} }
public: void
using result_type = std::size_t; flushToState(void const* data, std::size_t len)
{
if (!state_)
{
state_ = allocState();
if (seed_.has_value())
{
XXH3_64bits_reset_withSeed(state_, *seed_);
}
else
{
XXH3_64bits_reset(state_);
}
}
XXH3_64bits_update(state_, readBuffer_.data(), readBuffer_.size());
resetBuffers();
if (data && len)
{
XXH3_64bits_update(state_, data, len);
}
}
result_type
retrieveHash()
{
if (state_)
{
flushToState(nullptr, 0);
return XXH3_64bits_digest(state_);
}
else
{
if (seed_.has_value())
{
return XXH3_64bits_withSeed(
readBuffer_.data(), readBuffer_.size(), *seed_);
}
else
{
return XXH3_64bits(readBuffer_.data(), readBuffer_.size());
}
}
}
public:
static constexpr auto const endian = boost::endian::order::native; static constexpr auto const endian = boost::endian::order::native;
xxhasher(xxhasher const&) = delete; xxhasher(xxhasher const&) = delete;
@@ -58,43 +136,43 @@ public:
xxhasher() xxhasher()
{ {
state_ = allocState(); resetBuffers();
XXH3_64bits_reset(state_);
} }
~xxhasher() noexcept ~xxhasher() noexcept
{ {
XXH3_freeState(state_); if (state_)
{
XXH3_freeState(state_);
}
} }
template < template <
class Seed, class Seed,
std::enable_if_t<std::is_unsigned<Seed>::value>* = nullptr> std::enable_if_t<std::is_unsigned<Seed>::value>* = nullptr>
explicit xxhasher(Seed seed) explicit xxhasher(Seed seed) : seed_(seed)
{ {
state_ = allocState(); resetBuffers();
XXH3_64bits_reset_withSeed(state_, seed);
} }
template < template <
class Seed, class Seed,
std::enable_if_t<std::is_unsigned<Seed>::value>* = nullptr> std::enable_if_t<std::is_unsigned<Seed>::value>* = nullptr>
xxhasher(Seed seed, Seed) xxhasher(Seed seed, Seed) : seed_(seed)
{ {
state_ = allocState(); resetBuffers();
XXH3_64bits_reset_withSeed(state_, seed);
} }
void void
operator()(void const* key, std::size_t len) noexcept operator()(void const* key, std::size_t len) noexcept
{ {
XXH3_64bits_update(state_, key, len); updateHash(key, len);
} }
explicit explicit
operator std::size_t() noexcept operator result_type() noexcept
{ {
return XXH3_64bits_digest(state_); return retrieveHash();
} }
}; };

View File

@@ -0,0 +1,232 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2025 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <xrpl/beast/hash/xxhasher.h>
#include <xrpl/beast/unit_test.h>
namespace beast {
class XXHasher_test : public unit_test::suite
{
public:
void
testWithoutSeed()
{
testcase("Without seed");
xxhasher hasher{};
std::string objectToHash{"Hello, xxHash!"};
hasher(objectToHash.data(), objectToHash.size());
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
16042857369214894119ULL);
}
void
testWithSeed()
{
testcase("With seed");
xxhasher hasher{static_cast<std::uint32_t>(102)};
std::string objectToHash{"Hello, xxHash!"};
hasher(objectToHash.data(), objectToHash.size());
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
14440132435660934800ULL);
}
void
testWithTwoSeeds()
{
testcase("With two seeds");
xxhasher hasher{
static_cast<std::uint32_t>(102), static_cast<std::uint32_t>(103)};
std::string objectToHash{"Hello, xxHash!"};
hasher(objectToHash.data(), objectToHash.size());
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
14440132435660934800ULL);
}
void
testBigObjectWithMultiupleSmallUpdatesWithoutSeed()
{
testcase("Big object with multiple small updates without seed");
xxhasher hasher{};
std::string objectToHash{"Hello, xxHash!"};
for (int i = 0; i < 100; i++)
{
hasher(objectToHash.data(), objectToHash.size());
}
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
15296278154063476002ULL);
}
void
testBigObjectWithMultiupleSmallUpdatesWithSeed()
{
testcase("Big object with multiple small updates with seed");
xxhasher hasher{static_cast<std::uint32_t>(103)};
std::string objectToHash{"Hello, xxHash!"};
for (int i = 0; i < 100; i++)
{
hasher(objectToHash.data(), objectToHash.size());
}
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
17285302196561698791ULL);
}
void
testBigObjectWithSmallAndBigUpdatesWithoutSeed()
{
testcase("Big object with small and big updates without seed");
xxhasher hasher{};
std::string objectToHash{"Hello, xxHash!"};
std::string bigObject;
for (int i = 0; i < 20; i++)
{
bigObject += "Hello, xxHash!";
}
hasher(objectToHash.data(), objectToHash.size());
hasher(bigObject.data(), bigObject.size());
hasher(objectToHash.data(), objectToHash.size());
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
1865045178324729219ULL);
}
void
testBigObjectWithSmallAndBigUpdatesWithSeed()
{
testcase("Big object with small and big updates with seed");
xxhasher hasher{static_cast<std::uint32_t>(103)};
std::string objectToHash{"Hello, xxHash!"};
std::string bigObject;
for (int i = 0; i < 20; i++)
{
bigObject += "Hello, xxHash!";
}
hasher(objectToHash.data(), objectToHash.size());
hasher(bigObject.data(), bigObject.size());
hasher(objectToHash.data(), objectToHash.size());
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
16189862915636005281ULL);
}
void
testBigObjectWithOneUpdateWithoutSeed()
{
testcase("Big object with one update without seed");
xxhasher hasher{};
std::string objectToHash;
for (int i = 0; i < 100; i++)
{
objectToHash += "Hello, xxHash!";
}
hasher(objectToHash.data(), objectToHash.size());
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
15296278154063476002ULL);
}
void
testBigObjectWithOneUpdateWithSeed()
{
testcase("Big object with one update with seed");
xxhasher hasher{static_cast<std::uint32_t>(103)};
std::string objectToHash;
for (int i = 0; i < 100; i++)
{
objectToHash += "Hello, xxHash!";
}
hasher(objectToHash.data(), objectToHash.size());
BEAST_EXPECT(
static_cast<xxhasher::result_type>(hasher) ==
17285302196561698791ULL);
}
void
testOperatorResultTypeDoesNotChangeInternalState()
{
testcase("Operator result type doesn't change the internal state");
{
xxhasher hasher;
std::string object{"Hello xxhash"};
hasher(object.data(), object.size());
auto xxhashResult1 = static_cast<xxhasher::result_type>(hasher);
auto xxhashResult2 = static_cast<xxhasher::result_type>(hasher);
BEAST_EXPECT(xxhashResult1 == xxhashResult2);
}
{
xxhasher hasher;
std::string object;
for (int i = 0; i < 100; i++)
{
object += "Hello, xxHash!";
}
hasher(object.data(), object.size());
auto xxhashResult1 = hasher.operator xxhasher::result_type();
auto xxhashResult2 = hasher.operator xxhasher::result_type();
BEAST_EXPECT(xxhashResult1 == xxhashResult2);
}
}
void
run() override
{
testWithoutSeed();
testWithSeed();
testWithTwoSeeds();
testBigObjectWithMultiupleSmallUpdatesWithoutSeed();
testBigObjectWithMultiupleSmallUpdatesWithSeed();
testBigObjectWithSmallAndBigUpdatesWithoutSeed();
testBigObjectWithSmallAndBigUpdatesWithSeed();
testBigObjectWithOneUpdateWithoutSeed();
testBigObjectWithOneUpdateWithSeed();
testOperatorResultTypeDoesNotChangeInternalState();
}
};
BEAST_DEFINE_TESTSUITE(XXHasher, beast_core, beast);
} // namespace beast