refactor: optimize NodeStore object conversion: (#4353)

When writing objects to the NodeStore, we need to convert them from the in-memory format to the binary format used by the node store. The conversion is handled by the `EncodedBlob` class, which is only instantiated on the stack. Coupled with the fact that most objects are under 1024 bytes in size, this presents an opportunity to elide a memory allocation in a critical path. This commit also simplifies the interface of `EncodedBlob` and eliminates a subtle corner case that could result in dangling pointers. These changes are not expected to cause a significant reduction in memory usage. The change avoids the use of a `std::shared_ptr` when unnecessary and tries to use stack-based memory allocation instead of the heap whenever possible. This is a net gain both in terms of memory usage (lower fragmentation) and performance (less work to do at runtime).
2025-12-06 17:27:55 +00:00 · 2023-03-16 15:00:07 -07:00
parent 1c9df69b33
commit 150d4a47e4
7 changed files with 94 additions and 74 deletions
--- a/Builds/CMake/RippledCore.cmake
+++ b/Builds/CMake/RippledCore.cmake
@@ -532,7 +532,6 @@ target_sources (rippled PRIVATE
  src/ripple/nodestore/impl/DeterministicShard.cpp
  src/ripple/nodestore/impl/DecodedBlob.cpp
  src/ripple/nodestore/impl/DummyScheduler.cpp
  src/ripple/nodestore/impl/EncodedBlob.cpp
  src/ripple/nodestore/impl/ManagerImp.cpp
  src/ripple/nodestore/impl/NodeObject.cpp
  src/ripple/nodestore/impl/Shard.cpp
--- a/src/ripple/nodestore/backend/CassandraFactory.cpp
+++ b/src/ripple/nodestore/backend/CassandraFactory.cpp
@@ -670,7 +670,7 @@ public:
        // confirmed persisted. Otherwise, it can become deleted
        // prematurely if other copies are removed from caches.
        std::shared_ptr<NodeObject> no;
-        NodeStore::EncodedBlob e;
+        std::optional<NodeStore::EncodedBlob> e;
        std::pair<void const*, std::size_t> compressed;
        std::chrono::steady_clock::time_point begin;
        // The data is stored in this buffer. The void* in the above member
@@ -686,10 +686,10 @@ public:
            std::atomic<std::uint64_t>& retries)
            : backend(f), no(nobj), totalWriteRetries(retries)
        {
-            e.prepare(no);
+            e.emplace(no);
            compressed =
-                NodeStore::nodeobject_compress(e.getData(), e.getSize(), bf);
+                NodeStore::nodeobject_compress(e->getData(), e->getSize(), bf);
        }
    };
@@ -722,7 +722,7 @@ public:
        CassError rc = cass_statement_bind_bytes(
            statement,
            0,
-            static_cast<cass_byte_t const*>(data.e.getKey()),
+            static_cast<cass_byte_t const*>(data.e->getKey()),
            keyBytes_);
        if (rc != CASS_OK)
        {
--- a/src/ripple/nodestore/backend/NuDBFactory.cpp
+++ b/src/ripple/nodestore/backend/NuDBFactory.cpp
@@ -250,8 +250,7 @@ public:
    void
    do_insert(std::shared_ptr<NodeObject> const& no)
    {
-        EncodedBlob e;
+        EncodedBlob e(no);
        e.prepare(no);
        nudb::error_code ec;
        nudb::detail::buffer bf;
        auto const result = nodeobject_compress(e.getData(), e.getSize(), bf);
--- a/src/ripple/nodestore/backend/RocksDBFactory.cpp
+++ b/src/ripple/nodestore/backend/RocksDBFactory.cpp
@@ -352,11 +352,9 @@ public:
        assert(m_db);
        rocksdb::WriteBatch wb;
        EncodedBlob encoded;
        for (auto const& e : batch)
        {
-            encoded.prepare(e);
+            EncodedBlob encoded(e);
            wb.Put(
                rocksdb::Slice(
--- a/src/ripple/nodestore/impl/EncodedBlob.cpp
+++ b/src/ripple/nodestore/impl/EncodedBlob.cpp
@@ -1,42 +0,0 @@
 //------------------------------------------------------------------------------
 /*
    This file is part of rippled: https://github.com/ripple/rippled
    Copyright (c) 2012, 2013 Ripple Labs Inc.
    Permission to use, copy, modify, and/or distribute this software for any
    purpose  with  or without fee is hereby granted, provided that the above
    copyright notice and this permission notice appear in all copies.
    THE  SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    WITH  REGARD  TO  THIS  SOFTWARE  INCLUDING  ALL  IMPLIED  WARRANTIES  OF
    MERCHANTABILITY  AND  FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    ANY  SPECIAL ,  DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    WHATSOEVER  RESULTING  FROM  LOSS  OF USE, DATA OR PROFITS, WHETHER IN AN
    ACTION  OF  CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
 //==============================================================================
 #include <ripple/nodestore/impl/EncodedBlob.h>
 #include <cstring>
 namespace ripple {
 namespace NodeStore {
 void
 EncodedBlob::prepare(std::shared_ptr<NodeObject> const& object)
 {
    m_key = object->getHash().begin();
    auto ret = m_data.alloc(object->getData().size() + 9);
    // the first 8 bytes are unused
    std::memset(ret, 0, 8);
    ret[8] = static_cast<std::uint8_t>(object->getType());
    std::memcpy(ret + 9, object->getData().data(), object->getData().size());
 }
 }  // namespace NodeStore
 }  // namespace ripple
--- a/src/ripple/nodestore/impl/EncodedBlob.h
+++ b/src/ripple/nodestore/impl/EncodedBlob.h
@@ -22,42 +22,109 @@
 #include <ripple/basics/Buffer.h>
 #include <ripple/nodestore/NodeObject.h>
-#include <cstddef>
+#include <boost/align/align_up.hpp>
 #include <algorithm>
 #include <array>
 #include <cassert>
 #include <cstdint>
 namespace ripple {
 namespace NodeStore {
-/** Utility for producing flattened node objects.
+/** Convert a NodeObject from in-memory to database format.
    @note This defines the database format of a NodeObject!
 */
 // VFALCO TODO Make allocator aware and use short_alloc
 struct EncodedBlob
 {
 public:
    void
    prepare(std::shared_ptr<NodeObject> const& object);
-    void const*
+    The (suboptimal) database format consists of:
    - 8 prefix bytes which will typically be 0, but don't assume that's the
      case; earlier versions of the code would use these bytes to store the
      ledger index either once or twice.
    - A single byte denoting the type of the object.
    - The payload.
    @note This class is typically instantiated on the stack, so the size of
          the object does not matter as much as it normally would since the
          allocation is, effectively, free.
          We leverage that fact to preallocate enough memory to handle most
          payloads as part of this object, eliminating the need for dynamic
          allocation. As of this writing ~94% of objects require fewer than
          1024 payload bytes.
 */
 class EncodedBlob
 {
    /** The 32-byte key of the serialized object. */
    std::array<std::uint8_t, 32> key_;
    /** A pre-allocated buffer for the serialized object.
         The buffer is large enough for the 9 byte prefix and at least
         1024 more bytes. The precise size is calculated automatically
         at compile time so as to avoid wasting space on padding bytes.
     */
    std::array<
        std::uint8_t,
        boost::alignment::align_up(9 + 1024, alignof(std::uint32_t))>
        payload_;
    /** The size of the serialized data. */
    std::uint32_t size_;
    /** A pointer to the serialized data.
        This may point to the pre-allocated buffer (if it is sufficiently
        large) or to a dynamically allocated buffer.
     */
    std::uint8_t* const ptr_;
 public:
    explicit EncodedBlob(std::shared_ptr<NodeObject> const& obj)
        : size_([&obj]() {
            assert(obj);
            if (!obj)
                throw std::runtime_error(
                    "EncodedBlob: unseated std::shared_ptr used.");
            return obj->getData().size() + 9;
        }())
        , ptr_(
              (size_ <= payload_.size()) ? payload_.data()
                                         : new std::uint8_t[size_])
    {
        std::fill_n(ptr_, 8, std::uint8_t{0});
        ptr_[8] = static_cast<std::uint8_t>(obj->getType());
        std::copy_n(obj->getData().data(), obj->getData().size(), ptr_ + 9);
        std::copy_n(obj->getHash().data(), obj->getHash().size(), key_.data());
    }
    ~EncodedBlob()
    {
        assert(
            ((ptr_ == payload_.data()) && (size_ <= payload_.size())) ||
            ((ptr_ != payload_.data()) && (size_ > payload_.size())));
        if (ptr_ != payload_.data())
            delete[] ptr_;
    }
    [[nodiscard]] void const*
    getKey() const noexcept
    {
-        return m_key;
+        return static_cast<void const*>(key_.data());
    }
-    std::size_t
+    [[nodiscard]] std::size_t
    getSize() const noexcept
    {
-        return m_data.size();
+        return size_;
    }
-    void const*
+    [[nodiscard]] void const*
    getData() const noexcept
    {
-        return reinterpret_cast<void const*>(m_data.data());
+        return static_cast<void const*>(ptr_);
    }
 private:
    void const* m_key;
    Buffer m_data;
 };
 }  // namespace NodeStore
--- a/src/test/nodestore/Basics_test.cpp
+++ b/src/test/nodestore/Basics_test.cpp
@@ -56,10 +56,9 @@ public:
        auto batch = createPredictableBatch(numObjectsToTest, seedValue);
        EncodedBlob encoded;
        for (int i = 0; i < batch.size(); ++i)
        {
-            encoded.prepare(batch[i]);
+            EncodedBlob encoded(batch[i]);
            DecodedBlob decoded(
                encoded.getKey(), encoded.getData(), encoded.getSize());