refactor: optimize NodeStore object conversion: (#4353)

When writing objects to the NodeStore, we need to convert them from the in-memory format to the binary format used by the node store. The conversion is handled by the `EncodedBlob` class, which is only instantiated on the stack. Coupled with the fact that most objects are under 1024 bytes in size, this presents an opportunity to elide a memory allocation in a critical path. This commit also simplifies the interface of `EncodedBlob` and eliminates a subtle corner case that could result in dangling pointers. These changes are not expected to cause a significant reduction in memory usage. The change avoids the use of a `std::shared_ptr` when unnecessary and tries to use stack-based memory allocation instead of the heap whenever possible. This is a net gain both in terms of memory usage (lower fragmentation) and performance (less work to do at runtime).
2025-12-06 17:27:55 +00:00 · 2023-03-16 15:00:07 -07:00
parent 1c9df69b33
commit 150d4a47e4
7 changed files with 94 additions and 74 deletions
--- a/Builds/CMake/RippledCore.cmake
+++ b/Builds/CMake/RippledCore.cmake
@@ -532,7 +532,6 @@ target_sources (rippled PRIVATE
  src/ripple/nodestore/impl/DeterministicShard.cpp
  src/ripple/nodestore/impl/DecodedBlob.cpp
  src/ripple/nodestore/impl/DummyScheduler.cpp
-  src/ripple/nodestore/impl/EncodedBlob.cpp
  src/ripple/nodestore/impl/ManagerImp.cpp
  src/ripple/nodestore/impl/NodeObject.cpp
  src/ripple/nodestore/impl/Shard.cpp
--- a/src/ripple/nodestore/backend/CassandraFactory.cpp
+++ b/src/ripple/nodestore/backend/CassandraFactory.cpp
@@ -670,7 +670,7 @@ public:
        // confirmed persisted. Otherwise, it can become deleted
        // prematurely if other copies are removed from caches.
        std::shared_ptr<NodeObject> no;
-        NodeStore::EncodedBlob e;
+        std::optional<NodeStore::EncodedBlob> e;
        std::pair<void const*, std::size_t> compressed;
        std::chrono::steady_clock::time_point begin;
        // The data is stored in this buffer. The void* in the above member
@@ -686,10 +686,10 @@ public:
            std::atomic<std::uint64_t>& retries)
            : backend(f), no(nobj), totalWriteRetries(retries)
        {
-            e.prepare(no);
+            e.emplace(no);

            compressed =
-                NodeStore::nodeobject_compress(e.getData(), e.getSize(), bf);
+                NodeStore::nodeobject_compress(e->getData(), e->getSize(), bf);
        }
    };

@@ -722,7 +722,7 @@ public:
        CassError rc = cass_statement_bind_bytes(
            statement,
            0,
-            static_cast<cass_byte_t const*>(data.e.getKey()),
+            static_cast<cass_byte_t const*>(data.e->getKey()),
            keyBytes_);
        if (rc != CASS_OK)
        {
--- a/src/ripple/nodestore/backend/NuDBFactory.cpp
+++ b/src/ripple/nodestore/backend/NuDBFactory.cpp
@@ -250,8 +250,7 @@ public:
    void
    do_insert(std::shared_ptr<NodeObject> const& no)
    {
-        EncodedBlob e;
-        e.prepare(no);
+        EncodedBlob e(no);
        nudb::error_code ec;
        nudb::detail::buffer bf;
        auto const result = nodeobject_compress(e.getData(), e.getSize(), bf);
--- a/src/ripple/nodestore/backend/RocksDBFactory.cpp
+++ b/src/ripple/nodestore/backend/RocksDBFactory.cpp
@@ -352,11 +352,9 @@ public:
        assert(m_db);
        rocksdb::WriteBatch wb;

-        EncodedBlob encoded;
-
        for (auto const& e : batch)
        {
-            encoded.prepare(e);
+            EncodedBlob encoded(e);

            wb.Put(
                rocksdb::Slice(
--- a/src/ripple/nodestore/impl/EncodedBlob.cpp
+++ b/src/ripple/nodestore/impl/EncodedBlob.cpp
@@ -1,42 +0,0 @@
-//------------------------------------------------------------------------------
-/*
-    This file is part of rippled: https://github.com/ripple/rippled
-    Copyright (c) 2012, 2013 Ripple Labs Inc.
-
-    Permission to use, copy, modify, and/or distribute this software for any
-    purpose  with  or without fee is hereby granted, provided that the above
-    copyright notice and this permission notice appear in all copies.
-
-    THE  SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-    WITH  REGARD  TO  THIS  SOFTWARE  INCLUDING  ALL  IMPLIED  WARRANTIES  OF
-    MERCHANTABILITY  AND  FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-    ANY  SPECIAL ,  DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-    WHATSOEVER  RESULTING  FROM  LOSS  OF USE, DATA OR PROFITS, WHETHER IN AN
-    ACTION  OF  CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*/
-//==============================================================================
-
-#include <ripple/nodestore/impl/EncodedBlob.h>
-#include <cstring>
-
-namespace ripple {
-namespace NodeStore {
-
-void
-EncodedBlob::prepare(std::shared_ptr<NodeObject> const& object)
-{
-    m_key = object->getHash().begin();
-
-    auto ret = m_data.alloc(object->getData().size() + 9);
-
-    // the first 8 bytes are unused
-    std::memset(ret, 0, 8);
-
-    ret[8] = static_cast<std::uint8_t>(object->getType());
-
-    std::memcpy(ret + 9, object->getData().data(), object->getData().size());
-}
-
-}  // namespace NodeStore
-}  // namespace ripple
--- a/src/ripple/nodestore/impl/EncodedBlob.h
+++ b/src/ripple/nodestore/impl/EncodedBlob.h
@@ -22,42 +22,109 @@

 #include <ripple/basics/Buffer.h>
 #include <ripple/nodestore/NodeObject.h>
-#include <cstddef>
+#include <boost/align/align_up.hpp>
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstdint>

 namespace ripple {
 namespace NodeStore {

-/** Utility for producing flattened node objects.
-    @note This defines the database format of a NodeObject!
-*/
-// VFALCO TODO Make allocator aware and use short_alloc
-struct EncodedBlob
-{
-public:
-    void
-    prepare(std::shared_ptr<NodeObject> const& object);
+/** Convert a NodeObject from in-memory to database format.

-    void const*
+    The (suboptimal) database format consists of:
+
+    - 8 prefix bytes which will typically be 0, but don't assume that's the
+      case; earlier versions of the code would use these bytes to store the
+      ledger index either once or twice.
+    - A single byte denoting the type of the object.
+    - The payload.
+
+    @note This class is typically instantiated on the stack, so the size of
+          the object does not matter as much as it normally would since the
+          allocation is, effectively, free.
+
+          We leverage that fact to preallocate enough memory to handle most
+          payloads as part of this object, eliminating the need for dynamic
+          allocation. As of this writing ~94% of objects require fewer than
+          1024 payload bytes.
+ */
+
+class EncodedBlob
+{
+    /** The 32-byte key of the serialized object. */
+    std::array<std::uint8_t, 32> key_;
+
+    /** A pre-allocated buffer for the serialized object.
+
+         The buffer is large enough for the 9 byte prefix and at least
+         1024 more bytes. The precise size is calculated automatically
+         at compile time so as to avoid wasting space on padding bytes.
+     */
+    std::array<
+        std::uint8_t,
+        boost::alignment::align_up(9 + 1024, alignof(std::uint32_t))>
+        payload_;
+
+    /** The size of the serialized data. */
+    std::uint32_t size_;
+
+    /** A pointer to the serialized data.
+
+        This may point to the pre-allocated buffer (if it is sufficiently
+        large) or to a dynamically allocated buffer.
+     */
+    std::uint8_t* const ptr_;
+
+public:
+    explicit EncodedBlob(std::shared_ptr<NodeObject> const& obj)
+        : size_([&obj]() {
+            assert(obj);
+
+            if (!obj)
+                throw std::runtime_error(
+                    "EncodedBlob: unseated std::shared_ptr used.");
+
+            return obj->getData().size() + 9;
+        }())
+        , ptr_(
+              (size_ <= payload_.size()) ? payload_.data()
+                                         : new std::uint8_t[size_])
+    {
+        std::fill_n(ptr_, 8, std::uint8_t{0});
+        ptr_[8] = static_cast<std::uint8_t>(obj->getType());
+        std::copy_n(obj->getData().data(), obj->getData().size(), ptr_ + 9);
+        std::copy_n(obj->getHash().data(), obj->getHash().size(), key_.data());
+    }
+
+    ~EncodedBlob()
+    {
+        assert(
+            ((ptr_ == payload_.data()) && (size_ <= payload_.size())) ||
+            ((ptr_ != payload_.data()) && (size_ > payload_.size())));
+
+        if (ptr_ != payload_.data())
+            delete[] ptr_;
+    }
+
+    [[nodiscard]] void const*
    getKey() const noexcept
    {
-        return m_key;
+        return static_cast<void const*>(key_.data());
    }

-    std::size_t
+    [[nodiscard]] std::size_t
    getSize() const noexcept
    {
-        return m_data.size();
+        return size_;
    }

-    void const*
+    [[nodiscard]] void const*
    getData() const noexcept
    {
-        return reinterpret_cast<void const*>(m_data.data());
+        return static_cast<void const*>(ptr_);
    }
-
-private:
-    void const* m_key;
-    Buffer m_data;
 };

 }  // namespace NodeStore
--- a/src/test/nodestore/Basics_test.cpp
+++ b/src/test/nodestore/Basics_test.cpp
@@ -56,10 +56,9 @@ public:

        auto batch = createPredictableBatch(numObjectsToTest, seedValue);

-        EncodedBlob encoded;
        for (int i = 0; i < batch.size(); ++i)
        {
-            encoded.prepare(batch[i]);
+            EncodedBlob encoded(batch[i]);

            DecodedBlob decoded(
                encoded.getKey(), encoded.getData(), encoded.getSize());