Allow server to download cache from another clio server (#246)

* Allow server to download cache from another clio server * Config takes an array of clio peers. If any of these peers have a full cache, clio picks a peer at random to download the cache from. Otherwise, fall back to downloading cache from the database.
2025-11-28 23:55:52 +00:00 · 2022-10-04 12:29:29 -04:00
parent 4c9c606202
commit 97ef66d130
4 changed files with 250 additions and 11 deletions
--- a/example-config.json
+++ b/example-config.json
@@ -22,6 +22,7 @@
            "grpc_port":"50051"
        }
    ],
+    "peers": [],
    "dos_guard":
    {
        "whitelist":["127.0.0.1"]
--- a/src/etl/ReportingETL.cpp
+++ b/src/etl/ReportingETL.cpp
@@ -913,6 +913,172 @@ ReportingETL::monitor()
        }
    }
 }
+bool
+ReportingETL::loadCacheFromClioPeer(
+    uint32_t ledgerIndex,
+    std::string const& ip,
+    std::string const& port,
+    boost::asio::yield_context& yield)
+{
+    BOOST_LOG_TRIVIAL(info)
+        << "Loading cache from peer. ip = " << ip << " . port = " << port;
+    namespace beast = boost::beast;          // from <boost/beast.hpp>
+    namespace http = beast::http;            // from <boost/beast/http.hpp>
+    namespace websocket = beast::websocket;  // from
+    namespace net = boost::asio;             // from
+    using tcp = boost::asio::ip::tcp;        // from
+    try
+    {
+        boost::beast::error_code ec;
+        // These objects perform our I/O
+        tcp::resolver resolver{ioContext_};
+
+        BOOST_LOG_TRIVIAL(trace) << __func__ << " Creating websocket";
+        auto ws =
+            std::make_unique<websocket::stream<beast::tcp_stream>>(ioContext_);
+
+        // Look up the domain name
+        auto const results = resolver.async_resolve(ip, port, yield[ec]);
+        if (ec)
+            return {};
+
+        BOOST_LOG_TRIVIAL(trace) << __func__ << " Connecting websocket";
+        // Make the connection on the IP address we get from a lookup
+        ws->next_layer().async_connect(results, yield[ec]);
+        if (ec)
+            return false;
+
+        BOOST_LOG_TRIVIAL(trace)
+            << __func__ << " Performing websocket handshake";
+        // Perform the websocket handshake
+        ws->async_handshake(ip, "/", yield[ec]);
+        if (ec)
+            return false;
+
+        std::optional<boost::json::value> marker;
+
+        BOOST_LOG_TRIVIAL(trace) << __func__ << " Sending request";
+        auto getRequest = [&](auto marker) {
+            boost::json::object request = {
+                {"command", "ledger_data"},
+                {"ledger_index", ledgerIndex},
+                {"binary", true},
+                {"out_of_order", true},
+                {"limit", 2048}};
+
+            if (marker)
+                request["marker"] = *marker;
+            return request;
+        };
+
+        do
+        {
+            // Send the message
+            ws->async_write(
+                net::buffer(boost::json::serialize(getRequest(marker))),
+                yield[ec]);
+            if (ec)
+            {
+                BOOST_LOG_TRIVIAL(error)
+                    << __func__ << " error writing = " << ec.message();
+                return false;
+            }
+
+            beast::flat_buffer buffer;
+            ws->async_read(buffer, yield[ec]);
+            if (ec)
+            {
+                BOOST_LOG_TRIVIAL(error)
+                    << __func__ << " error reading = " << ec.message();
+                return false;
+            }
+
+            auto raw = beast::buffers_to_string(buffer.data());
+            auto parsed = boost::json::parse(raw);
+
+            if (!parsed.is_object())
+            {
+                BOOST_LOG_TRIVIAL(error)
+                    << __func__ << " Error parsing response: " << raw;
+                return false;
+            }
+            BOOST_LOG_TRIVIAL(trace)
+                << __func__ << " Successfully parsed response " << parsed;
+
+            if (auto const& response = parsed.as_object();
+                response.contains("error"))
+            {
+                BOOST_LOG_TRIVIAL(error)
+                    << __func__ << " Response contains error: " << response;
+                auto const& err = response.at("error");
+                if (err.is_string() && err.as_string() == "lgrNotFound")
+                {
+                    BOOST_LOG_TRIVIAL(warning)
+                        << __func__
+                        << " ledger not found. ledger = " << ledgerIndex
+                        << ". trying again";
+                    continue;
+                }
+                return false;
+            }
+            auto const& response = parsed.as_object()["result"].as_object();
+
+            if (!response.contains("cache_full") ||
+                !response.at("cache_full").as_bool())
+            {
+                BOOST_LOG_TRIVIAL(error)
+                    << __func__ << " cache not full for clio node. ip = " << ip;
+                return false;
+            }
+            if (response.contains("marker"))
+                marker = response.at("marker");
+            else
+                marker = {};
+
+            auto const& state = response.at("state").as_array();
+
+            std::vector<Backend::LedgerObject> objects;
+            objects.reserve(state.size());
+            for (auto const& ledgerObject : state)
+            {
+                auto const& obj = ledgerObject.as_object();
+
+                Backend::LedgerObject stateObject = {};
+
+                if (!stateObject.key.parseHex(
+                        obj.at("index").as_string().c_str()))
+                {
+                    BOOST_LOG_TRIVIAL(error)
+                        << __func__ << " failed to parse object id";
+                    return false;
+                }
+                boost::algorithm::unhex(
+                    obj.at("data").as_string().c_str(),
+                    std::back_inserter(stateObject.blob));
+                objects.push_back(std::move(stateObject));
+            }
+            backend_->cache().update(objects, ledgerIndex, true);
+
+            if (marker)
+                BOOST_LOG_TRIVIAL(debug)
+                    << __func__ << " - At marker " << *marker;
+
+        } while (marker);
+        BOOST_LOG_TRIVIAL(info)
+            << __func__
+            << " Finished downloading ledger from clio node. ip = " << ip;
+        backend_->cache().setFull();
+
+        return true;
+    }
+    catch (std::exception const& e)
+    {
+        BOOST_LOG_TRIVIAL(error)
+            << __func__ << " Encountered exception : " << e.what()
+            << " - ip = " << ip;
+        return false;
+    }
+}

 void
 ReportingETL::loadCache(uint32_t seq)
@@ -936,6 +1102,51 @@ ReportingETL::loadCache(uint32_t seq)
        assert(false);
        return;
    }
+
+    if (clioPeers.size() > 0)
+    {
+        boost::asio::spawn(
+            ioContext_, [this, seq](boost::asio::yield_context yield) {
+                for (auto const& peer : clioPeers)
+                {
+                    // returns true on success
+                    if (loadCacheFromClioPeer(
+                            seq, peer.ip, std::to_string(peer.port), yield))
+                        return;
+                }
+                // if we couldn't successfully load from any peers, load from db
+                loadCacheFromDb(seq);
+            });
+        return;
+    }
+    else
+    {
+        loadCacheFromDb(seq);
+    }
+    // If loading synchronously, poll cache until full
+    while (cacheLoadStyle_ == CacheLoadStyle::SYNC &&
+           !backend_->cache().isFull())
+    {
+        BOOST_LOG_TRIVIAL(debug)
+            << "Cache not full. Cache size = " << backend_->cache().size()
+            << ". Sleeping ...";
+        std::this_thread::sleep_for(std::chrono::seconds(10));
+        BOOST_LOG_TRIVIAL(info)
+            << "Cache is full. Cache size = " << backend_->cache().size();
+    }
+}
+
+void
+ReportingETL::loadCacheFromDb(uint32_t seq)
+{
+    // sanity check to make sure we are not calling this multiple times
+    static std::atomic_bool loading = false;
+    if (loading)
+    {
+        assert(false);
+        return;
+    }
+    loading = true;
    std::vector<Backend::LedgerObject> diff;
    auto append = [](auto&& a, auto&& b) {
        a.insert(std::end(a), std::begin(b), std::end(b));
@@ -1044,17 +1255,6 @@ ReportingETL::loadCache(uint32_t seq)
                });
        }
    }};
-    // If loading synchronously, poll cache until full
-    while (cacheLoadStyle_ == CacheLoadStyle::SYNC &&
-           !backend_->cache().isFull())
-    {
-        BOOST_LOG_TRIVIAL(debug)
-            << "Cache not full. Cache size = " << backend_->cache().size()
-            << ". Sleeping ...";
-        std::this_thread::sleep_for(std::chrono::seconds(10));
-        BOOST_LOG_TRIVIAL(info)
-            << "Cache is full. Cache size = " << backend_->cache().size();
-    }
 }

 void
@@ -1160,5 +1360,23 @@ ReportingETL::ReportingETL(
        if (cache.contains("page_fetch_size") &&
            cache.at("page_fetch_size").is_int64())
            cachePageFetchSize_ = cache.at("page_fetch_size").as_int64();
+        if (cache.contains("peers") && cache.at("peers").is_array())
+        {
+            auto const& peers = cache.at("peers").as_array();
+            for (auto const& peer : peers)
+            {
+                auto const& clio = peer.as_object();
+                auto ip = clio.at("ip").as_string().c_str();
+                auto port = clio.at("port").as_int64();
+                clioPeers.emplace_back(ip, port);
+            }
+            unsigned seed =
+                std::chrono::system_clock::now().time_since_epoch().count();
+
+            std::shuffle(
+                clioPeers.begin(),
+                clioPeers.end(),
+                std::default_random_engine(seed));
+        }
    }
 }
--- a/src/etl/ReportingETL.h
+++ b/src/etl/ReportingETL.h
@@ -77,6 +77,14 @@ private:
    // thread responsible for syncing the cache on startup
    std::thread cacheDownloader_;

+    struct ClioPeer
+    {
+        std::string ip;
+        int port;
+    };
+
+    std::vector<ClioPeer> clioPeers;
+
    std::thread worker_;
    boost::asio::io_context& ioContext_;

@@ -177,6 +185,16 @@ private:
    void
    loadCache(uint32_t seq);

+    void
+    loadCacheFromDb(uint32_t seq);
+
+    bool
+    loadCacheFromClioPeer(
+        uint32_t ledgerSequence,
+        std::string const& ip,
+        std::string const& port,
+        boost::asio::yield_context& yield);
+
    /// Run ETL. Extracts ledgers and writes them to the database, until a
    /// write conflict occurs (or the server shuts down).
    /// @note database must already be populated when this function is
--- a/src/rpc/handlers/LedgerData.cpp
+++ b/src/rpc/handlers/LedgerData.cpp
@@ -184,6 +184,8 @@ doLedgerData(Context const& context)
            objects.push_back(toJson(sle));
    }
    response[JS(state)] = std::move(objects);
+    if (outOfOrder)
+        response["cache_full"] = context.backend->cache().isFull();
    auto end2 = std::chrono::system_clock::now();

    time = std::chrono::duration_cast<std::chrono::microseconds>(end2 - end)