//------------------------------------------------------------------------------ /* This file is part of rippled: https://github.com/ripple/rippled Copyright (c) 2012, 2013 Ripple Labs Inc. Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ //============================================================================== #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ENABLE_PERFORMANCE_TRACKING 0 namespace ripple { #if ENABLE_PERFORMANCE_TRACKING // Performance monitoring statistics namespace { // Design: Uses thread-local storage for most stats to avoid contention. // Only global concurrency tracking uses atomics, as it requires cross-thread // visibility. Statistics are aggregated using dirty reads for minimal // performance impact. // Thread-local statistics - no synchronization needed! struct ThreadLocalStats { uint64_t executionCount = 0; uint64_t totalTimeNanos = 0; uint64_t totalKeys = 0; uint32_t currentlyExecuting = 0; // 0 or 1 for this thread std::thread::id threadId = std::this_thread::get_id(); // For global registry ThreadLocalStats* next = nullptr; ThreadLocalStats(); ~ThreadLocalStats(); }; // Global registry of thread-local stats (only modified during thread // creation/destruction) struct GlobalRegistry { std::atomic head{nullptr}; std::atomic globalExecutions{0}; std::atomic globalConcurrent{ 0}; // Current global concurrent executions std::atomic maxGlobalConcurrent{0}; // Max observed // For tracking concurrency samples std::vector concurrencySamples; std::mutex sampleMutex; // Only used during printing std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now(); std::chrono::steady_clock::time_point lastPrintTime = std::chrono::steady_clock::now(); static constexpr auto PRINT_INTERVAL = std::chrono::seconds(10); static constexpr uint64_t PRINT_EVERY_N_CALLS = 1000; void registerThread(ThreadLocalStats* stats) { // Add to linked list atomically ThreadLocalStats* oldHead = head.load(); do { stats->next = oldHead; } while (!head.compare_exchange_weak(oldHead, stats)); } void unregisterThread(ThreadLocalStats* stats) { // In production, you'd want proper removal logic // For this example, we'll just leave it in the list // (threads typically live for the process lifetime anyway) } void checkAndPrint(uint64_t localCount) { // Update approximate global count uint64_t approxGlobal = globalExecutions.fetch_add(localCount) + localCount; auto now = std::chrono::steady_clock::now(); if (approxGlobal % PRINT_EVERY_N_CALLS < localCount || (now - lastPrintTime) >= PRINT_INTERVAL) { // Only one thread prints at a time static std::atomic printing{false}; bool expected = false; if (printing.compare_exchange_strong(expected, true)) { // Double-check timing now = std::chrono::steady_clock::now(); if ((now - lastPrintTime) >= PRINT_INTERVAL) { printStats(); lastPrintTime = now; } printing = false; } } } void printStats() { // Dirty read of all thread-local stats uint64_t totalExecs = 0; uint64_t totalNanos = 0; uint64_t totalKeyCount = 0; uint32_t currentConcurrent = globalConcurrent.load(); uint32_t maxConcurrent = maxGlobalConcurrent.load(); std::unordered_map< std::thread::id, std::tuple> threadData; // Walk the linked list of thread-local stats ThreadLocalStats* current = head.load(); while (current) { // Dirty reads - no synchronization! uint64_t execs = current->executionCount; if (execs > 0) { uint64_t nanos = current->totalTimeNanos; uint64_t keys = current->totalKeys; totalExecs += execs; totalNanos += nanos; totalKeyCount += keys; threadData[current->threadId] = {execs, nanos, keys}; } current = current->next; } if (totalExecs == 0) return; double avgTimeUs = static_cast(totalNanos) / totalExecs / 1000.0; double avgKeys = static_cast(totalKeyCount) / totalExecs; double totalTimeMs = static_cast(totalNanos) / 1000000.0; // Calculate wall clock time elapsed auto now = std::chrono::steady_clock::now(); auto wallTimeMs = std::chrono::duration_cast( now - startTime) .count(); double effectiveParallelism = wallTimeMs > 0 ? totalTimeMs / static_cast(wallTimeMs) : 0.0; std::cout << "\n=== Transaction::tryDirectApply Performance Stats ===\n"; std::cout << "Total executions: ~" << totalExecs << " (dirty read)\n"; std::cout << "Wall clock time: " << wallTimeMs << " ms\n"; std::cout << "Total CPU time: " << std::fixed << std::setprecision(2) << totalTimeMs << " ms\n"; std::cout << "Effective parallelism: " << std::fixed << std::setprecision(2) << effectiveParallelism << "x\n"; std::cout << "Average time: " << std::fixed << std::setprecision(2) << avgTimeUs << " μs\n"; std::cout << "Average keys touched: " << std::fixed << std::setprecision(2) << avgKeys << "\n"; std::cout << "Current concurrent executions: " << currentConcurrent << "\n"; std::cout << "Max concurrent observed: " << maxConcurrent << "\n"; std::cout << "Active threads: " << threadData.size() << "\n"; std::cout << "Thread distribution:\n"; // Sort threads by total time spent (descending) std::vector>> sortedThreads(threadData.begin(), threadData.end()); std::sort( sortedThreads.begin(), sortedThreads.end(), [](const auto& a, const auto& b) { return std::get<1>(a.second) > std::get<1>(b.second); // Sort by time }); for (const auto& [tid, data] : sortedThreads) { auto [count, time, keys] = data; double percentage = (static_cast(count) / totalExecs) * 100.0; double avgThreadTimeUs = static_cast(time) / count / 1000.0; double totalThreadTimeMs = static_cast(time) / 1000000.0; double timePercentage = (static_cast(time) / totalNanos) * 100.0; std::cout << " Thread " << tid << ": " << count << " executions (" << std::fixed << std::setprecision(1) << percentage << "%), total " << std::setprecision(2) << totalThreadTimeMs << " ms (" << std::setprecision(1) << timePercentage << "% of time), avg " << std::setprecision(2) << avgThreadTimeUs << " μs\n"; } std::cout << "Hardware concurrency: " << std::thread::hardware_concurrency() << "\n"; std::cout << "===================================================\n\n"; std::cout.flush(); } }; static GlobalRegistry globalRegistry; // Thread-local instance thread_local ThreadLocalStats tlStats; // Constructor/destructor for thread registration ThreadLocalStats::ThreadLocalStats() { globalRegistry.registerThread(this); } ThreadLocalStats::~ThreadLocalStats() { globalRegistry.unregisterThread(this); } // RAII class to track concurrent executions (global) class ConcurrentExecutionTracker { // Note: This introduces minimal atomic contention to track true global // concurrency. The alternative would miss concurrent executions between // print intervals. public: ConcurrentExecutionTracker() { tlStats.currentlyExecuting = 1; // Update global concurrent count uint32_t current = globalRegistry.globalConcurrent.fetch_add(1) + 1; // Update max if needed (only contends when setting new maximum) uint32_t currentMax = globalRegistry.maxGlobalConcurrent.load(); while (current > currentMax && !globalRegistry.maxGlobalConcurrent.compare_exchange_weak( currentMax, current)) { // Loop until we successfully update or current is no longer > // currentMax } } ~ConcurrentExecutionTracker() { tlStats.currentlyExecuting = 0; globalRegistry.globalConcurrent.fetch_sub(1); } }; } // namespace #endif // ENABLE_PERFORMANCE_TRACKING Transaction::Transaction( std::shared_ptr const& stx, std::string& reason, Application& app) noexcept : mTransaction(stx), mApp(app), j_(app.journal("Ledger")) { try { mTransactionID = mTransaction->getTransactionID(); OpenView sandbox(*app.openLedger().current()); sandbox.getAndResetKeysTouched(); ApplyFlags flags{0}; #if ENABLE_PERFORMANCE_TRACKING ConcurrentExecutionTracker concurrentTracker; auto startTime = std::chrono::steady_clock::now(); #endif if (auto directApplied = app.getTxQ().tryDirectApply(app, sandbox, stx, flags, j_)) keysTouched = sandbox.getAndResetKeysTouched(); #if ENABLE_PERFORMANCE_TRACKING auto endTime = std::chrono::steady_clock::now(); auto elapsedNanos = std::chrono::duration_cast( endTime - startTime) .count(); tlStats.executionCount++; tlStats.totalTimeNanos += elapsedNanos; tlStats.totalKeys += keysTouched.size(); if (tlStats.executionCount % 100 == 0) { globalRegistry.checkAndPrint(100); } #endif } catch (std::exception& e) { reason = e.what(); return; } mStatus = NEW; } // // Misc. // void Transaction::setStatus( TransStatus ts, std::uint32_t lseq, std::optional tseq, std::optional netID) { mStatus = ts; mInLedger = lseq; if (tseq) mTxnSeq = tseq; if (netID) mNetworkID = netID; } TransStatus Transaction::sqlTransactionStatus(boost::optional const& status) { char const c = (status) ? (*status)[0] : safe_cast(txnSqlUnknown); switch (c) { case txnSqlNew: return NEW; case txnSqlConflict: return CONFLICTED; case txnSqlHeld: return HELD; case txnSqlValidated: return COMMITTED; case txnSqlIncluded: return INCLUDED; } assert(c == txnSqlUnknown); return INVALID; } Transaction::pointer Transaction::transactionFromSQL( boost::optional const& ledgerSeq, boost::optional const& status, Blob const& rawTxn, Application& app) { std::uint32_t const inLedger = rangeCheckedCast(ledgerSeq.value_or(0)); SerialIter it(makeSlice(rawTxn)); auto txn = std::make_shared(it); std::string reason; auto tr = std::make_shared(txn, reason, app); tr->setStatus(sqlTransactionStatus(status)); tr->setLedger(inLedger); return tr; } std::variant< std::pair, std::shared_ptr>, TxSearched> Transaction::load(uint256 const& id, Application& app, error_code_i& ec) { return load(id, app, std::nullopt, ec); } std::variant< std::pair, std::shared_ptr>, TxSearched> Transaction::load( uint256 const& id, Application& app, ClosedInterval const& range, error_code_i& ec) { using op = std::optional>; return load(id, app, op{range}, ec); } Transaction::Locator Transaction::locate(uint256 const& id, Application& app) { auto const db = dynamic_cast(&app.getRelationalDatabase()); if (!db) { Throw("Failed to get relational database"); } return db->locateTransaction(id); } std::variant< std::pair, std::shared_ptr>, TxSearched> Transaction::load( uint256 const& id, Application& app, std::optional> const& range, error_code_i& ec) { auto const db = dynamic_cast(&app.getRelationalDatabase()); if (!db) { Throw("Failed to get relational database"); } return db->getTransaction(id, range, ec); } // options 1 to include the date of the transaction Json::Value Transaction::getJson(JsonOptions options, bool binary) const { Json::Value ret(mTransaction->getJson(JsonOptions::none, binary)); if (mInLedger) { ret[jss::inLedger] = mInLedger; // Deprecated. ret[jss::ledger_index] = mInLedger; if (options == JsonOptions::include_date) { auto ct = mApp.getLedgerMaster().getCloseTimeBySeq(mInLedger); if (ct) ret[jss::date] = ct->time_since_epoch().count(); } // compute outgoing CTID // override local network id if it's explicitly in the txn std::optional netID = mNetworkID; if (mTransaction->isFieldPresent(sfNetworkID)) netID = mTransaction->getFieldU32(sfNetworkID); if (mTxnSeq && netID && *mTxnSeq <= 0xFFFFU && *netID < 0xFFFFU && mInLedger < 0xFFFFFFFUL) { std::optional ctid = RPC::encodeCTID(mInLedger, *mTxnSeq, *netID); if (ctid) ret[jss::ctid] = *ctid; } } return ret; } } // namespace ripple