diff --git a/include/xrpl/basics/MallocTrim.h b/include/xrpl/basics/MallocTrim.h new file mode 100644 index 0000000000..2d0cf989ba --- /dev/null +++ b/include/xrpl/basics/MallocTrim.h @@ -0,0 +1,73 @@ +#pragma once + +#include + +#include +#include +#include + +namespace xrpl { + +// cSpell:ignore ptmalloc + +// ----------------------------------------------------------------------------- +// Allocator interaction note: +// - This facility invokes glibc's malloc_trim(0) on Linux/glibc to request that +// ptmalloc return free heap pages to the OS. +// - If an alternative allocator (e.g. jemalloc or tcmalloc) is linked or +// preloaded (LD_PRELOAD), calling glibc's malloc_trim typically has no effect +// on the *active* heap. The call is harmless but may not reclaim memory +// because those allocators manage their own arenas. +// - Only glibc sbrk/arena space is eligible for trimming; large mmap-backed +// allocations are usually returned to the OS on free regardless of trimming. +// - Call at known reclamation points (e.g., after cache sweeps / online delete) +// and consider rate limiting to avoid churn. +// ----------------------------------------------------------------------------- + +struct MallocTrimReport +{ + bool supported{false}; + int trimResult{-1}; + std::int64_t rssBeforeKB{-1}; + std::int64_t rssAfterKB{-1}; + std::chrono::microseconds durationUs{-1}; + std::int64_t minfltDelta{-1}; + std::int64_t majfltDelta{-1}; + + [[nodiscard]] std::int64_t + deltaKB() const noexcept + { + if (rssBeforeKB < 0 || rssAfterKB < 0) + return 0; + return rssAfterKB - rssBeforeKB; + } +}; + +/** + * @brief Attempt to return freed memory to the operating system. + * + * On Linux with glibc malloc, this issues ::malloc_trim(0), which may release + * free space from ptmalloc arenas back to the kernel. On other platforms, or if + * a different allocator is in use, this function is a no-op and the report will + * indicate that trimming is unsupported or had no effect. + * + * @param tag Identifier for logging/debugging purposes. + * @param journal Journal for diagnostic logging. + * @return Report containing before/after metrics and the trim result. + * + * @note If an alternative allocator (jemalloc/tcmalloc) is linked or preloaded, + * calling glibc's malloc_trim may have no effect on the active heap. The + * call is harmless but typically does not reclaim memory under those + * allocators. + * + * @note Only memory served from glibc's sbrk/arena heaps is eligible for trim. + * Large allocations satisfied via mmap are usually returned on free + * independently of trimming. + * + * @note Intended for use after operations that free significant memory (e.g., + * cache sweeps, ledger cleanup, online delete). Consider rate limiting. + */ +MallocTrimReport +mallocTrim(std::string_view tag, beast::Journal journal); + +} // namespace xrpl diff --git a/src/libxrpl/basics/MallocTrim.cpp b/src/libxrpl/basics/MallocTrim.cpp new file mode 100644 index 0000000000..1b0932b39d --- /dev/null +++ b/src/libxrpl/basics/MallocTrim.cpp @@ -0,0 +1,157 @@ +#include +#include + +#include + +#include +#include +#include +#include +#include + +#if defined(__GLIBC__) && BOOST_OS_LINUX +#include + +#include +#include + +// Require RUSAGE_THREAD for thread-scoped page fault tracking +#ifndef RUSAGE_THREAD +#error "MallocTrim rusage instrumentation requires RUSAGE_THREAD on Linux/glibc" +#endif + +namespace { + +bool +getRusageThread(struct rusage& ru) +{ + return ::getrusage(RUSAGE_THREAD, &ru) == 0; // LCOV_EXCL_LINE +} + +} // namespace +#endif + +namespace xrpl { + +namespace detail { + +// cSpell:ignore statm + +#if defined(__GLIBC__) && BOOST_OS_LINUX + +inline int +mallocTrimWithPad(std::size_t padBytes) +{ + return ::malloc_trim(padBytes); +} + +long +parseStatmRSSkB(std::string const& statm) +{ + // /proc/self/statm format: size resident shared text lib data dt + // We want the second field (resident) which is in pages + std::istringstream iss(statm); + long size, resident; + if (!(iss >> size >> resident)) + return -1; + + // Convert pages to KB + long const pageSize = ::sysconf(_SC_PAGESIZE); + if (pageSize <= 0) + return -1; + + return (resident * pageSize) / 1024; +} + +#endif // __GLIBC__ && BOOST_OS_LINUX + +} // namespace detail + +MallocTrimReport +mallocTrim(std::string_view tag, beast::Journal journal) +{ + // LCOV_EXCL_START + + MallocTrimReport report; + +#if !(defined(__GLIBC__) && BOOST_OS_LINUX) + JLOG(journal.debug()) << "malloc_trim not supported on this platform (tag=" << tag << ")"; +#else + // Keep glibc malloc_trim padding at 0 (default): 12h Mainnet tests across 0/256KB/1MB/16MB + // showed no clear, consistent benefit from custom padding—0 provided the best overall balance + // of RSS reduction and trim-latency stability without adding a tuning surface. + constexpr std::size_t TRIM_PAD = 0; + + report.supported = true; + + if (journal.debug()) + { + auto readFile = [](std::string const& path) -> std::string { + std::ifstream ifs(path, std::ios::in | std::ios::binary); + if (!ifs.is_open()) + return {}; + + // /proc files are often not seekable; read as a stream. + std::ostringstream oss; + oss << ifs.rdbuf(); + return oss.str(); + }; + + std::string const tagStr{tag}; + std::string const statmPath = "/proc/self/statm"; + + auto const statmBefore = readFile(statmPath); + long const rssBeforeKB = detail::parseStatmRSSkB(statmBefore); + + struct rusage ru0{}; + bool const have_ru0 = getRusageThread(ru0); + + auto const t0 = std::chrono::steady_clock::now(); + + report.trimResult = detail::mallocTrimWithPad(TRIM_PAD); + + auto const t1 = std::chrono::steady_clock::now(); + + struct rusage ru1{}; + bool const have_ru1 = getRusageThread(ru1); + + auto const statmAfter = readFile(statmPath); + long const rssAfterKB = detail::parseStatmRSSkB(statmAfter); + + // Populate report fields + report.rssBeforeKB = rssBeforeKB; + report.rssAfterKB = rssAfterKB; + report.durationUs = std::chrono::duration_cast(t1 - t0); + + if (have_ru0 && have_ru1) + { + report.minfltDelta = ru1.ru_minflt - ru0.ru_minflt; + report.majfltDelta = ru1.ru_majflt - ru0.ru_majflt; + } + + std::int64_t const deltaKB = (rssBeforeKB < 0 || rssAfterKB < 0) + ? 0 + : (static_cast(rssAfterKB) - static_cast(rssBeforeKB)); + + JLOG(journal.debug()) << "malloc_trim tag=" << tagStr << " result=" << report.trimResult + << " pad=" << TRIM_PAD << " bytes" + << " rss_before=" << rssBeforeKB << "kB" + << " rss_after=" << rssAfterKB << "kB" + << " delta=" << deltaKB << "kB" + << " duration_us=" << report.durationUs.count() + << " minflt_delta=" << report.minfltDelta + << " majflt_delta=" << report.majfltDelta; + } + else + { + report.trimResult = detail::mallocTrimWithPad(TRIM_PAD); + } + +#endif + + return report; + + // LCOV_EXCL_STOP +} + +} // namespace xrpl diff --git a/src/tests/libxrpl/basics/MallocTrim.cpp b/src/tests/libxrpl/basics/MallocTrim.cpp new file mode 100644 index 0000000000..f01bd91bbf --- /dev/null +++ b/src/tests/libxrpl/basics/MallocTrim.cpp @@ -0,0 +1,209 @@ +#include + +#include + +#include + +using namespace xrpl; + +// cSpell:ignore statm + +#if defined(__GLIBC__) && BOOST_OS_LINUX +namespace xrpl::detail { +long +parseStatmRSSkB(std::string const& statm); +} // namespace xrpl::detail +#endif + +TEST(MallocTrimReport, structure) +{ + // Test default construction + MallocTrimReport report; + EXPECT_EQ(report.supported, false); + EXPECT_EQ(report.trimResult, -1); + EXPECT_EQ(report.rssBeforeKB, -1); + EXPECT_EQ(report.rssAfterKB, -1); + EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1}); + EXPECT_EQ(report.minfltDelta, -1); + EXPECT_EQ(report.majfltDelta, -1); + EXPECT_EQ(report.deltaKB(), 0); + + // Test deltaKB calculation - memory freed + report.rssBeforeKB = 1000; + report.rssAfterKB = 800; + EXPECT_EQ(report.deltaKB(), -200); + + // Test deltaKB calculation - memory increased + report.rssBeforeKB = 500; + report.rssAfterKB = 600; + EXPECT_EQ(report.deltaKB(), 100); + + // Test deltaKB calculation - no change + report.rssBeforeKB = 1234; + report.rssAfterKB = 1234; + EXPECT_EQ(report.deltaKB(), 0); +} + +#if defined(__GLIBC__) && BOOST_OS_LINUX +TEST(parseStatmRSSkB, standard_format) +{ + using xrpl::detail::parseStatmRSSkB; + + // Test standard format: size resident shared text lib data dt + // Assuming 4KB page size: resident=1000 pages = 4000 KB + { + std::string statm = "25365 1000 2377 0 0 5623 0"; + long result = parseStatmRSSkB(statm); + // Note: actual result depends on system page size + // On most systems it's 4KB, so 1000 pages = 4000 KB + EXPECT_GT(result, 0); + } + + // Test with newline + { + std::string statm = "12345 2000 1234 0 0 3456 0\n"; + long result = parseStatmRSSkB(statm); + EXPECT_GT(result, 0); + } + + // Test with tabs + { + std::string statm = "12345\t2000\t1234\t0\t0\t3456\t0"; + long result = parseStatmRSSkB(statm); + EXPECT_GT(result, 0); + } + + // Test zero resident pages + { + std::string statm = "25365 0 2377 0 0 5623 0"; + long result = parseStatmRSSkB(statm); + EXPECT_EQ(result, 0); + } + + // Test with extra whitespace + { + std::string statm = " 25365 1000 2377 "; + long result = parseStatmRSSkB(statm); + EXPECT_GT(result, 0); + } + + // Test empty string + { + std::string statm = ""; + long result = parseStatmRSSkB(statm); + EXPECT_EQ(result, -1); + } + + // Test malformed data (only one field) + { + std::string statm = "25365"; + long result = parseStatmRSSkB(statm); + EXPECT_EQ(result, -1); + } + + // Test malformed data (non-numeric) + { + std::string statm = "abc def ghi"; + long result = parseStatmRSSkB(statm); + EXPECT_EQ(result, -1); + } + + // Test malformed data (second field non-numeric) + { + std::string statm = "25365 abc 2377"; + long result = parseStatmRSSkB(statm); + EXPECT_EQ(result, -1); + } +} +#endif + +TEST(mallocTrim, without_debug_logging) +{ + beast::Journal journal{beast::Journal::getNullSink()}; + + MallocTrimReport report = mallocTrim("without_debug", journal); + +#if defined(__GLIBC__) && BOOST_OS_LINUX + EXPECT_EQ(report.supported, true); + EXPECT_GE(report.trimResult, 0); + EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1}); + EXPECT_EQ(report.minfltDelta, -1); + EXPECT_EQ(report.majfltDelta, -1); +#else + EXPECT_EQ(report.supported, false); + EXPECT_EQ(report.trimResult, -1); + EXPECT_EQ(report.rssBeforeKB, -1); + EXPECT_EQ(report.rssAfterKB, -1); + EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1}); + EXPECT_EQ(report.minfltDelta, -1); + EXPECT_EQ(report.majfltDelta, -1); +#endif +} + +TEST(mallocTrim, empty_tag) +{ + beast::Journal journal{beast::Journal::getNullSink()}; + MallocTrimReport report = mallocTrim("", journal); + +#if defined(__GLIBC__) && BOOST_OS_LINUX + EXPECT_EQ(report.supported, true); + EXPECT_GE(report.trimResult, 0); +#else + EXPECT_EQ(report.supported, false); +#endif +} + +TEST(mallocTrim, with_debug_logging) +{ + struct DebugSink : public beast::Journal::Sink + { + DebugSink() : Sink(beast::severities::kDebug, false) + { + } + void + write(beast::severities::Severity, std::string const&) override + { + } + void + writeAlways(beast::severities::Severity, std::string const&) override + { + } + }; + + DebugSink sink; + beast::Journal journal{sink}; + + MallocTrimReport report = mallocTrim("debug_test", journal); + +#if defined(__GLIBC__) && BOOST_OS_LINUX + EXPECT_EQ(report.supported, true); + EXPECT_GE(report.trimResult, 0); + EXPECT_GE(report.durationUs.count(), 0); + EXPECT_GE(report.minfltDelta, 0); + EXPECT_GE(report.majfltDelta, 0); +#else + EXPECT_EQ(report.supported, false); + EXPECT_EQ(report.trimResult, -1); + EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1}); + EXPECT_EQ(report.minfltDelta, -1); + EXPECT_EQ(report.majfltDelta, -1); +#endif +} + +TEST(mallocTrim, repeated_calls) +{ + beast::Journal journal{beast::Journal::getNullSink()}; + + // Call malloc_trim multiple times to ensure it's safe + for (int i = 0; i < 5; ++i) + { + MallocTrimReport report = mallocTrim("iteration_" + std::to_string(i), journal); + +#if defined(__GLIBC__) && BOOST_OS_LINUX + EXPECT_EQ(report.supported, true); + EXPECT_GE(report.trimResult, 0); +#else + EXPECT_EQ(report.supported, false); +#endif + } +} diff --git a/src/xrpld/app/main/Application.cpp b/src/xrpld/app/main/Application.cpp index 91cc387d54..1162bc497a 100644 --- a/src/xrpld/app/main/Application.cpp +++ b/src/xrpld/app/main/Application.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -1053,6 +1054,8 @@ public: << "; size after: " << cachedSLEs_.size(); } + mallocTrim("doSweep", m_journal); + // Set timer to do another sweep later. setSweepTimer(); }