//------------------------------------------------------------------------------ /* This file is part of rippled: https://github.com/ripple/rippled Copyright (c) 2012, 2013 Ripple Labs Inc. Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ //============================================================================== #ifndef RIPPLE_NODESTORE_DATABASEIMP_H_INCLUDED #define RIPPLE_NODESTORE_DATABASEIMP_H_INCLUDED #include #include #include #include #include #include #include #include #include #include #include #include #include namespace ripple { namespace NodeStore { class DatabaseImp : public Database { public: beast::Journal m_journal; Scheduler& m_scheduler; // Persistent key/value storage. std::unique_ptr m_backend; // Larger key/value storage, but not necessarily persistent. std::unique_ptr m_fastBackend; // Positive cache TaggedCache m_cache; // Negative cache KeyCache m_negCache; std::mutex m_readLock; std::condition_variable m_readCondVar; std::condition_variable m_readGenCondVar; std::set m_readSet; // set of reads to do uint256 m_readLast; // last hash read std::vector m_readThreads; bool m_readShut; uint64_t m_readGen; // current read generation DatabaseImp (std::string const& name, Scheduler& scheduler, int readThreads, std::unique_ptr backend, std::unique_ptr fastBackend, beast::Journal journal) : m_journal (journal) , m_scheduler (scheduler) , m_backend (std::move (backend)) , m_fastBackend (std::move (fastBackend)) , m_cache ("NodeStore", cacheTargetSize, cacheTargetSeconds, get_seconds_clock (), deprecatedLogs().journal("TaggedCache")) , m_negCache ("NodeStore", get_seconds_clock (), cacheTargetSize, cacheTargetSeconds) , m_readShut (false) , m_readGen (0) , m_storeCount (0) , m_fetchTotalCount (0) , m_fetchHitCount (0) , m_storeSize (0) , m_fetchSize (0) { for (int i = 0; i < readThreads; ++i) m_readThreads.push_back (std::thread (&DatabaseImp::threadEntry, this)); } ~DatabaseImp () { { std::unique_lock lock (m_readLock); m_readShut = true; m_readCondVar.notify_all (); m_readGenCondVar.notify_all (); } for (auto& e : m_readThreads) e.join(); } std::string getName () const override { return m_backend->getName (); } void close() override { if (m_backend) { m_backend->close(); m_backend = nullptr; } if (m_fastBackend) { m_fastBackend->close(); m_fastBackend = nullptr; } } //------------------------------------------------------------------------------ bool asyncFetch (uint256 const& hash, NodeObject::pointer& object) { // See if the object is in cache object = m_cache.fetch (hash); if (object || m_negCache.touch_if_exists (hash)) return true; { // No. Post a read std::unique_lock lock (m_readLock); if (m_readSet.insert (hash).second) m_readCondVar.notify_one (); } return false; } void waitReads() override { { std::unique_lock lock (m_readLock); // Wake in two generations std::uint64_t const wakeGeneration = m_readGen + 2; while (!m_readShut && !m_readSet.empty () && (m_readGen < wakeGeneration)) m_readGenCondVar.wait (lock); } } int getDesiredAsyncReadCount () { // We prefer a client not fill our cache // We don't want to push data out of the cache // before it's retrieved return m_cache.getTargetSize() / asyncDivider; } NodeObject::Ptr fetch (uint256 const& hash) override { ScopedMetrics::incrementThreadFetches (); return doTimedFetch (hash, false); } /** Perform a fetch and report the time it took */ NodeObject::Ptr doTimedFetch (uint256 const& hash, bool isAsync) { FetchReport report; report.isAsync = isAsync; report.wentToDisk = false; auto const before = std::chrono::steady_clock::now(); NodeObject::Ptr ret = doFetch (hash, report); report.elapsed = std::chrono::duration_cast (std::chrono::steady_clock::now() - before); report.wasFound = (ret != nullptr); m_scheduler.onFetch (report); return ret; } NodeObject::Ptr doFetch (uint256 const& hash, FetchReport &report) { // See if the object already exists in the cache // NodeObject::Ptr obj = m_cache.fetch (hash); if (obj != nullptr) return obj; if (m_negCache.touch_if_exists (hash)) return obj; // Check the database(s). bool foundInFastBackend = false; report.wentToDisk = true; // Check the fast backend database if we have one // if (m_fastBackend != nullptr) { obj = fetchInternal (*m_fastBackend, hash); // If we found the object, avoid storing it again later. if (obj != nullptr) foundInFastBackend = true; } // Are we still without an object? // if (obj == nullptr) { // Yes so at last we will try the main database. // obj = fetchFrom (hash); ++m_fetchTotalCount; } if (obj == nullptr) { // Just in case a write occurred obj = m_cache.fetch (hash); if (obj == nullptr) { // We give up m_negCache.insert (hash); } } else { // Ensure all threads get the same object // m_cache.canonicalize (hash, obj); if (! foundInFastBackend) { // If we have a fast back end, store it there for later. // if (m_fastBackend != nullptr) { m_fastBackend->store (obj); ++m_storeCount; if (obj) m_storeSize += obj->getData().size(); } // Since this was a 'hard' fetch, we will log it. // if (m_journal.trace) m_journal.trace << "HOS: " << hash << " fetch: in db"; } } return obj; } virtual NodeObject::Ptr fetchFrom (uint256 const& hash) { return fetchInternal (*m_backend, hash); } NodeObject::Ptr fetchInternal (Backend& backend, uint256 const& hash) { NodeObject::Ptr object; Status const status = backend.fetch (hash.begin (), &object); switch (status) { case ok: ++m_fetchHitCount; if (object) m_fetchSize += object->getData().size(); case notFound: break; case dataCorrupt: // VFALCO TODO Deal with encountering corrupt data! // if (m_journal.fatal) m_journal.fatal << "Corrupt NodeObject #" << hash; break; default: if (m_journal.warning) m_journal.warning << "Unknown status=" << status; break; } return object; } //------------------------------------------------------------------------------ void store (NodeObjectType type, Blob&& data, uint256 const& hash) override { storeInternal (type, std::move(data), hash, *m_backend.get()); } void storeInternal (NodeObjectType type, Blob&& data, uint256 const& hash, Backend& backend) { NodeObject::Ptr object = NodeObject::createObject( type, std::move(data), hash); #if RIPPLE_VERIFY_NODEOBJECT_KEYS assert (hash == getSHA512Half (data)); #endif m_cache.canonicalize (hash, object, true); backend.store (object); ++m_storeCount; if (object) m_storeSize += object->getData().size(); m_negCache.erase (hash); if (m_fastBackend) { m_fastBackend->store (object); ++m_storeCount; if (object) m_storeSize += object->getData().size(); } } //------------------------------------------------------------------------------ float getCacheHitRate () { return m_cache.getHitRate (); } void tune (int size, int age) { m_cache.setTargetSize (size); m_cache.setTargetAge (age); m_negCache.setTargetSize (size); m_negCache.setTargetAge (age); } void sweep () { m_cache.sweep (); m_negCache.sweep (); } std::int32_t getWriteLoad() const override { return m_backend->getWriteLoad(); } //------------------------------------------------------------------------------ // Entry point for async read threads void threadEntry () { beast::Thread::setCurrentThreadName ("prefetch"); while (1) { uint256 hash; { std::unique_lock lock (m_readLock); while (!m_readShut && m_readSet.empty ()) { // all work is done m_readGenCondVar.notify_all (); m_readCondVar.wait (lock); } if (m_readShut) break; // Read in key order to make the back end more efficient std::set ::iterator it = m_readSet.lower_bound (m_readLast); if (it == m_readSet.end ()) { it = m_readSet.begin (); // A generation has completed ++m_readGen; m_readGenCondVar.notify_all (); } hash = *it; m_readSet.erase (it); m_readLast = hash; } // Perform the read doTimedFetch (hash, true); } } //------------------------------------------------------------------------------ void for_each (std::function f) override { m_backend->for_each (f); } void import (Database& source) { importInternal (source, *m_backend.get()); } void importInternal (Database& source, Backend& dest) { Batch b; b.reserve (batchWritePreallocationSize); source.for_each ([&](NodeObject::Ptr object) { if (b.size() >= batchWritePreallocationSize) { dest.storeBatch (b); b.clear(); b.reserve (batchWritePreallocationSize); } b.push_back (object); ++m_storeCount; if (object) m_storeSize += object->getData().size(); }); if (! b.empty()) dest.storeBatch (b); } std::uint32_t getStoreCount () const override { return m_storeCount; } std::uint32_t getFetchTotalCount () const override { return m_fetchTotalCount; } std::uint32_t getFetchHitCount () const override { return m_fetchHitCount; } std::uint32_t getStoreSize () const override { return m_storeSize; } std::uint32_t getFetchSize () const override { return m_fetchSize; } private: std::atomic m_storeCount; std::atomic m_fetchTotalCount; std::atomic m_fetchHitCount; std::atomic m_storeSize; std::atomic m_fetchSize; }; } } #endif