From fb338a98607d840177a50c35abd4d7702c39b46f Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 15 Jul 2013 16:40:10 -0700 Subject: [PATCH 01/50] Assert on type size invariant --- Subtrees/beast/TODO.txt | 2 ++ .../beast/modules/beast_crypto/math/beast_UnsignedInteger.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Subtrees/beast/TODO.txt b/Subtrees/beast/TODO.txt index a5faea1456..1402c9af32 100644 --- a/Subtrees/beast/TODO.txt +++ b/Subtrees/beast/TODO.txt @@ -2,6 +2,8 @@ BEAST TODO -------------------------------------------------------------------------------- +- Rename HeapBlock routines to not conflict with _CRTDBG_MAP_ALLOC macros + - Design a WeakPtr / SharedPtr / SharedObject intrusive system - Implement beast::Bimap? diff --git a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h index 9d4f950ea3..a7460ddb0c 100644 --- a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h +++ b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h @@ -76,10 +76,10 @@ public: template UnsignedInteger & operator= (IntegerType value) { - static_bassert (sizeof (Bytes) >= sizeof (IntegerType)); + static_bassert (Bytes >= sizeof (IntegerType)); clear (); value = ByteOrder::swapIfLittleEndian (value); - memcpy (end () - sizeof (value), &value, sizeof (value)); + memcpy (end () - sizeof (value), &value, bmin (Bytes, sizeof (value))); return *this; } From ff7367629413d98b9a77dd43e21e33251e103c0a Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 15 Jul 2013 16:44:02 -0700 Subject: [PATCH 02/50] Add KeyvaDB, Backend, and unit test --- .gitignore | 4 + Builds/VisualStudio2012/RippleD.vcxproj | 16 +- .../VisualStudio2012/RippleD.vcxproj.filters | 12 + TODO.txt | 13 +- modules/ripple_app/node/ripple_KeyvaDB.cpp | 616 ++++++++++++++++++ modules/ripple_app/node/ripple_KeyvaDB.h | 35 + .../node/ripple_KeyvaDBBackendFactory.cpp | 149 +++++ .../node/ripple_KeyvaDBBackendFactory.h | 27 + modules/ripple_app/ripple_app.cpp | 4 + src/cpp/ripple/ripple_Main.cpp | 3 +- 10 files changed, 875 insertions(+), 4 deletions(-) create mode 100644 modules/ripple_app/node/ripple_KeyvaDB.cpp create mode 100644 modules/ripple_app/node/ripple_KeyvaDB.h create mode 100644 modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp create mode 100644 modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h diff --git a/.gitignore b/.gitignore index 6157c1787e..37b1e0a575 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,7 @@ DerivedData # Intel Parallel Studio 2013 XE My Amplifier XE Results - RippleD + +# KeyvaDB files +*.key +*.val diff --git a/Builds/VisualStudio2012/RippleD.vcxproj b/Builds/VisualStudio2012/RippleD.vcxproj index e7e4b5088f..49b7b01210 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj +++ b/Builds/VisualStudio2012/RippleD.vcxproj @@ -157,6 +157,18 @@ true true + + true + true + true + true + + + true + true + true + true + true true @@ -1402,6 +1414,8 @@ + + @@ -1732,7 +1746,7 @@ Disabled - _CRTDBG_MAP_ALLOC;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) ProgramDatabase false MultiThreadedDebug diff --git a/Builds/VisualStudio2012/RippleD.vcxproj.filters b/Builds/VisualStudio2012/RippleD.vcxproj.filters index 7d7ff164cf..0d116ab957 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj.filters +++ b/Builds/VisualStudio2012/RippleD.vcxproj.filters @@ -897,6 +897,12 @@ [1] Ripple\ripple_app\node + + [1] Ripple\ripple_app\node + + + [1] Ripple\ripple_app\node + @@ -1674,6 +1680,12 @@ [1] Ripple\ripple_app\node + + [1] Ripple\ripple_app\node + + + [1] Ripple\ripple_app\node + diff --git a/TODO.txt b/TODO.txt index 908475b5f3..70c3119383 100644 --- a/TODO.txt +++ b/TODO.txt @@ -2,6 +2,17 @@ RIPPLE TODO -------------------------------------------------------------------------------- +Vinnie's Short List (Changes day to day) +- Convert some Ripple boost unit tests to Beast. +- Eliminate new technical in NodeStore::Backend +- Improve NodeObject to construct with just a size. +- Work on KeyvaDB +- Finish unit tests and code for Validators + +-------------------------------------------------------------------------------- + +- Rewrite boost program_options in Beast + - Examples for different backend key/value config settings - Unit Test attention @@ -10,8 +21,6 @@ RIPPLE TODO - Validations unit test --------------------------------------------------------------------------------- - - Replace endian conversion calls with beast calls: htobe32, be32toh, ntohl, etc... Start by removing the system headers which provide these routines, if possible diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp new file mode 100644 index 0000000000..276bb93b8c --- /dev/null +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -0,0 +1,616 @@ +//------------------------------------------------------------------------------ +/* + Copyright (c) 2011-2013, OpenCoin, Inc. +*/ +//============================================================================== + +class KeyvaDBImp : public KeyvaDB +{ +private: + // These are stored in big endian format in the file. + + // A file offset. + typedef int64 FileOffset; + + // Index of a key. + typedef int32 KeyIndex; + + // Size of a value. + typedef int32 ByteSize; + +private: + enum + { + // The size of the fixed area at the beginning of the key file. + // This is used to store some housekeeping information like the + // key size and version number. + // + keyFileHeaderBytes = 1024 + }; + + // Accessed by multiple threads + struct State + { + ScopedPointer keyIn; + ScopedPointer keyOut; + KeyIndex newKeyIndex; + + ScopedPointer valIn; + ScopedPointer valOut; + FileOffset valFileSize; + + bool hasKeys () const noexcept + { + return newKeyIndex > 1; + } + }; + + typedef SharedData SharedState; + + // Key records are indexed starting at one. + struct KeyRecord + { + explicit KeyRecord (void* const keyStorage) + : key (keyStorage) + { + } + + // Absolute byte FileOffset in the value file. + FileOffset valFileOffset; + + // Size of the corresponding value, in bytes. + ByteSize valSize; + + // Key record index of left node, or 0. + KeyIndex leftIndex; + + // Key record index of right node, or 0. + KeyIndex rightIndex; + + // Points to keyBytes storage of the key. + void* const key; + }; + +public: + KeyvaDBImp (int keyBytes, + File keyPath, + File valPath, + bool filesAreTemporary) + : m_keyBytes (keyBytes) + , m_keyRecordBytes (getKeyRecordBytes ()) + , m_filesAreTemporary (filesAreTemporary) + , m_keyStorage (keyBytes) + { + SharedState::WriteAccess state (m_state); + + // Output must be opened first, in case it has + // to created, or else opening for input will fail. + state->keyOut = openForWrite (keyPath); + state->keyIn = openForRead (keyPath); + + int64 const fileSize = state->keyIn->getFile ().getSize (); + + if (fileSize == 0) + { + // initialize the key file + state->keyOut->setPosition (keyFileHeaderBytes - 1); + state->keyOut->writeByte (0); + state->keyOut->flush (); + } + + state->newKeyIndex = 1 + (state->keyIn->getFile ().getSize () - keyFileHeaderBytes) / m_keyRecordBytes; + + state->valOut = openForWrite (valPath); + state->valIn = openForRead (valPath); + state->valFileSize = state->valIn->getFile ().getSize (); + } + + ~KeyvaDBImp () + { + SharedState::WriteAccess state (m_state); + + flushInternal (state); + + state->keyOut = nullptr; + state->valOut = nullptr; + + // Delete the database files if requested. + // + if (m_filesAreTemporary) + { + { + File const path = state->keyIn->getFile (); + state->keyIn = nullptr; + path.deleteFile (); + } + + { + File const path = state->valIn->getFile (); + state->valIn = nullptr; + path.deleteFile (); + } + } + } + + //-------------------------------------------------------------------------- + + // Returns the number of physical bytes in a key record. + // This is specific to the format of the data. + // + int getKeyRecordBytes () const noexcept + { + int bytes = 0; + + bytes += sizeof (FileOffset); // valFileOffset + bytes += sizeof (ByteSize); // valSize + bytes += sizeof (KeyIndex); // leftIndex + bytes += sizeof (KeyIndex); // rightIndex + + bytes += m_keyBytes; + + return bytes; + } + + FileOffset calcKeyRecordOffset (KeyIndex keyIndex) + { + bassert (keyIndex > 0); + + FileOffset const byteOffset = keyFileHeaderBytes + (keyIndex - 1) * m_keyRecordBytes; + + return byteOffset; + } + + // Read a key record into memory. + void readKeyRecord (KeyRecord* const keyRecord, + KeyIndex const keyIndex, + SharedState::WriteAccess& state) + { + FileOffset const byteOffset = calcKeyRecordOffset (keyIndex); + + bool const success = state->keyIn->setPosition (byteOffset); + + if (success) + { + // This defines the file format! + keyRecord->valFileOffset = state->keyIn->readInt64BigEndian (); + keyRecord->valSize = state->keyIn->readIntBigEndian (); + keyRecord->leftIndex = state->keyIn->readIntBigEndian (); + keyRecord->rightIndex = state->keyIn->readIntBigEndian (); + + // Grab the key + state->keyIn->read (keyRecord->key, m_keyBytes); + } + else + { + String s; + s << "KeyvaDB: Seek failed in " << state->valOut->getFile ().getFileName (); + Throw (std::runtime_error (s.toStdString ())); + } + } + + // Write a key record from memory + void writeKeyRecord (KeyRecord const& keyRecord, + KeyIndex const keyIndex, + SharedState::WriteAccess& state, + bool includingKey) + { + FileOffset const byteOffset = calcKeyRecordOffset (keyIndex); + + bool const success = state->keyOut->setPosition (byteOffset); + + if (success) + { + // This defines the file format! + // VFALCO TODO Make OutputStream return the bool errors here + // + state->keyOut->writeInt64BigEndian (keyRecord.valFileOffset); + state->keyOut->writeIntBigEndian (keyRecord.valSize); + state->keyOut->writeIntBigEndian (keyRecord.leftIndex); + state->keyOut->writeIntBigEndian (keyRecord.rightIndex); + + // Write the key + if (includingKey) + { + bool const success = state->keyOut->write (keyRecord.key, m_keyBytes); + + if (! success) + { + String s; + s << "KeyvaDB: Write failed in " << state->valOut->getFile ().getFileName (); + Throw (std::runtime_error (s.toStdString ())); + } + } + + state->keyOut->flush (); + } + else + { + String s; + s << "KeyvaDB: Seek failed in " << state->valOut->getFile ().getFileName (); + Throw (std::runtime_error (s.toStdString ())); + } + } + + // Append a value to the value file. + void writeValue (void const* const value, ByteSize valueBytes, SharedState::WriteAccess& state) + { + bool const success = state->valOut->setPosition (state->valFileSize); + + if (success) + { + bool const success = state->valOut->write (value, static_cast (valueBytes)); + + if (! success) + { + String s; + s << "KeyvaDB: Write failed in " << state->valOut->getFile ().getFileName (); + Throw (std::runtime_error (s.toStdString ())); + } + + state->valFileSize += valueBytes; + + state->valOut->flush (); + } + else + { + String s; + s << "KeyvaDB: Seek failed in " << state->valOut->getFile ().getFileName (); + Throw (std::runtime_error (s.toStdString ())); + } + } + + //-------------------------------------------------------------------------- + + struct FindResult + { + FindResult (void* const keyStorage) + : keyRecord (keyStorage) + { + } + + int compare; // result of the last comparison + KeyIndex keyIndex; // index we looked at last + KeyRecord keyRecord; // KeyRecord we looked at last + }; + + // Find a key. If the key doesn't exist, enough information + // is left behind in the result to perform an insertion. + // + // Returns true if the key was found. + // + bool find (FindResult* findResult, void const* key, SharedState::WriteAccess& state) + { + // Not okay to call this with an empty key file! + bassert (state->hasKeys ()); + + // This performs a standard binary search + + findResult->keyIndex = 1; + + do + { + readKeyRecord (&findResult->keyRecord, findResult->keyIndex, state); + + findResult->compare = memcmp (key, findResult->keyRecord.key, m_keyBytes); + + if (findResult->compare < 0) + { + if (findResult->keyRecord.leftIndex != 0) + { + // Go left + findResult->keyIndex = findResult->keyRecord.leftIndex; + } + else + { + // Insert position is to the left + break; + } + } + else if (findResult->compare > 0) + { + if (findResult->keyRecord.rightIndex != 0) + { + // Go right + findResult->keyIndex = findResult->keyRecord.rightIndex; + } + else + { + // Insert position is to the right + break; + } + } + } + while (findResult->compare != 0); + + return findResult->compare == 0; + } + + //-------------------------------------------------------------------------- + + bool get (void const* key, GetCallback* callback) + { + FindResult findResult (m_keyStorage.getData ()); + + SharedState::WriteAccess state (m_state); + + bool found = false; + + if (state->hasKeys ()) + { + found = find (&findResult, key, state); + + if (found) + { + void* const destStorage = callback->createStorageForValue (findResult.keyRecord.valSize); + + bool const success = state->valIn->setPosition (findResult.keyRecord.valFileOffset); + + if (! success) + { + String s; + s << "KeyvaDB: Seek failed in " << state->valOut->getFile ().getFileName (); + Throw (std::runtime_error (s.toStdString ())); + } + + int const bytesRead = state->valIn->read (destStorage, findResult.keyRecord.valSize); + + if (bytesRead != findResult.keyRecord.valSize) + { + String s; + s << "KeyvaDB: Couldn't read a value from " << state->valIn->getFile ().getFileName (); + Throw (std::runtime_error (s.toStdString ())); + } + } + } + + return found; + } + + //-------------------------------------------------------------------------- + + void put (void const* key, void const* value, int valueBytes) + { + bassert (valueBytes > 0); + + SharedState::WriteAccess state (m_state); + + if (state->hasKeys ()) + { + // Search for the key + + FindResult findResult (m_keyStorage.getData ()); + + bool const found = find (&findResult, key, state); + + if (! found ) + { + bassert (findResult.compare != 0); + + // Binary tree insertion. + // Link the last key record to the new key + { + if (findResult.compare == -1) + { + findResult.keyRecord.leftIndex = state->newKeyIndex; + } + else + { + findResult.keyRecord.rightIndex = state->newKeyIndex; + } + + writeKeyRecord (findResult.keyRecord, findResult.keyIndex, state, false); + } + + // Write the new key + { + findResult.keyRecord.valFileOffset = state->valFileSize; + findResult.keyRecord.valSize = valueBytes; + findResult.keyRecord.leftIndex = 0; + findResult.keyRecord.rightIndex = 0; + + memcpy (findResult.keyRecord.key, key, m_keyBytes); + + writeKeyRecord (findResult.keyRecord, state->newKeyIndex, state, true); + } + + // Key file has grown by one. + ++state->newKeyIndex; + + // Write the value + writeValue (value, valueBytes, state); + } + else + { + String s; + s << "KeyvaDB: Attempt to write a duplicate key!"; + Throw (std::runtime_error (s.toStdString ())); + } + } + else + { + // + // Write first key + // + + KeyRecord keyRecord (m_keyStorage.getData ()); + + keyRecord.valFileOffset = state->valFileSize; + keyRecord.valSize = valueBytes; + keyRecord.leftIndex = 0; + keyRecord.rightIndex = 0; + + memcpy (keyRecord.key, key, m_keyBytes); + + writeKeyRecord (keyRecord, state->newKeyIndex, state, true); + + // Key file has grown by one. + ++state->newKeyIndex; + + // + // Write value + // + + bassert (state->valFileSize == 0); + + writeValue (value, valueBytes, state); + } + } + + //-------------------------------------------------------------------------- + + void flush () + { + SharedState::WriteAccess state (m_state); + + flushInternal (state); + } + + void flushInternal (SharedState::WriteAccess& state) + { + state->keyOut->flush (); + state->valOut->flush (); + } + + //-------------------------------------------------------------------------- + +private: + // Open a file for reading. + static FileInputStream* openForRead (File path) + { + FileInputStream* stream = path.createInputStream (); + + if (stream == nullptr) + { + String s; + s << "KeyvaDB: Couldn't open " << path.getFileName () << " for reading."; + Throw (std::runtime_error (s.toStdString ())); + } + + return stream; + } + + // Open a file for writing. + static FileOutputStream* openForWrite (File path) + { + FileOutputStream* stream = path.createOutputStream (); + + if (stream == nullptr) + { + String s; + s << "KeyvaDB: Couldn't open " << path.getFileName () << " for writing."; + Throw (std::runtime_error (s.toStdString ())); + } + + return stream; + } + +private: + int const m_keyBytes; + int const m_keyRecordBytes; + bool const m_filesAreTemporary; + SharedState m_state; + HeapBlock m_keyStorage; +}; + +KeyvaDB* KeyvaDB::New (int keyBytes, File keyPath, File valPath, bool filesAreTemporary) +{ + return new KeyvaDBImp (keyBytes, keyPath, valPath, filesAreTemporary); +} + +//------------------------------------------------------------------------------ + +class KeyvaDBTests : public UnitTest +{ +public: + KeyvaDBTests () : UnitTest ("KevyaDB") + { + } + + template + void repeatableShuffle (int const numberOfItems, HeapBlock & items) + { + Random r (69); + + for (int i = numberOfItems - 1; i > 0; --i) + { + int const choice = r.nextInt (i + 1); + + std::swap (items [i], items [choice]); + } + } + + template + void testSize (unsigned int const maxItems) + { + typedef UnsignedInteger KeyType; + + String s; + s << "keyBytes=" << String (KeyBytes); + beginTest (s); + + // Set up the key and value files and open the db. + File const keyPath = File::createTempFile ("").withFileExtension (".key"); + File const valPath = File::createTempFile ("").withFileExtension (".val"); + ScopedPointer db (KeyvaDB::New (KeyBytes, keyPath, valPath, true)); + + { + // Create an array of ascending integers. + HeapBlock items (maxItems); + for (unsigned int i = 0; i < maxItems; ++i) + items [i] = i; + + // Now shuffle it deterministically. + repeatableShuffle (maxItems, items); + + // Write all the keys of integers. + for (unsigned int i = 0; i < maxItems; ++i) + { + unsigned int const num = items [i]; + KeyType const v = KeyType::createFromInteger (num); + + // The value is the same as the key, for ease of comparison. + db->put (v.cbegin (), v.cbegin (), KeyBytes); + } + } + + { + // This callback creates storage for the value. + struct MyGetCallback : KeyvaDB::GetCallback + { + KeyType v; + + void* createStorageForValue (int valueBytes) + { + bassert (valueBytes == KeyBytes); + + return v.begin (); + } + }; + + // Go through all of our keys and try to retrieve them. + // since this is done in ascending order, we should get + // random seeks at this point. + // + for (unsigned int i = 0; i < maxItems; ++i) + { + KeyType const v = KeyType::createFromInteger (i); + + MyGetCallback cb; + + bool const found = db->get (v.cbegin (), &cb); + + expect (found, "Should be found"); + + expect (v == cb.v, "Should be equal"); + } + } + } + + void runTest () + { + testSize <4> (512); + testSize <32> (4096); + } +}; + +static KeyvaDBTests keyvaDBTests; diff --git a/modules/ripple_app/node/ripple_KeyvaDB.h b/modules/ripple_app/node/ripple_KeyvaDB.h new file mode 100644 index 0000000000..9ff1b2ec22 --- /dev/null +++ b/modules/ripple_app/node/ripple_KeyvaDB.h @@ -0,0 +1,35 @@ +//------------------------------------------------------------------------------ +/* + Copyright (c) 2011-2013, OpenCoin, Inc. +*/ +//============================================================================== + +#ifndef RIPPLE_KEYVADB_H_INCLUDED +#define RIPPLE_KEYVADB_H_INCLUDED + +/** Key/value database optimized for Ripple usage. +*/ +class KeyvaDB : LeakChecked +{ +public: + class GetCallback + { + public: + virtual void* createStorageForValue (int valueBytes) = 0; + }; + + static KeyvaDB* New (int keyBytes, + File keyPath, + File valPath, + bool filesAreTemporary); + + virtual ~KeyvaDB () { } + + virtual bool get (void const* key, GetCallback* callback) = 0; + + virtual void put (void const* key, void const* value, int valueBytes) = 0; + + virtual void flush () = 0; +}; + +#endif diff --git a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp new file mode 100644 index 0000000000..865bafaada --- /dev/null +++ b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp @@ -0,0 +1,149 @@ +//------------------------------------------------------------------------------ +/* + Copyright (c) 2011-2013, OpenCoin, Inc. +*/ +//============================================================================== + +class KeyvaDBBackendFactory::Backend : public NodeStore::Backend +{ +public: + typedef UnsignedInteger <32> Key; + + enum + { + keyBytes = Key::sizeInBytes + }; + + explicit Backend (StringPairArray const& keyValues) + : m_path (keyValues ["path"]) + , m_db (KeyvaDB::New ( + keyBytes, + File::getCurrentWorkingDirectory().getChildFile (m_path).withFileExtension ("key"), + File::getCurrentWorkingDirectory().getChildFile (m_path).withFileExtension ("val"), + false)) + { + } + + ~Backend () + { + } + + std::string getDataBaseName () + { + return m_path.toStdString (); + } + + void writeObject (NodeObject const& object) + { + m_db->put ( + object.getHash ().begin (), + &object.getData () [0], + object.getData ().size ()); + } + + bool bulkStore (std::vector const& objs) + { + for (size_t i = 0; i < objs.size (); ++i) + { + writeObject (*objs [i]); + } + + return true; + } + + struct MyGetCallback : KeyvaDB::GetCallback + { + int valueBytes; + HeapBlock data; + + void* createStorageForValue (int valueBytes_) + { + valueBytes = valueBytes_; + + data.malloc (valueBytes); + + return data.getData (); + } + }; + + NodeObject::pointer retrieve (uint256 const& hash) + { + NodeObject::pointer result; + + MyGetCallback cb; + + bool const found = m_db->get (hash.begin (), &cb); + + if (found) + { + result = fromBinary (hash, cb.data.getData (), cb.valueBytes); + } + + return result; + } + + void visitAll (FUNCTION_TYPE func) + { + bassertfalse; + } + + Blob toBlob (NodeObject::ref obj) + { + Blob rawData (9 + obj->getData ().size ()); + unsigned char* bufPtr = &rawData.front(); + + *reinterpret_cast (bufPtr + 0) = ntohl (obj->getIndex ()); + *reinterpret_cast (bufPtr + 4) = ntohl (obj->getIndex ()); + * (bufPtr + 8) = static_cast (obj->getType ()); + memcpy (bufPtr + 9, &obj->getData ().front (), obj->getData ().size ()); + + return rawData; + } + + NodeObject::pointer fromBinary (uint256 const& hash, char const* data, int size) + { + if (size < 9) + throw std::runtime_error ("undersized object"); + + uint32 index = htonl (*reinterpret_cast (data)); + + int htype = data[8]; + + return boost::make_shared (static_cast (htype), index, + data + 9, size - 9, hash); + } + +private: + String m_path; + ScopedPointer m_db; +}; + +//------------------------------------------------------------------------------ + +KeyvaDBBackendFactory::KeyvaDBBackendFactory () +{ +} + +KeyvaDBBackendFactory::~KeyvaDBBackendFactory () +{ +} + +KeyvaDBBackendFactory& KeyvaDBBackendFactory::getInstance () +{ + static KeyvaDBBackendFactory instance; + + return instance; +} + +String KeyvaDBBackendFactory::getName () const +{ + return "KeyvaDB"; +} + +NodeStore::Backend* KeyvaDBBackendFactory::createInstance (StringPairArray const& keyValues) +{ + return new KeyvaDBBackendFactory::Backend (keyValues); +} + +//------------------------------------------------------------------------------ + diff --git a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h new file mode 100644 index 0000000000..2587315d86 --- /dev/null +++ b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h @@ -0,0 +1,27 @@ +//------------------------------------------------------------------------------ +/* + Copyright (c) 2011-2013, OpenCoin, Inc. +*/ +//============================================================================== + +#ifndef RIPPLE_KEYVABACKENDFACTORY_H_INCLUDED +#define RIPPLE_KEYVABACKENDFACTORY_H_INCLUDED + +/** Factory to produce KeyvaDB backends for the NodeStore. +*/ +class KeyvaDBBackendFactory : public NodeStore::BackendFactory +{ +private: + class Backend; + + KeyvaDBBackendFactory (); + ~KeyvaDBBackendFactory (); + +public: + static KeyvaDBBackendFactory& getInstance (); + + String getName () const; + NodeStore::Backend* createInstance (StringPairArray const& keyValues); +}; + +#endif diff --git a/modules/ripple_app/ripple_app.cpp b/modules/ripple_app/ripple_app.cpp index afd567e708..43f9e5b4a0 100644 --- a/modules/ripple_app/ripple_app.cpp +++ b/modules/ripple_app/ripple_app.cpp @@ -104,6 +104,7 @@ namespace ripple #include "node/ripple_NodeStore.h" #include "node/ripple_LevelDBBackendFactory.h" #include "node/ripple_HyperLevelDBBackendFactory.h" +#include "node/ripple_KeyvaDBBackendFactory.h" #include "node/ripple_MdbBackendFactory.h" #include "node/ripple_NullBackendFactory.h" #include "node/ripple_SqliteBackendFactory.h" @@ -249,6 +250,9 @@ static const uint64 tenTo17m1 = tenTo17 - 1; #include "node/ripple_MdbBackendFactory.cpp" #include "node/ripple_NullBackendFactory.cpp" #include "node/ripple_SqliteBackendFactory.cpp" +#include "node/ripple_KeyvaDB.h" // private +#include "node/ripple_KeyvaDB.cpp" +#include "node/ripple_KeyvaDBBackendFactory.cpp" #include "ledger/Ledger.cpp" #include "src/cpp/ripple/ripple_SHAMapDelta.cpp" diff --git a/src/cpp/ripple/ripple_Main.cpp b/src/cpp/ripple/ripple_Main.cpp index 606453a4fd..2384ca4126 100644 --- a/src/cpp/ripple/ripple_Main.cpp +++ b/src/cpp/ripple/ripple_Main.cpp @@ -155,7 +155,7 @@ static void runBeastUnitTests () { UnitTests::TestResult const& r (*tr.getResult (i)); - for (int j = 0; j < r.messages.size (); ++i) + for (int j = 0; j < r.messages.size (); ++j) Log::out () << r.messages [j].toStdString (); } } @@ -252,6 +252,7 @@ int rippleMain (int argc, char** argv) // These must be added before the Application object is created NodeStore::addBackendFactory (SqliteBackendFactory::getInstance ()); NodeStore::addBackendFactory (LevelDBBackendFactory::getInstance ()); + NodeStore::addBackendFactory (KeyvaDBBackendFactory::getInstance ()); #if RIPPLE_HYPERLEVELDB_AVAILABLE NodeStore::addBackendFactory (HyperLevelDBBackendFactory::getInstance ()); #endif From 3906c7e1611281b901286f373c651a7635dd1438 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 15 Jul 2013 19:42:01 -0700 Subject: [PATCH 03/50] Refactor some NodeStore, TaggedCache --- modules/ripple_app/node/ripple_NodeStore.cpp | 234 +++++++++++------- modules/ripple_app/node/ripple_NodeStore.h | 101 ++++++-- .../containers/ripple_TaggedCache.h | 87 ++++--- 3 files changed, 268 insertions(+), 154 deletions(-) diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index 960e0d805f..b575a9a8cc 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -4,15 +4,109 @@ */ //============================================================================== +// +// NodeStore::Backend +// + +NodeStore::Backend::Backend () + : mWriteGeneration(0) + , mWriteLoad(0) + , mWritePending(false) +{ + mWriteSet.reserve (bulkWriteBatchSize); +} + +bool NodeStore::Backend::store (NodeObject::ref object) +{ + boost::mutex::scoped_lock sl (mWriteMutex); + mWriteSet.push_back (object); + + if (!mWritePending) + { + mWritePending = true; + + // VFALCO TODO Eliminate this dependency on the Application object. + getApp().getJobQueue ().addJob ( + jtWRITE, + "NodeObject::store", + BIND_TYPE (&NodeStore::Backend::bulkWrite, this, P_1)); + } + return true; +} + +void NodeStore::Backend::bulkWrite (Job &) +{ + int setSize = 0; + + // VFALCO NOTE Use the canonical for(;;) instead. + // Or better, provide a proper terminating condition. + while (1) + { + std::vector< boost::shared_ptr > set; + set.reserve (bulkWriteBatchSize); + + { + boost::mutex::scoped_lock sl (mWriteMutex); + + mWriteSet.swap (set); + assert (mWriteSet.empty ()); + ++mWriteGeneration; + mWriteCondition.notify_all (); + + if (set.empty ()) + { + mWritePending = false; + mWriteLoad = 0; + + // VFALCO NOTE Fix this function to not return from the middle + return; + } + + mWriteLoad = std::max (setSize, static_cast (mWriteSet.size ())); + setSize = set.size (); + } + + bulkStore (set); + } +} + +// VFALCO TODO This function should not be needed. Instead, the +// destructor should handle flushing of the bulk write buffer. +// +void NodeStore::Backend::waitWrite () +{ + boost::mutex::scoped_lock sl (mWriteMutex); + int gen = mWriteGeneration; + + while (mWritePending && (mWriteGeneration == gen)) + mWriteCondition.wait (sl); +} + +int NodeStore::Backend::getWriteLoad () +{ + boost::mutex::scoped_lock sl (mWriteMutex); + + return std::max (mWriteLoad, static_cast (mWriteSet.size ())); +} + +//------------------------------------------------------------------------------ + +// +// NodeStore +// + Array NodeStore::s_factories; -NodeStore::NodeStore (String backendParameters, String fastBackendParameters, int cacheSize, int cacheAge) +NodeStore::NodeStore (String backendParameters, + String fastBackendParameters, + int cacheSize, + int cacheAge) : m_backend (createBackend (backendParameters)) - , mCache ("NodeStore", cacheSize, cacheAge) - , mNegativeCache ("HashedObjectNegativeCache", 0, 120) + , m_fastBackend (fastBackendParameters.isNotEmpty () ? createBackend (fastBackendParameters) + : nullptr) + , m_cache ("NodeStore", cacheSize, cacheAge) + , m_negativeCache ("NoteStoreNegativeCache", 0, 120) { - if (fastBackendParameters.isNotEmpty ()) - m_fastBackend = createBackend (fastBackendParameters); } void NodeStore::addBackendFactory (BackendFactory& factory) @@ -22,19 +116,19 @@ void NodeStore::addBackendFactory (BackendFactory& factory) float NodeStore::getCacheHitRate () { - return mCache.getHitRate (); + return m_cache.getHitRate (); } void NodeStore::tune (int size, int age) { - mCache.setTargetSize (size); - mCache.setTargetAge (age); + m_cache.setTargetSize (size); + m_cache.setTargetAge (age); } void NodeStore::sweep () { - mCache.sweep (); - mNegativeCache.sweep (); + m_cache.sweep (); + m_negativeCache.sweep (); } void NodeStore::waitWrite () @@ -52,32 +146,49 @@ int NodeStore::getWriteLoad () bool NodeStore::store (NodeObjectType type, uint32 index, Blob const& data, uint256 const& hash) { - // return: false = already in cache, true = added to cache - if (mCache.touch (hash)) - return false; + bool wasStored = false; + bool const keyFoundAndObjectCached = m_cache.refreshIfPresent (hash); + + // VFALCO NOTE What happens if the key is found, but the object + // fell out of the cache? We will end up passing it + // to the backend anyway. + // + if (! keyFoundAndObjectCached) + { + +// VFALCO TODO Rename this to RIPPLE_NODESTORE_VERIFY_HASHES and make +// it be 1 or 0 instead of merely defined or undefined. +// #ifdef PARANOID - assert (hash == Serializer::getSHA512Half (data)); + assert (hash == Serializer::getSHA512Half (data)); #endif - NodeObject::pointer object = boost::make_shared (type, index, data, hash); + NodeObject::pointer object = boost::make_shared (type, index, data, hash); - if (!mCache.canonicalize (hash, object)) - { - m_backend->store (object); - if (m_fastBackend) - m_fastBackend->store (object); + // VFALCO NOTE What does it mean to canonicalize an object? + // + if (!m_cache.canonicalize (hash, object)) + { + m_backend->store (object); + + if (m_fastBackend) + m_fastBackend->store (object); + } + + m_negativeCache.del (hash); + + wasStored = true; } - mNegativeCache.del (hash); - return true; + return wasStored; } NodeObject::pointer NodeStore::retrieve (uint256 const& hash) { - NodeObject::pointer obj = mCache.fetch (hash); + NodeObject::pointer obj = m_cache.fetch (hash); - if (obj || mNegativeCache.isPresent (hash)) + if (obj || m_negativeCache.isPresent (hash)) return obj; if (m_fastBackend) @@ -86,7 +197,7 @@ NodeObject::pointer NodeStore::retrieve (uint256 const& hash) if (obj) { - mCache.canonicalize (hash, obj); + m_cache.canonicalize (hash, obj); return obj; } } @@ -97,17 +208,18 @@ NodeObject::pointer NodeStore::retrieve (uint256 const& hash) if (!obj) { - mNegativeCache.add (hash); + m_negativeCache.add (hash); return obj; } } - mCache.canonicalize (hash, obj); + m_cache.canonicalize (hash, obj); if (m_fastBackend) m_fastBackend->store(obj); WriteLog (lsTRACE, NodeObject) << "HOS: " << hash << " fetch: in db"; + return obj; } @@ -115,12 +227,12 @@ void NodeStore::importVisitor ( std::vector & objects, NodeObject::pointer object) { - if (objects.size() >= 128) + if (objects.size() >= bulkWriteBatchSize) { m_backend->bulkStore (objects); objects.clear (); - objects.reserve (128); + objects.reserve (bulkWriteBatchSize); } objects.push_back (object); @@ -136,7 +248,7 @@ int NodeStore::import (String sourceBackendParameters) std::vector objects; - objects.reserve (128); + objects.reserve (bulkWriteBatchSize); srcBackend->visitAll (BIND_TYPE (&NodeStore::importVisitor, this, boost::ref (objects), P_1)); @@ -183,65 +295,3 @@ NodeStore::Backend* NodeStore::createBackend (String const& parameters) return backend; } - -bool NodeStore::Backend::store (NodeObject::ref object) -{ - boost::mutex::scoped_lock sl (mWriteMutex); - mWriteSet.push_back (object); - - if (!mWritePending) - { - mWritePending = true; - getApp().getJobQueue ().addJob (jtWRITE, "NodeObject::store", - BIND_TYPE (&NodeStore::Backend::bulkWrite, this, P_1)); - } - return true; -} - -void NodeStore::Backend::bulkWrite (Job &) -{ - int setSize = 0; - - while (1) - { - std::vector< boost::shared_ptr > set; - set.reserve (128); - - { - boost::mutex::scoped_lock sl (mWriteMutex); - - mWriteSet.swap (set); - assert (mWriteSet.empty ()); - ++mWriteGeneration; - mWriteCondition.notify_all (); - - if (set.empty ()) - { - mWritePending = false; - mWriteLoad = 0; - return; - } - - mWriteLoad = std::max (setSize, static_cast (mWriteSet.size ())); - setSize = set.size (); - } - - bulkStore (set); - } -} - -void NodeStore::Backend::waitWrite () -{ - boost::mutex::scoped_lock sl (mWriteMutex); - int gen = mWriteGeneration; - - while (mWritePending && (mWriteGeneration == gen)) - mWriteCondition.wait (sl); -} - -int NodeStore::Backend::getWriteLoad () -{ - boost::mutex::scoped_lock sl (mWriteMutex); - - return std::max (mWriteLoad, static_cast (mWriteSet.size ())); -} diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index dc21f4c2f4..df611cbc88 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -12,32 +12,41 @@ class NodeStore : LeakChecked { public: + enum + { + /** This is the largest number of key/value pairs we + will write during a bulk write. + */ + // VFALCO TODO Make this a tunable parameter in the key value pairs + bulkWriteBatchSize = 128 + }; + + /** Interface to inform callers of cetain activities. + */ + class Hooks + { + virtual void on + }; + /** Back end used for the store. */ class Backend { public: - // VFALCO TODO Move the function definition to the .cpp - Backend () - : mWriteGeneration(0) - , mWriteLoad(0) - , mWritePending(false) - { - mWriteSet.reserve(128); - } + Backend (); virtual ~Backend () { } - virtual std::string getDataBaseName() = 0; - - // Store/retrieve a single object - // These functions must be thread safe + /** Store a single object. + */ + // VFALCO TODO Why should the Backend know or care about NodeObject? + // It should just deal with a fixed key and raw data. + // virtual bool store (NodeObject::ref); - virtual NodeObject::pointer retrieve (uint256 const &hash) = 0; - // Store a group of objects - // This function will only be called from a single thread - virtual bool bulkStore (const std::vector< NodeObject::pointer >&) = 0; + /** Retrieve an individual object. + */ + virtual NodeObject::pointer retrieve (uint256 const &hash) = 0; // Visit every object in the database // This function will only be called during an import operation @@ -46,19 +55,38 @@ public: // virtual void visitAll (FUNCTION_TYPE ) = 0; + private: + friend class NodeStore; + // VFALCO TODO Put this bulk writing logic into a separate class. - virtual void bulkWrite (Job &); - virtual void waitWrite (); - virtual int getWriteLoad (); + // NOTE Why are these virtual? + void bulkWrite (Job &); + void waitWrite (); + int getWriteLoad (); + + private: + virtual std::string getDataBaseName() = 0; + + // Store a group of objects + // This function will only be called from a single thread + // VFALCO NOTE It looks like NodeStore throws this into the job queue? + virtual bool bulkStore (const std::vector< NodeObject::pointer >&) = 0; protected: // VFALCO TODO Put this bulk writing logic into a separate class. - boost::mutex mWriteMutex; - boost::condition_variable mWriteCondition; - int mWriteGeneration; - int mWriteLoad; - bool mWritePending; - std::vector > mWriteSet; + boost::mutex mWriteMutex; + boost::condition_variable mWriteCondition; + int mWriteGeneration; + int mWriteLoad; + bool mWritePending; + std::vector > mWriteSet; + }; + +public: + // Helper functions for the backend + class BackendHelper + { + public: }; public: @@ -90,6 +118,7 @@ public: */ // VFALCO NOTE Is cacheSize in bytes? objects? KB? // Is cacheAge in minutes? seconds? + // These should be in the parameters. // NodeStore (String backendParameters, String fastBackendParameters, @@ -103,18 +132,32 @@ public: */ static void addBackendFactory (BackendFactory& factory); + // VFALCO TODO Document this. float getCacheHitRate (); + // VFALCO TODO Document this. bool store (NodeObjectType type, uint32 index, Blob const& data, uint256 const& hash); + // VFALCO TODO Document this. NodeObject::pointer retrieve (uint256 const& hash); + // VFALCO TODO Document this. void waitWrite (); + + // VFALCO TODO Document this. + // TODO Document the parameter meanings. void tune (int size, int age); + + // VFALCO TODO Document this. void sweep (); + + // VFALCO TODO Document this. + // What are the units of the return value? int getWriteLoad (); + // VFALCO TODO Document this. + // NOTE What's the return value? int import (String sourceBackendParameters); private: @@ -125,11 +168,15 @@ private: static Array s_factories; private: + // Persistent key/value storage. ScopedPointer m_backend; + + // Larger key/value storage, but not necessarily persistent. ScopedPointer m_fastBackend; - TaggedCache mCache; - KeyCache mNegativeCache; + // VFALCO NOTE What are these things for? We need comments. + TaggedCache m_cache; + KeyCache m_negativeCache; }; #endif diff --git a/modules/ripple_basics/containers/ripple_TaggedCache.h b/modules/ripple_basics/containers/ripple_TaggedCache.h index 1551ebda05..5fa75bf150 100644 --- a/modules/ripple_basics/containers/ripple_TaggedCache.h +++ b/modules/ripple_basics/containers/ripple_TaggedCache.h @@ -62,7 +62,58 @@ public: void sweep (); void clear (); - bool touch (const key_type& key); + /** Refresh the expiration time on a key. + + @param key The key to refresh. + @return `true` if the key was found and the object is cached. + */ + bool refreshIfPresent (const key_type& key) + { + bool found = false; + + // If present, make current in cache + boost::recursive_mutex::scoped_lock sl (mLock); + + cache_iterator cit = mCache.find (key); + + if (cit != mCache.end ()) + { + cache_entry& entry = cit->second; + + if (! entry.isCached ()) + { + // Convert weak to strong. + entry.ptr = entry.lock (); + + if (entry.isCached ()) + { + // We just put the object back in cache + ++mCacheCount; + entry.touch (); + found = true; + } + else + { + // Couldn't get strong pointer, + // object fell out of the cache so remove the entry. + mCache.erase (cit); + } + } + else + { + // It's cached so update the timer + entry.touch (); + found = true; + } + } + else + { + // not present + } + + return found; + } + bool del (const key_type& key, bool valid); bool canonicalize (const key_type& key, boost::shared_ptr& data, bool replace = false); bool store (const key_type& key, const c_Data& data); @@ -264,40 +315,6 @@ void TaggedCache::sweep () } } -template -bool TaggedCache::touch (const key_type& key) -{ - // If present, make current in cache - boost::recursive_mutex::scoped_lock sl (mLock); - - cache_iterator cit = mCache.find (key); - - if (cit == mCache.end ()) // Don't have the object - return false; - - cache_entry& entry = cit->second; - - if (entry.isCached ()) - { - entry.touch (); - return true; - } - - entry.ptr = entry.lock (); - - if (entry.isCached ()) - { - // We just put the object back in cache - ++mCacheCount; - entry.touch (); - return true; - } - - // Object fell out - mCache.erase (cit); - return false; -} - template bool TaggedCache::del (const key_type& key, bool valid) { From 664ed784e5be6fded0912d5d036f747a1bebd645 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Tue, 16 Jul 2013 06:15:18 -0700 Subject: [PATCH 04/50] Add return values to OutputStream methods --- .../files/beast_FileOutputStream.cpp | 11 ++- .../beast_core/files/beast_FileOutputStream.h | 13 ++- .../streams/beast_MemoryOutputStream.cpp | 6 +- .../streams/beast_MemoryOutputStream.h | 16 ++-- .../beast_core/streams/beast_OutputStream.cpp | 85 +++++++++++-------- .../beast_core/streams/beast_OutputStream.h | 60 ++++++++----- .../zip/beast_GZIPCompressorOutputStream.h | 6 +- 7 files changed, 111 insertions(+), 86 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/files/beast_FileOutputStream.cpp b/Subtrees/beast/modules/beast_core/files/beast_FileOutputStream.cpp index 368eb9c438..a3a0e5e3c6 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_FileOutputStream.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_FileOutputStream.cpp @@ -114,7 +114,7 @@ bool FileOutputStream::write (const void* const src, const size_t numBytes) return true; } -void FileOutputStream::writeRepeatedByte (uint8 byte, size_t numBytes) +bool FileOutputStream::writeRepeatedByte (uint8 byte, size_t numBytes) { bassert (((ssize_t) numBytes) >= 0); @@ -123,9 +123,8 @@ void FileOutputStream::writeRepeatedByte (uint8 byte, size_t numBytes) memset (buffer + bytesInBuffer, byte, numBytes); bytesInBuffer += numBytes; currentPosition += numBytes; + return true; } - else - { - OutputStream::writeRepeatedByte (byte, numBytes); - } -} + + return OutputStream::writeRepeatedByte (byte, numBytes); +} \ No newline at end of file diff --git a/Subtrees/beast/modules/beast_core/files/beast_FileOutputStream.h b/Subtrees/beast/modules/beast_core/files/beast_FileOutputStream.h index 5f358ecd63..e4110492c9 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_FileOutputStream.h +++ b/Subtrees/beast/modules/beast_core/files/beast_FileOutputStream.h @@ -27,7 +27,6 @@ #include "beast_File.h" #include "../streams/beast_OutputStream.h" - //============================================================================== /** An output stream that writes into a local file. @@ -87,11 +86,11 @@ public: Result truncate(); //============================================================================== - void flush(); - int64 getPosition(); - bool setPosition (int64 pos); - bool write (const void* data, size_t numBytes); - void writeRepeatedByte (uint8 byte, size_t numTimesToRepeat); + void flush() override; + int64 getPosition() override; + bool setPosition (int64) override; + bool write (const void*, size_t) override; + bool writeRepeatedByte (uint8 byte, size_t numTimesToRepeat) override; private: @@ -111,4 +110,4 @@ private: ssize_t writeInternal (const void*, size_t); }; -#endif // BEAST_FILEOUTPUTSTREAM_BEASTHEADER +#endif \ No newline at end of file diff --git a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.cpp b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.cpp index 0505920614..2e2e2f15a9 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.cpp +++ b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.cpp @@ -90,10 +90,12 @@ bool MemoryOutputStream::write (const void* const buffer, size_t howMany) return true; } -void MemoryOutputStream::writeRepeatedByte (uint8 byte, size_t howMany) +bool MemoryOutputStream::writeRepeatedByte (uint8 byte, size_t howMany) { if (howMany > 0) memset (prepareToWrite (howMany), byte, howMany); + + return true; } void MemoryOutputStream::appendUTF8Char (beast_wchar c) @@ -162,4 +164,4 @@ OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, const MemoryOutpu stream.write (streamToRead.getData(), dataSize); return stream; -} +} \ No newline at end of file diff --git a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h index 1413dcbf3d..c93f1ba0f2 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h +++ b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h @@ -28,7 +28,6 @@ #include "../memory/beast_MemoryBlock.h" #include "../memory/beast_ScopedPointer.h" - //============================================================================== /** Writes data to an internal memory buffer, which grows as required. @@ -36,9 +35,10 @@ The data that was written into the stream can then be accessed later as a contiguous block of memory. */ -class BEAST_API MemoryOutputStream +class BEAST_API MemoryOutputStream : public OutputStream , LeakChecked + , Uncopyable { public: //============================================================================== @@ -108,11 +108,11 @@ public: */ void flush(); - bool write (const void* buffer, size_t howMany); - int64 getPosition() { return position; } - bool setPosition (int64 newPosition); - int writeFromInputStream (InputStream& source, int64 maxNumBytesToWrite); - void writeRepeatedByte (uint8 byte, size_t numTimesToRepeat); + bool write (const void*, size_t) override; + int64 getPosition() override { return position; } + bool setPosition (int64) override; + int writeFromInputStream (InputStream&, int64 maxNumBytesToWrite) override; + bool writeRepeatedByte (uint8 byte, size_t numTimesToRepeat) override; private: //============================================================================== @@ -128,4 +128,4 @@ private: OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, const MemoryOutputStream& streamToRead); -#endif // BEAST_MEMORYOUTPUTSTREAM_BEASTHEADER +#endif \ No newline at end of file diff --git a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.cpp b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.cpp index 5089153779..c1ac44c04c 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.cpp +++ b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.cpp @@ -61,48 +61,51 @@ OutputStream::~OutputStream() } //============================================================================== -void OutputStream::writeBool (const bool b) +bool OutputStream::writeBool (const bool b) { - writeByte (b ? (char) 1 - : (char) 0); + return writeByte (b ? (char) 1 + : (char) 0); } -void OutputStream::writeByte (char byte) +bool OutputStream::writeByte (char byte) { - write (&byte, 1); + return write (&byte, 1); } -void OutputStream::writeRepeatedByte (uint8 byte, size_t numTimesToRepeat) +bool OutputStream::writeRepeatedByte (uint8 byte, size_t numTimesToRepeat) { for (size_t i = 0; i < numTimesToRepeat; ++i) - writeByte ((char) byte); + if (! writeByte ((char) byte)) + return false; + + return true; } -void OutputStream::writeShort (short value) +bool OutputStream::writeShort (short value) { const unsigned short v = ByteOrder::swapIfBigEndian ((unsigned short) value); - write (&v, 2); + return write (&v, 2); } -void OutputStream::writeShortBigEndian (short value) +bool OutputStream::writeShortBigEndian (short value) { const unsigned short v = ByteOrder::swapIfLittleEndian ((unsigned short) value); - write (&v, 2); + return write (&v, 2); } -void OutputStream::writeInt (int value) +bool OutputStream::writeInt (int value) { const unsigned int v = ByteOrder::swapIfBigEndian ((unsigned int) value); - write (&v, 4); + return write (&v, 4); } -void OutputStream::writeIntBigEndian (int value) +bool OutputStream::writeIntBigEndian (int value) { const unsigned int v = ByteOrder::swapIfLittleEndian ((unsigned int) value); - write (&v, 4); + return write (&v, 4); } -void OutputStream::writeCompressedInt (int value) +bool OutputStream::writeCompressedInt (int value) { unsigned int un = (value < 0) ? (unsigned int) -value : (unsigned int) value; @@ -121,60 +124,60 @@ void OutputStream::writeCompressedInt (int value) if (value < 0) data[0] |= 0x80; - write (data, num + 1); + return write (data, num + 1); } -void OutputStream::writeInt64 (int64 value) +bool OutputStream::writeInt64 (int64 value) { const uint64 v = ByteOrder::swapIfBigEndian ((uint64) value); - write (&v, 8); + return write (&v, 8); } -void OutputStream::writeInt64BigEndian (int64 value) +bool OutputStream::writeInt64BigEndian (int64 value) { const uint64 v = ByteOrder::swapIfLittleEndian ((uint64) value); - write (&v, 8); + return write (&v, 8); } -void OutputStream::writeFloat (float value) +bool OutputStream::writeFloat (float value) { union { int asInt; float asFloat; } n; n.asFloat = value; - writeInt (n.asInt); + return writeInt (n.asInt); } -void OutputStream::writeFloatBigEndian (float value) +bool OutputStream::writeFloatBigEndian (float value) { union { int asInt; float asFloat; } n; n.asFloat = value; - writeIntBigEndian (n.asInt); + return writeIntBigEndian (n.asInt); } -void OutputStream::writeDouble (double value) +bool OutputStream::writeDouble (double value) { union { int64 asInt; double asDouble; } n; n.asDouble = value; - writeInt64 (n.asInt); + return writeInt64 (n.asInt); } -void OutputStream::writeDoubleBigEndian (double value) +bool OutputStream::writeDoubleBigEndian (double value) { union { int64 asInt; double asDouble; } n; n.asDouble = value; - writeInt64BigEndian (n.asInt); + return writeInt64BigEndian (n.asInt); } -void OutputStream::writeString (const String& text) +bool OutputStream::writeString (const String& text) { // (This avoids using toUTF8() to prevent the memory bloat that it would leave behind // if lots of large, persistent strings were to be written to streams). const size_t numBytes = text.getNumBytesAsUTF8() + 1; HeapBlock temp (numBytes); text.copyToUTF8 (temp, numBytes); - write (temp, numBytes); + return write (temp, numBytes); } -void OutputStream::writeText (const String& text, const bool asUTF16, +bool OutputStream::writeText (const String& text, const bool asUTF16, const bool writeUTF16ByteOrderMark) { if (asUTF16) @@ -196,7 +199,9 @@ void OutputStream::writeText (const String& text, const bool asUTF16, writeShort ((short) '\r'); lastCharWasReturn = (c == L'\r'); - writeShort ((short) c); + + if (! writeShort ((short) c)) + return false; } } else @@ -209,9 +214,12 @@ void OutputStream::writeText (const String& text, const bool asUTF16, if (*t == '\n') { if (t > src) - write (src, (int) (t - src)); + if (! write (src, (int) (t - src))) + return false; + + if (! write ("\r\n", 2)) + return false; - write ("\r\n", 2); src = t + 1; } else if (*t == '\r') @@ -222,7 +230,8 @@ void OutputStream::writeText (const String& text, const bool asUTF16, else if (*t == 0) { if (t > src) - write (src, (int) (t - src)); + if (! write (src, (int) (t - src))) + return false; break; } @@ -230,6 +239,8 @@ void OutputStream::writeText (const String& text, const bool asUTF16, ++t; } } + + return true; } int OutputStream::writeFromInputStream (InputStream& source, int64 numBytesToWrite) @@ -317,4 +328,4 @@ BEAST_API OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, InputSt BEAST_API OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, const NewLine&) { return stream << stream.getNewLineString(); -} +} \ No newline at end of file diff --git a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h index 9365041ba8..33b744a7d6 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h +++ b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h @@ -40,9 +40,7 @@ class File; @see InputStream, MemoryOutputStream, FileOutputStream */ -class BEAST_API OutputStream - : public Uncopyable - , LeakChecked +class BEAST_API OutputStream { protected: //============================================================================== @@ -92,75 +90,88 @@ public: //============================================================================== /** Writes a single byte to the stream. - + @returns false if the write operation fails for some reason @see InputStream::readByte */ - virtual void writeByte (char byte); + virtual bool writeByte (char byte); /** Writes a boolean to the stream as a single byte. This is encoded as a binary byte (not as text) with a value of 1 or 0. + @returns false if the write operation fails for some reason @see InputStream::readBool */ - virtual void writeBool (bool boolValue); + virtual bool writeBool (bool boolValue); /** Writes a 16-bit integer to the stream in a little-endian byte order. This will write two bytes to the stream: (value & 0xff), then (value >> 8). + @returns false if the write operation fails for some reason @see InputStream::readShort */ - virtual void writeShort (short value); + virtual bool writeShort (short value); /** Writes a 16-bit integer to the stream in a big-endian byte order. This will write two bytes to the stream: (value >> 8), then (value & 0xff). + @returns false if the write operation fails for some reason @see InputStream::readShortBigEndian */ - virtual void writeShortBigEndian (short value); + virtual bool writeShortBigEndian (short value); /** Writes a 32-bit integer to the stream in a little-endian byte order. + @returns false if the write operation fails for some reason @see InputStream::readInt */ - virtual void writeInt (int value); + virtual bool writeInt (int value); /** Writes a 32-bit integer to the stream in a big-endian byte order. + @returns false if the write operation fails for some reason @see InputStream::readIntBigEndian */ - virtual void writeIntBigEndian (int value); + virtual bool writeIntBigEndian (int value); /** Writes a 64-bit integer to the stream in a little-endian byte order. + @returns false if the write operation fails for some reason @see InputStream::readInt64 */ - virtual void writeInt64 (int64 value); + virtual bool writeInt64 (int64 value); /** Writes a 64-bit integer to the stream in a big-endian byte order. + @returns false if the write operation fails for some reason @see InputStream::readInt64BigEndian */ - virtual void writeInt64BigEndian (int64 value); + virtual bool writeInt64BigEndian (int64 value); /** Writes a 32-bit floating point value to the stream in a binary format. The binary 32-bit encoding of the float is written as a little-endian int. + @returns false if the write operation fails for some reason @see InputStream::readFloat */ - virtual void writeFloat (float value); + virtual bool writeFloat (float value); /** Writes a 32-bit floating point value to the stream in a binary format. The binary 32-bit encoding of the float is written as a big-endian int. + @returns false if the write operation fails for some reason @see InputStream::readFloatBigEndian */ - virtual void writeFloatBigEndian (float value); + virtual bool writeFloatBigEndian (float value); /** Writes a 64-bit floating point value to the stream in a binary format. The eight raw bytes of the double value are written out as a little-endian 64-bit int. + @returns false if the write operation fails for some reason @see InputStream::readDouble */ - virtual void writeDouble (double value); + virtual bool writeDouble (double value); /** Writes a 64-bit floating point value to the stream in a binary format. The eight raw bytes of the double value are written out as a big-endian 64-bit int. @see InputStream::readDoubleBigEndian + @returns false if the write operation fails for some reason */ - virtual void writeDoubleBigEndian (double value); + virtual bool writeDoubleBigEndian (double value); - /** Writes a byte to the output stream a given number of times. */ - virtual void writeRepeatedByte (uint8 byte, size_t numTimesToRepeat); + /** Writes a byte to the output stream a given number of times. + @returns false if the write operation fails for some reason + */ + virtual bool writeRepeatedByte (uint8 byte, size_t numTimesToRepeat); /** Writes a condensed binary encoding of a 32-bit integer. @@ -170,9 +181,10 @@ public: The format used is: number of significant bytes + up to 4 bytes in little-endian order. + @returns false if the write operation fails for some reason @see InputStream::readCompressedInt */ - virtual void writeCompressedInt (int value); + virtual bool writeCompressedInt (int value); /** Stores a string in the stream in a binary format. @@ -184,9 +196,10 @@ public: For appending text to a file, instead use writeText, or operator<< + @returns false if the write operation fails for some reason @see InputStream::readString, writeText, operator<< */ - virtual void writeString (const String& text); + virtual bool writeString (const String& text); /** Writes a string of text to the stream. @@ -195,8 +208,9 @@ public: of a file). The method also replaces '\\n' characters in the text with '\\r\\n'. + @returns false if the write operation fails for some reason */ - virtual void writeText (const String& text, + virtual bool writeText (const String& text, bool asUTF16, bool writeUTF16ByteOrderMark); @@ -206,6 +220,7 @@ public: @param maxNumBytesToWrite the number of bytes to read from the stream (if this is less than zero, it will keep reading until the input is exhausted) + @returns the number of bytes written */ virtual int writeFromInputStream (InputStream& source, int64 maxNumBytesToWrite); @@ -258,5 +273,4 @@ BEAST_API OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, InputSt */ BEAST_API OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, const NewLine&); - -#endif // BEAST_OUTPUTSTREAM_BEASTHEADER +#endif diff --git a/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.h b/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.h index c083afc45f..d13e72802a 100644 --- a/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.h +++ b/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.h @@ -80,9 +80,9 @@ public: */ void flush(); - int64 getPosition(); - bool setPosition (int64 newPosition); - bool write (const void* destBuffer, size_t howMany); + int64 getPosition() override; + bool setPosition (int64) override; + bool write (const void*, size_t) override; /** These are preset values that can be used for the constructor's windowBits paramter. For more info about this, see the zlib documentation for its windowBits parameter. From ad933bae9cc9fd07dd8e8059401f6343401527f3 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Tue, 16 Jul 2013 08:07:18 -0700 Subject: [PATCH 05/50] Refactor NodeStore --- TODO.txt | 2 + modules/ripple_app/node/ripple_NodeObject.cpp | 74 +++++++++++++++++++ modules/ripple_app/node/ripple_NodeObject.h | 64 +++++++++++++--- modules/ripple_app/node/ripple_NodeStore.cpp | 27 +++++-- modules/ripple_app/node/ripple_NodeStore.h | 17 ++++- .../containers/ripple_TaggedCache.h | 1 + modules/ripple_basics/types/ripple_UInt256.h | 27 ++++++- src/cpp/ripple/ripple_SHAMapTreeNode.cpp | 10 ++- 8 files changed, 202 insertions(+), 20 deletions(-) diff --git a/TODO.txt b/TODO.txt index 70c3119383..5524de7a0a 100644 --- a/TODO.txt +++ b/TODO.txt @@ -11,6 +11,8 @@ Vinnie's Short List (Changes day to day) -------------------------------------------------------------------------------- +- Replace base_uint and uintXXX with UnsignedInteger + - Rewrite boost program_options in Beast - Examples for different backend key/value config settings diff --git a/modules/ripple_app/node/ripple_NodeObject.cpp b/modules/ripple_app/node/ripple_NodeObject.cpp index ac8bce22ee..4b4a0c8aee 100644 --- a/modules/ripple_app/node/ripple_NodeObject.cpp +++ b/modules/ripple_app/node/ripple_NodeObject.cpp @@ -32,6 +32,80 @@ NodeObject::NodeObject ( { } +NodeObject::NodeObject (void const* key, void const* value, int valueBytes) +{ + DecodedBlob decoded (key, value, valueBytes); + + if (decoded.success) + { + mType = decoded.objectType; + mHash = uint256 (key); + mLedgerIndex = decoded.ledgerIndex; + mData = Blob (decoded.objectData, decoded.objectData + decoded.dataBytes); + } + else + { + // VFALCO TODO Write the hex version of key to the string for diagnostics. + String s; + s << "NodeStore:: DecodedBlob failed"; + Throw (s); + } +} + +NodeObject::DecodedBlob::DecodedBlob (void const* key, void const* value, int valueBytes) +{ + /* Data format: + + Bytes + + 0...3 LedgerIndex 32-bit big endian integer + 4...7 Unused? An unused copy of the LedgerIndex + 8 char One of NodeObjectType + 9...end The body of the object data + */ + + success = false; + key = key; + // VFALCO NOTE Ledger indexes should have started at 1 + ledgerIndex = LedgerIndex (-1); + objectType = hotUNKNOWN; + objectData = nullptr; + dataBytes = bmin (0, valueBytes - 9); + + if (dataBytes > 4) + { + LedgerIndex const* index = static_cast (value); + ledgerIndex = ByteOrder::swapIfLittleEndian (*index); + } + + // VFALCO NOTE What about bytes 4 through 7 inclusive? + + if (dataBytes > 8) + { + unsigned char const* byte = static_cast (value); + objectType = static_cast (byte [8]); + } + + if (dataBytes > 9) + { + objectData = static_cast (value) + 9; + + switch (objectType) + { + case hotUNKNOWN: + default: + break; + + case hotLEDGER: + case hotTRANSACTION: + case hotACCOUNT_NODE: + case hotTRANSACTION_NODE: + success = true; + break; + } + } +} + NodeObjectType NodeObject::getType () const { return mType; diff --git a/modules/ripple_app/node/ripple_NodeObject.h b/modules/ripple_app/node/ripple_NodeObject.h index b889666f48..e6b4e3fb7f 100644 --- a/modules/ripple_app/node/ripple_NodeObject.h +++ b/modules/ripple_app/node/ripple_NodeObject.h @@ -34,6 +34,14 @@ class NodeObject : public CountedObject public: static char const* getCountedObjectName () { return "NodeObject"; } + /** The type used to hold the hash. + + The hahes are fixed size, SHA256. + + @note The key size can be retrieved with `Hash::sizeInBytes` + */ + typedef UnsignedInteger <32> Hash; + typedef boost::shared_ptr pointer; typedef pointer const& ref; @@ -42,20 +50,54 @@ public: @note A copy of the data is created. */ NodeObject (NodeObjectType type, - LedgerIndex ledgerIndex, - Blob const & binaryDataToCopy, - uint256 const & hash); + LedgerIndex ledgerIndex, + Blob const & binaryDataToCopy, + uint256 const & hash); /** Create from an area of memory. @note A copy of the data is created. */ NodeObject (NodeObjectType type, - LedgerIndex ledgerIndex, - void const * bufferToCopy, - int bytesInBuffer, - uint256 const & hash); + LedgerIndex ledgerIndex, + void const * bufferToCopy, + int bytesInBuffer, + uint256 const & hash); + /** Create from a key/value blob. + + This is the format in which a NodeObject is stored in the + persistent storage layer. + + @see NodeStore + */ + NodeObject (void const* key, void const* value, int valueBytes); + + /** Parsed key/value blob into NodeObject components. + + This will extract the information required to construct + a NodeObject. It also does consistency checking and returns + the result, so it is possible to determine if the data + is corrupted without throwing an exception. Note all forms + of corruption are detected so further analysis will be + needed to eliminate false positives. + + This is the format in which a NodeObject is stored in the + persistent storage layer. + */ + struct DecodedBlob + { + DecodedBlob (void const* key, void const* value, int valueBytes); + + bool success; + + void const* key; + LedgerIndex ledgerIndex; + NodeObjectType objectType; + unsigned char const* objectData; + int dataBytes; + }; + /** Retrieve the type of this object. */ NodeObjectType getType () const; @@ -74,10 +116,10 @@ public: Blob const& getData () const; private: - NodeObjectType const mType; - uint256 const mHash; - LedgerIndex const mLedgerIndex; - Blob const mData; + NodeObjectType mType; + uint256 mHash; + LedgerIndex mLedgerIndex; + Blob mData; }; #endif diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index b575a9a8cc..191b8eba0b 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -193,7 +193,7 @@ NodeObject::pointer NodeStore::retrieve (uint256 const& hash) if (m_fastBackend) { - obj = m_fastBackend->retrieve (hash); + obj = retrieve (m_fastBackend, hash); if (obj) { @@ -203,16 +203,26 @@ NodeObject::pointer NodeStore::retrieve (uint256 const& hash) } { - LoadEvent::autoptr event (getApp().getJobQueue ().getLoadEventAP (jtHO_READ, "HOS::retrieve")); - obj = m_backend->retrieve(hash); + // m_hooks->onRetrieveBegin () - if (!obj) + // VFALCO TODO Why is this an autoptr? Why can't it just be a plain old object? + // + LoadEvent::autoptr event (getApp().getJobQueue ().getLoadEventAP (jtHO_READ, "HOS::retrieve")); + + obj = retrieve (m_backend, hash); + + if (obj == nullptr) { m_negativeCache.add (hash); - return obj; + + // VFALCO TODO Eliminate return from middle of function + + return obj; // VFALCO NOTE This is nullptr, why return obj? } + } + // VFALCO NOTE What does this do? m_cache.canonicalize (hash, obj); if (m_fastBackend) @@ -223,6 +233,13 @@ NodeObject::pointer NodeStore::retrieve (uint256 const& hash) return obj; } +//------------------------------------------------------------------------------ + +NodeObject::pointer NodeStore::retrieve (Backend* backend, uint256 const& hash) +{ + return backend->retrieve (hash); +} + void NodeStore::importVisitor ( std::vector & objects, NodeObject::pointer object) diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index df611cbc88..27b4ce3df7 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -19,13 +19,18 @@ public: */ // VFALCO TODO Make this a tunable parameter in the key value pairs bulkWriteBatchSize = 128 + + /** Size of the fixed keys, in bytes. + */ + ,keyBytes = 32 // 256 bit hash }; /** Interface to inform callers of cetain activities. */ class Hooks { - virtual void on + virtual void onRetrieveBegin () { } + virtual void onRetrieveEnd () { } }; /** Back end used for the store. @@ -43,11 +48,19 @@ public: // It should just deal with a fixed key and raw data. // virtual bool store (NodeObject::ref); + //virtual bool put (void const* key, void const* value, int valueBytes) { return false; } /** Retrieve an individual object. */ virtual NodeObject::pointer retrieve (uint256 const &hash) = 0; + struct GetCallback + { + virtual void* getBufferForValue (int valueBytes) = 0; + }; + + virtual bool get (void const* key, GetCallback* callback) { return false; } + // Visit every object in the database // This function will only be called during an import operation // @@ -161,6 +174,8 @@ public: int import (String sourceBackendParameters); private: + NodeObject::pointer retrieve (Backend* backend, uint256 const& hash); + void importVisitor (std::vector & objects, NodeObject::pointer object); static Backend* createBackend (String const& parameters); diff --git a/modules/ripple_basics/containers/ripple_TaggedCache.h b/modules/ripple_basics/containers/ripple_TaggedCache.h index 5fa75bf150..20263393a1 100644 --- a/modules/ripple_basics/containers/ripple_TaggedCache.h +++ b/modules/ripple_basics/containers/ripple_TaggedCache.h @@ -343,6 +343,7 @@ bool TaggedCache::del (const key_type& key, bool valid) return ret; } +// VFALCO NOTE What does it mean to canonicalize the data? template bool TaggedCache::canonicalize (const key_type& key, boost::shared_ptr& data, bool replace) { diff --git a/modules/ripple_basics/types/ripple_UInt256.h b/modules/ripple_basics/types/ripple_UInt256.h index fa135ee218..9605dfc59b 100644 --- a/modules/ripple_basics/types/ripple_UInt256.h +++ b/modules/ripple_basics/types/ripple_UInt256.h @@ -19,6 +19,10 @@ inline int Testuint256AdHoc (std::vector vArg); // We have to keep a separate base class without constructors // so the compiler will let us use it in a union +// +// VFALCO NOTE This class produces undefined behavior when +// BITS is not a multiple of 32!!! +// template class base_uint { @@ -30,6 +34,22 @@ protected: unsigned int pn[WIDTH]; public: + base_uint () + { + } + + /** Construct from a raw pointer. + + The buffer pointed to by `data` must be at least 32 bytes. + */ + explicit base_uint (void const* data) + { + // BITS must be a multiple of 32 + static_bassert ((BITS % 32) == 0); + + memcpy (&pn [0], data, BITS / 8); + } + bool isZero () const { for (int i = 0; i < WIDTH; i++) @@ -474,6 +494,11 @@ public: *this = b; } + explicit uint256 (void const* data) + : base_uint256 (data) + { + } + uint256& operator= (uint64 uHost) { zero (); @@ -590,7 +615,7 @@ template inline std::ostream& operator<< (std::ostream& out, inline int Testuint256AdHoc (std::vector vArg) { - uint256 g (0); + uint256 g (uint64 (0)); printf ("%s\n", g.ToString ().c_str ()); --g; diff --git a/src/cpp/ripple/ripple_SHAMapTreeNode.cpp b/src/cpp/ripple/ripple_SHAMapTreeNode.cpp index d9b4b7eb77..c406808f0a 100644 --- a/src/cpp/ripple/ripple_SHAMapTreeNode.cpp +++ b/src/cpp/ripple/ripple_SHAMapTreeNode.cpp @@ -4,8 +4,14 @@ */ //============================================================================== -SHAMapTreeNode::SHAMapTreeNode (uint32 seq, const SHAMapNode& nodeID) : SHAMapNode (nodeID), mHash (0), - mSeq (seq), mAccessSeq (seq), mType (tnERROR), mIsBranch (0), mFullBelow (false) +SHAMapTreeNode::SHAMapTreeNode (uint32 seq, const SHAMapNode& nodeID) + : SHAMapNode (nodeID) + , mHash (uint64(0)) + , mSeq (seq) + , mAccessSeq (seq) + , mType (tnERROR) + , mIsBranch (0) + , mFullBelow (false) { } From 74510a95be6c8d3911b538c848efd3d6b6ba6d72 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 07:02:33 -0700 Subject: [PATCH 06/50] Add RandomAccessFile --- .../Builds/VisualStudio2012/beast.vcxproj | 7 + .../VisualStudio2012/beast.vcxproj.filters | 6 + .../beast/modules/beast_core/beast_core.cpp | 1 + .../beast/modules/beast_core/beast_core.h | 1 + .../files/beast_RandomAccessFile.cpp | 214 ++++++++++++++++ .../beast_core/files/beast_RandomAccessFile.h | 242 ++++++++++++++++++ .../beast_core/memory/beast_Uncopyable.h | 5 +- .../native/beast_posix_SharedCode.h | 170 ++++++++++++ .../beast_core/native/beast_win32_Files.cpp | 155 +++++++++++ .../streams/beast_MemoryOutputStream.h | 1 - .../beast_core/streams/beast_OutputStream.h | 2 +- 11 files changed, 801 insertions(+), 3 deletions(-) create mode 100644 Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp create mode 100644 Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h diff --git a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj index 8309bb27d2..6cc039c10c 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj +++ b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj @@ -140,6 +140,7 @@ + @@ -437,6 +438,12 @@ true true + + true + true + true + true + true true diff --git a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters index 7b777f5085..c70a09c151 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters +++ b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters @@ -623,6 +623,9 @@ beast_core\containers + + beast_core\files + @@ -967,6 +970,9 @@ beast_crypto\math + + beast_core\files + diff --git a/Subtrees/beast/modules/beast_core/beast_core.cpp b/Subtrees/beast/modules/beast_core/beast_core.cpp index 82966a182e..cb3662d5d8 100644 --- a/Subtrees/beast/modules/beast_core/beast_core.cpp +++ b/Subtrees/beast/modules/beast_core/beast_core.cpp @@ -155,6 +155,7 @@ namespace beast #include "files/beast_FileInputStream.cpp" #include "files/beast_FileOutputStream.cpp" #include "files/beast_FileSearchPath.cpp" +#include "files/beast_RandomAccessFile.cpp" #include "files/beast_TemporaryFile.cpp" #include "json/beast_JSON.cpp" diff --git a/Subtrees/beast/modules/beast_core/beast_core.h b/Subtrees/beast/modules/beast_core/beast_core.h index a0d9a3042f..8abaf273ea 100644 --- a/Subtrees/beast/modules/beast_core/beast_core.h +++ b/Subtrees/beast/modules/beast_core/beast_core.h @@ -252,6 +252,7 @@ namespace beast #include "files/beast_FileOutputStream.h" #include "files/beast_FileSearchPath.h" #include "files/beast_MemoryMappedFile.h" +#include "files/beast_RandomAccessFile.h" #include "files/beast_TemporaryFile.h" #include "json/beast_JSON.h" #include "logging/beast_FileLogger.h" diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp new file mode 100644 index 0000000000..e9588a7382 --- /dev/null +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp @@ -0,0 +1,214 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Portions of this file are from JUCE. + Copyright (c) 2013 - Raw Material Software Ltd. + Please visit http://www.juce.com + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +RandomAccessFile::RandomAccessFile (int bufferSizeToUse) noexcept + : fileHandle (nullptr) + , currentPosition (0) + , bufferSize (bufferSizeToUse) + , bytesInBuffer (0) + , writeBuffer (bmax (bufferSizeToUse, 16)) // enforce minimum size of 16 +{ +} + +RandomAccessFile::~RandomAccessFile () +{ + close (); +} + +Result RandomAccessFile::open (File const& path, Mode mode) +{ + close (); + + return nativeOpen (path, mode); +} + +void RandomAccessFile::close () +{ + if (isOpen ()) + { + flushBuffer (); + nativeFlush (); + nativeClose (); + } +} + +Result RandomAccessFile::setPosition (FileOffset newPosition) +{ + Result result (Result::ok ()); + + if (newPosition != currentPosition) + { + flushBuffer (); + + result = nativeSetPosition (newPosition); + } + + return result; +} + +Result RandomAccessFile::read (void* buffer, ByteCount numBytes, ByteCount* pActualAmount) +{ + return nativeRead (buffer, numBytes, pActualAmount); +} + +Result RandomAccessFile::write (const void* data, ByteCount numBytes, ByteCount* pActualAmount) +{ + bassert (data != nullptr && ((ssize_t) numBytes) >= 0); + + Result result (Result::ok ()); + + ByteCount amountWritten = 0; + + if (bytesInBuffer + numBytes < bufferSize) + { + memcpy (writeBuffer + bytesInBuffer, data, numBytes); + bytesInBuffer += numBytes; + currentPosition += numBytes; + } + else + { + result = flushBuffer (); + + if (result.wasOk ()) + { + if (numBytes < bufferSize) + { + bassert (bytesInBuffer == 0); + + memcpy (writeBuffer + bytesInBuffer, data, numBytes); + bytesInBuffer += numBytes; + currentPosition += numBytes; + } + else + { + ByteCount bytesWritten; + + result = nativeWrite (data, numBytes, &bytesWritten); + + if (result.wasOk ()) + currentPosition += bytesWritten; + } + } + } + + if (pActualAmount != nullptr) + *pActualAmount = amountWritten; + + return result; +} + +Result RandomAccessFile::truncate () +{ + Result result = flush (); + + if (result.wasOk ()) + result = nativeTruncate (); + + return result; +} + +Result RandomAccessFile::flush () +{ + Result result = flushBuffer (); + + if (result.wasOk ()) + result = nativeFlush (); + + return result; +} + +Result RandomAccessFile::flushBuffer () +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + if (bytesInBuffer > 0) + { + result = nativeWrite (writeBuffer, bytesInBuffer); + bytesInBuffer = 0; + } + + return result; +} + +//------------------------------------------------------------------------------ + +class RandomAccessFileTests : public UnitTest +{ +public: + RandomAccessFileTests () : UnitTest ("RandomAccessFile") + { + } + + struct Payload + { + Payload (int maxBytes) + : bufferSize (maxBytes) + , data (maxBytes) + { + } + + // Create a pseudo-random payload + void generate (int64 seedValue) noexcept + { + Random r (seedValue); + + bytes = 1 + r.nextInt (bufferSize); + + bassert (bytes >= 1 && bytes <= bufferSize); + + for (int i = 0; i < bytes; ++i) + data [i] = static_cast (r.nextInt ()); + } + + bool operator== (Payload const& other) const noexcept + { + if (bytes == other.bytes) + { + return memcmp (data.getData (), other.data.getData (), bytes) == 0; + } + else + { + return false; + } + } + + int const bufferSize; + int bytes; + HeapBlock data; + }; + + + void runTest () + { + Result result = file.open (File::createTempFile ("tests"), RandomAccessFile::readWrite); + + expect (result.wasOk (), "Should be ok"); + } + +private: + RandomAccessFile file; +}; + +static RandomAccessFileTests randomAccessFileTests; diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h new file mode 100644 index 0000000000..b97e17dcb1 --- /dev/null +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h @@ -0,0 +1,242 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Portions of this file are from JUCE. + Copyright (c) 2013 - Raw Material Software Ltd. + Please visit http://www.juce.com + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_RANDOMACCESSFILE_H_INCLUDED +#define BEAST_RANDOMACCESSFILE_H_INCLUDED + +#include "../misc/beast_Result.h" + +/** Provides random access reading and writing to an operating system file. + + This class wraps the underlying native operating system routines for + opening and closing a file for reading and/or writing, seeking within + the file, and performing read and write operations. There are also methods + provided for obtaining an input or output stream which will work with + the file. + + Writes are batched using an internal buffer. The buffer is flushed when + it fills, the current position is manually changed, or the file + is closed. It is also possible to explicitly flush the buffer. + + @note All files are opened in binary mode. No text newline conversions + are performed. + + @see FileInputStream, FileOutputStream +*/ +class BEAST_API RandomAccessFile : Uncopyable, LeakChecked +{ +public: + /** The type of an FileOffset. + + This can be useful when writing templates. + */ + typedef int64 FileOffset; + + /** The type of a byte count. + + This can be useful when writing templates. + */ + typedef size_t ByteCount; + + /** The access mode. + + @see open + */ + enum Mode + { + readOnly, + readWrite + }; + + //============================================================================== + /** Creates an unopened file object. + + @see open, isOpen + */ + explicit RandomAccessFile (int bufferSizeToUse = 16384) noexcept; + + /** Destroy the file object. + + If the operating system file is open it will be closed. + */ + ~RandomAccessFile (); + + /** Determine if a file is open. + + @return `true` if the operating system file is open. + */ + bool isOpen () const noexcept { return fileHandle != nullptr; } + + /** Opens a file object. + + The file is opened with the specified permissions. The initial + position is set to the beginning of the file. + + @note If a file is already open, it will be closed first. + + @param path The path to the file + @param mode The access permissions + @return An indication of the success of the operation. + + @see Mode + */ + Result open (File const& path, Mode mode); + + /** Closes the file object. + + Any data that needs to be flushed will be written before the file is closed. + + @note If no file is opened, this call does nothing. + */ + void close (); + + /** Retrieve the @ref File associated with this object. + + @return The associated @ref File. + */ + File const& getFile () const noexcept { return file; } + + /** Get the current position. + + The next read or write will take place from here. + + @return The current position, as an absolute byte FileOffset from the begining. + */ + FileOffset getPosition () const noexcept { return currentPosition; } + + /** Set the current position. + + The next read or write will take place at this location. + + @param newPosition The byte FileOffset from the beginning of the file to move to. + + @return `true` if the operation was successful. + */ + Result setPosition (FileOffset newPosition); + + /** Read data at the current position. + + The caller is responsible for making sure that the memory pointed to + by `buffer` is at least as large as `bytesToRead`. + + @note The file must have been opened with read permission. + + @param buffer The memory to store the incoming data + @param numBytes The number of bytes to read. + @param pActualAmount Pointer to store the actual amount read, or `nullptr`. + + @return `true` if all the bytes were read. + */ + Result read (void* buffer, ByteCount numBytes, ByteCount* pActualAmount = 0); + + /** Write data at the current position. + + The current position is advanced past the data written. If data is + written past the end of the file, the file size is increased on disk. + + The caller is responsible for making sure that the memory pointed to + by `buffer` is at least as large as `bytesToWrite`. + + @note The file must have been opened with write permission. + + @param data A pointer to the data buffer to write to the file. + @param numBytes The number of bytes to write. + @param pActualAmount Pointer to store the actual amount written, or `nullptr`. + + @return `true` if all the data was written. + */ + Result write (const void* data, ByteCount numBytes, ByteCount* pActualAmount = 0); + + /** Truncate the file at the current position. + */ + Result truncate (); + + /** Flush the output buffers. + + This calls the operating system to make sure all data has been written. + */ + Result flush(); + + //============================================================================== +private: + Result flushBuffer (); + + // Some of these these methods are implemented natively on + // the corresponding platform. + // + // See beast_posix_SharedCode.h and beast_win32_Files.cpp + Result nativeOpen (File const& path, Mode mode); + void nativeClose (); + Result nativeSetPosition (FileOffset newPosition); + Result nativeRead (void* buffer, ByteCount numBytes, ByteCount* pActualAmount = 0); + Result nativeWrite (const void* data, ByteCount numBytes, ByteCount* pActualAmount = 0); + Result nativeTruncate (); + Result nativeFlush (); + +private: + File file; + void* fileHandle; + FileOffset currentPosition; + ByteCount const bufferSize; + ByteCount bytesInBuffer; + HeapBlock writeBuffer; +}; + +class BEAST_API RandomAccessFileInputStream : public InputStream +{ +public: + explicit RandomAccessFileInputStream (RandomAccessFile& file) : m_file (file) { } + + int64 getTotalLength() { return m_file.getFile ().getSize (); } + bool isExhausted() { return getPosition () == getTotalLength (); } + int read (void* destBuffer, int maxBytesToRead) + { + size_t actualBytes = 0; + m_file.read (destBuffer, maxBytesToRead, &actualBytes); + return actualBytes; + } + + int64 getPosition() { return m_file.getPosition (); } + bool setPosition (int64 newPosition) { return m_file.setPosition (newPosition); } + void skipNextBytes (int64 numBytesToSkip) { m_file.setPosition (getPosition () + numBytesToSkip); } + +private: + RandomAccessFile& m_file; +}; + +class BEAST_API RandomAccessFileOutputStream : public OutputStream +{ +public: + explicit RandomAccessFileOutputStream (RandomAccessFile& file) : m_file (file) { } + + void flush() { m_file.flush (); } + int64 getPosition() { return m_file.getPosition (); } + bool setPosition (int64 newPosition) { return m_file.setPosition (newPosition); } + bool write (const void* dataToWrite, size_t numberOfBytes) { return m_file.write (dataToWrite, numberOfBytes); } + +private: + RandomAccessFile& m_file; +}; + +#endif + diff --git a/Subtrees/beast/modules/beast_core/memory/beast_Uncopyable.h b/Subtrees/beast/modules/beast_core/memory/beast_Uncopyable.h index e1f1a614b1..349dde0a10 100644 --- a/Subtrees/beast/modules/beast_core/memory/beast_Uncopyable.h +++ b/Subtrees/beast/modules/beast_core/memory/beast_Uncopyable.h @@ -45,13 +45,16 @@ @code - class MyClass : Uncopyable + class MyClass : public Uncopyable { public: //... }; @endcode + + @note The derivation should be public or else child classes which + also derive from Uncopyable may not compile. */ class Uncopyable { diff --git a/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h b/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h index 936be35f40..7de9d0ebbd 100644 --- a/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h +++ b/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h @@ -504,6 +504,176 @@ Result FileOutputStream::truncate() return getResultForReturnValue (ftruncate (getFD (fileHandle), (off_t) currentPosition)); } +//============================================================================== +RandomAccessFile::RandomAccessFile (int bufferSizeToUse) noexcept + : fileHandle (nullptr) + , currentPosition (0) + , writeBuffer (bufferSizeToUse) +{ +} + +RandomAccessFile::~RandomAccessFile () +{ + close (); +} + +Result RandomAccessFile::open (File const& path, Mode mode) +{ + close (); + + Result result (Result::ok ()); + + if (path.exists()) + { + int oflag; + switch (mode) + { + case readOnly: + oflag = O_RDONLY; + break; + + default: + case readWRite: + oflag = O_RDWR; + break; + }; + + const int f = ::open (path.getFullPathName().toUTF8(), oflag, 00644); + + if (f != -1) + { + currentPosition = lseek (f, 0, SEEK_SET); + + if (currentPosition >= 0) + { + file = path; + fileHandle = fdToVoidPointer (f); + } + else + { + result = getResultForErrno(); + ::close (f); + } + } + else + { + result = getResultForErrno(); + } + } + else if (mode == readWrite) + { + const int f = open (file.getFullPathName().toUTF8(), O_RDWR + O_CREAT, 00644); + + if (f != -1) + { + file = path; + fileHandle = fdToVoidPointer (f); + } + else + { + result = getResultForErrno(); + } + } + else + { + // file doesn't exist and we're opening read-only + Result::fail (String (strerror (ENOENT))); + } + + return result; +} + +void RandomAccessFile::close () +{ + if (fileHandle != nullptr) + { + file = File::nonexistent (); + ::close (getFD (fileHandle)); + fileHandle = nullptr; + } +} + +Result RandomAccessFile::setPosition (Offset newPosition) +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + off_t const actual = lseek (getFD (fileHandle), newPosition, SEEK_SET); + + if (actual != newPosition) + result = getResultForErrno(); + + return result; +} + +Result RandomAccessFile::read (void* buffer, ByteCount numBytes, ByteCount* pActualAmount ) +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + ssize_t amount = ::read (getFD (fileHandle), buffer, numBytes); + + if (amount < 0) + { + result = getResultForErrno(); + amount = 0; + } + + if (pActualAmount != nullptr) + *pActualAmount = amount; + + return (size_t) result; +} + +Result RandomAccessFile::write (void const* data, ByteCount numBytes, size_t* pActualAmount) +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + ssize_t const actual = ::write (getFD (fileHandle), data, numBytes); + + if (actual == -1) + { + status = getResultForErrno(); + actual = 0; + } + + if (pActualAmount != nullptr) + *pActualAmount = actual; + + return result; +} + +Result RandomAccessFile::truncate () +{ + flush(); + + return getResultForReturnValue (ftruncate (getFD (fileHandle), (off_t) currentPosition)); +} + +void RandomAccessFile::flush () +{ + bassert (isOpen ()); + + if (fileHandle != nullptr) + { + if (fsync (getFD (fileHandle)) == -1) + status = getResultForErrno(); + + #if BEAST_ANDROID + // This stuff tells the OS to asynchronously update the metadata + // that the OS has cached aboud the file - this metadata is used + // when the device is acting as a USB drive, and unless it's explicitly + // refreshed, it'll get out of step with the real file. + const LocalRef t (javaString (file.getFullPathName())); + android.activity.callVoidMethod (BeastAppActivity.scanFile, t.get()); + #endif + } +} + //============================================================================== String SystemStats::getEnvironmentVariable (const String& name, const String& defaultValue) { diff --git a/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp b/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp index 444bc51c3e..cb5933a69b 100644 --- a/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp +++ b/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp @@ -307,6 +307,161 @@ Result FileOutputStream::truncate() : WindowsFileHelpers::getResultForLastError(); } +//============================================================================== + +Result RandomAccessFile::nativeOpen (File const& path, Mode mode) +{ + bassert (! isOpen ()); + + Result result (Result::ok ()); + + DWORD dwDesiredAccess; + switch (mode) + { + case readOnly: + dwDesiredAccess = GENERIC_READ; + break; + + default: + case readWrite: + dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; + break; + }; + + DWORD dwCreationDisposition; + switch (mode) + { + case readOnly: + dwCreationDisposition = OPEN_EXISTING; + break; + + default: + case readWrite: + dwCreationDisposition = OPEN_ALWAYS; + break; + }; + + HANDLE h = CreateFile (path.getFullPathName().toWideCharPointer(), + dwDesiredAccess, + FILE_SHARE_READ, + 0, + dwCreationDisposition, + FILE_ATTRIBUTE_NORMAL, + 0); + + if (h != INVALID_HANDLE_VALUE) + { + file = path; + fileHandle = h; + + result = setPosition (0); + + if (result.failed ()) + nativeClose (); + } + else + { + result = WindowsFileHelpers::getResultForLastError(); + } + + return result; +} + +void RandomAccessFile::nativeClose () +{ + bassert (isOpen ()); + + CloseHandle ((HANDLE) fileHandle); + + file = File::nonexistent (); + fileHandle = nullptr; + currentPosition = 0; +} + +Result RandomAccessFile::nativeSetPosition (FileOffset newPosition) +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + LARGE_INTEGER li; + li.QuadPart = newPosition; + li.LowPart = SetFilePointer ((HANDLE) fileHandle, + (LONG) li.LowPart, + &li.HighPart, + FILE_BEGIN); + + if (li.LowPart != INVALID_SET_FILE_POINTER) + { + currentPosition = li.QuadPart; + } + else + { + result = WindowsFileHelpers::getResultForLastError(); + } + + return result; +} + +Result RandomAccessFile::nativeRead (void* buffer, ByteCount numBytes, ByteCount* pActualAmount ) +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + DWORD actualNum = 0; + + if (! ReadFile ((HANDLE) fileHandle, buffer, (DWORD) numBytes, &actualNum, 0)) + result = WindowsFileHelpers::getResultForLastError(); + + if (pActualAmount != nullptr) + *pActualAmount = actualNum; + + return result; +} + +Result RandomAccessFile::nativeWrite (void const* data, ByteCount numBytes, size_t* pActualAmount) +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + DWORD actualNum = 0; + + if (! WriteFile ((HANDLE) fileHandle, data, (DWORD) numBytes, &actualNum, 0)) + result = WindowsFileHelpers::getResultForLastError(); + + if (pActualAmount != nullptr) + *pActualAmount = actualNum; + + return result; +} + +Result RandomAccessFile::nativeTruncate () +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + if (! SetEndOfFile ((HANDLE) fileHandle)) + result = WindowsFileHelpers::getResultForLastError(); + + return result; +} + +Result RandomAccessFile::nativeFlush () +{ + bassert (isOpen ()); + + Result result (Result::ok ()); + + if (! FlushFileBuffers ((HANDLE) fileHandle)) + result = WindowsFileHelpers::getResultForLastError(); + + return result; +} + + //============================================================================== void MemoryMappedFile::openInternal (const File& file, AccessMode mode) { diff --git a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h index c93f1ba0f2..f2e8f7ad8c 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h +++ b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h @@ -38,7 +38,6 @@ class BEAST_API MemoryOutputStream : public OutputStream , LeakChecked - , Uncopyable { public: //============================================================================== diff --git a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h index 33b744a7d6..0528f0fcac 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h +++ b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h @@ -40,7 +40,7 @@ class File; @see InputStream, MemoryOutputStream, FileOutputStream */ -class BEAST_API OutputStream +class BEAST_API OutputStream : public Uncopyable { protected: //============================================================================== From b81fc4e0c8dbf62b33c7307d380b3ea74edbb00e Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 07:02:58 -0700 Subject: [PATCH 07/50] Use RandomAccessFile in KeyvaDB --- modules/ripple_app/node/ripple_KeyvaDB.cpp | 263 +++++++++++++-------- 1 file changed, 166 insertions(+), 97 deletions(-) diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 276bb93b8c..9b8672b147 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -31,12 +31,15 @@ private: // Accessed by multiple threads struct State { - ScopedPointer keyIn; - ScopedPointer keyOut; - KeyIndex newKeyIndex; + State () + : keyFile (16384) // buffer size + , valFile (16384) // buffer size + { + } - ScopedPointer valIn; - ScopedPointer valOut; + RandomAccessFile keyFile; + RandomAccessFile valFile; + KeyIndex newKeyIndex; FileOffset valFileSize; bool hasKeys () const noexcept @@ -83,26 +86,24 @@ public: { SharedState::WriteAccess state (m_state); - // Output must be opened first, in case it has - // to created, or else opening for input will fail. - state->keyOut = openForWrite (keyPath); - state->keyIn = openForRead (keyPath); + openFile (&state->keyFile, keyPath); - int64 const fileSize = state->keyIn->getFile ().getSize (); + int64 const fileSize = state->keyFile.getFile ().getSize (); if (fileSize == 0) { // initialize the key file - state->keyOut->setPosition (keyFileHeaderBytes - 1); - state->keyOut->writeByte (0); - state->keyOut->flush (); + RandomAccessFileOutputStream stream (state->keyFile); + stream.setPosition (keyFileHeaderBytes - 1); + stream.writeByte (0); + stream.flush (); } - state->newKeyIndex = 1 + (state->keyIn->getFile ().getSize () - keyFileHeaderBytes) / m_keyRecordBytes; + state->newKeyIndex = 1 + (state->keyFile.getFile ().getSize () - keyFileHeaderBytes) / m_keyRecordBytes; - state->valOut = openForWrite (valPath); - state->valIn = openForRead (valPath); - state->valFileSize = state->valIn->getFile ().getSize (); + openFile (&state->valFile, valPath); + + state->valFileSize = state->valFile.getFile ().getSize (); } ~KeyvaDBImp () @@ -111,22 +112,19 @@ public: flushInternal (state); - state->keyOut = nullptr; - state->valOut = nullptr; - // Delete the database files if requested. // if (m_filesAreTemporary) { { - File const path = state->keyIn->getFile (); - state->keyIn = nullptr; + File const path = state->keyFile.getFile (); + state->keyFile.close (); path.deleteFile (); } { - File const path = state->valIn->getFile (); - state->valIn = nullptr; + File const path = state->valFile.getFile (); + state->valFile.close (); path.deleteFile (); } } @@ -161,29 +159,33 @@ public: } // Read a key record into memory. + // VFALCO TODO Return a Result and do validity checking on all inputs + // void readKeyRecord (KeyRecord* const keyRecord, KeyIndex const keyIndex, SharedState::WriteAccess& state) { FileOffset const byteOffset = calcKeyRecordOffset (keyIndex); - bool const success = state->keyIn->setPosition (byteOffset); + RandomAccessFileInputStream stream (state->keyFile); + + bool const success = stream.setPosition (byteOffset); if (success) { // This defines the file format! - keyRecord->valFileOffset = state->keyIn->readInt64BigEndian (); - keyRecord->valSize = state->keyIn->readIntBigEndian (); - keyRecord->leftIndex = state->keyIn->readIntBigEndian (); - keyRecord->rightIndex = state->keyIn->readIntBigEndian (); + keyRecord->valFileOffset = stream.readInt64BigEndian (); + keyRecord->valSize = stream.readIntBigEndian (); + keyRecord->leftIndex = stream.readIntBigEndian (); + keyRecord->rightIndex = stream.readIntBigEndian (); // Grab the key - state->keyIn->read (keyRecord->key, m_keyBytes); + stream.read (keyRecord->key, m_keyBytes); } else { String s; - s << "KeyvaDB: Seek failed in " << state->valOut->getFile ().getFileName (); + s << "KeyvaDB: Seek failed in " << state->keyFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } @@ -196,65 +198,70 @@ public: { FileOffset const byteOffset = calcKeyRecordOffset (keyIndex); - bool const success = state->keyOut->setPosition (byteOffset); + RandomAccessFileOutputStream stream (state->keyFile); + + bool const success = stream.setPosition (byteOffset); if (success) { // This defines the file format! // VFALCO TODO Make OutputStream return the bool errors here // - state->keyOut->writeInt64BigEndian (keyRecord.valFileOffset); - state->keyOut->writeIntBigEndian (keyRecord.valSize); - state->keyOut->writeIntBigEndian (keyRecord.leftIndex); - state->keyOut->writeIntBigEndian (keyRecord.rightIndex); + stream.writeInt64BigEndian (keyRecord.valFileOffset); + stream.writeIntBigEndian (keyRecord.valSize); + stream.writeIntBigEndian (keyRecord.leftIndex); + stream.writeIntBigEndian (keyRecord.rightIndex); // Write the key if (includingKey) { - bool const success = state->keyOut->write (keyRecord.key, m_keyBytes); + bool const success = stream.write (keyRecord.key, m_keyBytes); if (! success) { String s; - s << "KeyvaDB: Write failed in " << state->valOut->getFile ().getFileName (); + s << "KeyvaDB: Write failed in " << state->keyFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } - state->keyOut->flush (); + //stream.flush (); } else { String s; - s << "KeyvaDB: Seek failed in " << state->valOut->getFile ().getFileName (); + s << "KeyvaDB: Seek failed in " << state->keyFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } // Append a value to the value file. + // VFALCO TODO return a Result void writeValue (void const* const value, ByteSize valueBytes, SharedState::WriteAccess& state) { - bool const success = state->valOut->setPosition (state->valFileSize); + RandomAccessFileOutputStream stream (state->valFile); + + bool const success = stream.setPosition (state->valFileSize); if (success) { - bool const success = state->valOut->write (value, static_cast (valueBytes)); + bool const success = stream.write (value, static_cast (valueBytes)); if (! success) { String s; - s << "KeyvaDB: Write failed in " << state->valOut->getFile ().getFileName (); + s << "KeyvaDB: Write failed in " << state->valFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } state->valFileSize += valueBytes; - state->valOut->flush (); + //stream.flush (); } else { String s; - s << "KeyvaDB: Seek failed in " << state->valOut->getFile ().getFileName (); + s << "KeyvaDB: Seek failed in " << state->valFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } @@ -329,9 +336,9 @@ public: bool get (void const* key, GetCallback* callback) { - FindResult findResult (m_keyStorage.getData ()); - + // VFALCO TODD Swap these two lines SharedState::WriteAccess state (m_state); + FindResult findResult (m_keyStorage.getData ()); bool found = false; @@ -343,21 +350,23 @@ public: { void* const destStorage = callback->createStorageForValue (findResult.keyRecord.valSize); - bool const success = state->valIn->setPosition (findResult.keyRecord.valFileOffset); + RandomAccessFileInputStream stream (state->valFile); + + bool const success = stream.setPosition (findResult.keyRecord.valFileOffset); if (! success) { String s; - s << "KeyvaDB: Seek failed in " << state->valOut->getFile ().getFileName (); + s << "KeyvaDB: Seek failed in " << state->valFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } - int const bytesRead = state->valIn->read (destStorage, findResult.keyRecord.valSize); + int const bytesRead = stream.read (destStorage, findResult.keyRecord.valSize); if (bytesRead != findResult.keyRecord.valSize) { String s; - s << "KeyvaDB: Couldn't read a value from " << state->valIn->getFile ().getFileName (); + s << "KeyvaDB: Couldn't read a value from " << state->valFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } @@ -421,9 +430,12 @@ public: } else { + // Do nothing + /* String s; s << "KeyvaDB: Attempt to write a duplicate key!"; Throw (std::runtime_error (s.toStdString ())); + */ } } else @@ -467,41 +479,25 @@ public: void flushInternal (SharedState::WriteAccess& state) { - state->keyOut->flush (); - state->valOut->flush (); + state->keyFile.flush (); + state->valFile.flush (); } //-------------------------------------------------------------------------- private: - // Open a file for reading. - static FileInputStream* openForRead (File path) + // Open a file for reading and writing. + // Creates the file if it doesn't exist. + static void openFile (RandomAccessFile* file, File path) { - FileInputStream* stream = path.createInputStream (); + Result const result = file->open (path, RandomAccessFile::readWrite); - if (stream == nullptr) - { - String s; - s << "KeyvaDB: Couldn't open " << path.getFileName () << " for reading."; - Throw (std::runtime_error (s.toStdString ())); - } - - return stream; - } - - // Open a file for writing. - static FileOutputStream* openForWrite (File path) - { - FileOutputStream* stream = path.createOutputStream (); - - if (stream == nullptr) + if (! result) { String s; s << "KeyvaDB: Couldn't open " << path.getFileName () << " for writing."; Throw (std::runtime_error (s.toStdString ())); } - - return stream; } private: @@ -521,11 +517,70 @@ KeyvaDB* KeyvaDB::New (int keyBytes, File keyPath, File valPath, bool filesAreTe class KeyvaDBTests : public UnitTest { +public: + enum + { + maxPayloadBytes = 8 * 1024 + }; + + // The payload is used as the value to store + struct Payload + { + Payload (int maxBytes) + : bufferSize (maxBytes) + , data (maxBytes) + { + } + + // Create a pseudo-random payload + void generate (int64 seedValue) noexcept + { + Random r (seedValue); + + bytes = 1 + r.nextInt (bufferSize); + + bassert (bytes >= 1 && bytes <= bufferSize); + + for (int i = 0; i < bytes; ++i) + data [i] = static_cast (r.nextInt ()); + } + + bool operator== (Payload const& other) const noexcept + { + if (bytes == other.bytes) + { + return memcmp (data.getData (), other.data.getData (), bytes) == 0; + } + else + { + return false; + } + } + + int const bufferSize; + int bytes; + HeapBlock data; + }; + public: KeyvaDBTests () : UnitTest ("KevyaDB") { } + // Make sure the Payload object works first! + void testPayload () + { + beginTest ("Payload"); + Payload p1 (maxPayloadBytes); + Payload p2 (maxPayloadBytes); + for (int i = 0; i < 256; ++i) + { + p1.generate (i); + p2.generate (i); + expect (p1 == p2, "Should be equal"); + } + } + template void repeatableShuffle (int const numberOfItems, HeapBlock & items) { @@ -539,11 +594,30 @@ public: } } + // Retrieval callback stores the value in a Payload object for comparison + struct PayloadGetCallback : KeyvaDB::GetCallback + { + Payload payload; + + PayloadGetCallback () : payload (maxPayloadBytes) + { + } + + void* createStorageForValue (int valueBytes) + { + bassert (valueBytes <= maxPayloadBytes); + + payload.bytes = valueBytes; + + return payload.data.getData (); + } + }; + template void testSize (unsigned int const maxItems) { typedef UnsignedInteger KeyType; - + String s; s << "keyBytes=" << String (KeyBytes); beginTest (s); @@ -553,6 +627,8 @@ public: File const valPath = File::createTempFile ("").withFileExtension (".val"); ScopedPointer db (KeyvaDB::New (KeyBytes, keyPath, valPath, true)); + Payload payload (maxPayloadBytes); + { // Create an array of ascending integers. HeapBlock items (maxItems); @@ -565,51 +641,44 @@ public: // Write all the keys of integers. for (unsigned int i = 0; i < maxItems; ++i) { - unsigned int const num = items [i]; - KeyType const v = KeyType::createFromInteger (num); + unsigned int num = items [i]; + + KeyType const key = KeyType::createFromInteger (num); - // The value is the same as the key, for ease of comparison. - db->put (v.cbegin (), v.cbegin (), KeyBytes); + payload.generate (num); + + db->put (key.cbegin (), payload.data.getData (), payload.bytes); } } { - // This callback creates storage for the value. - struct MyGetCallback : KeyvaDB::GetCallback - { - KeyType v; - - void* createStorageForValue (int valueBytes) - { - bassert (valueBytes == KeyBytes); - - return v.begin (); - } - }; - // Go through all of our keys and try to retrieve them. // since this is done in ascending order, we should get // random seeks at this point. // + PayloadGetCallback cb; for (unsigned int i = 0; i < maxItems; ++i) { KeyType const v = KeyType::createFromInteger (i); - MyGetCallback cb; - bool const found = db->get (v.cbegin (), &cb); expect (found, "Should be found"); - expect (v == cb.v, "Should be equal"); + payload.generate (i); + + expect (payload == cb.payload, "Should be equal"); } } } void runTest () { - testSize <4> (512); - testSize <32> (4096); + testPayload (); + + //testSize <4> (512); + //testSize <32> (4096); + testSize <4> (4); } }; From 159aabd8d95248bba9e508cbcdb847a3e16cec8d Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 09:43:48 -0700 Subject: [PATCH 08/50] Run a named unit test --- .../beast_core/diagnostic/beast_UnitTest.cpp | 27 +++++++++++++++++++ .../beast_core/diagnostic/beast_UnitTest.h | 11 +++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp index 41903dc311..6cc4ebfae5 100644 --- a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp +++ b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp @@ -105,6 +105,33 @@ void UnitTests::resultsUpdated() { } +void UnitTests::runTest (String const& name) +{ + results.clear(); + resultsUpdated(); + + Array& tests = UnitTest::getAllTests (); + + for (int i = 0; i < tests.size(); ++i) + { + UnitTest& test = *tests [i]; + + if (test.getName () == name) + { + try + { + test.performTest (this); + } + catch (...) + { + addFail ("An unhandled exception was thrown!"); + } + + break; + } + } +} + void UnitTests::runTests (const Array& tests) { results.clear(); diff --git a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.h b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.h index 237f1ba89e..6945bf7660 100644 --- a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.h +++ b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.h @@ -56,9 +56,10 @@ class UnitTests; } }; - // Explicit template instantiation is required to make the unit - // test get automatically added to the set of unit tests. - template class UnitTestType ; + // This makes the unit test available in the global list + // It doesn't have to be static. + // + static MyTest myTest; @endcode @@ -187,6 +188,10 @@ public: /** Destructor. */ virtual ~UnitTests(); + /** Run a particular test. + */ + void runTest (String const& name); + /** Runs a set of tests. The tests are performed in order, and the results are logged. To run all the From a4c63f4c5d1ac7ec6cd3c7b1a5f6cc3835265ed2 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 10:28:28 -0700 Subject: [PATCH 09/50] Run an individual unit test --- BeastConfig.h | 2 -- .../Builds/VisualStudio2012/BeastConfig.h | 2 -- .../containers/beast_AbstractFifo.cpp | 2 -- .../beast_core/diagnostic/beast_UnitTest.cpp | 8 ++++---- .../modules/beast_core/files/beast_File.cpp | 2 -- .../files/beast_RandomAccessFile.cpp | 6 ++++-- .../modules/beast_core/json/beast_JSON.cpp | 2 -- .../modules/beast_core/maths/beast_Random.cpp | 2 -- .../streams/beast_MemoryInputStream.cpp | 2 -- .../modules/beast_core/text/beast_String.cpp | 2 -- .../modules/beast_core/text/beast_TextDiff.cpp | 2 -- .../beast_core/threads/beast_ChildProcess.cpp | 2 -- .../beast_core/threads/beast_Thread.cpp | 2 -- .../zip/beast_GZIPCompressorOutputStream.cpp | 2 -- .../math/beast_UnsignedInteger.cpp | 4 +--- modules/ripple_app/node/ripple_KeyvaDB.cpp | 2 +- src/cpp/ripple/ripple_Main.cpp | 18 ++++++++++++++---- 17 files changed, 24 insertions(+), 38 deletions(-) diff --git a/BeastConfig.h b/BeastConfig.h index 47adbde46b..4c35072c81 100644 --- a/BeastConfig.h +++ b/BeastConfig.h @@ -120,6 +120,4 @@ //#define BEAST_BIND_USES_TR1 1 //#define BEAST_BIND_USES_BOOST 1 -//#define BEAST_UNIT_TESTS 1 - #endif diff --git a/Subtrees/beast/Builds/VisualStudio2012/BeastConfig.h b/Subtrees/beast/Builds/VisualStudio2012/BeastConfig.h index ebf0d9a7bd..0a19d0402d 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/BeastConfig.h +++ b/Subtrees/beast/Builds/VisualStudio2012/BeastConfig.h @@ -120,6 +120,4 @@ //#define BEAST_BIND_USES_TR1 1 //#define BEAST_BIND_USES_BOOST 1 -#define BEAST_UNIT_TESTS 1 - #endif diff --git a/Subtrees/beast/modules/beast_core/containers/beast_AbstractFifo.cpp b/Subtrees/beast/modules/beast_core/containers/beast_AbstractFifo.cpp index 0e3dd1236d..004a8b94e2 100644 --- a/Subtrees/beast/modules/beast_core/containers/beast_AbstractFifo.cpp +++ b/Subtrees/beast/modules/beast_core/containers/beast_AbstractFifo.cpp @@ -224,6 +224,4 @@ public: } }; -#if BEAST_UNIT_TESTS static AbstractFifoTests abstractFifoTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp index 6cc4ebfae5..056d5fb248 100644 --- a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp +++ b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp @@ -182,9 +182,9 @@ void UnitTests::beginNewTest (UnitTest* const test, const String& subCategory) r->passes = 0; r->failures = 0; - logMessage ("Test: " + r->unitTestName + "/" + subCategory + "..."); + logMessage ("Test '" + r->unitTestName + "': " + subCategory); - resultsUpdated(); + resultsUpdated (); } void UnitTests::endTest() @@ -241,8 +241,8 @@ void UnitTests::addFail (const String& failureMessage) r->failures++; - String message ("!!! Test "); - message << (r->failures + r->passes) << " failed"; + String message ("Failure, #"); + message << (r->failures + r->passes); if (failureMessage.isNotEmpty()) message << ": " << failureMessage; diff --git a/Subtrees/beast/modules/beast_core/files/beast_File.cpp b/Subtrees/beast/modules/beast_core/files/beast_File.cpp index 7ef709e324..f3a4f8713a 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_File.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_File.cpp @@ -1106,7 +1106,5 @@ public: } }; -#if BEAST_UNIT_TESTS static FileTests fileTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp index e9588a7382..ee6df2b5a1 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp @@ -199,16 +199,18 @@ public: HeapBlock data; }; - void runTest () { + RandomAccessFile file; + + beginTest ("open"); + Result result = file.open (File::createTempFile ("tests"), RandomAccessFile::readWrite); expect (result.wasOk (), "Should be ok"); } private: - RandomAccessFile file; }; static RandomAccessFileTests randomAccessFileTests; diff --git a/Subtrees/beast/modules/beast_core/json/beast_JSON.cpp b/Subtrees/beast/modules/beast_core/json/beast_JSON.cpp index d712896a1f..518c5d45a8 100644 --- a/Subtrees/beast/modules/beast_core/json/beast_JSON.cpp +++ b/Subtrees/beast/modules/beast_core/json/beast_JSON.cpp @@ -639,6 +639,4 @@ public: } }; -#if BEAST_UNIT_TESTS static JSONTests jsonTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp b/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp index ce3199699c..90628f605e 100644 --- a/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp +++ b/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp @@ -165,6 +165,4 @@ public: } }; -#if BEAST_UNIT_TESTS static RandomTests randomTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/streams/beast_MemoryInputStream.cpp b/Subtrees/beast/modules/beast_core/streams/beast_MemoryInputStream.cpp index 59c6078562..eef9e80fd0 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_MemoryInputStream.cpp +++ b/Subtrees/beast/modules/beast_core/streams/beast_MemoryInputStream.cpp @@ -148,6 +148,4 @@ public: } }; -#if BEAST_UNIT_TESTS static MemoryStreamTests memoryStreamTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/text/beast_String.cpp b/Subtrees/beast/modules/beast_core/text/beast_String.cpp index d8771bb31f..dff203b2eb 100644 --- a/Subtrees/beast/modules/beast_core/text/beast_String.cpp +++ b/Subtrees/beast/modules/beast_core/text/beast_String.cpp @@ -2402,6 +2402,4 @@ public: } }; -#if BEAST_UNIT_TESTS static StringTests stringTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/text/beast_TextDiff.cpp b/Subtrees/beast/modules/beast_core/text/beast_TextDiff.cpp index 7a1f2a894c..8589683327 100644 --- a/Subtrees/beast/modules/beast_core/text/beast_TextDiff.cpp +++ b/Subtrees/beast/modules/beast_core/text/beast_TextDiff.cpp @@ -229,6 +229,4 @@ public: } }; -#if BEAST_UNIT_TESTS static DiffTests diffTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/threads/beast_ChildProcess.cpp b/Subtrees/beast/modules/beast_core/threads/beast_ChildProcess.cpp index 4e809a7197..da7a9accd9 100644 --- a/Subtrees/beast/modules/beast_core/threads/beast_ChildProcess.cpp +++ b/Subtrees/beast/modules/beast_core/threads/beast_ChildProcess.cpp @@ -82,6 +82,4 @@ public: } }; -#if BEAST_UNIT_TESTS static ChildProcessTests childProcessTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/threads/beast_Thread.cpp b/Subtrees/beast/modules/beast_core/threads/beast_Thread.cpp index 999f35d444..1ffb1b9c6e 100644 --- a/Subtrees/beast/modules/beast_core/threads/beast_Thread.cpp +++ b/Subtrees/beast/modules/beast_core/threads/beast_Thread.cpp @@ -350,6 +350,4 @@ public: }; }; -#if BEAST_UNIT_TESTS static AtomicTests atomicTests; -#endif diff --git a/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.cpp b/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.cpp index d8febfa5ad..da68fe9941 100644 --- a/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.cpp +++ b/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.cpp @@ -205,6 +205,4 @@ public: } }; -#if BEAST_UNIT_TESTS static GZIPTests gzipTests; -#endif diff --git a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.cpp b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.cpp index 916e6bad0d..a0532d38fa 100644 --- a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.cpp +++ b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.cpp @@ -29,7 +29,7 @@ public: { String s; - s << "UnsignedInteger <" << String(Bytes) << ">"; + s << "Bytes=" << String(Bytes); beginTest (s); @@ -82,6 +82,4 @@ public: private: }; -#if BEAST_UNIT_TESTS static UnsignedIntegerTests unsignedIntegerTests; -#endif diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 9b8672b147..c4cefb89bb 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -619,7 +619,7 @@ public: typedef UnsignedInteger KeyType; String s; - s << "keyBytes=" << String (KeyBytes); + s << "keyBytes=" << String (KeyBytes) << ", maxItems=" << String (maxItems); beginTest (s); // Set up the key and value files and open the db. diff --git a/src/cpp/ripple/ripple_Main.cpp b/src/cpp/ripple/ripple_Main.cpp index 2384ca4126..658d2147bc 100644 --- a/src/cpp/ripple/ripple_Main.cpp +++ b/src/cpp/ripple/ripple_Main.cpp @@ -141,14 +141,21 @@ public: /** Run the Beast unit tests. */ -static void runBeastUnitTests () +static void runBeastUnitTests (std::string const& individualTest = "") { RippleUnitTests tr; tr.setAssertOnFailure (false); tr.setPassesAreLogged (false); - tr.runAllTests (); + if (individualTest.empty ()) + { + tr.runAllTests (); + } + else + { + tr.runTest (individualTest.c_str ()); + } // Report for (int i = 0; i < tr.getNumResults (); ++i) @@ -232,7 +239,7 @@ int rippleMain (int argc, char** argv) ("standalone,a", "Run with no peers.") ("testnet,t", "Run in test net mode.") ("unittest,u", "Perform unit tests.") - ("unittest2", "Perform new unit tests.") + ("unittest2", po::value ()->implicit_value (""), "Perform new unit tests.") ("parameters", po::value< vector > (), "Specify comma separated parameters.") ("quiet,q", "Reduce diagnotics.") ("verbose,v", "Verbose logging.") @@ -332,7 +339,10 @@ int rippleMain (int argc, char** argv) if (vm.count ("unittest2")) { - runBeastUnitTests (); + std::string const test = vm ["unittest2"].as (); + + runBeastUnitTests (test); + return 0; } From 62f5324cfac1b8ab6d4174a90aa9a718ac6c32be Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 11:56:45 -0700 Subject: [PATCH 10/50] RandomAccessFile unit tests --- .../Builds/VisualStudio2012/beast.vcxproj | 7 + .../VisualStudio2012/beast.vcxproj.filters | 6 + .../beast/modules/beast_core/beast_core.cpp | 1 + .../beast/modules/beast_core/beast_core.h | 1 + .../diagnostic/beast_UnitTestUtilities.cpp | 56 ++++++ .../diagnostic/beast_UnitTestUtilities.h | 100 +++++++++++ .../files/beast_RandomAccessFile.cpp | 166 ++++++++++++++---- 7 files changed, 298 insertions(+), 39 deletions(-) create mode 100644 Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp create mode 100644 Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.h diff --git a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj index 6cc039c10c..18f2243de8 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj +++ b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj @@ -134,6 +134,7 @@ + @@ -408,6 +409,12 @@ true true + + true + true + true + true + true true diff --git a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters index c70a09c151..2e5383b85e 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters +++ b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters @@ -626,6 +626,9 @@ beast_core\files + + beast_core\diagnostic + @@ -973,6 +976,9 @@ beast_core\files + + beast_core\diagnostic + diff --git a/Subtrees/beast/modules/beast_core/beast_core.cpp b/Subtrees/beast/modules/beast_core/beast_core.cpp index cb3662d5d8..f2387873fe 100644 --- a/Subtrees/beast/modules/beast_core/beast_core.cpp +++ b/Subtrees/beast/modules/beast_core/beast_core.cpp @@ -149,6 +149,7 @@ namespace beast #include "diagnostic/beast_FPUFlags.cpp" #include "diagnostic/beast_LeakChecked.cpp" #include "diagnostic/beast_UnitTest.cpp" +#include "diagnostic/beast_UnitTestUtilities.cpp" #include "files/beast_DirectoryIterator.cpp" #include "files/beast_File.cpp" diff --git a/Subtrees/beast/modules/beast_core/beast_core.h b/Subtrees/beast/modules/beast_core/beast_core.h index 8abaf273ea..cbc27191a0 100644 --- a/Subtrees/beast/modules/beast_core/beast_core.h +++ b/Subtrees/beast/modules/beast_core/beast_core.h @@ -226,6 +226,7 @@ namespace beast #include "diagnostic/beast_Error.h" #include "diagnostic/beast_FPUFlags.h" #include "diagnostic/beast_UnitTest.h" +#include "diagnostic/beast_UnitTestUtilities.h" #include "diagnostic/beast_Throw.h" #include "containers/beast_AbstractFifo.h" #include "containers/beast_Array.h" diff --git a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp new file mode 100644 index 0000000000..5d94a0bba0 --- /dev/null +++ b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp @@ -0,0 +1,56 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +class UnitTestUtilitiesTests : public UnitTest +{ +public: + UnitTestUtilitiesTests () : UnitTest ("UnitTestUtilities") + { + } + + void testPayload () + { + using namespace UnitTestUtilities; + + int const maxBufferSize = 4000; + int const minimumBytes = 1; + int const numberOfItems = 100; + int64 const seedValue = 50; + + beginTest ("Payload"); + + Payload p1 (maxBufferSize); + Payload p2 (maxBufferSize); + + for (int i = 0; i < numberOfItems; ++i) + { + p1.repeatableRandomFill (minimumBytes, maxBufferSize, seedValue); + p2.repeatableRandomFill (minimumBytes, maxBufferSize, seedValue); + + expect (p1 == p2, "Should be equal"); + } + } + + void runTest () + { + testPayload (); + } +}; + +static UnitTestUtilitiesTests unitTestUtilitiesTests; diff --git a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.h b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.h new file mode 100644 index 0000000000..b2fa7792c0 --- /dev/null +++ b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.h @@ -0,0 +1,100 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_UNITTESTUTILITIES_H_INCLUDED +#define BEAST_UNITTESTUTILITIES_H_INCLUDED + +#include "../maths/beast_Random.h" + +namespace UnitTestUtilities +{ + +/** Fairly shuffle an array pseudo-randomly. +*/ +template +void repeatableShuffle (int const numberOfItems, T& arrayOfItems, int64 seedValue) +{ + Random r (seedValue); + + for (int i = numberOfItems - 1; i > 0; --i) + { + int const choice = r.nextInt (i + 1); + + std::swap (arrayOfItems [i], arrayOfItems [choice]); + } +} + +/** A block of memory used for test data. +*/ +struct Payload +{ + /** Construct a payload with a buffer of the specified maximum size. + + @param maximumBytes The size of the buffer, in bytes. + */ + explicit Payload (int maxBufferSize) + : bufferSize (maxBufferSize) + , data (maxBufferSize) + { + } + + /** Generate a random block of data within a certain size range. + + @param minimumBytes The smallest number of bytes in the resulting payload. + @param maximumBytes The largest number of bytes in the resulting payload. + @param seedValue The value to seed the random number generator with. + */ + void repeatableRandomFill (int minimumBytes, int maximumBytes, int64 seedValue) noexcept + { + bassert (minimumBytes >=0 && maximumBytes <= bufferSize); + + Random r (seedValue); + + bytes = minimumBytes + r.nextInt (1 + maximumBytes - minimumBytes); + + bassert (bytes >= minimumBytes && bytes <= bufferSize); + + for (int i = 0; i < bytes; ++i) + data [i] = static_cast (r.nextInt ()); + } + + /** Compare two payloads for equality. + */ + bool operator== (Payload const& other) const noexcept + { + if (bytes == other.bytes) + { + return memcmp (data.getData (), other.data.getData (), bytes) == 0; + } + else + { + return false; + } + } + +public: + int const bufferSize; + + int bytes; + HeapBlock data; +}; + +} + +#endif diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp index ee6df2b5a1..200b40a13f 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp @@ -157,60 +157,148 @@ Result RandomAccessFile::flushBuffer () class RandomAccessFileTests : public UnitTest { public: - RandomAccessFileTests () : UnitTest ("RandomAccessFile") + RandomAccessFileTests () + : UnitTest ("RandomAccessFile") + , numRecords (1000) + , seedValue (50) { } - struct Payload + /* For this test we will create a file which consists of a fixed + number of variable length records. Each record is numbered sequentially + start at 1. To calculate the position of each record we first build + a table of size/offset pairs using a pseudorandom number generator. + */ + struct Record { - Payload (int maxBytes) - : bufferSize (maxBytes) - , data (maxBytes) - { - } - - // Create a pseudo-random payload - void generate (int64 seedValue) noexcept - { - Random r (seedValue); - - bytes = 1 + r.nextInt (bufferSize); - - bassert (bytes >= 1 && bytes <= bufferSize); - - for (int i = 0; i < bytes; ++i) - data [i] = static_cast (r.nextInt ()); - } - - bool operator== (Payload const& other) const noexcept - { - if (bytes == other.bytes) - { - return memcmp (data.getData (), other.data.getData (), bytes) == 0; - } - else - { - return false; - } - } - - int const bufferSize; + int index; int bytes; - HeapBlock data; + int offset; }; - void runTest () - { - RandomAccessFile file; + typedef HeapBlock Records; - beginTest ("open"); + // Produce the pseudo-random set of records. + static void createRecords (HeapBlock & records, + int numRecords, + int maxBytes, + int64 seedValue) + { + using namespace UnitTestUtilities; + + Random r (seedValue); + + records.malloc (numRecords); + + int offset = 0; + + for (int i = 0; i < numRecords; ++i) + { + int const bytes = r.nextInt (maxBytes) + 1; + + records [i].index = i; + records [i].bytes = bytes; + records [i].offset = offset; + + offset += bytes; + } + + repeatableShuffle (numRecords, records, seedValue); + } + + void writeRecords (RandomAccessFile& file, + int numRecords, + HeapBlock const& records, + int64 seedValue) + { + using namespace UnitTestUtilities; + + for (int i = 0; i < numRecords; ++i) + { + Payload p (records [i].bytes); + + p.repeatableRandomFill (records [i].bytes, + records [i].bytes, + records [i].index + seedValue); + + file.setPosition (records [i].offset); + + Result result = file.write (p.data.getData (), p.bytes); + + expect (result.wasOk (), "Should be ok"); + } + } + + void readRecords (RandomAccessFile& file, + int numRecords, + HeapBlock const & records, + int64 seedValue) + { + using namespace UnitTestUtilities; + + for (int i = 0; i < numRecords; ++i) + { + int const bytes = records [i].bytes; + + Payload p1 (bytes); + Payload p2 (bytes); + + p1.repeatableRandomFill (bytes, bytes, records [i].index + seedValue); + + file.setPosition (records [i].offset); + + Result result = file.read (p2.data.getData (), bytes); + + expect (result.wasOk (), "Should be ok"); + + if (result.wasOk ()) + { + p2.bytes = bytes; + + expect (p1 == p2, "Should be equal"); + } + } + } + + void testFile (int const bufferSize) + { + using namespace UnitTestUtilities; + + String s; + s << "bufferSize = " << String (bufferSize); + beginTest (s); + + int const maxPayload = bmax (1000, bufferSize * 2); + + RandomAccessFile file (bufferSize); Result result = file.open (File::createTempFile ("tests"), RandomAccessFile::readWrite); expect (result.wasOk (), "Should be ok"); + + HeapBlock records (numRecords); + + createRecords (records, numRecords, maxPayload, seedValue); + + writeRecords (file, numRecords, records, seedValue); + + readRecords (file, numRecords, records, seedValue); + + repeatableShuffle (numRecords, records, seedValue); + + readRecords (file, numRecords, records, seedValue); + } + + void runTest () + { + testFile (0); + testFile (1000); + testFile (10000); } private: + int const numRecords; + int64 const seedValue; }; static RandomAccessFileTests randomAccessFileTests; From d94db9a8938783e75a7939917fd4c27bc24c37b8 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 11:56:53 -0700 Subject: [PATCH 11/50] Use unit test utilities --- TODO.txt | 1 - modules/ripple_app/node/ripple_KeyvaDB.cpp | 97 ++++------------------ 2 files changed, 16 insertions(+), 82 deletions(-) diff --git a/TODO.txt b/TODO.txt index 5524de7a0a..5ea4f1d09f 100644 --- a/TODO.txt +++ b/TODO.txt @@ -5,7 +5,6 @@ RIPPLE TODO Vinnie's Short List (Changes day to day) - Convert some Ripple boost unit tests to Beast. - Eliminate new technical in NodeStore::Backend -- Improve NodeObject to construct with just a size. - Work on KeyvaDB - Finish unit tests and code for Validators diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index c4cefb89bb..814b27506b 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -32,8 +32,8 @@ private: struct State { State () - : keyFile (16384) // buffer size - , valFile (16384) // buffer size + : keyFile (0)//16384) // buffer size + , valFile (0)//16384) // buffer size { } @@ -523,81 +523,14 @@ public: maxPayloadBytes = 8 * 1024 }; - // The payload is used as the value to store - struct Payload - { - Payload (int maxBytes) - : bufferSize (maxBytes) - , data (maxBytes) - { - } - - // Create a pseudo-random payload - void generate (int64 seedValue) noexcept - { - Random r (seedValue); - - bytes = 1 + r.nextInt (bufferSize); - - bassert (bytes >= 1 && bytes <= bufferSize); - - for (int i = 0; i < bytes; ++i) - data [i] = static_cast (r.nextInt ()); - } - - bool operator== (Payload const& other) const noexcept - { - if (bytes == other.bytes) - { - return memcmp (data.getData (), other.data.getData (), bytes) == 0; - } - else - { - return false; - } - } - - int const bufferSize; - int bytes; - HeapBlock data; - }; - -public: KeyvaDBTests () : UnitTest ("KevyaDB") { } - // Make sure the Payload object works first! - void testPayload () - { - beginTest ("Payload"); - Payload p1 (maxPayloadBytes); - Payload p2 (maxPayloadBytes); - for (int i = 0; i < 256; ++i) - { - p1.generate (i); - p2.generate (i); - expect (p1 == p2, "Should be equal"); - } - } - - template - void repeatableShuffle (int const numberOfItems, HeapBlock & items) - { - Random r (69); - - for (int i = numberOfItems - 1; i > 0; --i) - { - int const choice = r.nextInt (i + 1); - - std::swap (items [i], items [choice]); - } - } - // Retrieval callback stores the value in a Payload object for comparison struct PayloadGetCallback : KeyvaDB::GetCallback { - Payload payload; + UnitTestUtilities::Payload payload; PayloadGetCallback () : payload (maxPayloadBytes) { @@ -608,16 +541,20 @@ public: bassert (valueBytes <= maxPayloadBytes); payload.bytes = valueBytes; - + return payload.data.getData (); } }; template - void testSize (unsigned int const maxItems) + void testKeySize (unsigned int const maxItems) { + using namespace UnitTestUtilities; + typedef UnsignedInteger KeyType; - + + int64 const seedValue = 50; + String s; s << "keyBytes=" << String (KeyBytes) << ", maxItems=" << String (maxItems); beginTest (s); @@ -636,7 +573,7 @@ public: items [i] = i; // Now shuffle it deterministically. - repeatableShuffle (maxItems, items); + repeatableShuffle (maxItems, items, seedValue); // Write all the keys of integers. for (unsigned int i = 0; i < maxItems; ++i) @@ -645,7 +582,7 @@ public: KeyType const key = KeyType::createFromInteger (num); - payload.generate (num); + payload.repeatableRandomFill (1, maxPayloadBytes, i + seedValue); db->put (key.cbegin (), payload.data.getData (), payload.bytes); } @@ -665,7 +602,7 @@ public: expect (found, "Should be found"); - payload.generate (i); + payload.repeatableRandomFill (1, maxPayloadBytes, i + seedValue); expect (payload == cb.payload, "Should be equal"); } @@ -674,11 +611,9 @@ public: void runTest () { - testPayload (); - - //testSize <4> (512); - //testSize <32> (4096); - testSize <4> (4); + //testKeySize <4> (512); + //testKeySize <32> (4096); + testKeySize <8> (64); } }; From 5e35fe8db425b8cd4097e17933d503cfb394378b Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 12:17:11 -0700 Subject: [PATCH 12/50] Fix RandomAccessFile read --- Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp b/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp index cb5933a69b..cb0a2cd63b 100644 --- a/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp +++ b/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp @@ -414,6 +414,8 @@ Result RandomAccessFile::nativeRead (void* buffer, ByteCount numBytes, ByteCount if (! ReadFile ((HANDLE) fileHandle, buffer, (DWORD) numBytes, &actualNum, 0)) result = WindowsFileHelpers::getResultForLastError(); + currentPosition += actualNum; + if (pActualAmount != nullptr) *pActualAmount = actualNum; From a84c3debf591aed46e6009a6a3eb6a469453e96d Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 12:17:17 -0700 Subject: [PATCH 13/50] Tidy up KeyvaDB unit test --- modules/ripple_app/node/ripple_KeyvaDB.cpp | 25 +++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 814b27506b..75186920fe 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -523,7 +523,7 @@ public: maxPayloadBytes = 8 * 1024 }; - KeyvaDBTests () : UnitTest ("KevyaDB") + KeyvaDBTests () : UnitTest ("KeyvaDB") { } @@ -565,6 +565,7 @@ public: ScopedPointer db (KeyvaDB::New (KeyBytes, keyPath, valPath, true)); Payload payload (maxPayloadBytes); + Payload check (maxPayloadBytes); { // Create an array of ascending integers. @@ -578,13 +579,18 @@ public: // Write all the keys of integers. for (unsigned int i = 0; i < maxItems; ++i) { - unsigned int num = items [i]; + unsigned int keyIndex = items [i]; - KeyType const key = KeyType::createFromInteger (num); + KeyType const key = KeyType::createFromInteger (keyIndex); - payload.repeatableRandomFill (1, maxPayloadBytes, i + seedValue); + payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); db->put (key.cbegin (), payload.data.getData (), payload.bytes); + + { + // VFALCO TODO Check what we just wrote? + //db->get (key.cbegin (), check.data.getData (), payload.bytes); + } } } @@ -594,15 +600,15 @@ public: // random seeks at this point. // PayloadGetCallback cb; - for (unsigned int i = 0; i < maxItems; ++i) + for (unsigned int keyIndex = 0; keyIndex < maxItems; ++keyIndex) { - KeyType const v = KeyType::createFromInteger (i); + KeyType const v = KeyType::createFromInteger (keyIndex); bool const found = db->get (v.cbegin (), &cb); expect (found, "Should be found"); - payload.repeatableRandomFill (1, maxPayloadBytes, i + seedValue); + payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); expect (payload == cb.payload, "Should be equal"); } @@ -611,9 +617,8 @@ public: void runTest () { - //testKeySize <4> (512); - //testKeySize <32> (4096); - testKeySize <8> (64); + testKeySize <4> (512); + testKeySize <32> (4096); } }; From 3a85ad04d37ee811cf0135a5ffbbc08fb197c10c Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 12:47:40 -0700 Subject: [PATCH 14/50] Fix NodeObject retrieve in KeyvaDB backend --- .../ripple_app/node/ripple_KeyvaDBBackendFactory.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp index 865bafaada..687d06e387 100644 --- a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp @@ -33,19 +33,17 @@ public: return m_path.toStdString (); } - void writeObject (NodeObject const& object) + void writeObject (NodeObject::ref object) { - m_db->put ( - object.getHash ().begin (), - &object.getData () [0], - object.getData ().size ()); + Blob blob (toBlob (object)); + m_db->put (object->getHash ().begin (), &blob [0], blob.size ()); } bool bulkStore (std::vector const& objs) { for (size_t i = 0; i < objs.size (); ++i) { - writeObject (*objs [i]); + writeObject (objs [i]); } return true; From 748de3b2e1d8572a46d6a6049862979f8fe27654 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 12:50:55 -0700 Subject: [PATCH 15/50] Add POSIX RandomAccessFile native routines --- .../native/beast_posix_SharedCode.h | 78 +++++++++---------- .../beast_core/native/beast_win32_Files.cpp | 2 +- .../beast_crypto/math/beast_UnsignedInteger.h | 2 +- 3 files changed, 37 insertions(+), 45 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h b/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h index 7de9d0ebbd..880981f6c8 100644 --- a/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h +++ b/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h @@ -505,21 +505,10 @@ Result FileOutputStream::truncate() } //============================================================================== -RandomAccessFile::RandomAccessFile (int bufferSizeToUse) noexcept - : fileHandle (nullptr) - , currentPosition (0) - , writeBuffer (bufferSizeToUse) -{ -} -RandomAccessFile::~RandomAccessFile () +Result RandomAccessFile::nativeOpen (File const& path, Mode mode) { - close (); -} - -Result RandomAccessFile::open (File const& path, Mode mode) -{ - close (); + bassert (! isOpen ()); Result result (Result::ok ()); @@ -533,7 +522,7 @@ Result RandomAccessFile::open (File const& path, Mode mode) break; default: - case readWRite: + case readWrite: oflag = O_RDWR; break; }; @@ -562,7 +551,7 @@ Result RandomAccessFile::open (File const& path, Mode mode) } else if (mode == readWrite) { - const int f = open (file.getFullPathName().toUTF8(), O_RDWR + O_CREAT, 00644); + const int f = ::open (file.getFullPathName().toUTF8(), O_RDWR + O_CREAT, 00644); if (f != -1) { @@ -583,17 +572,17 @@ Result RandomAccessFile::open (File const& path, Mode mode) return result; } -void RandomAccessFile::close () +void RandomAccessFile::nativeClose () { - if (fileHandle != nullptr) - { - file = File::nonexistent (); - ::close (getFD (fileHandle)); - fileHandle = nullptr; - } + bassert (isOpen ()); + + file = File::nonexistent (); + ::close (getFD (fileHandle)); + fileHandle = nullptr; + currentPosition = 0; } -Result RandomAccessFile::setPosition (Offset newPosition) +Result RandomAccessFile::nativeSetPosition (FileOffset newPosition) { bassert (isOpen ()); @@ -607,7 +596,7 @@ Result RandomAccessFile::setPosition (Offset newPosition) return result; } -Result RandomAccessFile::read (void* buffer, ByteCount numBytes, ByteCount* pActualAmount ) +Result RandomAccessFile::nativeRead (void* buffer, ByteCount numBytes, ByteCount* pActualAmount) { bassert (isOpen ()); @@ -624,20 +613,20 @@ Result RandomAccessFile::read (void* buffer, ByteCount numBytes, ByteCount* pAct if (pActualAmount != nullptr) *pActualAmount = amount; - return (size_t) result; + return result; } -Result RandomAccessFile::write (void const* data, ByteCount numBytes, size_t* pActualAmount) +Result RandomAccessFile::nativeWrite (void const* data, ByteCount numBytes, size_t* pActualAmount) { bassert (isOpen ()); Result result (Result::ok ()); - ssize_t const actual = ::write (getFD (fileHandle), data, numBytes); + ssize_t actual = ::write (getFD (fileHandle), data, numBytes); if (actual == -1) { - status = getResultForErrno(); + result = getResultForErrno(); actual = 0; } @@ -647,31 +636,34 @@ Result RandomAccessFile::write (void const* data, ByteCount numBytes, size_t* pA return result; } -Result RandomAccessFile::truncate () +Result RandomAccessFile::nativeTruncate () { + bassert (isOpen ()); + flush(); return getResultForReturnValue (ftruncate (getFD (fileHandle), (off_t) currentPosition)); } -void RandomAccessFile::flush () +Result RandomAccessFile::nativeFlush () { bassert (isOpen ()); - if (fileHandle != nullptr) - { - if (fsync (getFD (fileHandle)) == -1) - status = getResultForErrno(); + Result result (Result::ok ()); - #if BEAST_ANDROID - // This stuff tells the OS to asynchronously update the metadata - // that the OS has cached aboud the file - this metadata is used - // when the device is acting as a USB drive, and unless it's explicitly - // refreshed, it'll get out of step with the real file. - const LocalRef t (javaString (file.getFullPathName())); - android.activity.callVoidMethod (BeastAppActivity.scanFile, t.get()); - #endif - } + if (fsync (getFD (fileHandle)) == -1) + result = getResultForErrno(); + + #if BEAST_ANDROID + // This stuff tells the OS to asynchronously update the metadata + // that the OS has cached aboud the file - this metadata is used + // when the device is acting as a USB drive, and unless it's explicitly + // refreshed, it'll get out of step with the real file. + const LocalRef t (javaString (file.getFullPathName())); + android.activity.callVoidMethod (BeastAppActivity.scanFile, t.get()); + #endif + + return result; } //============================================================================== diff --git a/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp b/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp index cb0a2cd63b..302ba9a960 100644 --- a/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp +++ b/Subtrees/beast/modules/beast_core/native/beast_win32_Files.cpp @@ -403,7 +403,7 @@ Result RandomAccessFile::nativeSetPosition (FileOffset newPosition) return result; } -Result RandomAccessFile::nativeRead (void* buffer, ByteCount numBytes, ByteCount* pActualAmount ) +Result RandomAccessFile::nativeRead (void* buffer, ByteCount numBytes, ByteCount* pActualAmount) { bassert (isOpen ()); diff --git a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h index a7460ddb0c..b6676ea637 100644 --- a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h +++ b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h @@ -28,7 +28,7 @@ @tparam Bytes The number of bytes of storage. */ -template +template class UnsignedInteger : public SafeBool > { public: From b7dc115813825e87364d6b0fe92193f5a6ee5dde Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 13:31:01 -0700 Subject: [PATCH 16/50] Fix POSIX RandomAccessFile --- .../files/beast_RandomAccessFile.cpp | 21 +++++++++++-------- .../native/beast_posix_SharedCode.h | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp index 200b40a13f..33febec9c4 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp @@ -216,11 +216,11 @@ public: for (int i = 0; i < numRecords; ++i) { Payload p (records [i].bytes); - + p.repeatableRandomFill (records [i].bytes, records [i].bytes, records [i].index + seedValue); - + file.setPosition (records [i].offset); Result result = file.write (p.data.getData (), p.bytes); @@ -228,7 +228,7 @@ public: expect (result.wasOk (), "Should be ok"); } } - + void readRecords (RandomAccessFile& file, int numRecords, HeapBlock const & records, @@ -276,17 +276,20 @@ public: expect (result.wasOk (), "Should be ok"); - HeapBlock records (numRecords); + if (result.wasOk ()) + { + HeapBlock records (numRecords); - createRecords (records, numRecords, maxPayload, seedValue); + createRecords (records, numRecords, maxPayload, seedValue); - writeRecords (file, numRecords, records, seedValue); + writeRecords (file, numRecords, records, seedValue); - readRecords (file, numRecords, records, seedValue); + readRecords (file, numRecords, records, seedValue); - repeatableShuffle (numRecords, records, seedValue); + repeatableShuffle (numRecords, records, seedValue); - readRecords (file, numRecords, records, seedValue); + readRecords (file, numRecords, records, seedValue); + } } void runTest () diff --git a/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h b/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h index 880981f6c8..622d48a141 100644 --- a/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h +++ b/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h @@ -551,7 +551,7 @@ Result RandomAccessFile::nativeOpen (File const& path, Mode mode) } else if (mode == readWrite) { - const int f = ::open (file.getFullPathName().toUTF8(), O_RDWR + O_CREAT, 00644); + const int f = ::open (path.getFullPathName().toUTF8(), O_RDWR + O_CREAT, 00644); if (f != -1) { From 586ac0be6f3a8863d69af944d159c3c43caa87e9 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 13:47:16 -0700 Subject: [PATCH 17/50] Fix compare() --- .../modules/beast_crypto/math/beast_UnsignedInteger.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h index b6676ea637..fc0fa167cb 100644 --- a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h +++ b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.h @@ -234,28 +234,28 @@ public: */ bool operator< (UnsignedInteger const& other) const noexcept { - return compare (other) == -1; + return compare (other) < 0; } /** Ordered comparison. */ bool operator<= (UnsignedInteger const& other) const noexcept { - return compare (other) != 1; + return compare (other) <= 0; } /** Ordered comparison. */ bool operator> (UnsignedInteger const& other) const noexcept { - return compare (other) == 1; + return compare (other) > 0; } /** Ordered comparison. */ bool operator>= (UnsignedInteger const& other) const noexcept { - return compare (other) != -1; + return compare (other) >= 0; } /** Perform bitwise logical-not. From 133508cbaa44408f31f8915fc442b8b5ef577764 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 14:14:53 -0700 Subject: [PATCH 18/50] Fix check against findResult.compare --- modules/ripple_app/node/ripple_KeyvaDB.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 75186920fe..864b649640 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -398,7 +398,7 @@ public: // Binary tree insertion. // Link the last key record to the new key { - if (findResult.compare == -1) + if (findResult.compare < 0) { findResult.keyRecord.leftIndex = state->newKeyIndex; } From 37cc3569454264360028256bfccaa460b3539f51 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 14:21:19 -0700 Subject: [PATCH 19/50] Use write buffer in KeyvaDB --- modules/ripple_app/node/ripple_KeyvaDB.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 864b649640..7f48c77ec5 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -32,8 +32,8 @@ private: struct State { State () - : keyFile (0)//16384) // buffer size - , valFile (0)//16384) // buffer size + : keyFile (16384) // buffer size + , valFile (16384) // buffer size { } From 25f1a729cebaa1df54cb7ba04d0794ca2b6dece5 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 18:23:13 -0700 Subject: [PATCH 20/50] Fix warning --- src/cpp/ripple/ripple_Pathfinder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/ripple/ripple_Pathfinder.cpp b/src/cpp/ripple/ripple_Pathfinder.cpp index a2fa47d717..dc798110ec 100644 --- a/src/cpp/ripple/ripple_Pathfinder.cpp +++ b/src/cpp/ripple/ripple_Pathfinder.cpp @@ -818,7 +818,7 @@ int Pathfinder::getPathsOut (const uint160& currencyID, const uint160& accountID return it->second; int aFlags = mLedger->getSLEi(Ledger::getAccountRootIndex(accountID))->getFieldU32(sfFlags); - bool bAuthRequired = aFlags & lsfRequireAuth; + bool const bAuthRequired = (aFlags & lsfRequireAuth) != 0; int count = 0; AccountItems& rippleLines (mRLCache->getRippleLines (accountID)); From b2e764bf2148815d46a62a0c9d4faf986f5a6540 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 21:11:37 -0700 Subject: [PATCH 21/50] Add Random::nextBlob --- .../modules/beast_core/maths/beast_Random.cpp | 17 +++++++++++++++++ .../modules/beast_core/maths/beast_Random.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp b/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp index 90628f605e..ecc5921f69 100644 --- a/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp +++ b/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp @@ -98,6 +98,23 @@ double Random::nextDouble() noexcept return static_cast (nextInt()) / (double) 0xffffffff; } +void Random::nextBlob (void* buffer, size_t bytes) +{ + int const remainder = bytes % sizeof (int64); + + { + int64* dest = static_cast (buffer); + for (int i = bytes / sizeof (int64); i > 0; --i) + *dest++ = nextInt64 (); + buffer = dest; + } + + { + int64 const val = nextInt64 (); + memcpy (buffer, &val, remainder); + } +} + BigInteger Random::nextLargeNumber (const BigInteger& maximumValue) { BigInteger n; diff --git a/Subtrees/beast/modules/beast_core/maths/beast_Random.h b/Subtrees/beast/modules/beast_core/maths/beast_Random.h index f35c0eed92..1e68b1959c 100644 --- a/Subtrees/beast/modules/beast_core/maths/beast_Random.h +++ b/Subtrees/beast/modules/beast_core/maths/beast_Random.h @@ -89,6 +89,10 @@ public: */ bool nextBool() noexcept; + /** Fills a piece of memory with random data. + */ + void nextBlob (void* buffer, size_t bytes); + /** Returns a BigInteger containing a random number. @returns a random value in the range 0 to (maximumValue - 1). From df5512cf456074957ee44ed6d1194e2234022318 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Fri, 19 Jul 2013 10:35:17 -0700 Subject: [PATCH 22/50] Hack to fix Random --- Subtrees/beast/modules/beast_core/maths/beast_Random.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp b/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp index ecc5921f69..4b8ac22b51 100644 --- a/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp +++ b/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp @@ -24,6 +24,7 @@ Random::Random (const int64 seedValue) noexcept : seed (seedValue) { + nextInt (); // fixes a bug where the first int is always 0 } Random::Random() @@ -39,6 +40,8 @@ Random::~Random() noexcept void Random::setSeed (const int64 newSeed) noexcept { seed = newSeed; + + nextInt (); // fixes a bug where the first int is always 0 } void Random::combineSeed (const int64 seedValue) noexcept @@ -56,6 +59,8 @@ void Random::setSeedRandomly() combineSeed (Time::getHighResolutionTicksPerSecond()); combineSeed (Time::currentTimeMillis()); globalSeed ^= seed; + + nextInt (); // fixes a bug where the first int is always 0 } Random& Random::getSystemRandom() noexcept From 5caaea60c9261545e5e17dbaba0c0c96998f2ce6 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 18 Jul 2013 06:01:10 -0700 Subject: [PATCH 23/50] Add RecycledObjectPool --- .../Builds/VisualStudio2012/beast.vcxproj | 1 + .../VisualStudio2012/beast.vcxproj.filters | 3 + Subtrees/beast/TODO.txt | 2 + .../beast/modules/beast_core/beast_core.h | 1 + .../memory/beast_RecycledObjectPool.h | 126 ++++++++++++++++++ 5 files changed, 133 insertions(+) create mode 100644 Subtrees/beast/modules/beast_core/memory/beast_RecycledObjectPool.h diff --git a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj index 18f2243de8..b53c3918ec 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj +++ b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj @@ -164,6 +164,7 @@ + diff --git a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters index 2e5383b85e..2692e37621 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters +++ b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters @@ -629,6 +629,9 @@ beast_core\diagnostic + + beast_core\memory + diff --git a/Subtrees/beast/TODO.txt b/Subtrees/beast/TODO.txt index 1402c9af32..ae7183418e 100644 --- a/Subtrees/beast/TODO.txt +++ b/Subtrees/beast/TODO.txt @@ -2,6 +2,8 @@ BEAST TODO -------------------------------------------------------------------------------- +- Macro for acquiring a ScopedLock that records file and line. + - Rename HeapBlock routines to not conflict with _CRTDBG_MAP_ALLOC macros - Design a WeakPtr / SharedPtr / SharedObject intrusive system diff --git a/Subtrees/beast/modules/beast_core/beast_core.h b/Subtrees/beast/modules/beast_core/beast_core.h index cbc27191a0..c19c149f27 100644 --- a/Subtrees/beast/modules/beast_core/beast_core.h +++ b/Subtrees/beast/modules/beast_core/beast_core.h @@ -276,6 +276,7 @@ namespace beast #include "memory/beast_WeakReference.h" #include "memory/beast_MemoryAlignment.h" #include "memory/beast_CacheLine.h" +#include "memory/beast_RecycledObjectPool.h" #include "misc/beast_Result.h" #include "misc/beast_Uuid.h" #include "misc/beast_WindowsRegistry.h" diff --git a/Subtrees/beast/modules/beast_core/memory/beast_RecycledObjectPool.h b/Subtrees/beast/modules/beast_core/memory/beast_RecycledObjectPool.h new file mode 100644 index 0000000000..6981427bf5 --- /dev/null +++ b/Subtrees/beast/modules/beast_core/memory/beast_RecycledObjectPool.h @@ -0,0 +1,126 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_RECYCLEDOBJECTPOOL_H_INCLUDED +#define BEAST_RECYCLEDOBJECTPOOL_H_INCLUDED + +/** A pool of objects which may be recycled. + + This is a thread safe pool of objects that get re-used. It is + primarily designed to eliminate the need for many memory allocations + and frees when temporary buffers are needed for operations. + + To use it, first declare a structure containing the information + that you want to recycle. Then when you want to use a recycled object + put a ScopedItem on your stack: + + @code + + struct StdString + { + std::string data; + }; + + RecycledObjectPool pool; + + void foo () + { + RecycledObjectPool ::ScopedItem item; + + item.getObject ().data = "text"; + } + + @endcode +*/ +template +class RecycledObjectPool +{ +public: + struct Item : Object, LockFreeStack ::Node, LeakChecked + { + }; + + class ScopedItem + { + public: + explicit ScopedItem (RecycledObjectPool & pool) + : m_pool (pool) + , m_item (pool.get ()) + { + } + + ~ScopedItem () + { + m_pool.release (m_item); + } + + Object& getObject () noexcept + { + return *m_item; + } + + private: + RecycledObjectPool & m_pool; + Item* const m_item; + }; + +public: + RecycledObjectPool () noexcept + { + } + + ~RecycledObjectPool () + { + for (;;) + { + Item* const item = m_stack.pop_front (); + + if (item != nullptr) + delete item; + else + break; + } + } + +private: + Item* get () + { + Item* item = m_stack.pop_front (); + + if (item == nullptr) + { + item = new Item; + + if (item == nullptr) + Throw (std::bad_alloc ()); + } + + return item; + } + + void release (Item* item) noexcept + { + m_stack.push_front (item); + } + +private: + LockFreeStack m_stack; +}; + +#endif From 8b1592e494e91b272897cb70adb8507888a092ab Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 Jul 2013 19:52:13 -0700 Subject: [PATCH 24/50] Refactor NodeStore --- Builds/VisualStudio2012/RippleD.vcxproj | 7 + .../VisualStudio2012/RippleD.vcxproj.filters | 18 +- TODO.txt | 4 + modules/ripple_app/data/ripple_DBInit.cpp | 30 +- modules/ripple_app/data/ripple_DBInit.h | 8 - .../ripple_HyperLevelDBBackendFactory.cpp | 18 +- .../node/ripple_HyperLevelDBBackendFactory.h | 2 +- modules/ripple_app/node/ripple_KeyvaDB.cpp | 21 +- modules/ripple_app/node/ripple_KeyvaDB.h | 7 +- .../node/ripple_KeyvaDBBackendFactory.cpp | 61 +- .../node/ripple_KeyvaDBBackendFactory.h | 2 +- .../node/ripple_LevelDBBackendFactory.cpp | 108 ++- .../node/ripple_LevelDBBackendFactory.h | 2 +- modules/ripple_app/node/ripple_NodeObject.cpp | 105 +-- modules/ripple_app/node/ripple_NodeObject.h | 38 +- modules/ripple_app/node/ripple_NodeStore.cpp | 647 +++++++++++++----- modules/ripple_app/node/ripple_NodeStore.h | 106 +-- .../node/ripple_NullBackendFactory.cpp | 2 +- .../node/ripple_NullBackendFactory.h | 2 +- .../node/ripple_SqliteBackendFactory.cpp | 72 +- .../node/ripple_SqliteBackendFactory.h | 2 +- modules/ripple_app/ripple_app.cpp | 10 +- .../containers/ripple_TaggedCache.h | 15 + src/cpp/ripple/NetworkOPs.cpp | 3 + src/cpp/ripple/ripple_Application.cpp | 70 +- src/cpp/ripple/ripple_Application.h | 10 +- src/cpp/ripple/ripple_Main.cpp | 7 +- 27 files changed, 869 insertions(+), 508 deletions(-) diff --git a/Builds/VisualStudio2012/RippleD.vcxproj b/Builds/VisualStudio2012/RippleD.vcxproj index 49b7b01210..b741d0fa69 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj +++ b/Builds/VisualStudio2012/RippleD.vcxproj @@ -169,6 +169,12 @@ true true + + true + true + true + true + true true @@ -1416,6 +1422,7 @@ + diff --git a/Builds/VisualStudio2012/RippleD.vcxproj.filters b/Builds/VisualStudio2012/RippleD.vcxproj.filters index 0d116ab957..8754763744 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj.filters +++ b/Builds/VisualStudio2012/RippleD.vcxproj.filters @@ -807,9 +807,6 @@ [1] Ripple\ripple_app\node - - [1] Ripple\ripple_app\node - [1] Ripple\ripple_mdb @@ -903,6 +900,12 @@ [1] Ripple\ripple_app\node + + [1] Ripple\ripple_app\node + + + [1] Ripple\ripple_app\node + @@ -1587,9 +1590,6 @@ [1] Ripple\ripple_app\node - - [1] Ripple\ripple_app\node - [1] Ripple\ripple_mdb @@ -1686,6 +1686,12 @@ [1] Ripple\ripple_app\node + + [1] Ripple\ripple_app\node + + + [1] Ripple\ripple_app\node + diff --git a/TODO.txt b/TODO.txt index 5ea4f1d09f..91f07811af 100644 --- a/TODO.txt +++ b/TODO.txt @@ -10,7 +10,11 @@ Vinnie's Short List (Changes day to day) -------------------------------------------------------------------------------- +- Replace master lock with + - Replace base_uint and uintXXX with UnsignedInteger + * Need to specialize UnsignedInteger to work efficiently with 4 and 8 byte + multiples of the size. - Rewrite boost program_options in Beast diff --git a/modules/ripple_app/data/ripple_DBInit.cpp b/modules/ripple_app/data/ripple_DBInit.cpp index 8639fe035b..a2d6ba1a7d 100644 --- a/modules/ripple_app/data/ripple_DBInit.cpp +++ b/modules/ripple_app/data/ripple_DBInit.cpp @@ -283,32 +283,15 @@ const char* WalletDBInit[] = int WalletDBCount = NUMBER (WalletDBInit); // Hash node database holds nodes indexed by hash -const char* HashNodeDBInit[] = -{ - "PRAGMA synchronous=NORMAL;", - "PRAGMA journal_mode=WAL;", - "PRAGMA journal_size_limit=1582080;", - -#if (ULONG_MAX > UINT_MAX) && !defined (NO_SQLITE_MMAP) - "PRAGMA mmap_size=171798691840;", -#endif - - "BEGIN TRANSACTION;", - - "CREATE TABLE CommittedObjects ( \ - Hash CHARACTER(64) PRIMARY KEY, \ - ObjType CHAR(1) NOT NULL, \ - LedgerIndex BIGINT UNSIGNED, \ - Object BLOB \ - );", - - "END TRANSACTION;" -}; +// VFALCO TODO Remove this since it looks unused +/* int HashNodeDBCount = NUMBER (HashNodeDBInit); +*/ // Net node database holds nodes seen on the network // XXX Not really used needs replacement. +/* const char* NetNodeDBInit[] = { "CREATE TABLE KnownNodes ( \ @@ -320,7 +303,10 @@ const char* NetNodeDBInit[] = }; int NetNodeDBCount = NUMBER (NetNodeDBInit); +*/ +// This appears to be unused +/* const char* PathFindDBInit[] = { "PRAGMA synchronous = OFF; ", @@ -353,5 +339,5 @@ const char* PathFindDBInit[] = }; int PathFindDBCount = NUMBER (PathFindDBInit); +*/ -// vim:ts=4 diff --git a/modules/ripple_app/data/ripple_DBInit.h b/modules/ripple_app/data/ripple_DBInit.h index d6111f9612..489b511588 100644 --- a/modules/ripple_app/data/ripple_DBInit.h +++ b/modules/ripple_app/data/ripple_DBInit.h @@ -12,19 +12,11 @@ extern const char* RpcDBInit[]; extern const char* TxnDBInit[]; extern const char* LedgerDBInit[]; extern const char* WalletDBInit[]; -extern const char* HashNodeDBInit[]; // VFALCO TODO Figure out what these counts are for extern int RpcDBCount; extern int TxnDBCount; extern int LedgerDBCount; extern int WalletDBCount; -extern int HashNodeDBCount; - -// VFALCO TODO Seems these two aren't used so delete EVERYTHING. -extern const char* NetNodeDBInit[]; -extern const char* PathFindDBInit[]; -extern int NetNodeDBCount; -extern int PathFindDBCount; #endif diff --git a/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.cpp b/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.cpp index 12a3892378..6d5ff693f1 100644 --- a/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.cpp @@ -9,8 +9,9 @@ class HyperLevelDBBackendFactory::Backend : public NodeStore::Backend { public: - Backend (StringPairArray const& keyValues) - : mName(keyValues ["path"].toStdString ()) + Backend (size_t keyBytes, StringPairArray const& keyValues) + : m_keyBytes (keyBytes) + , mName(keyValues ["path"].toStdString ()) , mDB(NULL) { if (mName.empty()) @@ -58,7 +59,7 @@ public: { Blob blob (toBlob (obj)); batch.Put ( - hyperleveldb::Slice (reinterpret_cast(obj->getHash ().begin ()), 256 / 8), + hyperleveldb::Slice (reinterpret_cast(obj->getHash ().begin ()), m_keyBytes), hyperleveldb::Slice (reinterpret_cast(&blob.front ()), blob.size ())); } return mDB->Write (hyperleveldb::WriteOptions (), &batch).ok (); @@ -68,7 +69,7 @@ public: { std::string sData; if (!mDB->Get (hyperleveldb::ReadOptions (), - hyperleveldb::Slice (reinterpret_cast(hash.begin ()), 256 / 8), &sData).ok ()) + hyperleveldb::Slice (reinterpret_cast(hash.begin ()), m_keyBytes), &sData).ok ()) { return NodeObject::pointer(); } @@ -80,10 +81,10 @@ public: hyperleveldb::Iterator* it = mDB->NewIterator (hyperleveldb::ReadOptions ()); for (it->SeekToFirst (); it->Valid (); it->Next ()) { - if (it->key ().size () == 256 / 8) + if (it->key ().size () == m_keyBytes) { uint256 hash; - memcpy(hash.begin(), it->key ().data(), 256 / 8); + memcpy(hash.begin(), it->key ().data(), m_keyBytes); func (fromBinary (hash, it->value ().data (), it->value ().size ())); } } @@ -116,6 +117,7 @@ public: } private: + size_t const m_keyBytes; std::string mName; hyperleveldb::DB* mDB; }; @@ -142,9 +144,9 @@ String HyperLevelDBBackendFactory::getName () const return "HyperLevelDB"; } -NodeStore::Backend* HyperLevelDBBackendFactory::createInstance (StringPairArray const& keyValues) +NodeStore::Backend* HyperLevelDBBackendFactory::createInstance (size_t keyBytes, StringPairArray const& keyValues) { - return new HyperLevelDBBackendFactory::Backend (keyValues); + return new HyperLevelDBBackendFactory::Backend (keyBytes, keyValues); } //------------------------------------------------------------------------------ diff --git a/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.h b/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.h index 1b44e4f9d1..6691681c72 100644 --- a/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.h +++ b/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.h @@ -23,7 +23,7 @@ public: static HyperLevelDBBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (StringPairArray const& keyValues); + NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); }; #endif diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 7f48c77ec5..70d5954f72 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -3,6 +3,19 @@ Copyright (c) 2011-2013, OpenCoin, Inc. */ //============================================================================== +/* + +TODO + +- Check consistency / range checking on read + +- Cache top level tree nodes + +- Coalesce I/O in RandomAccessFile + +- Delete / file compaction + +*/ class KeyvaDBImp : public KeyvaDB { @@ -336,10 +349,10 @@ public: bool get (void const* key, GetCallback* callback) { - // VFALCO TODD Swap these two lines - SharedState::WriteAccess state (m_state); FindResult findResult (m_keyStorage.getData ()); + SharedState::WriteAccess state (m_state); + bool found = false; if (state->hasKeys ()) @@ -348,7 +361,7 @@ public: if (found) { - void* const destStorage = callback->createStorageForValue (findResult.keyRecord.valSize); + void* const destStorage = callback->getStorageForValue (findResult.keyRecord.valSize); RandomAccessFileInputStream stream (state->valFile); @@ -536,7 +549,7 @@ public: { } - void* createStorageForValue (int valueBytes) + void* getStorageForValue (int valueBytes) { bassert (valueBytes <= maxPayloadBytes); diff --git a/modules/ripple_app/node/ripple_KeyvaDB.h b/modules/ripple_app/node/ripple_KeyvaDB.h index 9ff1b2ec22..7c36c5051c 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.h +++ b/modules/ripple_app/node/ripple_KeyvaDB.h @@ -15,7 +15,7 @@ public: class GetCallback { public: - virtual void* createStorageForValue (int valueBytes) = 0; + virtual void* getStorageForValue (int valueBytes) = 0; }; static KeyvaDB* New (int keyBytes, @@ -25,8 +25,13 @@ public: virtual ~KeyvaDB () { } + // VFALCO TODO Make the return value a Result so we can + // detect corruption and errors! + // virtual bool get (void const* key, GetCallback* callback) = 0; + // VFALCO TODO Use Result for return value + // virtual void put (void const* key, void const* value, int valueBytes) = 0; virtual void flush () = 0; diff --git a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp index 687d06e387..f9b65a0193 100644 --- a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp @@ -7,15 +7,9 @@ class KeyvaDBBackendFactory::Backend : public NodeStore::Backend { public: - typedef UnsignedInteger <32> Key; - - enum - { - keyBytes = Key::sizeInBytes - }; - - explicit Backend (StringPairArray const& keyValues) - : m_path (keyValues ["path"]) + Backend (size_t keyBytes, StringPairArray const& keyValues) + : m_keyBytes (keyBytes) + , m_path (keyValues ["path"]) , m_db (KeyvaDB::New ( keyBytes, File::getCurrentWorkingDirectory().getChildFile (m_path).withFileExtension ("key"), @@ -33,6 +27,48 @@ public: return m_path.toStdString (); } + //-------------------------------------------------------------------------- + + Status get (void const* key, GetCallback* callback) + { + Status status (ok); + + struct ForwardingGetCallback : KeyvaDB::GetCallback + { + ForwardingGetCallback (Backend::GetCallback* callback) + : m_callback (callback) + { + } + + void* getStorageForValue (int valueBytes) + { + return m_callback->getStorageForValue (valueBytes); + } + + private: + Backend::GetCallback* const m_callback; + }; + + ForwardingGetCallback cb (callback); + + // VFALCO TODO Can't we get KeyvaDB to provide a proper status? + // + bool const found = m_db->get (key, &cb); + + if (found) + { + status = ok; + } + else + { + status = notFound; + } + + return status; + } + + //-------------------------------------------------------------------------- + void writeObject (NodeObject::ref object) { Blob blob (toBlob (object)); @@ -54,7 +90,7 @@ public: int valueBytes; HeapBlock data; - void* createStorageForValue (int valueBytes_) + void* getStorageForValue (int valueBytes_) { valueBytes = valueBytes_; @@ -112,6 +148,7 @@ public: } private: + size_t const m_keyBytes; String m_path; ScopedPointer m_db; }; @@ -138,9 +175,9 @@ String KeyvaDBBackendFactory::getName () const return "KeyvaDB"; } -NodeStore::Backend* KeyvaDBBackendFactory::createInstance (StringPairArray const& keyValues) +NodeStore::Backend* KeyvaDBBackendFactory::createInstance (size_t keyBytes, StringPairArray const& keyValues) { - return new KeyvaDBBackendFactory::Backend (keyValues); + return new KeyvaDBBackendFactory::Backend (keyBytes, keyValues); } //------------------------------------------------------------------------------ diff --git a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h index 2587315d86..4ee95c7b25 100644 --- a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h +++ b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h @@ -21,7 +21,7 @@ public: static KeyvaDBBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (StringPairArray const& keyValues); + NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); }; #endif diff --git a/modules/ripple_app/node/ripple_LevelDBBackendFactory.cpp b/modules/ripple_app/node/ripple_LevelDBBackendFactory.cpp index b00fd0f287..3dbdcd3301 100644 --- a/modules/ripple_app/node/ripple_LevelDBBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_LevelDBBackendFactory.cpp @@ -7,11 +7,12 @@ class LevelDBBackendFactory::Backend : public NodeStore::Backend { public: - Backend (StringPairArray const& keyValues) - : mName(keyValues ["path"].toStdString ()) - , mDB(NULL) + Backend (int keyBytes, StringPairArray const& keyValues) + : m_keyBytes (keyBytes) + , m_name(keyValues ["path"].toStdString ()) + , m_db(NULL) { - if (mName.empty()) + if (m_name.empty()) throw std::runtime_error ("Missing path in LevelDB backend"); leveldb::Options options; @@ -33,21 +34,83 @@ public: if (!keyValues["open_files"].isEmpty()) options.max_open_files = keyValues["open_files"].getIntValue(); - leveldb::Status status = leveldb::DB::Open (options, mName, &mDB); - if (!status.ok () || !mDB) + leveldb::Status status = leveldb::DB::Open (options, m_name, &m_db); + if (!status.ok () || !m_db) throw (std::runtime_error (std::string("Unable to open/create leveldb: ") + status.ToString())); } ~Backend () { - delete mDB; + delete m_db; } std::string getDataBaseName() { - return mName; + return m_name; } + //-------------------------------------------------------------------------- + + struct StdString + { + std::string blob; + }; + + typedef RecycledObjectPool StdStringPool; + + //-------------------------------------------------------------------------- + + Status get (void const* key, GetCallback* callback) + { + Status status (ok); + + leveldb::ReadOptions const options; + leveldb::Slice const slice (static_cast (key), m_keyBytes); + + { + // These are reused std::string objects, + // required for leveldb's funky interface. + // + StdStringPool::ScopedItem item (m_stringPool); + std::string& blob = item.getObject ().blob; + + leveldb::Status getStatus = m_db->Get (options, slice, &blob); + + if (getStatus.ok ()) + { + void* const buffer = callback->getStorageForValue (blob.size ()); + + if (buffer != nullptr) + { + memcpy (buffer, blob.data (), blob.size ()); + } + else + { + Throw (std::bad_alloc ()); + } + } + else + { + if (getStatus.IsCorruption ()) + { + status = dataCorrupt; + } + else if (getStatus.IsNotFound ()) + { + status = notFound; + } + else + { + status = unknown; + } + } + } + + return status; + } + + //-------------------------------------------------------------------------- + bool bulkStore (const std::vector< NodeObject::pointer >& objs) { leveldb::WriteBatch batch; @@ -56,17 +119,17 @@ public: { Blob blob (toBlob (obj)); batch.Put ( - leveldb::Slice (reinterpret_cast(obj->getHash ().begin ()), 256 / 8), + leveldb::Slice (reinterpret_cast(obj->getHash ().begin ()), m_keyBytes), leveldb::Slice (reinterpret_cast(&blob.front ()), blob.size ())); } - return mDB->Write (leveldb::WriteOptions (), &batch).ok (); + return m_db->Write (leveldb::WriteOptions (), &batch).ok (); } NodeObject::pointer retrieve (uint256 const& hash) { std::string sData; - if (!mDB->Get (leveldb::ReadOptions (), - leveldb::Slice (reinterpret_cast(hash.begin ()), 256 / 8), &sData).ok ()) + if (!m_db->Get (leveldb::ReadOptions (), + leveldb::Slice (reinterpret_cast(hash.begin ()), m_keyBytes), &sData).ok ()) { return NodeObject::pointer(); } @@ -75,15 +138,20 @@ public: void visitAll (FUNCTION_TYPE func) { - leveldb::Iterator* it = mDB->NewIterator (leveldb::ReadOptions ()); + leveldb::Iterator* it = m_db->NewIterator (leveldb::ReadOptions ()); for (it->SeekToFirst (); it->Valid (); it->Next ()) { - if (it->key ().size () == 256 / 8) + if (it->key ().size () == m_keyBytes) { uint256 hash; - memcpy(hash.begin(), it->key ().data(), 256 / 8); + memcpy(hash.begin(), it->key ().data(), m_keyBytes); func (fromBinary (hash, it->value ().data (), it->value ().size ())); } + else + { + // VFALCO NOTE What does it mean to find an + // incorrectly sized key? Corruption? + } } } @@ -114,8 +182,10 @@ public: } private: - std::string mName; - leveldb::DB* mDB; + size_t const m_keyBytes; + StdStringPool m_stringPool; + std::string m_name; + leveldb::DB* m_db; }; //------------------------------------------------------------------------------ @@ -140,9 +210,9 @@ String LevelDBBackendFactory::getName () const return "LevelDB"; } -NodeStore::Backend* LevelDBBackendFactory::createInstance (StringPairArray const& keyValues) +NodeStore::Backend* LevelDBBackendFactory::createInstance (size_t keyBytes, StringPairArray const& keyValues) { - return new LevelDBBackendFactory::Backend (keyValues); + return new LevelDBBackendFactory::Backend (keyBytes, keyValues); } //------------------------------------------------------------------------------ diff --git a/modules/ripple_app/node/ripple_LevelDBBackendFactory.h b/modules/ripple_app/node/ripple_LevelDBBackendFactory.h index b2f324f927..5843221c0d 100644 --- a/modules/ripple_app/node/ripple_LevelDBBackendFactory.h +++ b/modules/ripple_app/node/ripple_LevelDBBackendFactory.h @@ -21,7 +21,7 @@ public: static LevelDBBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (StringPairArray const& keyValues); + NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); }; #endif diff --git a/modules/ripple_app/node/ripple_NodeObject.cpp b/modules/ripple_app/node/ripple_NodeObject.cpp index 4b4a0c8aee..b3de59ed95 100644 --- a/modules/ripple_app/node/ripple_NodeObject.cpp +++ b/modules/ripple_app/node/ripple_NodeObject.cpp @@ -6,6 +6,8 @@ SETUP_LOG (NodeObject) +//------------------------------------------------------------------------------ + NodeObject::NodeObject ( NodeObjectType type, LedgerIndex ledgerIndex, @@ -32,80 +34,6 @@ NodeObject::NodeObject ( { } -NodeObject::NodeObject (void const* key, void const* value, int valueBytes) -{ - DecodedBlob decoded (key, value, valueBytes); - - if (decoded.success) - { - mType = decoded.objectType; - mHash = uint256 (key); - mLedgerIndex = decoded.ledgerIndex; - mData = Blob (decoded.objectData, decoded.objectData + decoded.dataBytes); - } - else - { - // VFALCO TODO Write the hex version of key to the string for diagnostics. - String s; - s << "NodeStore:: DecodedBlob failed"; - Throw (s); - } -} - -NodeObject::DecodedBlob::DecodedBlob (void const* key, void const* value, int valueBytes) -{ - /* Data format: - - Bytes - - 0...3 LedgerIndex 32-bit big endian integer - 4...7 Unused? An unused copy of the LedgerIndex - 8 char One of NodeObjectType - 9...end The body of the object data - */ - - success = false; - key = key; - // VFALCO NOTE Ledger indexes should have started at 1 - ledgerIndex = LedgerIndex (-1); - objectType = hotUNKNOWN; - objectData = nullptr; - dataBytes = bmin (0, valueBytes - 9); - - if (dataBytes > 4) - { - LedgerIndex const* index = static_cast (value); - ledgerIndex = ByteOrder::swapIfLittleEndian (*index); - } - - // VFALCO NOTE What about bytes 4 through 7 inclusive? - - if (dataBytes > 8) - { - unsigned char const* byte = static_cast (value); - objectType = static_cast (byte [8]); - } - - if (dataBytes > 9) - { - objectData = static_cast (value) + 9; - - switch (objectType) - { - case hotUNKNOWN: - default: - break; - - case hotLEDGER: - case hotTRANSACTION: - case hotACCOUNT_NODE: - case hotTRANSACTION_NODE: - success = true; - break; - } - } -} - NodeObjectType NodeObject::getType () const { return mType; @@ -125,3 +53,32 @@ Blob const& NodeObject::getData () const { return mData; } + +bool NodeObject::isCloneOf (NodeObject const& other) const +{ + return + mType == other.mType && + mHash == other.mHash && + mLedgerIndex == other.mLedgerIndex && + mData == other.mData + ; +} + +//------------------------------------------------------------------------------ + +class NodeObjectTests : public UnitTest +{ +public: + + NodeObjectTests () : UnitTest ("NodeObject") + { + } + + + void runTest () + { + } +}; + +static NodeObjectTests nodeObjectTests; + diff --git a/modules/ripple_app/node/ripple_NodeObject.h b/modules/ripple_app/node/ripple_NodeObject.h index e6b4e3fb7f..0637b29426 100644 --- a/modules/ripple_app/node/ripple_NodeObject.h +++ b/modules/ripple_app/node/ripple_NodeObject.h @@ -64,40 +64,6 @@ public: int bytesInBuffer, uint256 const & hash); - /** Create from a key/value blob. - - This is the format in which a NodeObject is stored in the - persistent storage layer. - - @see NodeStore - */ - NodeObject (void const* key, void const* value, int valueBytes); - - /** Parsed key/value blob into NodeObject components. - - This will extract the information required to construct - a NodeObject. It also does consistency checking and returns - the result, so it is possible to determine if the data - is corrupted without throwing an exception. Note all forms - of corruption are detected so further analysis will be - needed to eliminate false positives. - - This is the format in which a NodeObject is stored in the - persistent storage layer. - */ - struct DecodedBlob - { - DecodedBlob (void const* key, void const* value, int valueBytes); - - bool success; - - void const* key; - LedgerIndex ledgerIndex; - NodeObjectType objectType; - unsigned char const* objectData; - int dataBytes; - }; - /** Retrieve the type of this object. */ NodeObjectType getType () const; @@ -115,6 +81,10 @@ public: */ Blob const& getData () const; + /** See if this object has the same data as another object. + */ + bool isCloneOf (NodeObject const& other) const; + private: NodeObjectType mType; uint256 mHash; diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index 191b8eba0b..ef7e7a965e 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -95,220 +95,519 @@ int NodeStore::Backend::getWriteLoad () // NodeStore // -Array NodeStore::s_factories; - -NodeStore::NodeStore (String backendParameters, - String fastBackendParameters, - int cacheSize, - int cacheAge) - : m_backend (createBackend (backendParameters)) - , m_fastBackend (fastBackendParameters.isNotEmpty () ? createBackend (fastBackendParameters) - : nullptr) - , m_cache ("NodeStore", cacheSize, cacheAge) - , m_negativeCache ("NoteStoreNegativeCache", 0, 120) +class NodeStoreImp : public NodeStore { -} - -void NodeStore::addBackendFactory (BackendFactory& factory) -{ - s_factories.add (&factory); -} - -float NodeStore::getCacheHitRate () -{ - return m_cache.getHitRate (); -} - -void NodeStore::tune (int size, int age) -{ - m_cache.setTargetSize (size); - m_cache.setTargetAge (age); -} - -void NodeStore::sweep () -{ - m_cache.sweep (); - m_negativeCache.sweep (); -} - -void NodeStore::waitWrite () -{ - m_backend->waitWrite (); - if (m_fastBackend) - m_fastBackend->waitWrite (); -} - -int NodeStore::getWriteLoad () -{ - return m_backend->getWriteLoad (); -} - -bool NodeStore::store (NodeObjectType type, uint32 index, - Blob const& data, uint256 const& hash) -{ - bool wasStored = false; - - bool const keyFoundAndObjectCached = m_cache.refreshIfPresent (hash); - - // VFALCO NOTE What happens if the key is found, but the object - // fell out of the cache? We will end up passing it - // to the backend anyway. - // - if (! keyFoundAndObjectCached) +public: + /** Size of a key. + */ + enum { + keyBytes = 32 + }; -// VFALCO TODO Rename this to RIPPLE_NODESTORE_VERIFY_HASHES and make -// it be 1 or 0 instead of merely defined or undefined. -// -#ifdef PARANOID - assert (hash == Serializer::getSHA512Half (data)); -#endif + /** Parsed key/value blob into NodeObject components. - NodeObject::pointer object = boost::make_shared (type, index, data, hash); + This will extract the information required to construct + a NodeObject. It also does consistency checking and returns + the result, so it is possible to determine if the data + is corrupted without throwing an exception. Note all forms + of corruption are detected so further analysis will be + needed to eliminate false positives. - // VFALCO NOTE What does it mean to canonicalize an object? - // - if (!m_cache.canonicalize (hash, object)) + This is the format in which a NodeObject is stored in the + persistent storage layer. + */ + struct DecodedBlob + { + /** Construct the decoded blob from raw data. + + The `success` member will indicate if the operation was succesful. + */ + DecodedBlob (void const* keyParam, void const* value, int valueBytes) { - m_backend->store (object); + /* Data format: - if (m_fastBackend) - m_fastBackend->store (object); + Bytes + + 0...3 LedgerIndex 32-bit big endian integer + 4...7 Unused? An unused copy of the LedgerIndex + 8 char One of NodeObjectType + 9...end The body of the object data + */ + + success = false; + key = keyParam; + // VFALCO NOTE Ledger indexes should have started at 1 + ledgerIndex = LedgerIndex (-1); + objectType = hotUNKNOWN; + objectData = nullptr; + dataBytes = bmax (0, valueBytes - 9); + + if (dataBytes > 4) + { + LedgerIndex const* index = static_cast (value); + ledgerIndex = ByteOrder::swapIfLittleEndian (*index); + } + + // VFALCO NOTE What about bytes 4 through 7 inclusive? + + if (dataBytes > 8) + { + unsigned char const* byte = static_cast (value); + objectType = static_cast (byte [8]); + } + + if (dataBytes > 9) + { + objectData = static_cast (value) + 9; + + switch (objectType) + { + case hotUNKNOWN: + default: + break; + + case hotLEDGER: + case hotTRANSACTION: + case hotACCOUNT_NODE: + case hotTRANSACTION_NODE: + success = true; + break; + } + } } - m_negativeCache.del (hash); - - wasStored = true; - } - - return wasStored; -} - -NodeObject::pointer NodeStore::retrieve (uint256 const& hash) -{ - NodeObject::pointer obj = m_cache.fetch (hash); - - if (obj || m_negativeCache.isPresent (hash)) - return obj; - - if (m_fastBackend) - { - obj = retrieve (m_fastBackend, hash); - - if (obj) + /** Create a NodeObject from this data. + */ + NodeObject::pointer createObject () { - m_cache.canonicalize (hash, obj); - return obj; + NodeObject::pointer object; + + if (success) + { + // VFALCO NOTE I dislke these shared pointers from boost + object = boost::make_shared ( + objectType, ledgerIndex, objectData, dataBytes, uint256 (key)); + } + + return object; } + + bool success; + + void const* key; + LedgerIndex ledgerIndex; + NodeObjectType objectType; + unsigned char const* objectData; + int dataBytes; + }; + + //-------------------------------------------------------------------------- + + class EncodedBlob + { + HeapBlock data; + }; + +public: + NodeStoreImp (String backendParameters, + String fastBackendParameters, + int cacheSize, + int cacheAge) + : m_backend (createBackend (backendParameters)) + , m_fastBackend (fastBackendParameters.isNotEmpty () ? createBackend (fastBackendParameters) + : nullptr) + , m_cache ("NodeStore", cacheSize, cacheAge) + , m_negativeCache ("NoteStoreNegativeCache", 0, 120) + { } + ~NodeStoreImp () { - // m_hooks->onRetrieveBegin () - - // VFALCO TODO Why is this an autoptr? Why can't it just be a plain old object? + // VFALCO NOTE This shouldn't be necessary, the backend can + // just handle it in the destructor. // - LoadEvent::autoptr event (getApp().getJobQueue ().getLoadEventAP (jtHO_READ, "HOS::retrieve")); + m_backend->waitWrite (); - obj = retrieve (m_backend, hash); + if (m_fastBackend) + m_fastBackend->waitWrite (); + } + + float getCacheHitRate () + { + return m_cache.getHitRate (); + } + + void tune (int size, int age) + { + m_cache.setTargetSize (size); + m_cache.setTargetAge (age); + } + + void sweep () + { + m_cache.sweep (); + m_negativeCache.sweep (); + } + + int getWriteLoad () + { + return m_backend->getWriteLoad (); + } + + bool store (NodeObjectType type, + uint32 index, + Blob const& data, + uint256 const& hash) + { + bool wasStored = false; + + bool const keyFoundAndObjectCached = m_cache.refreshIfPresent (hash); + + // VFALCO NOTE What happens if the key is found, but the object + // fell out of the cache? We will end up passing it + // to the backend anyway. + // + if (! keyFoundAndObjectCached) + { + + // VFALCO TODO Rename this to RIPPLE_NODESTORE_VERIFY_HASHES and make + // it be 1 or 0 instead of merely defined or undefined. + // + #ifdef PARANOID + assert (hash == Serializer::getSHA512Half (data)); + #endif + + NodeObject::pointer object = boost::make_shared (type, index, data, hash); + + // VFALCO NOTE What does it mean to canonicalize an object? + // + if (!m_cache.canonicalize (hash, object)) + { + m_backend->store (object); + + if (m_fastBackend) + m_fastBackend->store (object); + } + + m_negativeCache.del (hash); + + wasStored = true; + } + + return wasStored; + } + + //------------------------------------------------------------------------------ + + NodeObject::pointer retrieve (uint256 const& hash) + { + // See if the object already exists in the cache + // + NodeObject::pointer obj = m_cache.fetch (hash); if (obj == nullptr) { - m_negativeCache.add (hash); + // It's not in the cache, see if we can skip checking the db. + // + if (! m_negativeCache.isPresent (hash)) + { + // There's still a chance it could be in one of the databases. - // VFALCO TODO Eliminate return from middle of function + bool foundInFastBackend = false; - return obj; // VFALCO NOTE This is nullptr, why return obj? + // Check the fast backend database if we have one + // + if (m_fastBackend != nullptr) + { + obj = retrieveInternal (m_fastBackend, hash); + + // If we found the object, avoid storing it again later. + if (obj != nullptr) + foundInFastBackend = true; + } + + // Are we still without an object? + // + if (obj == nullptr) + { + // Yes so at last we will try the main database. + // + { + // Monitor this operation's load since it is expensive. + + // m_hooks->onRetrieveBegin () + + // VFALCO TODO Why is this an autoptr? Why can't it just be a plain old object? + // + LoadEvent::autoptr event (getApp().getJobQueue ().getLoadEventAP (jtHO_READ, "HOS::retrieve")); + + obj = retrieveInternal (m_backend, hash); + + // m_hooks->onRetrieveEnd () + } + + // If it's not in the main database, remember that so we + // can skip the lookup for the same object again later. + // + if (obj == nullptr) + m_negativeCache.add (hash); + } + + // Did we finally get something? + // + if (obj != nullptr) + { + // Yes it so canonicalize. This solves the problem where + // more than one thread has its own copy of the same object. + // + m_cache.canonicalize (hash, obj); + + if (! foundInFastBackend) + { + // If we have a fast back end, store it there for later. + // + if (m_fastBackend != nullptr) + m_fastBackend->store (obj); + + // Since this was a 'hard' fetch, we will log it. + // + WriteLog (lsTRACE, NodeObject) << "HOS: " << hash << " fetch: in db"; + } + } + } + else + { + // hash is known not to be in the database + } + } + else + { + // found it! } + return obj; } - // VFALCO NOTE What does this do? - m_cache.canonicalize (hash, obj); + NodeObject::pointer retrieveInternal (Backend* backend, uint256 const& hash) + { + // VFALCO TODO Make this not allocate and free on each call + // + struct MyGetCallback : Backend::GetCallback + { + void* getStorageForValue (size_t sizeInBytes) + { + bytes = sizeInBytes; + data.malloc (sizeInBytes); - if (m_fastBackend) - m_fastBackend->store(obj); + return &data [0]; + } - WriteLog (lsTRACE, NodeObject) << "HOS: " << hash << " fetch: in db"; + size_t bytes; + HeapBlock data; + }; - return obj; + NodeObject::pointer object; + + MyGetCallback cb; + Backend::Status const status = backend->get (hash.begin (), &cb); + + if (status == Backend::ok) + { + // Deserialize the payload into its components. + // + DecodedBlob decoded (hash.begin (), cb.data.getData (), cb.bytes); + + if (decoded.success) + { + object = decoded.createObject (); + } + else + { + // Houston, we've had a problem. Data is likely corrupt. + + // VFALCO TODO Deal with encountering corrupt data! + + WriteLog (lsFATAL, NodeObject) << "Corrupt NodeObject #" << hash; + } + } + + return object; + } + + //------------------------------------------------------------------------------ + + void importVisitor ( + std::vector & objects, + NodeObject::pointer object) + { + if (objects.size() >= bulkWriteBatchSize) + { + m_backend->bulkStore (objects); + + objects.clear (); + objects.reserve (bulkWriteBatchSize); + } + + objects.push_back (object); + } + + int import (String sourceBackendParameters) + { + ScopedPointer srcBackend (createBackend (sourceBackendParameters)); + + WriteLog (lsWARNING, NodeObject) << + "Node import from '" << srcBackend->getDataBaseName() << "' to '" + << m_backend->getDataBaseName() << "'."; + + std::vector objects; + + objects.reserve (bulkWriteBatchSize); + + srcBackend->visitAll (BIND_TYPE (&NodeStoreImp::importVisitor, this, boost::ref (objects), P_1)); + + if (!objects.empty ()) + m_backend->bulkStore (objects); + + return 0; + } + + NodeStore::Backend* createBackend (String const& parameters) + { + Backend* backend = nullptr; + + StringPairArray keyValues = parseKeyValueParameters (parameters, '|'); + + String const& type = keyValues ["type"]; + + if (type.isNotEmpty ()) + { + BackendFactory* factory = nullptr; + + for (int i = 0; i < s_factories.size (); ++i) + { + if (s_factories [i]->getName () == type) + { + factory = s_factories [i]; + break; + } + } + + if (factory != nullptr) + { + backend = factory->createInstance (keyBytes, keyValues); + } + else + { + throw std::runtime_error ("unkown backend type"); + } + } + else + { + throw std::runtime_error ("missing backend type"); + } + + return backend; + } + + static void addBackendFactory (BackendFactory& factory) + { + s_factories.add (&factory); + } + +private: + static Array s_factories; + + RecycledObjectPool m_blobPool; + + // Persistent key/value storage. + ScopedPointer m_backend; + + // Larger key/value storage, but not necessarily persistent. + ScopedPointer m_fastBackend; + + // VFALCO NOTE What are these things for? We need comments. + TaggedCache m_cache; + KeyCache m_negativeCache; +}; + +Array NodeStoreImp::s_factories; + +//------------------------------------------------------------------------------ + +void NodeStore::addBackendFactory (BackendFactory& factory) +{ + NodeStoreImp::addBackendFactory (factory); +} + +NodeStore* NodeStore::New (String backendParameters, + String fastBackendParameters, + int cacheSize, + int cacheAge) +{ + return new NodeStoreImp (backendParameters, + fastBackendParameters, + cacheSize, + cacheAge); } //------------------------------------------------------------------------------ -NodeObject::pointer NodeStore::retrieve (Backend* backend, uint256 const& hash) +class NodeStoreTests : public UnitTest { - return backend->retrieve (hash); -} - -void NodeStore::importVisitor ( - std::vector & objects, - NodeObject::pointer object) -{ - if (objects.size() >= bulkWriteBatchSize) +public: + enum { - m_backend->bulkStore (objects); + maxPayloadBytes = 1000, - objects.clear (); - objects.reserve (bulkWriteBatchSize); + numObjects = 1000 + }; + + NodeStoreTests () : UnitTest ("NodeStore") + { } - objects.push_back (object); -} - -int NodeStore::import (String sourceBackendParameters) -{ - ScopedPointer srcBackend (createBackend (sourceBackendParameters)); - - WriteLog (lsWARNING, NodeObject) << - "Node import from '" << srcBackend->getDataBaseName() << "' to '" - << m_backend->getDataBaseName() << "'."; - - std::vector objects; - - objects.reserve (bulkWriteBatchSize); - - srcBackend->visitAll (BIND_TYPE (&NodeStore::importVisitor, this, boost::ref (objects), P_1)); - - if (!objects.empty ()) - m_backend->bulkStore (objects); - - return 0; -} - -NodeStore::Backend* NodeStore::createBackend (String const& parameters) -{ - Backend* backend = nullptr; - - StringPairArray keyValues = parseKeyValueParameters (parameters, '|'); - - String const& type = keyValues ["type"]; - - if (type.isNotEmpty ()) + // Create a pseudo-random object + static NodeObject* createNodeObject (int index, int64 seedValue, HeapBlock & payloadBuffer) { - BackendFactory* factory = nullptr; + Random r (seedValue + index); - for (int i = 0; i < s_factories.size (); ++i) + NodeObjectType type; + switch (r.nextInt (4)) { - if (s_factories [i]->getName () == type) - { - factory = s_factories [i]; - break; - } - } + case 0: type = hotLEDGER; break; + case 1: type = hotTRANSACTION; break; + case 2: type = hotACCOUNT_NODE; break; + case 3: type = hotTRANSACTION_NODE; break; + default: + type = hotUNKNOWN; + break; + }; - if (factory != nullptr) - { - backend = factory->createInstance (keyValues); - } - else - { - throw std::runtime_error ("unkown backend type"); - } - } - else - { - throw std::runtime_error ("missing backend type"); + LedgerIndex ledgerIndex = 1 + r.nextInt (1024 * 1024); + + uint256 hash; + r.nextBlob (hash.begin (), hash.size ()); + + int payloadBytes = 1 + r.nextInt (maxPayloadBytes); + r.nextBlob (payloadBuffer.getData (), payloadBytes); + + return new NodeObject (type, ledgerIndex, payloadBuffer.getData (), payloadBytes, hash); } - return backend; -} + void runTest () + { + beginTest ("create"); + + int64 const seedValue = 50; + + HeapBlock payloadBuffer (maxPayloadBytes); + + for (int i = 0; i < numObjects; ++i) + { + ScopedPointer object (createNodeObject (i, seedValue, payloadBuffer)); + } + } +}; + +static NodeStoreTests nodeStoreTests; diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index 27b4ce3df7..60c3ff3a4a 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -34,14 +34,55 @@ public: }; /** Back end used for the store. + + A Backend implements a persistent key/value storage system. + Keys sizes are all fixed within the same database. */ class Backend { public: + /** Return codes from operations. + */ + enum Status + { + ok, + notFound, + dataCorrupt, + unknown + }; + Backend (); virtual ~Backend () { } + /** Provides storage for retrieved objects. + */ + struct GetCallback + { + /** Get storage for an object. + + @param sizeInBytes The number of bytes needed to store the value. + + @return A pointer to a buffer large enough to hold all the bytes. + */ + virtual void* getStorageForValue (size_t sizeInBytes) = 0; + }; + + /** Retrieve a single object. + + If the object is not found or an error is encountered, the + result will indicate the condition. + + @param key A pointer to the key data. + @param callback The callback used to obtain storage for the value. + + @return The result of the operation. + */ + virtual Status get (void const* key, GetCallback* callback) { return notFound; } + + + + /** Store a single object. */ // VFALCO TODO Why should the Backend know or care about NodeObject? @@ -54,13 +95,6 @@ public: */ virtual NodeObject::pointer retrieve (uint256 const &hash) = 0; - struct GetCallback - { - virtual void* getBufferForValue (int valueBytes) = 0; - }; - - virtual bool get (void const* key, GetCallback* callback) { return false; } - // Visit every object in the database // This function will only be called during an import operation // @@ -69,7 +103,7 @@ public: virtual void visitAll (FUNCTION_TYPE ) = 0; private: - friend class NodeStore; + friend class NodeStoreImp; // VFALCO TODO Put this bulk writing logic into a separate class. // NOTE Why are these virtual? @@ -115,8 +149,13 @@ public: virtual String getName () const = 0; /** Create an instance of this factory's backend. + + @param keyBytes The fixed number of bytes per key. + @param keyValues A set of key/value configuration pairs. + + @return A pointer to the Backend object. */ - virtual Backend* createInstance (StringPairArray const& keyValues) = 0; + virtual Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues) = 0; }; public: @@ -133,10 +172,10 @@ public: // Is cacheAge in minutes? seconds? // These should be in the parameters. // - NodeStore (String backendParameters, - String fastBackendParameters, - int cacheSize, - int cacheAge); + static NodeStore* New (String backendParameters, + String fastBackendParameters, + int cacheSize, + int cacheAge); /** Add the specified backend factory to the list of available factories. @@ -146,52 +185,31 @@ public: static void addBackendFactory (BackendFactory& factory); // VFALCO TODO Document this. - float getCacheHitRate (); + virtual float getCacheHitRate () = 0; // VFALCO TODO Document this. - bool store (NodeObjectType type, uint32 index, Blob const& data, - uint256 const& hash); + virtual bool store (NodeObjectType type, uint32 index, Blob const& data, + uint256 const& hash) = 0; // VFALCO TODO Document this. - NodeObject::pointer retrieve (uint256 const& hash); - - // VFALCO TODO Document this. - void waitWrite (); + // TODO Replace uint256 with void* + // + virtual NodeObject::pointer retrieve (uint256 const& hash) = 0; // VFALCO TODO Document this. // TODO Document the parameter meanings. - void tune (int size, int age); + virtual void tune (int size, int age) = 0; // VFALCO TODO Document this. - void sweep (); + virtual void sweep () = 0; // VFALCO TODO Document this. // What are the units of the return value? - int getWriteLoad (); + virtual int getWriteLoad () = 0; // VFALCO TODO Document this. // NOTE What's the return value? - int import (String sourceBackendParameters); - -private: - NodeObject::pointer retrieve (Backend* backend, uint256 const& hash); - - void importVisitor (std::vector & objects, NodeObject::pointer object); - - static Backend* createBackend (String const& parameters); - - static Array s_factories; - -private: - // Persistent key/value storage. - ScopedPointer m_backend; - - // Larger key/value storage, but not necessarily persistent. - ScopedPointer m_fastBackend; - - // VFALCO NOTE What are these things for? We need comments. - TaggedCache m_cache; - KeyCache m_negativeCache; + virtual int import (String sourceBackendParameters) = 0; }; #endif diff --git a/modules/ripple_app/node/ripple_NullBackendFactory.cpp b/modules/ripple_app/node/ripple_NullBackendFactory.cpp index 6ffb0d8299..79607fa923 100644 --- a/modules/ripple_app/node/ripple_NullBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_NullBackendFactory.cpp @@ -62,7 +62,7 @@ String NullBackendFactory::getName () const return "none"; } -NodeStore::Backend* NullBackendFactory::createInstance (StringPairArray const& keyValues) +NodeStore::Backend* NullBackendFactory::createInstance (size_t, StringPairArray const&) { return new NullBackendFactory::Backend; } diff --git a/modules/ripple_app/node/ripple_NullBackendFactory.h b/modules/ripple_app/node/ripple_NullBackendFactory.h index 7112473384..2284fed2d2 100644 --- a/modules/ripple_app/node/ripple_NullBackendFactory.h +++ b/modules/ripple_app/node/ripple_NullBackendFactory.h @@ -23,7 +23,7 @@ public: static NullBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (StringPairArray const& keyValues); + NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); }; #endif diff --git a/modules/ripple_app/node/ripple_SqliteBackendFactory.cpp b/modules/ripple_app/node/ripple_SqliteBackendFactory.cpp index 0b421ac5be..aa4d4096dc 100644 --- a/modules/ripple_app/node/ripple_SqliteBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_SqliteBackendFactory.cpp @@ -4,32 +4,65 @@ */ //============================================================================== +static const char* s_nodeStoreDBInit [] = +{ + "PRAGMA synchronous=NORMAL;", + "PRAGMA journal_mode=WAL;", + "PRAGMA journal_size_limit=1582080;", + +#if (ULONG_MAX > UINT_MAX) && !defined (NO_SQLITE_MMAP) + "PRAGMA mmap_size=171798691840;", +#endif + + "BEGIN TRANSACTION;", + + "CREATE TABLE CommittedObjects ( \ + Hash CHARACTER(64) PRIMARY KEY, \ + ObjType CHAR(1) NOT NULL, \ + LedgerIndex BIGINT UNSIGNED, \ + Object BLOB \ + );", + + "END TRANSACTION;" +}; + +static int s_nodeStoreDBCount = NUMBER (s_nodeStoreDBInit); + class SqliteBackendFactory::Backend : public NodeStore::Backend { public: - Backend(std::string const& path) : mName(path) + Backend (size_t keyBytes, std::string const& path) + : m_keyBytes (keyBytes) + , m_name (path) + , m_db (new DatabaseCon(path, s_nodeStoreDBInit, s_nodeStoreDBCount)) { - mDb = new DatabaseCon(path, HashNodeDBInit, HashNodeDBCount); - mDb->getDB()->executeSQL(boost::str(boost::format("PRAGMA cache_size=-%d;") % - (theConfig.getSize(siHashNodeDBCache) * 1024))); + String s; + + // VFALCO TODO Remove this dependency on theConfig + // + s << "PRAGMA cache_size=-" << String (theConfig.getSize(siHashNodeDBCache) * 1024); + m_db->getDB()->executeSQL (s.toStdString ().c_str ()); + + //m_db->getDB()->executeSQL (boost::str (boost::format ("PRAGMA cache_size=-%d;") % + // (theConfig.getSize(siHashNodeDBCache) * 1024))); } - Backend() + ~Backend() { - delete mDb; + delete m_db; } std::string getDataBaseName() { - return mName; + return m_name; } - bool bulkStore(const std::vector< NodeObject::pointer >& objects) + bool bulkStore (const std::vector< NodeObject::pointer >& objects) { - ScopedLock sl(mDb->getDBLock()); - static SqliteStatement pStB(mDb->getDB()->getSqliteDB(), "BEGIN TRANSACTION;"); - static SqliteStatement pStE(mDb->getDB()->getSqliteDB(), "END TRANSACTION;"); - static SqliteStatement pSt(mDb->getDB()->getSqliteDB(), + ScopedLock sl(m_db->getDBLock()); + static SqliteStatement pStB(m_db->getDB()->getSqliteDB(), "BEGIN TRANSACTION;"); + static SqliteStatement pStE(m_db->getDB()->getSqliteDB(), "END TRANSACTION;"); + static SqliteStatement pSt(m_db->getDB()->getSqliteDB(), "INSERT OR IGNORE INTO CommittedObjects " "(Hash,ObjType,LedgerIndex,Object) VALUES (?, ?, ?, ?);"); @@ -55,8 +88,8 @@ public: NodeObject::pointer ret; { - ScopedLock sl(mDb->getDBLock()); - static SqliteStatement pSt(mDb->getDB()->getSqliteDB(), + ScopedLock sl(m_db->getDBLock()); + static SqliteStatement pSt(m_db->getDB()->getSqliteDB(), "SELECT ObjType,LedgerIndex,Object FROM CommittedObjects WHERE Hash = ?;"); pSt.bind(1, hash.GetHex()); @@ -74,7 +107,7 @@ public: { uint256 hash; - static SqliteStatement pSt(mDb->getDB()->getSqliteDB(), + static SqliteStatement pSt(m_db->getDB()->getSqliteDB(), "SELECT ObjType,LedgerIndex,Object,Hash FROM CommittedObjects;"); while (pSt.isRow(pSt.step())) @@ -121,8 +154,9 @@ public: } private: - std::string mName; - DatabaseCon* mDb; + size_t const m_keyBytes; + std::string const m_name; + ScopedPointer m_db; }; //------------------------------------------------------------------------------ @@ -147,7 +181,7 @@ String SqliteBackendFactory::getName () const return "Sqlite"; } -NodeStore::Backend* SqliteBackendFactory::createInstance (StringPairArray const& keyValues) +NodeStore::Backend* SqliteBackendFactory::createInstance (size_t keyBytes, StringPairArray const& keyValues) { - return new Backend (keyValues ["path"].toStdString ()); + return new Backend (keyBytes, keyValues ["path"].toStdString ()); } diff --git a/modules/ripple_app/node/ripple_SqliteBackendFactory.h b/modules/ripple_app/node/ripple_SqliteBackendFactory.h index e6420cbde2..dfb10b1bd7 100644 --- a/modules/ripple_app/node/ripple_SqliteBackendFactory.h +++ b/modules/ripple_app/node/ripple_SqliteBackendFactory.h @@ -21,7 +21,7 @@ public: static SqliteBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (StringPairArray const& keyValues); + NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); }; #endif diff --git a/modules/ripple_app/ripple_app.cpp b/modules/ripple_app/ripple_app.cpp index 43f9e5b4a0..62ad772745 100644 --- a/modules/ripple_app/ripple_app.cpp +++ b/modules/ripple_app/ripple_app.cpp @@ -102,9 +102,9 @@ namespace ripple #include "node/ripple_NodeObject.h" #include "node/ripple_NodeStore.h" -#include "node/ripple_LevelDBBackendFactory.h" #include "node/ripple_HyperLevelDBBackendFactory.h" #include "node/ripple_KeyvaDBBackendFactory.h" +#include "node/ripple_LevelDBBackendFactory.h" #include "node/ripple_MdbBackendFactory.h" #include "node/ripple_NullBackendFactory.h" #include "node/ripple_SqliteBackendFactory.h" @@ -245,14 +245,14 @@ static const uint64 tenTo17m1 = tenTo17 - 1; #include "basics/ripple_RPCServerHandler.cpp" #include "node/ripple_NodeObject.cpp" #include "node/ripple_NodeStore.cpp" -#include "node/ripple_LevelDBBackendFactory.cpp" #include "node/ripple_HyperLevelDBBackendFactory.cpp" -#include "node/ripple_MdbBackendFactory.cpp" -#include "node/ripple_NullBackendFactory.cpp" -#include "node/ripple_SqliteBackendFactory.cpp" #include "node/ripple_KeyvaDB.h" // private #include "node/ripple_KeyvaDB.cpp" #include "node/ripple_KeyvaDBBackendFactory.cpp" +#include "node/ripple_LevelDBBackendFactory.cpp" +#include "node/ripple_NullBackendFactory.cpp" +#include "node/ripple_MdbBackendFactory.cpp" +#include "node/ripple_SqliteBackendFactory.cpp" #include "ledger/Ledger.cpp" #include "src/cpp/ripple/ripple_SHAMapDelta.cpp" diff --git a/modules/ripple_basics/containers/ripple_TaggedCache.h b/modules/ripple_basics/containers/ripple_TaggedCache.h index 20263393a1..1f3c294887 100644 --- a/modules/ripple_basics/containers/ripple_TaggedCache.h +++ b/modules/ripple_basics/containers/ripple_TaggedCache.h @@ -115,7 +115,22 @@ public: } bool del (const key_type& key, bool valid); + + /** Replace aliased objects with originals. + + Due to concurrency it is possible for two separate objects with + the same content and referring to the same unique "thing" to exist. + This routine eliminates the duplicate and performs a replacement + on the callers shared pointer if needed. + + @param key The key corresponding to the object + @param data A shared pointer to the data corresponding to the object. + @param replace `true` if `data` is the up to date version of the object. + + @return `true` if the operation was successful. + */ bool canonicalize (const key_type& key, boost::shared_ptr& data, bool replace = false); + bool store (const key_type& key, const c_Data& data); boost::shared_ptr fetch (const key_type& key); bool retrieve (const key_type& key, c_Data& data); diff --git a/src/cpp/ripple/NetworkOPs.cpp b/src/cpp/ripple/NetworkOPs.cpp index 13694d7180..2f95ddffdc 100644 --- a/src/cpp/ripple/NetworkOPs.cpp +++ b/src/cpp/ripple/NetworkOPs.cpp @@ -44,6 +44,9 @@ void NetworkOPs::processNetTimer () { ScopedLock sl (getApp().getMasterLock ()); + Application& app (getApp ()); + ILoadManager& mgr (app.getLoadManager ()); + getApp().getLoadManager ().resetDeadlockDetector (); std::size_t const numPeers = getApp().getPeers ().getPeerVector ().size (); diff --git a/src/cpp/ripple/ripple_Application.cpp b/src/cpp/ripple/ripple_Application.cpp index 8c87ba70bf..8419fd7873 100644 --- a/src/cpp/ripple/ripple_Application.cpp +++ b/src/cpp/ripple/ripple_Application.cpp @@ -46,10 +46,11 @@ public: , mNetOps (&mLedgerMaster) , m_rpcServerHandler (mNetOps) , mTempNodeCache ("NodeCache", 16384, 90) - , m_nodeStore ( + , m_nodeStore (NodeStore::New ( theConfig.NODE_DB, theConfig.FASTNODE_DB, - 16384, 300) + 16384, + 300)) , mSLECache ("LedgerEntryCache", 4096, 120) , mSNTPClient (mAuxService) , mJobQueue (mIOService) @@ -70,11 +71,6 @@ public: , mTxnDB (NULL) , mLedgerDB (NULL) , mWalletDB (NULL) // VFALCO NOTE are all these 'NULL' ctor params necessary? - , mNetNodeDB (NULL) - , mPathFindDB (NULL) - , mHashNodeDB (NULL) - , mHashNodeLDB (NULL) - , mEphemeralLDB (NULL) , mPeerDoor (NULL) , mRPCDoor (NULL) , mWSPublicDoor (NULL) @@ -92,13 +88,6 @@ public: delete mTxnDB; delete mLedgerDB; delete mWalletDB; - delete mHashNodeDB; - delete mNetNodeDB; - delete mPathFindDB; - delete mHashNodeLDB; - - if (mEphemeralLDB != nullptr) - delete mEphemeralLDB; } LocalCredentials& getLocalCredentials () @@ -138,7 +127,7 @@ public: NodeStore& getNodeStore () { - return m_nodeStore; + return *m_nodeStore; } JobQueue& getJobQueue () @@ -247,27 +236,6 @@ public: { return mWalletDB; } - DatabaseCon* getNetNodeDB () - { - return mNetNodeDB; - } - DatabaseCon* getPathFindDB () - { - return mPathFindDB; - } - DatabaseCon* getHashNodeDB () - { - return mHashNodeDB; - } - - leveldb::DB* getHashNodeLDB () - { - return mHashNodeLDB; - } - leveldb::DB* getEphemeralLDB () - { - return mEphemeralLDB; - } bool isShutdown () { @@ -302,7 +270,7 @@ private: NetworkOPs mNetOps; RPCServerHandler m_rpcServerHandler; NodeCache mTempNodeCache; - NodeStore m_nodeStore; + ScopedPointer m_nodeStore; SLECache mSLECache; SNTPClient mSNTPClient; JobQueue mJobQueue; @@ -326,13 +294,6 @@ private: DatabaseCon* mTxnDB; DatabaseCon* mLedgerDB; DatabaseCon* mWalletDB; - DatabaseCon* mNetNodeDB; - DatabaseCon* mPathFindDB; - DatabaseCon* mHashNodeDB; - - // VFALCO TODO Wrap this in an interface - leveldb::DB* mHashNodeLDB; - leveldb::DB* mEphemeralLDB; ScopedPointer mPeerDoor; ScopedPointer mRPCDoor; @@ -353,19 +314,11 @@ void ApplicationImp::stop () StopSustain (); mShutdown = true; mIOService.stop (); - // VFALCO TODO We shouldn't have to explicitly call this function. - // The NodeStore destructor should take care of it. - m_nodeStore.waitWrite (); + m_nodeStore = nullptr; mValidations->flush (); mAuxService.stop (); mJobQueue.shutdown (); - delete mHashNodeLDB; - mHashNodeLDB = NULL; - - delete mEphemeralLDB; - mEphemeralLDB = NULL; - WriteLog (lsINFO, Application) << "Stopped: " << mIOService.stopped (); mShutdown = false; } @@ -445,16 +398,11 @@ void ApplicationImp::setup () boost::thread t1 (BIND_TYPE (&InitDB, &mRpcDB, "rpc.db", RpcDBInit, RpcDBCount)); boost::thread t2 (BIND_TYPE (&InitDB, &mTxnDB, "transaction.db", TxnDBInit, TxnDBCount)); boost::thread t3 (BIND_TYPE (&InitDB, &mLedgerDB, "ledger.db", LedgerDBInit, LedgerDBCount)); + boost::thread t4 (BIND_TYPE (&InitDB, &mWalletDB, "wallet.db", WalletDBInit, WalletDBCount)); t1.join (); t2.join (); t3.join (); - - boost::thread t4 (BIND_TYPE (&InitDB, &mWalletDB, "wallet.db", WalletDBInit, WalletDBCount)); - boost::thread t6 (BIND_TYPE (&InitDB, &mNetNodeDB, "netnode.db", NetNodeDBInit, NetNodeDBCount)); - boost::thread t7 (BIND_TYPE (&InitDB, &mPathFindDB, "pathfind.db", PathFindDBInit, PathFindDBCount)); t4.join (); - t6.join (); - t7.join (); leveldb::Options options; options.create_if_missing = true; @@ -515,7 +463,7 @@ void ApplicationImp::setup () getUNL ().nodeBootstrap (); mValidations->tune (theConfig.getSize (siValidationsSize), theConfig.getSize (siValidationsAge)); - m_nodeStore.tune (theConfig.getSize (siNodeCacheSize), theConfig.getSize (siNodeCacheAge)); + m_nodeStore->tune (theConfig.getSize (siNodeCacheSize), theConfig.getSize (siNodeCacheAge)); mLedgerMaster.tune (theConfig.getSize (siLedgerSize), theConfig.getSize (siLedgerAge)); mSLECache.setTargetSize (theConfig.getSize (siSLECacheSize)); mSLECache.setTargetAge (theConfig.getSize (siSLECacheAge)); @@ -697,7 +645,7 @@ void ApplicationImp::doSweep(Job& j) // mMasterTransaction.sweep (); - m_nodeStore.sweep (); + m_nodeStore->sweep (); mLedgerMaster.sweep (); mTempNodeCache.sweep (); mValidations->sweep (); diff --git a/src/cpp/ripple/ripple_Application.h b/src/cpp/ripple/ripple_Application.h index 6917442b9e..2b0aee4cf3 100644 --- a/src/cpp/ripple/ripple_Application.h +++ b/src/cpp/ripple/ripple_Application.h @@ -89,15 +89,9 @@ public: It looks like this is used to store the unique node list. */ // VFALCO TODO Rename, document this + // NOTE This will be replaced by class Validators + // virtual DatabaseCon* getWalletDB () = 0; - // VFALCO NOTE It looks like this isn't used... - //virtual DatabaseCon* getNetNodeDB () = 0; - // VFALCO NOTE It looks like this isn't used... - //virtual DatabaseCon* getPathFindDB () = 0; - virtual DatabaseCon* getHashNodeDB () = 0; - - virtual leveldb::DB* getHashNodeLDB () = 0; - virtual leveldb::DB* getEphemeralLDB () = 0; virtual bool getSystemTimeOffset (int& offset) = 0; virtual bool isShutdown () = 0; diff --git a/src/cpp/ripple/ripple_Main.cpp b/src/cpp/ripple/ripple_Main.cpp index 658d2147bc..6e0a0919aa 100644 --- a/src/cpp/ripple/ripple_Main.cpp +++ b/src/cpp/ripple/ripple_Main.cpp @@ -257,15 +257,16 @@ int rippleMain (int argc, char** argv) p.add ("parameters", -1); // These must be added before the Application object is created - NodeStore::addBackendFactory (SqliteBackendFactory::getInstance ()); - NodeStore::addBackendFactory (LevelDBBackendFactory::getInstance ()); - NodeStore::addBackendFactory (KeyvaDBBackendFactory::getInstance ()); #if RIPPLE_HYPERLEVELDB_AVAILABLE NodeStore::addBackendFactory (HyperLevelDBBackendFactory::getInstance ()); #endif + NodeStore::addBackendFactory (KeyvaDBBackendFactory::getInstance ()); + NodeStore::addBackendFactory (LevelDBBackendFactory::getInstance ()); #if RIPPLE_MDB_AVAILABLE NodeStore::addBackendFactory (MdbBackendFactory::getInstance ()); #endif + NodeStore::addBackendFactory (NullBackendFactory::getInstance ()); + NodeStore::addBackendFactory (SqliteBackendFactory::getInstance ()); if (! RandomNumbers::getInstance ().initialize ()) { From 959e0bd512dc97d2bab319bd0bf2a3e4b13f9c10 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 18 Jul 2013 09:22:04 -0700 Subject: [PATCH 25/50] New MasterLockType which tracks ownership --- .../VisualStudio2012/RippleD.vcxproj.filters | 6 +- modules/ripple_app/ripple_app.cpp | 2 +- .../ripple_basics/utility/ripple_ScopedLock.h | 3 +- src/cpp/ripple/NetworkOPs.cpp | 388 +++++++------- src/cpp/ripple/RPCHandler.cpp | 230 +++++---- src/cpp/ripple/RPCHandler.h | 128 ++--- src/cpp/ripple/ripple_Application.cpp | 28 +- src/cpp/ripple/ripple_Application.h | 107 +++- src/cpp/ripple/ripple_LedgerConsensus.cpp | 260 +++++----- src/cpp/ripple/ripple_LoadManager.cpp | 5 + src/cpp/ripple/ripple_Peer.cpp | 487 +++++++++--------- src/cpp/ripple/ripple_SHAMap.cpp | 4 + src/cpp/ripple/ripple_TransactionAcquire.cpp | 12 +- 13 files changed, 899 insertions(+), 761 deletions(-) diff --git a/Builds/VisualStudio2012/RippleD.vcxproj.filters b/Builds/VisualStudio2012/RippleD.vcxproj.filters index 8754763744..964828c715 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj.filters +++ b/Builds/VisualStudio2012/RippleD.vcxproj.filters @@ -972,9 +972,6 @@ [1] Ripple\ripple_basics\utility - - [1] Ripple\ripple_basics\utility - [1] Ripple\ripple_basics\utility @@ -1692,6 +1689,9 @@ [1] Ripple\ripple_app\node + + [1] Ripple\ripple_basics\utility + diff --git a/modules/ripple_app/ripple_app.cpp b/modules/ripple_app/ripple_app.cpp index 62ad772745..305bd0c829 100644 --- a/modules/ripple_app/ripple_app.cpp +++ b/modules/ripple_app/ripple_app.cpp @@ -155,10 +155,10 @@ namespace ripple #include "src/cpp/ripple/TransactionMaster.h" #include "src/cpp/ripple/ripple_LocalCredentials.h" #include "src/cpp/ripple/WSDoor.h" +#include "src/cpp/ripple/ripple_Application.h" #include "src/cpp/ripple/RPCHandler.h" #include "src/cpp/ripple/TransactionQueue.h" #include "ledger/OrderBookDB.h" -#include "src/cpp/ripple/ripple_Application.h" #include "src/cpp/ripple/CallRPC.h" #include "src/cpp/ripple/Transactor.h" #include "src/cpp/ripple/ChangeTransactor.h" diff --git a/modules/ripple_basics/utility/ripple_ScopedLock.h b/modules/ripple_basics/utility/ripple_ScopedLock.h index a51dd1dd3a..a38a539251 100644 --- a/modules/ripple_basics/utility/ripple_ScopedLock.h +++ b/modules/ripple_basics/utility/ripple_ScopedLock.h @@ -37,7 +37,7 @@ public: }; // A class that unlocks on construction and locks on destruction - +/* class ScopedUnlock { protected: @@ -80,5 +80,6 @@ private: ScopedUnlock (const ScopedUnlock&); // no implementation ScopedUnlock& operator= (const ScopedUnlock&); // no implementation }; +*/ #endif diff --git a/src/cpp/ripple/NetworkOPs.cpp b/src/cpp/ripple/NetworkOPs.cpp index 2f95ddffdc..6b08e8bb65 100644 --- a/src/cpp/ripple/NetworkOPs.cpp +++ b/src/cpp/ripple/NetworkOPs.cpp @@ -42,50 +42,53 @@ NetworkOPs::NetworkOPs (LedgerMaster* pLedgerMaster) void NetworkOPs::processNetTimer () { - ScopedLock sl (getApp().getMasterLock ()); - - Application& app (getApp ()); - ILoadManager& mgr (app.getLoadManager ()); - - getApp().getLoadManager ().resetDeadlockDetector (); - - std::size_t const numPeers = getApp().getPeers ().getPeerVector ().size (); - - // do we have sufficient peers? If not, we are disconnected. - if (numPeers < theConfig.NETWORK_QUORUM) { - if (mMode != omDISCONNECTED) + Application::ScopedLockType lock (getApp().getMasterLock (), __FILE__, __LINE__); + + // VFALCO NOTE This is for diagnosing a crash on exit + Application& app (getApp ()); + ILoadManager& mgr (app.getLoadManager ()); + + getApp().getLoadManager ().resetDeadlockDetector (); + + std::size_t const numPeers = getApp().getPeers ().getPeerVector ().size (); + + // do we have sufficient peers? If not, we are disconnected. + if (numPeers < theConfig.NETWORK_QUORUM) { - setMode (omDISCONNECTED); - WriteLog (lsWARNING, NetworkOPs) - << "Node count (" << numPeers << ") " - << "has fallen below quorum (" << theConfig.NETWORK_QUORUM << ")."; + if (mMode != omDISCONNECTED) + { + setMode (omDISCONNECTED); + WriteLog (lsWARNING, NetworkOPs) + << "Node count (" << numPeers << ") " + << "has fallen below quorum (" << theConfig.NETWORK_QUORUM << ")."; + } + + return; } - return; - } + if (mMode == omDISCONNECTED) + { + setMode (omCONNECTED); + WriteLog (lsINFO, NetworkOPs) << "Node count (" << numPeers << ") is sufficient."; + } - if (mMode == omDISCONNECTED) - { - setMode (omCONNECTED); - WriteLog (lsINFO, NetworkOPs) << "Node count (" << numPeers << ") is sufficient."; - } + // Check if the last validated ledger forces a change between these states + if (mMode == omSYNCING) + { + setMode (omSYNCING); + } + else if (mMode == omCONNECTED) + { + setMode (omCONNECTED); + } - // Check if the last validated ledger forces a change between these states - if (mMode == omSYNCING) - { - setMode (omSYNCING); - } - else if (mMode == omCONNECTED) - { - setMode (omCONNECTED); - } + if (!mConsensus) + tryStartConsensus (); - if (!mConsensus) - tryStartConsensus (); - - if (mConsensus) - mConsensus->timerEntry (); + if (mConsensus) + mConsensus->timerEntry (); + } } void NetworkOPs::onDeadlineTimer (DeadlineTimer& timer) @@ -309,71 +312,72 @@ void NetworkOPs::runTransactionQueue () { LoadEvent::autoptr ev = getApp().getJobQueue ().getLoadEventAP (jtTXN_PROC, "runTxnQ"); - boost::recursive_mutex::scoped_lock sl (getApp().getMasterLock ()); - - Transaction::pointer dbtx = getApp().getMasterTransaction ().fetch (txn->getID (), true); - assert (dbtx); - - bool didApply; - TER r = mLedgerMaster->doTransaction (dbtx->getSTransaction (), - tapOPEN_LEDGER | tapNO_CHECK_SIGN, didApply); - dbtx->setResult (r); - - if (isTemMalformed (r)) // malformed, cache bad - getApp().getHashRouter ().setFlag (txn->getID (), SF_BAD); -// else if (isTelLocal (r) || isTerRetry (r)) // can be retried -// getApp().getHashRouter ().setFlag (txn->getID (), SF_RETRY); - - - if (isTerRetry (r)) { - // transaction should be held - WriteLog (lsDEBUG, NetworkOPs) << "QTransaction should be held: " << r; - dbtx->setStatus (HELD); - getApp().getMasterTransaction ().canonicalize (dbtx); - mLedgerMaster->addHeldTransaction (dbtx); - } - else if (r == tefPAST_SEQ) - { - // duplicate or conflict - WriteLog (lsINFO, NetworkOPs) << "QTransaction is obsolete"; - dbtx->setStatus (OBSOLETE); - } - else if (r == tesSUCCESS) - { - WriteLog (lsINFO, NetworkOPs) << "QTransaction is now included in open ledger"; - dbtx->setStatus (INCLUDED); - getApp().getMasterTransaction ().canonicalize (dbtx); - } - else - { - WriteLog (lsDEBUG, NetworkOPs) << "QStatus other than success " << r; - dbtx->setStatus (INVALID); - } + Application::ScopedLockType lock (getApp().getMasterLock (), __FILE__, __LINE__); -// if (didApply || (mMode != omFULL)) - if (didApply) - { - std::set peers; + Transaction::pointer dbtx = getApp().getMasterTransaction ().fetch (txn->getID (), true); + assert (dbtx); - if (getApp().getHashRouter ().swapSet (txn->getID (), peers, SF_RELAYED)) + bool didApply; + TER r = mLedgerMaster->doTransaction (dbtx->getSTransaction (), + tapOPEN_LEDGER | tapNO_CHECK_SIGN, didApply); + dbtx->setResult (r); + + if (isTemMalformed (r)) // malformed, cache bad + getApp().getHashRouter ().setFlag (txn->getID (), SF_BAD); + // else if (isTelLocal (r) || isTerRetry (r)) // can be retried + // getApp().getHashRouter ().setFlag (txn->getID (), SF_RETRY); + + + if (isTerRetry (r)) { - WriteLog (lsDEBUG, NetworkOPs) << "relaying"; - protocol::TMTransaction tx; - Serializer s; - dbtx->getSTransaction ()->add (s); - tx.set_rawtransaction (&s.getData ().front (), s.getLength ()); - tx.set_status (protocol::tsCURRENT); - tx.set_receivetimestamp (getNetworkTimeNC ()); // FIXME: This should be when we received it - - PackedMessage::pointer packet = boost::make_shared (tx, protocol::mtTRANSACTION); - getApp().getPeers ().relayMessageBut (peers, packet); + // transaction should be held + WriteLog (lsDEBUG, NetworkOPs) << "QTransaction should be held: " << r; + dbtx->setStatus (HELD); + getApp().getMasterTransaction ().canonicalize (dbtx); + mLedgerMaster->addHeldTransaction (dbtx); + } + else if (r == tefPAST_SEQ) + { + // duplicate or conflict + WriteLog (lsINFO, NetworkOPs) << "QTransaction is obsolete"; + dbtx->setStatus (OBSOLETE); + } + else if (r == tesSUCCESS) + { + WriteLog (lsINFO, NetworkOPs) << "QTransaction is now included in open ledger"; + dbtx->setStatus (INCLUDED); + getApp().getMasterTransaction ().canonicalize (dbtx); } else - WriteLog(lsDEBUG, NetworkOPs) << "recently relayed"; - } + { + WriteLog (lsDEBUG, NetworkOPs) << "QStatus other than success " << r; + dbtx->setStatus (INVALID); + } - txn->doCallbacks (r); + if (didApply /*|| (mMode != omFULL)*/ ) + { + std::set peers; + + if (getApp().getHashRouter ().swapSet (txn->getID (), peers, SF_RELAYED)) + { + WriteLog (lsDEBUG, NetworkOPs) << "relaying"; + protocol::TMTransaction tx; + Serializer s; + dbtx->getSTransaction ()->add (s); + tx.set_rawtransaction (&s.getData ().front (), s.getLength ()); + tx.set_status (protocol::tsCURRENT); + tx.set_receivetimestamp (getNetworkTimeNC ()); // FIXME: This should be when we received it + + PackedMessage::pointer packet = boost::make_shared (tx, protocol::mtTRANSACTION); + getApp().getPeers ().relayMessageBut (peers, packet); + } + else + WriteLog(lsDEBUG, NetworkOPs) << "recently relayed"; + } + + txn->doCallbacks (r); + } } } @@ -410,77 +414,79 @@ Transaction::pointer NetworkOPs::processTransaction (Transaction::pointer trans, getApp().getHashRouter ().setFlag (trans->getID (), SF_SIGGOOD); } - boost::recursive_mutex::scoped_lock sl (getApp().getMasterLock ()); - bool didApply; - TER r = mLedgerMaster->doTransaction (trans->getSTransaction (), - bAdmin ? (tapOPEN_LEDGER | tapNO_CHECK_SIGN | tapADMIN) : (tapOPEN_LEDGER | tapNO_CHECK_SIGN), didApply); - trans->setResult (r); + { + Application::ScopedLockType lock (getApp().getMasterLock (), __FILE__, __LINE__); - if (isTemMalformed (r)) // malformed, cache bad - getApp().getHashRouter ().setFlag (trans->getID (), SF_BAD); -// else if (isTelLocal (r) || isTerRetry (r)) // can be retried -// getApp().getHashRouter ().setFlag (trans->getID (), SF_RETRY); + bool didApply; + TER r = mLedgerMaster->doTransaction (trans->getSTransaction (), + bAdmin ? (tapOPEN_LEDGER | tapNO_CHECK_SIGN | tapADMIN) : (tapOPEN_LEDGER | tapNO_CHECK_SIGN), didApply); + trans->setResult (r); + + if (isTemMalformed (r)) // malformed, cache bad + getApp().getHashRouter ().setFlag (trans->getID (), SF_BAD); + // else if (isTelLocal (r) || isTerRetry (r)) // can be retried + // getApp().getHashRouter ().setFlag (trans->getID (), SF_RETRY); #ifdef BEAST_DEBUG - - if (r != tesSUCCESS) - { - std::string token, human; - CondLog (transResultInfo (r, token, human), lsINFO, NetworkOPs) << "TransactionResult: " << token << ": " << human; - } + if (r != tesSUCCESS) + { + std::string token, human; + CondLog (transResultInfo (r, token, human), lsINFO, NetworkOPs) << "TransactionResult: " << token << ": " << human; + } #endif - if (callback) - callback (trans, r); + if (callback) + callback (trans, r); - if (r == tefFAILURE) - throw Fault (IO_ERROR); + if (r == tefFAILURE) + throw Fault (IO_ERROR); - if (r == tesSUCCESS) - { - WriteLog (lsINFO, NetworkOPs) << "Transaction is now included in open ledger"; - trans->setStatus (INCLUDED); - getApp().getMasterTransaction ().canonicalize (trans); - } - else if (r == tefPAST_SEQ) - { - // duplicate or conflict - WriteLog (lsINFO, NetworkOPs) << "Transaction is obsolete"; - trans->setStatus (OBSOLETE); - } - else if (isTerRetry (r)) - { - if (!bFailHard) + if (r == tesSUCCESS) { - // transaction should be held - WriteLog (lsDEBUG, NetworkOPs) << "Transaction should be held: " << r; - trans->setStatus (HELD); - getApp().getMasterTransaction ().canonicalize (trans); - mLedgerMaster->addHeldTransaction (trans); + WriteLog (lsINFO, NetworkOPs) << "Transaction is now included in open ledger"; + trans->setStatus (INCLUDED); + getApp().getMasterTransaction ().canonicalize (trans); } - } - else - { - WriteLog (lsDEBUG, NetworkOPs) << "Status other than success " << r; - trans->setStatus (INVALID); - } - - if (didApply || ((mMode != omFULL) && !bFailHard)) - { - std::set peers; - - if (getApp().getHashRouter ().swapSet (trans->getID (), peers, SF_RELAYED)) + else if (r == tefPAST_SEQ) { - protocol::TMTransaction tx; - Serializer s; - trans->getSTransaction ()->add (s); - tx.set_rawtransaction (&s.getData ().front (), s.getLength ()); - tx.set_status (protocol::tsCURRENT); - tx.set_receivetimestamp (getNetworkTimeNC ()); // FIXME: This should be when we received it + // duplicate or conflict + WriteLog (lsINFO, NetworkOPs) << "Transaction is obsolete"; + trans->setStatus (OBSOLETE); + } + else if (isTerRetry (r)) + { + if (!bFailHard) + { + // transaction should be held + WriteLog (lsDEBUG, NetworkOPs) << "Transaction should be held: " << r; + trans->setStatus (HELD); + getApp().getMasterTransaction ().canonicalize (trans); + mLedgerMaster->addHeldTransaction (trans); + } + } + else + { + WriteLog (lsDEBUG, NetworkOPs) << "Status other than success " << r; + trans->setStatus (INVALID); + } - PackedMessage::pointer packet = boost::make_shared (tx, protocol::mtTRANSACTION); - getApp().getPeers ().relayMessageBut (peers, packet); + if (didApply || ((mMode != omFULL) && !bFailHard)) + { + std::set peers; + + if (getApp().getHashRouter ().swapSet (trans->getID (), peers, SF_RELAYED)) + { + protocol::TMTransaction tx; + Serializer s; + trans->getSTransaction ()->add (s); + tx.set_rawtransaction (&s.getData ().front (), s.getLength ()); + tx.set_status (protocol::tsCURRENT); + tx.set_receivetimestamp (getNetworkTimeNC ()); // FIXME: This should be when we received it + + PackedMessage::pointer packet = boost::make_shared (tx, protocol::mtTRANSACTION); + getApp().getPeers ().relayMessageBut (peers, packet); + } } } @@ -958,49 +964,53 @@ uint256 NetworkOPs::getConsensusLCL () void NetworkOPs::processTrustedProposal (LedgerProposal::pointer proposal, boost::shared_ptr set, RippleAddress nodePublic, uint256 checkLedger, bool sigGood) { - boost::recursive_mutex::scoped_lock sl (getApp().getMasterLock ()); - - bool relay = true; - - if (!haveConsensusObject ()) { - WriteLog (lsINFO, NetworkOPs) << "Received proposal outside consensus window"; + Application::ScopedLockType lock (getApp().getMasterLock (), __FILE__, __LINE__); - if (mMode == omFULL) - relay = false; - } - else - { - storeProposal (proposal, nodePublic); + bool relay = true; - uint256 consensusLCL = mConsensus->getLCL (); - - if (!set->has_previousledger () && (checkLedger != consensusLCL)) + if (!haveConsensusObject ()) { - WriteLog (lsWARNING, NetworkOPs) << "Have to re-check proposal signature due to consensus view change"; - assert (proposal->hasSignature ()); - proposal->setPrevLedger (consensusLCL); + WriteLog (lsINFO, NetworkOPs) << "Received proposal outside consensus window"; - if (proposal->checkSign ()) - sigGood = true; + if (mMode == omFULL) + relay = false; + } + else + { + storeProposal (proposal, nodePublic); + + uint256 consensusLCL = mConsensus->getLCL (); + + if (!set->has_previousledger () && (checkLedger != consensusLCL)) + { + WriteLog (lsWARNING, NetworkOPs) << "Have to re-check proposal signature due to consensus view change"; + assert (proposal->hasSignature ()); + proposal->setPrevLedger (consensusLCL); + + if (proposal->checkSign ()) + sigGood = true; + } + + if (sigGood && (consensusLCL == proposal->getPrevLedger ())) + { + relay = mConsensus->peerPosition (proposal); + WriteLog (lsTRACE, NetworkOPs) << "Proposal processing finished, relay=" << relay; + } } - if (sigGood && (consensusLCL == proposal->getPrevLedger ())) + if (relay) { - relay = mConsensus->peerPosition (proposal); - WriteLog (lsTRACE, NetworkOPs) << "Proposal processing finished, relay=" << relay; + std::set peers; + getApp().getHashRouter ().swapSet (proposal->getHashRouter (), peers, SF_RELAYED); + PackedMessage::pointer message = boost::make_shared (*set, protocol::mtPROPOSE_LEDGER); + getApp().getPeers ().relayMessageBut (peers, message); + } + else + { + WriteLog (lsINFO, NetworkOPs) << "Not relaying trusted proposal"; } } - - if (relay) - { - std::set peers; - getApp().getHashRouter ().swapSet (proposal->getHashRouter (), peers, SF_RELAYED); - PackedMessage::pointer message = boost::make_shared (*set, protocol::mtPROPOSE_LEDGER); - getApp().getPeers ().relayMessageBut (peers, message); - } - else - WriteLog (lsINFO, NetworkOPs) << "Not relaying trusted proposal"; } SHAMap::pointer NetworkOPs::getTXMap (uint256 const& hash) @@ -1042,8 +1052,10 @@ SHAMapAddNode NetworkOPs::gotTXData (const boost::shared_ptr& peer, uint25 { boost::shared_ptr consensus; + { - ScopedLock mlh(getApp().getMasterLock()); + Application::ScopedLockType lock (getApp ().getMasterLock (), __FILE__, __LINE__); + consensus = mConsensus; } diff --git a/src/cpp/ripple/RPCHandler.cpp b/src/cpp/ripple/RPCHandler.cpp index 3329e05cab..5de7f0e1d8 100644 --- a/src/cpp/ripple/RPCHandler.cpp +++ b/src/cpp/ripple/RPCHandler.cpp @@ -58,7 +58,7 @@ RPCHandler::RPCHandler (NetworkOPs* netOps, InfoSub::pointer infoSub) : mNetOps ; } -Json::Value RPCHandler::transactionSign (Json::Value params, bool bSubmit, bool bFailHard, ScopedLock& mlh) +Json::Value RPCHandler::transactionSign (Json::Value params, bool bSubmit, bool bFailHard, Application::ScopedLockType& mlh) { if (getApp().getFeeTrack().isLoadedCluster() && (mRole != ADMIN)) return rpcError(rpcTOO_BUSY); @@ -595,7 +595,7 @@ Json::Value RPCHandler::accountFromString (Ledger::ref lrLedger, RippleAddress& // ledger_hash : // ledger_index : // } -Json::Value RPCHandler::doAccountInfo (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doAccountInfo (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Ledger::pointer lpLedger; Json::Value jvResult = lookupLedger (params, lpLedger); @@ -640,7 +640,7 @@ Json::Value RPCHandler::doAccountInfo (Json::Value params, LoadType* loadType, S // port: // } // XXX Might allow domain for manual connections. -Json::Value RPCHandler::doConnect (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doConnect (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (theConfig.RUN_STANDALONE) return "cannot connect in standalone mode"; @@ -661,7 +661,7 @@ Json::Value RPCHandler::doConnect (Json::Value params, LoadType* loadType, Scope // { // key: // } -Json::Value RPCHandler::doDataDelete (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doDataDelete (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("key")) return rpcError (rpcINVALID_PARAMS); @@ -687,7 +687,7 @@ Json::Value RPCHandler::doDataDelete (Json::Value params, LoadType* loadType, Sc // { // key: // } -Json::Value RPCHandler::doDataFetch (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doDataFetch (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("key")) return rpcError (rpcINVALID_PARAMS); @@ -711,7 +711,7 @@ Json::Value RPCHandler::doDataFetch (Json::Value params, LoadType* loadType, Sco // key: // value: // } -Json::Value RPCHandler::doDataStore (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doDataStore (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("key") || !params.isMember ("value")) @@ -772,7 +772,7 @@ Json::Value RPCHandler::doNicknameInfo (Json::Value params) // 'account_index' : // optional // } // XXX This would be better if it took the ledger. -Json::Value RPCHandler::doOwnerInfo (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doOwnerInfo (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("account") && !params.isMember ("ident")) return rpcError (rpcINVALID_PARAMS); @@ -797,7 +797,7 @@ Json::Value RPCHandler::doOwnerInfo (Json::Value params, LoadType* loadType, Sco return ret; } -Json::Value RPCHandler::doPeers (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doPeers (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value jvResult (Json::objectValue); @@ -808,7 +808,7 @@ Json::Value RPCHandler::doPeers (Json::Value, LoadType* loadType, ScopedLock& Ma return jvResult; } -Json::Value RPCHandler::doPing (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doPing (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { return Json::Value (Json::objectValue); } @@ -818,7 +818,7 @@ Json::Value RPCHandler::doPing (Json::Value, LoadType* loadType, ScopedLock& Mas // issuer is the offering account // --> submit: 'submit|true|false': defaults to false // Prior to running allow each to have a credit line of what they will be getting from the other account. -Json::Value RPCHandler::doProfile (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doProfile (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { /* need to fix now that sharedOfferCreate is gone int iArgs = params.size(); @@ -910,9 +910,9 @@ Json::Value RPCHandler::doProfile (Json::Value params, LoadType* loadType, Scope // difficulty: // optional // secret: // optional // } -Json::Value RPCHandler::doProofCreate (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doProofCreate (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); // XXX: Add ability to create proof with arbitrary time Json::Value jvResult (Json::objectValue); @@ -955,9 +955,9 @@ Json::Value RPCHandler::doProofCreate (Json::Value params, LoadType* loadType, S // { // token: // } -Json::Value RPCHandler::doProofSolve (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doProofSolve (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); Json::Value jvResult; @@ -985,9 +985,9 @@ Json::Value RPCHandler::doProofSolve (Json::Value params, LoadType* loadType, Sc // difficulty: // optional // secret: // optional // } -Json::Value RPCHandler::doProofVerify (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doProofVerify (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); // XXX Add ability to check proof against arbitrary time Json::Value jvResult; @@ -1055,7 +1055,7 @@ Json::Value RPCHandler::doProofVerify (Json::Value params, LoadType* loadType, S // ledger_hash : // ledger_index : // } -Json::Value RPCHandler::doAccountLines (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doAccountLines (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Ledger::pointer lpLedger; Json::Value jvResult = lookupLedger (params, lpLedger); @@ -1067,7 +1067,7 @@ Json::Value RPCHandler::doAccountLines (Json::Value params, LoadType* loadType, if (lpLedger->isImmutable ()) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); bUnlocked = true; } @@ -1140,7 +1140,7 @@ Json::Value RPCHandler::doAccountLines (Json::Value params, LoadType* loadType, } if (!bUnlocked) - MasterLockHolder.unlock (); + masterLockHolder.unlock (); } else { @@ -1168,7 +1168,7 @@ static void offerAdder (Json::Value& jvLines, SLE::ref offer) // ledger_hash : // ledger_index : // } -Json::Value RPCHandler::doAccountOffers (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doAccountOffers (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Ledger::pointer lpLedger; Json::Value jvResult = lookupLedger (params, lpLedger); @@ -1180,7 +1180,7 @@ Json::Value RPCHandler::doAccountOffers (Json::Value params, LoadType* loadType, if (lpLedger->isImmutable ()) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); bUnlocked = true; } @@ -1212,7 +1212,7 @@ Json::Value RPCHandler::doAccountOffers (Json::Value params, LoadType* loadType, lpLedger->visitAccountItems (raAccount.getAccountID (), BIND_TYPE (&offerAdder, boost::ref (jvsOffers), P_1)); if (!bUnlocked) - MasterLockHolder.unlock (); + masterLockHolder.unlock (); return jvResult; } @@ -1227,7 +1227,7 @@ Json::Value RPCHandler::doAccountOffers (Json::Value params, LoadType* loadType, // "limit" : integer, // Optional. // "proof" : boolean // Defaults to false. // } -Json::Value RPCHandler::doBookOffers (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doBookOffers (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (getApp().getJobQueue ().getJobCountGE (jtCLIENT) > 200) { @@ -1241,7 +1241,7 @@ Json::Value RPCHandler::doBookOffers (Json::Value params, LoadType* loadType, Sc return jvResult; if (lpLedger->isImmutable ()) - MasterLockHolder.unlock (); + masterLockHolder.unlock (); if (!params.isMember ("taker_pays") || !params.isMember ("taker_gets") || !params["taker_pays"].isObject () || !params["taker_gets"].isObject ()) return rpcError (rpcINVALID_PARAMS); @@ -1328,9 +1328,9 @@ Json::Value RPCHandler::doBookOffers (Json::Value params, LoadType* loadType, Sc // { // random: // } -Json::Value RPCHandler::doRandom (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doRandom (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); uint256 uRandom; try @@ -1349,7 +1349,7 @@ Json::Value RPCHandler::doRandom (Json::Value params, LoadType* loadType, Scoped } } -Json::Value RPCHandler::doPathFind (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doPathFind (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("subcommand") || !params["subcommand"].isString ()) return rpcError (rpcINVALID_PARAMS); @@ -1404,7 +1404,7 @@ Json::Value RPCHandler::doPathFind (Json::Value params, LoadType* loadType, Scop // - Allows clients to verify path exists. // - Return canonicalized path. // - From a trusted server, allows clients to use path without manipulation. -Json::Value RPCHandler::doRipplePathFind (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doRipplePathFind (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { int jc = getApp().getJobQueue ().getJobCountGE (jtCLIENT); @@ -1487,7 +1487,7 @@ Json::Value RPCHandler::doRipplePathFind (Json::Value params, LoadType* loadType *loadType = LT_RPCBurden; Ledger::pointer lSnapShot = boost::make_shared (boost::ref (*lpLedger), false); - MasterLockHolder.unlock (); // As long as we have a locked copy of the ledger, we can unlock. + masterLockHolder.unlock (); // As long as we have a locked copy of the ledger, we can unlock. // Fill in currencies destination will accept Json::Value jvDestCur (Json::arrayValue); @@ -1636,23 +1636,23 @@ Json::Value RPCHandler::doRipplePathFind (Json::Value params, LoadType* loadType // tx_json: , // secret: // } -Json::Value RPCHandler::doSign (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doSign (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { *loadType = LT_RPCBurden; bool bFailHard = params.isMember ("fail_hard") && params["fail_hard"].asBool (); - return transactionSign (params, false, bFailHard, MasterLockHolder); + return transactionSign (params, false, bFailHard, masterLockHolder); } // { // tx_json: , // secret: // } -Json::Value RPCHandler::doSubmit (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doSubmit (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("tx_blob")) { bool bFailHard = params.isMember ("fail_hard") && params["fail_hard"].asBool (); - return transactionSign (params, true, bFailHard, MasterLockHolder); + return transactionSign (params, true, bFailHard, masterLockHolder); } Json::Value jvResult; @@ -1710,7 +1710,7 @@ Json::Value RPCHandler::doSubmit (Json::Value params, LoadType* loadType, Scoped return jvResult; } - MasterLockHolder.unlock (); + masterLockHolder.unlock (); try { @@ -1740,7 +1740,7 @@ Json::Value RPCHandler::doSubmit (Json::Value params, LoadType* loadType, Scoped } } -Json::Value RPCHandler::doConsensusInfo (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doConsensusInfo (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value ret (Json::objectValue); @@ -1749,7 +1749,7 @@ Json::Value RPCHandler::doConsensusInfo (Json::Value, LoadType* loadType, Scoped return ret; } -Json::Value RPCHandler::doServerInfo (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doServerInfo (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value ret (Json::objectValue); @@ -1758,7 +1758,7 @@ Json::Value RPCHandler::doServerInfo (Json::Value, LoadType* loadType, ScopedLoc return ret; } -Json::Value RPCHandler::doServerState (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doServerState (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value ret (Json::objectValue); @@ -1770,9 +1770,9 @@ Json::Value RPCHandler::doServerState (Json::Value, LoadType* loadType, ScopedLo // { // start: // } -Json::Value RPCHandler::doTxHistory (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doTxHistory (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); if (!params.isMember ("start")) return rpcError (rpcINVALID_PARAMS); @@ -1807,7 +1807,7 @@ Json::Value RPCHandler::doTxHistory (Json::Value params, LoadType* loadType, Sco // { // transaction: // } -Json::Value RPCHandler::doTx (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doTx (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("transaction")) return rpcError (rpcINVALID_PARAMS); @@ -1873,7 +1873,7 @@ Json::Value RPCHandler::doTx (Json::Value params, LoadType* loadType, ScopedLock return rpcError (rpcNOT_IMPL); } -Json::Value RPCHandler::doLedgerClosed (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLedgerClosed (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value jvResult; @@ -1886,7 +1886,7 @@ Json::Value RPCHandler::doLedgerClosed (Json::Value, LoadType* loadType, ScopedL return jvResult; } -Json::Value RPCHandler::doLedgerCurrent (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLedgerCurrent (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value jvResult; @@ -1900,7 +1900,7 @@ Json::Value RPCHandler::doLedgerCurrent (Json::Value, LoadType* loadType, Scoped // ledger: 'current' | 'closed' | | , // optional // full: true | false // optional, defaults to false. // } -Json::Value RPCHandler::doLedger (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLedger (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("ledger") && !params.isMember ("ledger_hash") && !params.isMember ("ledger_index")) { @@ -1922,7 +1922,7 @@ Json::Value RPCHandler::doLedger (Json::Value params, LoadType* loadType, Scoped return jvResult; if (lpLedger->isImmutable ()) - MasterLockHolder.unlock (); + masterLockHolder.unlock (); bool bFull = params.isMember ("full") && params["full"].asBool (); bool bTransactions = params.isMember ("transactions") && params["transactions"].asBool (); @@ -1949,7 +1949,7 @@ Json::Value RPCHandler::doLedger (Json::Value params, LoadType* loadType, Scoped // offset: integer, // optional, defaults to 0 // limit: integer // optional // } -Json::Value RPCHandler::doAccountTransactions (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doAccountTransactions (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { RippleAddress raAccount; uint32 offset = params.isMember ("offset") ? params["offset"].asUInt () : 0; @@ -2018,7 +2018,7 @@ Json::Value RPCHandler::doAccountTransactions (Json::Value params, LoadType* loa try { #endif - MasterLockHolder.unlock (); + masterLockHolder.unlock (); Json::Value ret (Json::objectValue); @@ -2094,7 +2094,7 @@ Json::Value RPCHandler::doAccountTransactions (Json::Value params, LoadType* loa // } // // This command requires admin access because it makes no sense to ask an untrusted server for this. -Json::Value RPCHandler::doValidationCreate (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doValidationCreate (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { RippleAddress raSeed; Json::Value obj (Json::objectValue); @@ -2120,7 +2120,7 @@ Json::Value RPCHandler::doValidationCreate (Json::Value params, LoadType* loadTy // { // secret: // } -Json::Value RPCHandler::doValidationSeed (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doValidationSeed (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value obj (Json::objectValue); @@ -2191,7 +2191,7 @@ Json::Value RPCHandler::accounts (Ledger::ref lrLedger, const RippleAddress& naM // ledger_hash : // ledger_index : // } -Json::Value RPCHandler::doWalletAccounts (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doWalletAccounts (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Ledger::pointer lpLedger; Json::Value jvResult = lookupLedger (params, lpLedger); @@ -2234,7 +2234,7 @@ Json::Value RPCHandler::doWalletAccounts (Json::Value params, LoadType* loadType } } -Json::Value RPCHandler::doLogRotate (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLogRotate (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { return Log::rotateLog (); } @@ -2242,9 +2242,9 @@ Json::Value RPCHandler::doLogRotate (Json::Value, LoadType* loadType, ScopedLock // { // passphrase: // } -Json::Value RPCHandler::doWalletPropose (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doWalletPropose (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); RippleAddress naSeed; RippleAddress naAccount; @@ -2274,7 +2274,7 @@ Json::Value RPCHandler::doWalletPropose (Json::Value params, LoadType* loadType, // { // secret: // } -Json::Value RPCHandler::doWalletSeed (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doWalletSeed (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { RippleAddress raSeed; bool bSecret = params.isMember ("secret"); @@ -2313,7 +2313,7 @@ Json::Value RPCHandler::doWalletSeed (Json::Value params, LoadType* loadType, Sc // username: , // password: // } -Json::Value RPCHandler::doLogin (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLogin (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("username") || !params.isMember ("password")) @@ -2351,7 +2351,7 @@ static void textTime (std::string& text, int& seconds, const char* unitName, int text += "s"; } -Json::Value RPCHandler::doFeature (Json::Value params, LoadType* loadType, ScopedLock& mlh) +Json::Value RPCHandler::doFeature (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh) { if (!params.isMember ("feature")) { @@ -2380,7 +2380,7 @@ Json::Value RPCHandler::doFeature (Json::Value params, LoadType* loadType, Scope // { // min_count: // optional, defaults to 10 // } -Json::Value RPCHandler::doGetCounts (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doGetCounts (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { int minCount = 10; @@ -2432,7 +2432,7 @@ Json::Value RPCHandler::doGetCounts (Json::Value params, LoadType* loadType, Sco return ret; } -Json::Value RPCHandler::doLogLevel (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLogLevel (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { // log_level if (!params.isMember ("severity")) @@ -2485,7 +2485,7 @@ Json::Value RPCHandler::doLogLevel (Json::Value params, LoadType* loadType, Scop // node: |, // comment: // optional // } -Json::Value RPCHandler::doUnlAdd (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doUnlAdd (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { std::string strNode = params.isMember ("node") ? params["node"].asString () : ""; std::string strComment = params.isMember ("comment") ? params["comment"].asString () : ""; @@ -2509,7 +2509,7 @@ Json::Value RPCHandler::doUnlAdd (Json::Value params, LoadType* loadType, Scoped // { // node: | // } -Json::Value RPCHandler::doUnlDelete (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doUnlDelete (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("node")) return rpcError (rpcINVALID_PARAMS); @@ -2532,7 +2532,7 @@ Json::Value RPCHandler::doUnlDelete (Json::Value params, LoadType* loadType, Sco } } -Json::Value RPCHandler::doUnlList (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doUnlList (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value obj (Json::objectValue); @@ -2542,7 +2542,7 @@ Json::Value RPCHandler::doUnlList (Json::Value, LoadType* loadType, ScopedLock& } // Populate the UNL from a local validators.txt file. -Json::Value RPCHandler::doUnlLoad (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doUnlLoad (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (theConfig.VALIDATORS_FILE.empty () || !getApp().getUNL ().nodeLoad (theConfig.VALIDATORS_FILE)) { @@ -2554,7 +2554,7 @@ Json::Value RPCHandler::doUnlLoad (Json::Value, LoadType* loadType, ScopedLock& // Populate the UNL from ripple.com's validators.txt file. -Json::Value RPCHandler::doUnlNetwork (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doUnlNetwork (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { getApp().getUNL ().nodeNetwork (); @@ -2562,7 +2562,7 @@ Json::Value RPCHandler::doUnlNetwork (Json::Value params, LoadType* loadType, Sc } // unl_reset -Json::Value RPCHandler::doUnlReset (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doUnlReset (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { getApp().getUNL ().nodeReset (); @@ -2570,14 +2570,14 @@ Json::Value RPCHandler::doUnlReset (Json::Value params, LoadType* loadType, Scop } // unl_score -Json::Value RPCHandler::doUnlScore (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doUnlScore (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { getApp().getUNL ().nodeScore (); return "scoring requested"; } -Json::Value RPCHandler::doSMS (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doSMS (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { if (!params.isMember ("text")) return rpcError (rpcINVALID_PARAMS); @@ -2586,14 +2586,14 @@ Json::Value RPCHandler::doSMS (Json::Value params, LoadType* loadType, ScopedLoc return "sms dispatched"; } -Json::Value RPCHandler::doStop (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doStop (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { getApp().stop (); return SYSTEM_NAME " server stopping"; } -Json::Value RPCHandler::doLedgerAccept (Json::Value, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLedgerAccept (Json::Value, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Json::Value jvResult; @@ -2616,7 +2616,7 @@ Json::Value RPCHandler::doLedgerAccept (Json::Value, LoadType* loadType, ScopedL // ledger_index : // } // XXX In this case, not specify either ledger does not mean ledger current. It means any ledger. -Json::Value RPCHandler::doTransactionEntry (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doTransactionEntry (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Ledger::pointer lpLedger; Json::Value jvResult = lookupLedger (params, lpLedger); @@ -2785,7 +2785,7 @@ Json::Value RPCHandler::lookupLedger (Json::Value params, Ledger::pointer& lpLed // ledger_index : // ... // } -Json::Value RPCHandler::doLedgerEntry (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLedgerEntry (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Ledger::pointer lpLedger; Json::Value jvResult = lookupLedger (params, lpLedger); @@ -2794,7 +2794,7 @@ Json::Value RPCHandler::doLedgerEntry (Json::Value params, LoadType* loadType, S return jvResult; if (lpLedger->isImmutable ()) - MasterLockHolder.unlock (); + masterLockHolder.unlock (); uint256 uNodeIndex; bool bNodeBinary = false; @@ -2998,7 +2998,7 @@ Json::Value RPCHandler::doLedgerEntry (Json::Value params, LoadType* loadType, S // ledger_hash : // ledger_index : // } -Json::Value RPCHandler::doLedgerHeader (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doLedgerHeader (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { Ledger::pointer lpLedger; Json::Value jvResult = lookupLedger (params, lpLedger); @@ -3040,7 +3040,7 @@ boost::unordered_set RPCHandler::parseAccountIds (const Json::Val return usnaResult; } -Json::Value RPCHandler::doSubscribe (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doSubscribe (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { InfoSub::pointer ispSub; Json::Value jvResult (Json::objectValue); @@ -3332,7 +3332,7 @@ Json::Value RPCHandler::doSubscribe (Json::Value params, LoadType* loadType, Sco } // FIXME: This leaks RPCSub objects for JSON-RPC. Shouldn't matter for anyone sane. -Json::Value RPCHandler::doUnsubscribe (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doUnsubscribe (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { InfoSub::pointer ispSub; Json::Value jvResult (Json::objectValue); @@ -3554,7 +3554,7 @@ Json::Value RPCHandler::doRpcCommand (const std::string& strMethod, Json::Value return jvResult; } -Json::Value RPCHandler::doInternal (Json::Value params, LoadType* loadType, ScopedLock& MasterLockHolder) +Json::Value RPCHandler::doInternal (Json::Value params, LoadType* loadType, Application::ScopedLockType& masterLockHolder) { // Used for debug or special-purpose RPC commands if (!params.isMember ("internal_command")) @@ -3676,53 +3676,55 @@ Json::Value RPCHandler::doCommand (const Json::Value& params, int iRole, LoadTyp return rpcError (rpcNO_PERMISSION); } - ScopedLock MasterLockHolder (getApp().getMasterLock ()); + { + Application::ScopedLockType lock (getApp().getMasterLock (), __FILE__, __LINE__); - if ((commandsA[i].iOptions & optNetwork) && (mNetOps->getOperatingMode () < NetworkOPs::omSYNCING)) - { - WriteLog (lsINFO, RPCHandler) << "Insufficient network mode for RPC: " << mNetOps->strOperatingMode (); - - return rpcError (rpcNO_NETWORK); - } - - if (!theConfig.RUN_STANDALONE && (commandsA[i].iOptions & optCurrent) && (getApp().getLedgerMaster().getValidatedLedgerAge() > 120)) - { - return rpcError (rpcNO_CURRENT); - } - else if ((commandsA[i].iOptions & optClosed) && !mNetOps->getClosedLedger ()) - { - return rpcError (rpcNO_CLOSED); - } - else - { - try + if ((commandsA[i].iOptions & optNetwork) && (mNetOps->getOperatingMode () < NetworkOPs::omSYNCING)) { - Json::Value jvRaw = (this->* (commandsA[i].dfpFunc)) (params, loadType, MasterLockHolder); + WriteLog (lsINFO, RPCHandler) << "Insufficient network mode for RPC: " << mNetOps->strOperatingMode (); - // Regularize result. - if (jvRaw.isObject ()) - { - // Got an object. - return jvRaw; - } - else - { - // Probably got a string. - Json::Value jvResult (Json::objectValue); - - jvResult["message"] = jvRaw; - - return jvResult; - } + return rpcError (rpcNO_NETWORK); } - catch (std::exception& e) + + if (!theConfig.RUN_STANDALONE && (commandsA[i].iOptions & optCurrent) && (getApp().getLedgerMaster().getValidatedLedgerAge() > 120)) { - WriteLog (lsINFO, RPCHandler) << "Caught throw: " << e.what (); + return rpcError (rpcNO_CURRENT); + } + else if ((commandsA[i].iOptions & optClosed) && !mNetOps->getClosedLedger ()) + { + return rpcError (rpcNO_CLOSED); + } + else + { + try + { + Json::Value jvRaw = (this->* (commandsA[i].dfpFunc)) (params, loadType, lock); - if (*loadType == LT_RPCReference) - *loadType = LT_RPCException; + // Regularize result. + if (jvRaw.isObject ()) + { + // Got an object. + return jvRaw; + } + else + { + // Probably got a string. + Json::Value jvResult (Json::objectValue); - return rpcError (rpcINTERNAL); + jvResult["message"] = jvRaw; + + return jvResult; + } + } + catch (std::exception& e) + { + WriteLog (lsINFO, RPCHandler) << "Caught throw: " << e.what (); + + if (*loadType == LT_RPCReference) + *loadType = LT_RPCException; + + return rpcError (rpcINTERNAL); + } } } } diff --git a/src/cpp/ripple/RPCHandler.h b/src/cpp/ripple/RPCHandler.h index f2931c6cad..bc5f86596d 100644 --- a/src/cpp/ripple/RPCHandler.h +++ b/src/cpp/ripple/RPCHandler.h @@ -40,7 +40,7 @@ private: typedef Json::Value (RPCHandler::*doFuncPtr) ( Json::Value params, LoadType* loadType, - ScopedLock& MasterLockHolder); + Application::ScopedLockType& MasterLockHolder); // VFALCO TODO Document these and give the enumeration a label. enum @@ -57,7 +57,7 @@ private: boost::unordered_set parseAccountIds (const Json::Value& jvArray); - Json::Value transactionSign (Json::Value jvRequest, bool bSubmit, bool bFailHard, ScopedLock& mlh); + Json::Value transactionSign (Json::Value jvRequest, bool bSubmit, bool bFailHard, Application::ScopedLockType& mlh); Json::Value lookupLedger (Json::Value jvRequest, Ledger::pointer& lpLedger); @@ -89,70 +89,70 @@ private: const int iIndex, const bool bStrict); - Json::Value doAccountInfo (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doAccountLines (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doAccountOffers (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doAccountTransactions (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doBookOffers (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doConnect (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doConsensusInfo (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doFeature (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doGetCounts (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doInternal (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLedger (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLedgerAccept (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLedgerClosed (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLedgerCurrent (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLedgerEntry (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLedgerHeader (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLogLevel (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLogRotate (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doNicknameInfo (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doOwnerInfo (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doPathFind (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doPeers (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doPing (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doProfile (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doProofCreate (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doProofSolve (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doProofVerify (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doRandom (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doRipplePathFind (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doSMS (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doServerInfo (Json::Value params, LoadType* loadType, ScopedLock& mlh); // for humans - Json::Value doServerState (Json::Value params, LoadType* loadType, ScopedLock& mlh); // for machines - Json::Value doSessionClose (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doSessionOpen (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doSign (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doStop (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doSubmit (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doSubscribe (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doTransactionEntry (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doTx (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doTxHistory (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnlAdd (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnlDelete (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnlFetch (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnlList (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnlLoad (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnlNetwork (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnlReset (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnlScore (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doUnsubscribe (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doValidationCreate (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doValidationSeed (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doWalletAccounts (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doWalletLock (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doWalletPropose (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doWalletSeed (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doWalletUnlock (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doWalletVerify (Json::Value params, LoadType* loadType, ScopedLock& mlh); + Json::Value doAccountInfo (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doAccountLines (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doAccountOffers (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doAccountTransactions (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doBookOffers (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doConnect (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doConsensusInfo (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doFeature (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doGetCounts (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doInternal (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLedger (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLedgerAccept (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLedgerClosed (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLedgerCurrent (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLedgerEntry (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLedgerHeader (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLogLevel (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLogRotate (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doNicknameInfo (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doOwnerInfo (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doPathFind (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doPeers (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doPing (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doProfile (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doProofCreate (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doProofSolve (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doProofVerify (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doRandom (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doRipplePathFind (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doSMS (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doServerInfo (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); // for humans + Json::Value doServerState (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); // for machines + Json::Value doSessionClose (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doSessionOpen (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doSign (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doStop (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doSubmit (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doSubscribe (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doTransactionEntry (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doTx (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doTxHistory (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnlAdd (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnlDelete (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnlFetch (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnlList (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnlLoad (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnlNetwork (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnlReset (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnlScore (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doUnsubscribe (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doValidationCreate (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doValidationSeed (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doWalletAccounts (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doWalletLock (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doWalletPropose (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doWalletSeed (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doWalletUnlock (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doWalletVerify (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); #if ENABLE_INSECURE - Json::Value doDataDelete (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doDataFetch (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doDataStore (Json::Value params, LoadType* loadType, ScopedLock& mlh); - Json::Value doLogin (Json::Value params, LoadType* loadType, ScopedLock& mlh); + Json::Value doDataDelete (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doDataFetch (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doDataStore (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); + Json::Value doLogin (Json::Value params, LoadType* loadType, Application::ScopedLockType& mlh); #endif private: diff --git a/src/cpp/ripple/ripple_Application.cpp b/src/cpp/ripple/ripple_Application.cpp index 8419fd7873..1b7c31c0f8 100644 --- a/src/cpp/ripple/ripple_Application.cpp +++ b/src/cpp/ripple/ripple_Application.cpp @@ -43,8 +43,8 @@ public: #endif , mIOService ((theConfig.NODE_SIZE >= 2) ? 2 : 1) , mIOWork (mIOService) - , mNetOps (&mLedgerMaster) - , m_rpcServerHandler (mNetOps) + , mNetOps (new NetworkOPs (&mLedgerMaster)) + , m_rpcServerHandler (*mNetOps) , mTempNodeCache ("NodeCache", 16384, 90) , m_nodeStore (NodeStore::New ( theConfig.NODE_DB, @@ -84,6 +84,8 @@ public: ~ApplicationImp () { + mNetOps = nullptr; + // VFALCO TODO Wrap these in ScopedPointer delete mTxnDB; delete mLedgerDB; @@ -97,7 +99,7 @@ public: NetworkOPs& getOPs () { - return mNetOps; + return *mNetOps; } boost::asio::io_service& getIOService () @@ -135,7 +137,7 @@ public: return mJobQueue; } - boost::recursive_mutex& getMasterLock () + MasterLockType& getMasterLock () { return mMasterLock; } @@ -261,13 +263,13 @@ private: // boost::asio::io_service::work mIOWork; - boost::recursive_mutex mMasterLock; + MasterLockType mMasterLock; LocalCredentials m_localCredentials; LedgerMaster mLedgerMaster; InboundLedgers m_inboundLedgers; TransactionMaster mMasterTransaction; - NetworkOPs mNetOps; + ScopedPointer mNetOps; RPCServerHandler m_rpcServerHandler; NodeCache mTempNodeCache; ScopedPointer m_nodeStore; @@ -441,7 +443,7 @@ void ApplicationImp::setup () { // This should probably become the default once we have a stable network if (!theConfig.RUN_STANDALONE) - mNetOps.needNetworkLedger (); + mNetOps->needNetworkLedger (); startNewLedger (); } @@ -572,13 +574,13 @@ void ApplicationImp::setup () { WriteLog (lsWARNING, Application) << "Running in standalone mode"; - mNetOps.setStandAlone (); + mNetOps->setStandAlone (); } else { // VFALCO NOTE the state timer resets the deadlock detector. // - mNetOps.setStateTimer (); + mNetOps->setStateTimer (); } } @@ -653,8 +655,8 @@ void ApplicationImp::doSweep(Job& j) mSLECache.sweep (); AcceptedLedger::sweep (); // VFALCO NOTE AcceptedLedger is/has a singleton? SHAMap::sweep (); // VFALCO NOTE SHAMap is/has a singleton? - mNetOps.sweepFetchPack (); - + mNetOps->sweepFetchPack (); + // VFALCO NOTE does the call to sweep() happen on another thread? mSweepTimer.expires_from_now (boost::posix_time::seconds (theConfig.getSize (siSweepInterval))); mSweepTimer.async_wait (BIND_TYPE (&ApplicationImp::sweep, this)); } @@ -685,7 +687,7 @@ void ApplicationImp::startNewLedger () secondLedger->setAccepted (); mLedgerMaster.pushLedger (secondLedger, boost::make_shared (true, boost::ref (*secondLedger)), false); assert (!!secondLedger->getAccountState (rootAddress)); - mNetOps.setLastCloseTime (secondLedger->getCloseTimeNC ()); + mNetOps->setLastCloseTime (secondLedger->getCloseTimeNC ()); } } @@ -753,7 +755,7 @@ bool ApplicationImp::loadOldLedger (const std::string& l, bool bReplay) Ledger::pointer openLedger = boost::make_shared (false, boost::ref (*loadLedger)); mLedgerMaster.switchLedgers (loadLedger, openLedger); mLedgerMaster.forceValid(loadLedger); - mNetOps.setLastCloseTime (loadLedger->getCloseTimeNC ()); + mNetOps->setLastCloseTime (loadLedger->getCloseTimeNC ()); if (bReplay) { // inject transaction from replayLedger into consensus set diff --git a/src/cpp/ripple/ripple_Application.h b/src/cpp/ripple/ripple_Application.h index 2b0aee4cf3..605674d4d1 100644 --- a/src/cpp/ripple/ripple_Application.h +++ b/src/cpp/ripple/ripple_Application.h @@ -38,8 +38,6 @@ typedef TaggedCache SLECach class Application { public: - virtual ~Application () { } - /* VFALCO NOTE The master lock protects: @@ -51,7 +49,110 @@ public: other things */ - virtual boost::recursive_mutex& getMasterLock () = 0; +#if 1 + class ScopedLockType; + + class MasterLockType + { + public: + MasterLockType () + : m_fileName ("") + , m_lineNumber (0) + { + } + + // Note that these are not exactly thread safe. + + char const* getFileName () const noexcept + { + return m_fileName; + } + + int getLineNumber () const noexcept + { + return m_lineNumber; + } + + private: + friend class ScopedLockType; + + void setOwner (char const* fileName, int lineNumber) + { + m_fileName = fileName; + m_lineNumber = lineNumber; + } + + void resetOwner () + { + m_fileName = ""; + m_lineNumber = 0; + } + + boost::recursive_mutex m_mutex; + char const* m_fileName; + int m_lineNumber; + }; + + class ScopedLockType + { + public: + explicit ScopedLockType (MasterLockType& mutex, + char const* fileName, + int lineNumber) + : m_mutex (mutex) + , m_lock (mutex.m_mutex) + { + mutex.setOwner (fileName, lineNumber); + } + + ~ScopedLockType () + { + if (m_lock.owns_lock ()) + m_mutex.resetOwner (); + } + + void unlock () + { + if (m_lock.owns_lock ()) + m_mutex.resetOwner (); + + m_lock.unlock (); + } + + private: + MasterLockType& m_mutex; + boost::recursive_mutex::scoped_lock m_lock; + }; + +#else + typedef boost::recursive_mutex MasterLockType; + + typedef boost::recursive_mutex::scoped_lock ScopedLockType; + +#endif + + virtual MasterLockType& getMasterLock () = 0; + + + + +public: + struct State + { + // Stuff in here is accessed concurrently and requires a WriteAccess + }; + + typedef SharedData SharedState; + + SharedState& getSharedState () noexcept { return m_sharedState; } + + SharedState const& getSharedState () const noexcept { return m_sharedState; } + +private: + SharedState m_sharedState; + +public: + virtual ~Application () { } virtual boost::asio::io_service& getIOService () = 0; diff --git a/src/cpp/ripple/ripple_LedgerConsensus.cpp b/src/cpp/ripple/ripple_LedgerConsensus.cpp index 704ae9c4f7..d400d68766 100644 --- a/src/cpp/ripple/ripple_LedgerConsensus.cpp +++ b/src/cpp/ripple/ripple_LedgerConsensus.cpp @@ -1179,155 +1179,157 @@ void LedgerConsensus::accept (SHAMap::ref set, LoadEvent::pointer) if (set->getHash ().isNonZero ()) // put our set where others can get it later getApp().getOPs ().takePosition (mPreviousLedger->getLedgerSeq (), set); - boost::recursive_mutex::scoped_lock masterLock (getApp().getMasterLock ()); - assert (set->getHash () == mOurPosition->getCurrentHash ()); - - getApp().getOPs ().peekStoredProposals ().clear (); // these are now obsolete - - uint32 closeTime = roundCloseTime (mOurPosition->getCloseTime ()); - bool closeTimeCorrect = true; - - if (closeTime == 0) { - // we agreed to disagree - closeTimeCorrect = false; - closeTime = mPreviousLedger->getCloseTimeNC () + 1; - } + Application::ScopedLockType lock (getApp ().getMasterLock (), __FILE__, __LINE__); - WriteLog (lsDEBUG, LedgerConsensus) << "Report: Prop=" << (mProposing ? "yes" : "no") << " val=" << (mValidating ? "yes" : "no") << - " corLCL=" << (mHaveCorrectLCL ? "yes" : "no") << " fail=" << (mConsensusFail ? "yes" : "no"); - WriteLog (lsDEBUG, LedgerConsensus) << "Report: Prev = " << mPrevLedgerHash << ":" << mPreviousLedger->getLedgerSeq (); - WriteLog (lsDEBUG, LedgerConsensus) << "Report: TxSt = " << set->getHash () << ", close " << closeTime << (closeTimeCorrect ? "" : "X"); + assert (set->getHash () == mOurPosition->getCurrentHash ()); - CanonicalTXSet failedTransactions (set->getHash ()); + getApp().getOPs ().peekStoredProposals ().clear (); // these are now obsolete - Ledger::pointer newLCL = boost::make_shared (false, boost::ref (*mPreviousLedger)); + uint32 closeTime = roundCloseTime (mOurPosition->getCloseTime ()); + bool closeTimeCorrect = true; - newLCL->peekTransactionMap ()->armDirty (); - newLCL->peekAccountStateMap ()->armDirty (); - WriteLog (lsDEBUG, LedgerConsensus) << "Applying consensus set transactions to the last closed ledger"; - applyTransactions (set, newLCL, newLCL, failedTransactions, false); - newLCL->updateSkipList (); - newLCL->setClosed (); - boost::shared_ptr acctNodes = newLCL->peekAccountStateMap ()->disarmDirty (); - boost::shared_ptr txnNodes = newLCL->peekTransactionMap ()->disarmDirty (); - - // write out dirty nodes (temporarily done here) Most come before setAccepted - int fc; - - while ((fc = SHAMap::flushDirty (*acctNodes, 256, hotACCOUNT_NODE, newLCL->getLedgerSeq ())) > 0) - { - WriteLog (lsTRACE, LedgerConsensus) << "Flushed " << fc << " dirty state nodes"; - } - - while ((fc = SHAMap::flushDirty (*txnNodes, 256, hotTRANSACTION_NODE, newLCL->getLedgerSeq ())) > 0) - { - WriteLog (lsTRACE, LedgerConsensus) << "Flushed " << fc << " dirty transaction nodes"; - } - - newLCL->setAccepted (closeTime, mCloseResolution, closeTimeCorrect); - newLCL->updateHash (); - newLCL->setImmutable (); - - WriteLog (lsDEBUG, LedgerConsensus) << "Report: NewL = " << newLCL->getHash () << ":" << newLCL->getLedgerSeq (); - uint256 newLCLHash = newLCL->getHash (); - - if (ShouldLog (lsTRACE, LedgerConsensus)) - { - WriteLog (lsTRACE, LedgerConsensus) << "newLCL"; - Json::Value p; - newLCL->addJson (p, LEDGER_JSON_DUMP_TXRP | LEDGER_JSON_DUMP_STATE); - WriteLog (lsTRACE, LedgerConsensus) << p; - } - - statusChange (protocol::neACCEPTED_LEDGER, *newLCL); - - if (mValidating && !mConsensusFail) - { - uint256 signingHash; - SerializedValidation::pointer v = boost::make_shared - (newLCLHash, getApp().getOPs ().getValidationTimeNC (), mValPublic, mProposing); - v->setFieldU32 (sfLedgerSequence, newLCL->getLedgerSeq ()); - addLoad(v); - - if (((newLCL->getLedgerSeq () + 1) % 256) == 0) // next ledger is flag ledger + if (closeTime == 0) { - getApp().getFeeVote ().doValidation (newLCL, *v); - getApp().getFeatureTable ().doValidation (newLCL, *v); + // we agreed to disagree + closeTimeCorrect = false; + closeTime = mPreviousLedger->getCloseTimeNC () + 1; } - v->sign (signingHash, mValPrivate); - v->setTrusted (); - getApp().getHashRouter ().addSuppression (signingHash); // suppress it if we receive it - getApp().getValidations ().addValidation (v, "local"); - getApp().getOPs ().setLastValidation (v); - Blob validation = v->getSigned (); - protocol::TMValidation val; - val.set_validation (&validation[0], validation.size ()); - int j = getApp().getPeers ().relayMessage (NULL, - boost::make_shared (val, protocol::mtVALIDATION)); - WriteLog (lsINFO, LedgerConsensus) << "CNF Val " << newLCLHash << " to " << j << " peers"; - } - else - WriteLog (lsINFO, LedgerConsensus) << "CNF newLCL " << newLCLHash; + WriteLog (lsDEBUG, LedgerConsensus) << "Report: Prop=" << (mProposing ? "yes" : "no") << " val=" << (mValidating ? "yes" : "no") << + " corLCL=" << (mHaveCorrectLCL ? "yes" : "no") << " fail=" << (mConsensusFail ? "yes" : "no"); + WriteLog (lsDEBUG, LedgerConsensus) << "Report: Prev = " << mPrevLedgerHash << ":" << mPreviousLedger->getLedgerSeq (); + WriteLog (lsDEBUG, LedgerConsensus) << "Report: TxSt = " << set->getHash () << ", close " << closeTime << (closeTimeCorrect ? "" : "X"); - Ledger::pointer newOL = boost::make_shared (true, boost::ref (*newLCL)); - ScopedLock sl ( getApp().getLedgerMaster ().getLock ()); + CanonicalTXSet failedTransactions (set->getHash ()); - // Apply disputed transactions that didn't get in - TransactionEngine engine (newOL); - BOOST_FOREACH (u256_lct_pair & it, mDisputes) - { - if (!it.second->getOurVote ()) + Ledger::pointer newLCL = boost::make_shared (false, boost::ref (*mPreviousLedger)); + + newLCL->peekTransactionMap ()->armDirty (); + newLCL->peekAccountStateMap ()->armDirty (); + WriteLog (lsDEBUG, LedgerConsensus) << "Applying consensus set transactions to the last closed ledger"; + applyTransactions (set, newLCL, newLCL, failedTransactions, false); + newLCL->updateSkipList (); + newLCL->setClosed (); + boost::shared_ptr acctNodes = newLCL->peekAccountStateMap ()->disarmDirty (); + boost::shared_ptr txnNodes = newLCL->peekTransactionMap ()->disarmDirty (); + + // write out dirty nodes (temporarily done here) Most come before setAccepted + int fc; + + while ((fc = SHAMap::flushDirty (*acctNodes, 256, hotACCOUNT_NODE, newLCL->getLedgerSeq ())) > 0) { - // we voted NO - try - { - WriteLog (lsDEBUG, LedgerConsensus) << "Test applying disputed transaction that did not get in"; - SerializerIterator sit (it.second->peekTransaction ()); - SerializedTransaction::pointer txn = boost::make_shared (boost::ref (sit)); + WriteLog (lsTRACE, LedgerConsensus) << "Flushed " << fc << " dirty state nodes"; + } - if (applyTransaction (engine, txn, newOL, true, false)) - failedTransactions.push_back (txn); - } - catch (...) + while ((fc = SHAMap::flushDirty (*txnNodes, 256, hotTRANSACTION_NODE, newLCL->getLedgerSeq ())) > 0) + { + WriteLog (lsTRACE, LedgerConsensus) << "Flushed " << fc << " dirty transaction nodes"; + } + + newLCL->setAccepted (closeTime, mCloseResolution, closeTimeCorrect); + newLCL->updateHash (); + newLCL->setImmutable (); + + WriteLog (lsDEBUG, LedgerConsensus) << "Report: NewL = " << newLCL->getHash () << ":" << newLCL->getLedgerSeq (); + uint256 newLCLHash = newLCL->getHash (); + + if (ShouldLog (lsTRACE, LedgerConsensus)) + { + WriteLog (lsTRACE, LedgerConsensus) << "newLCL"; + Json::Value p; + newLCL->addJson (p, LEDGER_JSON_DUMP_TXRP | LEDGER_JSON_DUMP_STATE); + WriteLog (lsTRACE, LedgerConsensus) << p; + } + + statusChange (protocol::neACCEPTED_LEDGER, *newLCL); + + if (mValidating && !mConsensusFail) + { + uint256 signingHash; + SerializedValidation::pointer v = boost::make_shared + (newLCLHash, getApp().getOPs ().getValidationTimeNC (), mValPublic, mProposing); + v->setFieldU32 (sfLedgerSequence, newLCL->getLedgerSeq ()); + addLoad(v); + + if (((newLCL->getLedgerSeq () + 1) % 256) == 0) // next ledger is flag ledger { - WriteLog (lsDEBUG, LedgerConsensus) << "Failed to apply transaction we voted NO on"; + getApp().getFeeVote ().doValidation (newLCL, *v); + getApp().getFeatureTable ().doValidation (newLCL, *v); + } + + v->sign (signingHash, mValPrivate); + v->setTrusted (); + getApp().getHashRouter ().addSuppression (signingHash); // suppress it if we receive it + getApp().getValidations ().addValidation (v, "local"); + getApp().getOPs ().setLastValidation (v); + Blob validation = v->getSigned (); + protocol::TMValidation val; + val.set_validation (&validation[0], validation.size ()); + int j = getApp().getPeers ().relayMessage (NULL, + boost::make_shared (val, protocol::mtVALIDATION)); + WriteLog (lsINFO, LedgerConsensus) << "CNF Val " << newLCLHash << " to " << j << " peers"; + } + else + WriteLog (lsINFO, LedgerConsensus) << "CNF newLCL " << newLCLHash; + + Ledger::pointer newOL = boost::make_shared (true, boost::ref (*newLCL)); + ScopedLock sl ( getApp().getLedgerMaster ().getLock ()); + + // Apply disputed transactions that didn't get in + TransactionEngine engine (newOL); + BOOST_FOREACH (u256_lct_pair & it, mDisputes) + { + if (!it.second->getOurVote ()) + { + // we voted NO + try + { + WriteLog (lsDEBUG, LedgerConsensus) << "Test applying disputed transaction that did not get in"; + SerializerIterator sit (it.second->peekTransaction ()); + SerializedTransaction::pointer txn = boost::make_shared (boost::ref (sit)); + + if (applyTransaction (engine, txn, newOL, true, false)) + failedTransactions.push_back (txn); + } + catch (...) + { + WriteLog (lsDEBUG, LedgerConsensus) << "Failed to apply transaction we voted NO on"; + } } } - } - WriteLog (lsDEBUG, LedgerConsensus) << "Applying transactions from current open ledger"; - applyTransactions (getApp().getLedgerMaster ().getCurrentLedger ()->peekTransactionMap (), newOL, newLCL, - failedTransactions, true); - getApp().getLedgerMaster ().pushLedger (newLCL, newOL, !mConsensusFail); - mNewLedgerHash = newLCL->getHash (); - mState = lcsACCEPTED; - sl.unlock (); + WriteLog (lsDEBUG, LedgerConsensus) << "Applying transactions from current open ledger"; + applyTransactions (getApp().getLedgerMaster ().getCurrentLedger ()->peekTransactionMap (), newOL, newLCL, + failedTransactions, true); + getApp().getLedgerMaster ().pushLedger (newLCL, newOL, !mConsensusFail); + mNewLedgerHash = newLCL->getHash (); + mState = lcsACCEPTED; + sl.unlock (); - if (mValidating) - { - // see how close our close time is to other node's close time reports - WriteLog (lsINFO, LedgerConsensus) << "We closed at " << boost::lexical_cast (mCloseTime); - uint64 closeTotal = mCloseTime; - int closeCount = 1; - - for (std::map::iterator it = mCloseTimes.begin (), end = mCloseTimes.end (); it != end; ++it) + if (mValidating) { - // FIXME: Use median, not average - WriteLog (lsINFO, LedgerConsensus) << boost::lexical_cast (it->second) << " time votes for " - << boost::lexical_cast (it->first); - closeCount += it->second; - closeTotal += static_cast (it->first) * static_cast (it->second); + // see how close our close time is to other node's close time reports + WriteLog (lsINFO, LedgerConsensus) << "We closed at " << boost::lexical_cast (mCloseTime); + uint64 closeTotal = mCloseTime; + int closeCount = 1; + + for (std::map::iterator it = mCloseTimes.begin (), end = mCloseTimes.end (); it != end; ++it) + { + // FIXME: Use median, not average + WriteLog (lsINFO, LedgerConsensus) << boost::lexical_cast (it->second) << " time votes for " + << boost::lexical_cast (it->first); + closeCount += it->second; + closeTotal += static_cast (it->first) * static_cast (it->second); + } + + closeTotal += (closeCount / 2); + closeTotal /= closeCount; + int offset = static_cast (closeTotal) - static_cast (mCloseTime); + WriteLog (lsINFO, LedgerConsensus) << "Our close offset is estimated at " << offset << " (" << closeCount << ")"; + getApp().getOPs ().closeTimeOffset (offset); } - - closeTotal += (closeCount / 2); - closeTotal /= closeCount; - int offset = static_cast (closeTotal) - static_cast (mCloseTime); - WriteLog (lsINFO, LedgerConsensus) << "Our close offset is estimated at " << offset << " (" << closeCount << ")"; - getApp().getOPs ().closeTimeOffset (offset); } - } void LedgerConsensus::endConsensus () diff --git a/src/cpp/ripple/ripple_LoadManager.cpp b/src/cpp/ripple/ripple_LoadManager.cpp index df4ec66980..4b97611f1c 100644 --- a/src/cpp/ripple/ripple_LoadManager.cpp +++ b/src/cpp/ripple/ripple_LoadManager.cpp @@ -250,6 +250,11 @@ private: static void logDeadlock (int dlTime) { WriteLog (lsWARNING, LoadManager) << "Server stalled for " << dlTime << " seconds."; + + char const* fileName = getApp ().getMasterLock ().getFileName (); + int lineNumber = getApp ().getMasterLock ().getLineNumber (); + + WriteLog (lsWARNING, LoadManager) << "Master lock owned by " << File (fileName).getFileName ().toStdString () << ", line " << lineNumber; } private: diff --git a/src/cpp/ripple/ripple_Peer.cpp b/src/cpp/ripple/ripple_Peer.cpp index d7bfe89478..f6d33e00c8 100644 --- a/src/cpp/ripple/ripple_Peer.cpp +++ b/src/cpp/ripple/ripple_Peer.cpp @@ -165,12 +165,12 @@ private: void recvHello (protocol::TMHello & packet); void recvCluster (protocol::TMCluster & packet); - void recvTransaction (protocol::TMTransaction & packet, ScopedLock & MasterLockHolder); - void recvValidation (const boost::shared_ptr& packet, ScopedLock & MasterLockHolder); + void recvTransaction (protocol::TMTransaction & packet, Application::ScopedLockType& masterLockHolder); + void recvValidation (const boost::shared_ptr& packet, Application::ScopedLockType& masterLockHolder); void recvGetValidation (protocol::TMGetValidations & packet); void recvContact (protocol::TMContact & packet); void recvGetContacts (protocol::TMGetContacts & packet); - void recvGetPeers (protocol::TMGetPeers & packet, ScopedLock & MasterLockHolder); + void recvGetPeers (protocol::TMGetPeers & packet, Application::ScopedLockType& masterLockHolder); void recvPeers (protocol::TMPeers & packet); void recvGetObjectByHash (const boost::shared_ptr& packet); void recvPing (protocol::TMPing & packet); @@ -178,8 +178,8 @@ private: void recvSearchTransaction (protocol::TMSearchTransaction & packet); void recvGetAccount (protocol::TMGetAccount & packet); void recvAccount (protocol::TMAccount & packet); - void recvGetLedger (protocol::TMGetLedger & packet, ScopedLock & MasterLockHolder); - void recvLedger (const boost::shared_ptr& packet, ScopedLock & MasterLockHolder); + void recvGetLedger (protocol::TMGetLedger & packet, Application::ScopedLockType& masterLockHolder); + void recvLedger (const boost::shared_ptr& packet, Application::ScopedLockType& masterLockHolder); void recvStatus (protocol::TMStatusChange & packet); void recvPropose (const boost::shared_ptr& packet); void recvHaveTxSet (protocol::TMHaveTransactionSet & packet); @@ -630,8 +630,11 @@ void PeerImp::handleReadBody (const boost::system::error_code& error) WriteLog (lsINFO, Peer) << "Peer: Body: Error: " << getIP () << ": " << error.category ().name () << ": " << error.message () << ": " << error; } - boost::recursive_mutex::scoped_lock sl (getApp().getMasterLock ()); - detach ("hrb", true); + { + Application::ScopedLockType lock (getApp ().getMasterLock (), __FILE__, __LINE__); + + detach ("hrb", true); + } return; } @@ -651,276 +654,278 @@ void PeerImp::processReadBuffer () LoadEvent::autoptr event (getApp().getJobQueue ().getLoadEventAP (jtPEER, "PeerImp::read")); - ScopedLock sl (getApp().getMasterLock ()); - - // If connected and get a mtHELLO or if not connected and get a non-mtHELLO, wrong message was sent. - if (mHelloed == (type == protocol::mtHELLO)) { - WriteLog (lsWARNING, Peer) << "Wrong message type: " << type; - detach ("prb1", true); - } - else - { - switch (type) - { - case protocol::mtHELLO: - { - event->reName ("PeerImp::hello"); - protocol::TMHello msg; + Application::ScopedLockType lock (getApp ().getMasterLock (), __FILE__, __LINE__); - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvHello (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; + // If connected and get a mtHELLO or if not connected and get a non-mtHELLO, wrong message was sent. + if (mHelloed == (type == protocol::mtHELLO)) + { + WriteLog (lsWARNING, Peer) << "Wrong message type: " << type; + detach ("prb1", true); } - break; - - case protocol::mtCLUSTER: + else { - event->reName ("PeerImp::cluster"); - protocol::TMCluster msg; + switch (type) + { + case protocol::mtHELLO: + { + event->reName ("PeerImp::hello"); + protocol::TMHello msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvCluster (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvHello (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtERROR_MSG: - { - event->reName ("PeerImp::errormessage"); - protocol::TMErrorMsg msg; + case protocol::mtCLUSTER: + { + event->reName ("PeerImp::cluster"); + protocol::TMCluster msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvErrorMessage (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvCluster (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } - case protocol::mtPING: - { - event->reName ("PeerImp::ping"); - protocol::TMPing msg; + case protocol::mtERROR_MSG: + { + event->reName ("PeerImp::errormessage"); + protocol::TMErrorMsg msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvPing (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvErrorMessage (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtGET_CONTACTS: - { - event->reName ("PeerImp::getcontacts"); - protocol::TMGetContacts msg; + case protocol::mtPING: + { + event->reName ("PeerImp::ping"); + protocol::TMPing msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvGetContacts (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvPing (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtCONTACT: - { - event->reName ("PeerImp::contact"); - protocol::TMContact msg; + case protocol::mtGET_CONTACTS: + { + event->reName ("PeerImp::getcontacts"); + protocol::TMGetContacts msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvContact (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvGetContacts (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtGET_PEERS: - { - event->reName ("PeerImp::getpeers"); - protocol::TMGetPeers msg; + case protocol::mtCONTACT: + { + event->reName ("PeerImp::contact"); + protocol::TMContact msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvGetPeers (msg, sl); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvContact (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtPEERS: - { - event->reName ("PeerImp::peers"); - protocol::TMPeers msg; + case protocol::mtGET_PEERS: + { + event->reName ("PeerImp::getpeers"); + protocol::TMGetPeers msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvPeers (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvGetPeers (msg, lock); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtSEARCH_TRANSACTION: - { - event->reName ("PeerImp::searchtransaction"); - protocol::TMSearchTransaction msg; + case protocol::mtPEERS: + { + event->reName ("PeerImp::peers"); + protocol::TMPeers msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvSearchTransaction (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvPeers (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtGET_ACCOUNT: - { - event->reName ("PeerImp::getaccount"); - protocol::TMGetAccount msg; + case protocol::mtSEARCH_TRANSACTION: + { + event->reName ("PeerImp::searchtransaction"); + protocol::TMSearchTransaction msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvGetAccount (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvSearchTransaction (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtACCOUNT: - { - event->reName ("PeerImp::account"); - protocol::TMAccount msg; + case protocol::mtGET_ACCOUNT: + { + event->reName ("PeerImp::getaccount"); + protocol::TMGetAccount msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvAccount (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvGetAccount (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtTRANSACTION: - { - event->reName ("PeerImp::transaction"); - protocol::TMTransaction msg; + case protocol::mtACCOUNT: + { + event->reName ("PeerImp::account"); + protocol::TMAccount msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvTransaction (msg, sl); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvAccount (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtSTATUS_CHANGE: - { - event->reName ("PeerImp::statuschange"); - protocol::TMStatusChange msg; + case protocol::mtTRANSACTION: + { + event->reName ("PeerImp::transaction"); + protocol::TMTransaction msg; - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvStatus (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvTransaction (msg, lock); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtPROPOSE_LEDGER: - { - event->reName ("PeerImp::propose"); - boost::shared_ptr msg = boost::make_shared (); + case protocol::mtSTATUS_CHANGE: + { + event->reName ("PeerImp::statuschange"); + protocol::TMStatusChange msg; - if (msg->ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvPropose (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvStatus (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtGET_LEDGER: - { - event->reName ("PeerImp::getledger"); - protocol::TMGetLedger msg; + case protocol::mtPROPOSE_LEDGER: + { + event->reName ("PeerImp::propose"); + boost::shared_ptr msg = boost::make_shared (); - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvGetLedger (msg, sl); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg->ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvPropose (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtLEDGER_DATA: - { - event->reName ("PeerImp::ledgerdata"); - boost::shared_ptr msg = boost::make_shared (); + case protocol::mtGET_LEDGER: + { + event->reName ("PeerImp::getledger"); + protocol::TMGetLedger msg; - if (msg->ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvLedger (msg, sl); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvGetLedger (msg, lock); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtHAVE_SET: - { - event->reName ("PeerImp::haveset"); - protocol::TMHaveTransactionSet msg; + case protocol::mtLEDGER_DATA: + { + event->reName ("PeerImp::ledgerdata"); + boost::shared_ptr msg = boost::make_shared (); - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvHaveTxSet (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg->ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvLedger (msg, lock); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtVALIDATION: - { - event->reName ("PeerImp::validation"); - boost::shared_ptr msg = boost::make_shared (); + case protocol::mtHAVE_SET: + { + event->reName ("PeerImp::haveset"); + protocol::TMHaveTransactionSet msg; - if (msg->ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvValidation (msg, sl); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; -#if 0 + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvHaveTxSet (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - case protocol::mtGET_VALIDATION: - { - protocol::TM msg; + case protocol::mtVALIDATION: + { + event->reName ("PeerImp::validation"); + boost::shared_ptr msg = boost::make_shared (); - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recv (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg->ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvValidation (msg, lock); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; + #if 0 -#endif + case protocol::mtGET_VALIDATION: + { + protocol::TM msg; - case protocol::mtGET_OBJECTS: - { - event->reName ("PeerImp::getobjects"); - boost::shared_ptr msg = boost::make_shared (); + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recv (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - if (msg->ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvGetObjectByHash (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + #endif - case protocol::mtPROOFOFWORK: - { - event->reName ("PeerImp::proofofwork"); - protocol::TMProofWork msg; + case protocol::mtGET_OBJECTS: + { + event->reName ("PeerImp::getobjects"); + boost::shared_ptr msg = boost::make_shared (); - if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) - recvProofWork (msg); - else - WriteLog (lsWARNING, Peer) << "parse error: " << type; - } - break; + if (msg->ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvGetObjectByHash (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; + + case protocol::mtPROOFOFWORK: + { + event->reName ("PeerImp::proofofwork"); + protocol::TMProofWork msg; + + if (msg.ParseFromArray (&mReadbuf[PackedMessage::kHeaderBytes], mReadbuf.size () - PackedMessage::kHeaderBytes)) + recvProofWork (msg); + else + WriteLog (lsWARNING, Peer) << "parse error: " << type; + } + break; - default: - event->reName ("PeerImp::unknown"); - WriteLog (lsWARNING, Peer) << "Unknown Msg: " << type; - WriteLog (lsWARNING, Peer) << strHex (&mReadbuf[0], mReadbuf.size ()); + default: + event->reName ("PeerImp::unknown"); + WriteLog (lsWARNING, Peer) << "Unknown Msg: " << type; + WriteLog (lsWARNING, Peer) << strHex (&mReadbuf[0], mReadbuf.size ()); + } } } } @@ -1111,9 +1116,9 @@ static void checkTransaction (Job&, int flags, SerializedTransaction::pointer st #endif } -void PeerImp::recvTransaction (protocol::TMTransaction& packet, ScopedLock& MasterLockHolder) +void PeerImp::recvTransaction (protocol::TMTransaction& packet, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); Transaction::pointer tx; #ifndef TRUST_NETWORK @@ -1375,9 +1380,9 @@ static void checkValidation (Job&, SerializedValidation::pointer val, uint256 si #endif } -void PeerImp::recvValidation (const boost::shared_ptr& packet, ScopedLock& MasterLockHolder) +void PeerImp::recvValidation (const boost::shared_ptr& packet, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); if (packet->validation ().size () < 50) { @@ -1463,9 +1468,9 @@ void PeerImp::recvGetContacts (protocol::TMGetContacts& packet) // Return a list of your favorite people // TODO: filter out all the LAN peers // TODO: filter out the peer you are talking to -void PeerImp::recvGetPeers (protocol::TMGetPeers& packet, ScopedLock& MasterLockHolder) +void PeerImp::recvGetPeers (protocol::TMGetPeers& packet, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); std::vector addrs; getApp().getPeers ().getTopNAddrs (30, addrs); @@ -1779,7 +1784,7 @@ void PeerImp::recvStatus (protocol::TMStatusChange& packet) mMaxLedger = packet.lastseq (); } -void PeerImp::recvGetLedger (protocol::TMGetLedger& packet, ScopedLock& MasterLockHolder) +void PeerImp::recvGetLedger (protocol::TMGetLedger& packet, Application::ScopedLockType& masterLockHolder) { SHAMap::pointer map; protocol::TMLedgerData reply; @@ -1929,7 +1934,7 @@ void PeerImp::recvGetLedger (protocol::TMGetLedger& packet, ScopedLock& MasterLo } if (ledger->isImmutable ()) - MasterLockHolder.unlock (); + masterLockHolder.unlock (); else { WriteLog (lsWARNING, Peer) << "Request for data from mutable ledger"; @@ -2063,9 +2068,9 @@ void PeerImp::recvGetLedger (protocol::TMGetLedger& packet, ScopedLock& MasterLo sendPacket (oPacket, true); } -void PeerImp::recvLedger (const boost::shared_ptr& packet_ptr, ScopedLock& MasterLockHolder) +void PeerImp::recvLedger (const boost::shared_ptr& packet_ptr, Application::ScopedLockType& masterLockHolder) { - MasterLockHolder.unlock (); + masterLockHolder.unlock (); protocol::TMLedgerData& packet = *packet_ptr; if (packet.nodes ().size () <= 0) diff --git a/src/cpp/ripple/ripple_SHAMap.cpp b/src/cpp/ripple/ripple_SHAMap.cpp index 3782e7a4a9..4964f1c1cb 100644 --- a/src/cpp/ripple/ripple_SHAMap.cpp +++ b/src/cpp/ripple/ripple_SHAMap.cpp @@ -822,6 +822,10 @@ SHAMapTreeNode::pointer SHAMap::fetchNodeExternalNT (const SHAMapNode& id, uint2 if (!getApp().running ()) return ret; + // These are for diagnosing a crash on exit + Application& app (getApp ()); + NodeStore& nodeStore (app.getNodeStore ()); + NodeObject::pointer obj (getApp().getNodeStore ().retrieve (hash)); if (!obj) diff --git a/src/cpp/ripple/ripple_TransactionAcquire.cpp b/src/cpp/ripple/ripple_TransactionAcquire.cpp index 6f4a020fc9..f7cbef4fb7 100644 --- a/src/cpp/ripple/ripple_TransactionAcquire.cpp +++ b/src/cpp/ripple/ripple_TransactionAcquire.cpp @@ -18,9 +18,13 @@ TransactionAcquire::TransactionAcquire (uint256 const& hash) : PeerSet (hash, TX static void TACompletionHandler (uint256 hash, SHAMap::pointer map) { - boost::recursive_mutex::scoped_lock sl (getApp().getMasterLock ()); - getApp().getOPs ().mapComplete (hash, map); - getApp().getInboundLedgers ().dropLedger (hash); + { + Application::ScopedLockType lock (getApp ().getMasterLock (), __FILE__, __LINE__); + + getApp().getOPs ().mapComplete (hash, map); + + getApp().getInboundLedgers ().dropLedger (hash); + } } void TransactionAcquire::done () @@ -50,7 +54,7 @@ void TransactionAcquire::onTimer (bool progress) { WriteLog (lsWARNING, TransactionAcquire) << "Ten timeouts on TX set " << getHash (); { - boost::recursive_mutex::scoped_lock sl (getApp().getMasterLock ()); + Application::ScopedLockType lock (getApp().getMasterLock (), __FILE__, __LINE__); if (getApp().getOPs ().stillNeedTXSet (mHash)) { From a762781343ae85d65d495ffebb865e8744d8ab76 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 18 Jul 2013 10:23:57 -0700 Subject: [PATCH 26/50] Convert to beast UnitTest --- Builds/VisualStudio2012/RippleD.vcxproj | 6 -- .../VisualStudio2012/RippleD.vcxproj.filters | 3 - TODO.txt | 2 + modules/ripple_app/ripple_app.cpp | 1 - src/cpp/ripple/ripple_ProofOfWork.cpp | 3 +- src/cpp/ripple/ripple_ProofOfWorkFactory.cpp | 69 +++++++++++++++++++ .../ripple_ProofOfWorkFactoryUnitTests.cpp | 61 ---------------- 7 files changed, 73 insertions(+), 72 deletions(-) diff --git a/Builds/VisualStudio2012/RippleD.vcxproj b/Builds/VisualStudio2012/RippleD.vcxproj index b741d0fa69..f4e259fdc9 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj +++ b/Builds/VisualStudio2012/RippleD.vcxproj @@ -820,12 +820,6 @@ true true - - true - true - true - true - true true diff --git a/Builds/VisualStudio2012/RippleD.vcxproj.filters b/Builds/VisualStudio2012/RippleD.vcxproj.filters index 964828c715..a2d81919ff 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj.filters +++ b/Builds/VisualStudio2012/RippleD.vcxproj.filters @@ -594,9 +594,6 @@ [1] Ripple\ripple_app\_misc - - [1] Ripple\ripple_app\_misc - [1] Ripple\ripple_app\_misc diff --git a/TODO.txt b/TODO.txt index 91f07811af..38c8cdd5b9 100644 --- a/TODO.txt +++ b/TODO.txt @@ -3,6 +3,8 @@ RIPPLE TODO -------------------------------------------------------------------------------- Vinnie's Short List (Changes day to day) +- Memory NodeStore::Backend for unit tests +- Improved Mutex to track deadlocks - Convert some Ripple boost unit tests to Beast. - Eliminate new technical in NodeStore::Backend - Work on KeyvaDB diff --git a/modules/ripple_app/ripple_app.cpp b/modules/ripple_app/ripple_app.cpp index 305bd0c829..1d5bb112e0 100644 --- a/modules/ripple_app/ripple_app.cpp +++ b/modules/ripple_app/ripple_app.cpp @@ -431,7 +431,6 @@ static DH* handleTmpDh (SSL* ssl, int is_export, int iKeyLength) #include "ledger/LedgerUnitTests.cpp" #include "src/cpp/ripple/ripple_SHAMapUnitTests.cpp" #include "src/cpp/ripple/ripple_SHAMapSyncUnitTests.cpp" -#include "src/cpp/ripple/ripple_ProofOfWorkFactoryUnitTests.cpp" // Requires ProofOfWorkFactory.h #include "src/cpp/ripple/ripple_SerializedTransactionUnitTests.cpp" //------------------------------------------------------------------------------ diff --git a/src/cpp/ripple/ripple_ProofOfWork.cpp b/src/cpp/ripple/ripple_ProofOfWork.cpp index 03f407f7e7..3a6f7a0839 100644 --- a/src/cpp/ripple/ripple_ProofOfWork.cpp +++ b/src/cpp/ripple/ripple_ProofOfWork.cpp @@ -177,4 +177,5 @@ bool ProofOfWork::validateToken (const std::string& strToken) return boost::regex_match (strToken, smMatch, reToken); } -// vim:ts=4 +//------------------------------------------------------------------------------ + diff --git a/src/cpp/ripple/ripple_ProofOfWorkFactory.cpp b/src/cpp/ripple/ripple_ProofOfWorkFactory.cpp index 0cd7c776dd..c749b287cf 100644 --- a/src/cpp/ripple/ripple_ProofOfWorkFactory.cpp +++ b/src/cpp/ripple/ripple_ProofOfWorkFactory.cpp @@ -231,3 +231,72 @@ IProofOfWorkFactory* IProofOfWorkFactory::New () return new ProofOfWorkFactory; } +//------------------------------------------------------------------------------ + +class ProofOfWorkTests : public UnitTest +{ +public: + ProofOfWorkTests () : UnitTest ("ProofOfWork") + { + } + + void runTest () + { + using namespace ripple; + + ProofOfWorkFactory gen; + ProofOfWork pow = gen.getProof (); + + String s; + + s << "solve difficulty " << String (pow.getDifficulty ()); + beginTest ("solve"); + + uint256 solution = pow.solve (16777216); + + expect (! solution.isZero (), "Should be solved"); + + expect (pow.checkSolution (solution), "Should be checked"); + + // Why is this emitted? + //WriteLog (lsDEBUG, ProofOfWork) << "A bad nonce error is expected"; + + POWResult r = gen.checkProof (pow.getToken (), uint256 ()); + + expect (r == powBADNONCE, "Should show bad nonce for empty solution"); + + expect (gen.checkProof (pow.getToken (), solution) == powOK, "Solution should check with issuer"); + + //WriteLog (lsDEBUG, ProofOfWork) << "A reused nonce error is expected"; + + expect (gen.checkProof (pow.getToken (), solution) == powREUSED, "Reuse solution should be detected"); + + #ifdef SOLVE_POWS + + for (int i = 0; i < 12; ++i) + { + gen.setDifficulty (i); + ProofOfWork pow = gen.getProof (); + WriteLog (lsINFO, ProofOfWork) << "Level: " << i << ", Estimated difficulty: " << pow.getDifficulty (); + uint256 solution = pow.solve (131072); + + if (solution.isZero ()) + { + //WriteLog (lsINFO, ProofOfWork) << "Giving up"; + } + else + { + //WriteLog (lsINFO, ProofOfWork) << "Solution found"; + + if (gen.checkProof (pow.getToken (), solution) != powOK) + { + //WriteLog (lsFATAL, ProofOfWork) << "Solution fails"; + } + } + } + + #endif + } +}; + +static ProofOfWorkTests proofOfWorkTests; diff --git a/src/cpp/ripple/ripple_ProofOfWorkFactoryUnitTests.cpp b/src/cpp/ripple/ripple_ProofOfWorkFactoryUnitTests.cpp index e96eccd485..121e1743be 100644 --- a/src/cpp/ripple/ripple_ProofOfWorkFactoryUnitTests.cpp +++ b/src/cpp/ripple/ripple_ProofOfWorkFactoryUnitTests.cpp @@ -4,64 +4,3 @@ */ //============================================================================== -BOOST_AUTO_TEST_SUITE (ProofOfWork_suite) - -BOOST_AUTO_TEST_CASE ( ProofOfWork_test ) -{ - using namespace ripple; - - ProofOfWorkFactory gen; - ProofOfWork pow = gen.getProof (); - WriteLog (lsINFO, ProofOfWork) << "Estimated difficulty: " << pow.getDifficulty (); - uint256 solution = pow.solve (16777216); - - if (solution.isZero ()) - BOOST_FAIL ("Unable to solve proof of work"); - - if (!pow.checkSolution (solution)) - BOOST_FAIL ("Solution did not check"); - - WriteLog (lsDEBUG, ProofOfWork) << "A bad nonce error is expected"; - POWResult r = gen.checkProof (pow.getToken (), uint256 ()); - - if (r != powBADNONCE) - { - Log (lsFATAL) << "POWResult = " << static_cast (r); - BOOST_FAIL ("Empty solution didn't show bad nonce"); - } - - if (gen.checkProof (pow.getToken (), solution) != powOK) - BOOST_FAIL ("Solution did not check with issuer"); - - WriteLog (lsDEBUG, ProofOfWork) << "A reused nonce error is expected"; - - if (gen.checkProof (pow.getToken (), solution) != powREUSED) - BOOST_FAIL ("Reuse solution not detected"); - -#ifdef SOLVE_POWS - - for (int i = 0; i < 12; ++i) - { - gen.setDifficulty (i); - ProofOfWork pow = gen.getProof (); - WriteLog (lsINFO, ProofOfWork) << "Level: " << i << ", Estimated difficulty: " << pow.getDifficulty (); - uint256 solution = pow.solve (131072); - - if (solution.isZero ()) - WriteLog (lsINFO, ProofOfWork) << "Giving up"; - else - { - WriteLog (lsINFO, ProofOfWork) << "Solution found"; - - if (gen.checkProof (pow.getToken (), solution) != powOK) - { - WriteLog (lsFATAL, ProofOfWork) << "Solution fails"; - } - } - } - -#endif - -} - -BOOST_AUTO_TEST_SUITE_END () From 46c6489fc8afb195b0754ec5e91241e03c8c3360 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Thu, 18 Jul 2013 12:24:52 -0700 Subject: [PATCH 27/50] Update config example --- rippled-example.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/rippled-example.cfg b/rippled-example.cfg index a4173c6ced..8f760c3e68 100644 --- a/rippled-example.cfg +++ b/rippled-example.cfg @@ -233,6 +233,7 @@ # HyperLevelDB Use an improved version of LevelDB (preferred) # LevelDB Use Google's LevelDB database (deprecated) # MDB Use MDB +# none Use no backend # KeyvaDB Use OpenCoin's KeyvaDB (experimental) # Required keys: # path Location to store the database (all types) From 48935083487a0112f632f2e77e64a835825d573d Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sat, 20 Jul 2013 07:17:40 -0700 Subject: [PATCH 28/50] Add group and run type to UnitTest --- .../containers/beast_AbstractFifo.cpp | 2 +- .../beast_core/diagnostic/beast_UnitTest.cpp | 101 +++++++++++------- .../beast_core/diagnostic/beast_UnitTest.h | 54 +++++++--- .../modules/beast_core/files/beast_File.cpp | 2 +- .../files/beast_RandomAccessFile.cpp | 2 +- .../modules/beast_core/json/beast_JSON.cpp | 2 +- .../modules/beast_core/maths/beast_Random.cpp | 2 +- .../streams/beast_MemoryInputStream.cpp | 2 +- .../modules/beast_core/text/beast_String.cpp | 2 +- .../beast_core/text/beast_TextDiff.cpp | 2 +- .../beast_core/threads/beast_ChildProcess.cpp | 2 +- .../beast_core/threads/beast_Thread.cpp | 2 +- .../zip/beast_GZIPCompressorOutputStream.cpp | 2 +- .../math/beast_UnsignedInteger.cpp | 2 +- 14 files changed, 111 insertions(+), 68 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/containers/beast_AbstractFifo.cpp b/Subtrees/beast/modules/beast_core/containers/beast_AbstractFifo.cpp index 004a8b94e2..32f3ff5f70 100644 --- a/Subtrees/beast/modules/beast_core/containers/beast_AbstractFifo.cpp +++ b/Subtrees/beast/modules/beast_core/containers/beast_AbstractFifo.cpp @@ -129,7 +129,7 @@ void AbstractFifo::finishedRead (int numRead) noexcept class AbstractFifoTests : public UnitTest { public: - AbstractFifoTests() : UnitTest ("Abstract Fifo") + AbstractFifoTests() : UnitTest ("Abstract Fifo", "beast") { } diff --git a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp index 056d5fb248..8905ed2a12 100644 --- a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp +++ b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.cpp @@ -21,8 +21,13 @@ */ //============================================================================== -UnitTest::UnitTest (const String& name_) - : name (name_), runner (nullptr) +UnitTest::UnitTest (String const& name, + String const& group, + When when) + : m_name (name) + , m_group (group) + , m_when (when) + , m_runner (nullptr) { getAllTests().add (this); } @@ -32,19 +37,25 @@ UnitTest::~UnitTest() getAllTests().removeFirstMatchingValue (this); } -Array& UnitTest::getAllTests() +UnitTest::TestList& UnitTest::getAllTests() { - static Array tests; - return tests; + static TestList s_tests; + + return s_tests; } -void UnitTest::initialise() {} -void UnitTest::shutdown() {} - -void UnitTest::performTest (UnitTests* const runner_) +void UnitTest::initialise() { - bassert (runner_ != nullptr); - runner = runner_; +} + +void UnitTest::shutdown() +{ +} + +void UnitTest::performTest (UnitTests* const runner) +{ + bassert (runner != nullptr); + m_runner = runner; initialise(); runTest(); @@ -53,23 +64,24 @@ void UnitTest::performTest (UnitTests* const runner_) void UnitTest::logMessage (const String& message) { - runner->logMessage (message); + m_runner->logMessage (message); } void UnitTest::beginTest (const String& testName) { - runner->beginNewTest (this, testName); + m_runner->beginNewTest (this, testName); } void UnitTest::expect (const bool result, const String& failureMessage) { if (result) - runner->addPass(); + m_runner->addPass(); else - runner->addFail (failureMessage); + m_runner->addFail (failureMessage); } //============================================================================== + UnitTests::UnitTests() : currentTest (nullptr), assertOnFailure (true), @@ -105,35 +117,52 @@ void UnitTests::resultsUpdated() { } +void UnitTests::runTest (UnitTest& test) +{ + try + { + test.performTest (this); + } + catch (std::exception& e) + { + String s; + s << "Got an exception: " << e.what (); + addFail (s); + } + catch (...) + { + addFail ("Got an unhandled exception"); + } +} + void UnitTests::runTest (String const& name) { results.clear(); resultsUpdated(); - Array& tests = UnitTest::getAllTests (); + UnitTest::TestList& tests (UnitTest::getAllTests ()); for (int i = 0; i < tests.size(); ++i) { UnitTest& test = *tests [i]; - if (test.getName () == name) + if (test.getGroup () == name && test.getWhen () == UnitTest::runAlways) { - try - { - test.performTest (this); - } - catch (...) - { - addFail ("An unhandled exception was thrown!"); - } - + runTest (test); + } + else if (test.getName () == name) + { + runTest (test); break; } + } } -void UnitTests::runTests (const Array& tests) +void UnitTests::runAllTests () { + UnitTest::TestList& tests (UnitTest::getAllTests ()); + results.clear(); resultsUpdated(); @@ -142,22 +171,14 @@ void UnitTests::runTests (const Array& tests) if (shouldAbortTests()) break; - try - { - tests.getUnchecked(i)->performTest (this); - } - catch (...) - { - addFail ("An unhandled exception was thrown!"); - } + UnitTest& test = *tests [i]; + + if (test.getWhen () == UnitTest::runAlways) + runTest (test); } endTest(); -} -void UnitTests::runAllTests() -{ - runTests (UnitTest::getAllTests()); } void UnitTests::logMessage (const String& message) @@ -177,7 +198,7 @@ void UnitTests::beginNewTest (UnitTest* const test, const String& subCategory) TestResult* const r = new TestResult(); results.add (r); - r->unitTestName = test->getName(); + r->unitTestName = test->getGroup() + "::" + test->getName(); r->subcategoryName = subCategory; r->passes = 0; r->failures = 0; diff --git a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.h b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.h index 6945bf7660..f7e8466c18 100644 --- a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.h +++ b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTest.h @@ -28,7 +28,6 @@ #include "../containers/beast_OwnedArray.h" class UnitTests; - /** This is a base class for classes that perform a unit test. To write a test using this class, your code should look something like this: @@ -70,15 +69,38 @@ class UnitTests; class BEAST_API UnitTest : Uncopyable { public: + enum When + { + runAlways, + runManual + }; + + /** The type of a list of tests. + */ + typedef Array TestList; + //============================================================================== - /** Creates a test with the given name. */ - explicit UnitTest (String const& name); + /** Creates a test with the given name, group, and run option. + + The group is used when you want to run all tests in a particular group + instead of all tests in general. The run option allows you to write some + tests that are only available manually. For examplem, a performance unit + test that takes a long time which you might not want to run every time + you run all tests. + */ + explicit UnitTest (String const& name, String const& group = "", When when = runAlways); /** Destructor. */ virtual ~UnitTest(); /** Returns the name of the test. */ - const String& getName() const noexcept { return name; } + const String& getName() const noexcept { return m_name; } + + /** Returns the group of the test. */ + String const& getGroup () const noexcept { return m_group; } + + /** Returns the run option of the test. */ + When getWhen () const noexcept { return m_when; } /** Runs the test, using the specified UnitTests. You shouldn't need to call this method directly - use @@ -87,7 +109,7 @@ public: void performTest (UnitTests* runner); /** Returns the set of all UnitTest objects that currently exist. */ - static Array& getAllTests(); + static TestList& getAllTests(); //============================================================================== /** You can optionally implement this method to set up your test. @@ -156,14 +178,16 @@ public: //============================================================================== /** Writes a message to the test log. - This can only be called from within your runTest() method. + This can only be called during your runTest() method. */ void logMessage (const String& message); private: //============================================================================== - const String name; - UnitTests* runner; + String const m_name; + String const m_group; + When const m_when; + UnitTests* m_runner; }; //============================================================================== @@ -188,17 +212,15 @@ public: /** Destructor. */ virtual ~UnitTests(); - /** Run a particular test. + /** Run the specified unit test. + + Subclasses can override this to do extra stuff. */ + virtual void runTest (UnitTest& test); + + /** Run a particular test or group. */ void runTest (String const& name); - /** Runs a set of tests. - - The tests are performed in order, and the results are logged. To run all the - registered UnitTest objects that exist, use runAllTests(). - */ - void runTests (const Array& tests); - /** Runs all the UnitTest objects that currently exist. This calls runTests() for all the objects listed in UnitTest::getAllTests(). */ diff --git a/Subtrees/beast/modules/beast_core/files/beast_File.cpp b/Subtrees/beast/modules/beast_core/files/beast_File.cpp index f3a4f8713a..55bc1cafbd 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_File.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_File.cpp @@ -926,7 +926,7 @@ MemoryMappedFile::MemoryMappedFile (const File& file, const Range& fileRa class FileTests : public UnitTest { public: - FileTests() : UnitTest ("File") {} + FileTests() : UnitTest ("File", "beast") {} void runTest() { diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp index 33febec9c4..0131f520a0 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp @@ -158,7 +158,7 @@ class RandomAccessFileTests : public UnitTest { public: RandomAccessFileTests () - : UnitTest ("RandomAccessFile") + : UnitTest ("RandomAccessFile", "beast") , numRecords (1000) , seedValue (50) { diff --git a/Subtrees/beast/modules/beast_core/json/beast_JSON.cpp b/Subtrees/beast/modules/beast_core/json/beast_JSON.cpp index 518c5d45a8..216bdf7741 100644 --- a/Subtrees/beast/modules/beast_core/json/beast_JSON.cpp +++ b/Subtrees/beast/modules/beast_core/json/beast_JSON.cpp @@ -535,7 +535,7 @@ void JSON::writeToStream (OutputStream& output, const var& data, const bool allO class JSONTests : public UnitTest { public: - JSONTests() : UnitTest ("JSON") { } + JSONTests() : UnitTest ("JSON", "beast") { } static String createRandomWideCharString (Random& r) { diff --git a/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp b/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp index 4b8ac22b51..9f381e8479 100644 --- a/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp +++ b/Subtrees/beast/modules/beast_core/maths/beast_Random.cpp @@ -159,7 +159,7 @@ void Random::fillBitsRandomly (BigInteger& arrayToChange, int startBit, int numB class RandomTests : public UnitTest { public: - RandomTests() : UnitTest ("Random") {} + RandomTests() : UnitTest ("Random", "beast") {} void runTest() { diff --git a/Subtrees/beast/modules/beast_core/streams/beast_MemoryInputStream.cpp b/Subtrees/beast/modules/beast_core/streams/beast_MemoryInputStream.cpp index eef9e80fd0..2cdd4198a2 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_MemoryInputStream.cpp +++ b/Subtrees/beast/modules/beast_core/streams/beast_MemoryInputStream.cpp @@ -92,7 +92,7 @@ int64 MemoryInputStream::getPosition() class MemoryStreamTests : public UnitTest { public: - MemoryStreamTests() : UnitTest ("MemoryStream") { } + MemoryStreamTests() : UnitTest ("MemoryStream", "beast") { } void runTest() { diff --git a/Subtrees/beast/modules/beast_core/text/beast_String.cpp b/Subtrees/beast/modules/beast_core/text/beast_String.cpp index dff203b2eb..55ad6d5929 100644 --- a/Subtrees/beast/modules/beast_core/text/beast_String.cpp +++ b/Subtrees/beast/modules/beast_core/text/beast_String.cpp @@ -2078,7 +2078,7 @@ String String::fromUTF8 (const char* const buffer, int bufferSizeBytes) class StringTests : public UnitTest { public: - StringTests() : UnitTest ("String") { } + StringTests() : UnitTest ("String", "beast") { } template struct TestUTFConversion diff --git a/Subtrees/beast/modules/beast_core/text/beast_TextDiff.cpp b/Subtrees/beast/modules/beast_core/text/beast_TextDiff.cpp index 8589683327..6da5b587bd 100644 --- a/Subtrees/beast/modules/beast_core/text/beast_TextDiff.cpp +++ b/Subtrees/beast/modules/beast_core/text/beast_TextDiff.cpp @@ -177,7 +177,7 @@ String TextDiff::Change::appliedTo (const String& text) const noexcept class DiffTests : public UnitTest { public: - DiffTests() : UnitTest ("TextDiff") {} + DiffTests() : UnitTest ("TextDiff", "beast") {} static String createString() { diff --git a/Subtrees/beast/modules/beast_core/threads/beast_ChildProcess.cpp b/Subtrees/beast/modules/beast_core/threads/beast_ChildProcess.cpp index da7a9accd9..0c08aa82d4 100644 --- a/Subtrees/beast/modules/beast_core/threads/beast_ChildProcess.cpp +++ b/Subtrees/beast/modules/beast_core/threads/beast_ChildProcess.cpp @@ -61,7 +61,7 @@ String ChildProcess::readAllProcessOutput() class ChildProcessTests : public UnitTest { public: - ChildProcessTests() : UnitTest ("ChildProcess") {} + ChildProcessTests() : UnitTest ("ChildProcess", "beast") {} void runTest() { diff --git a/Subtrees/beast/modules/beast_core/threads/beast_Thread.cpp b/Subtrees/beast/modules/beast_core/threads/beast_Thread.cpp index 1ffb1b9c6e..7685cfa00a 100644 --- a/Subtrees/beast/modules/beast_core/threads/beast_Thread.cpp +++ b/Subtrees/beast/modules/beast_core/threads/beast_Thread.cpp @@ -255,7 +255,7 @@ void SpinLock::enter() const noexcept class AtomicTests : public UnitTest { public: - AtomicTests() : UnitTest ("Atomic") {} + AtomicTests() : UnitTest ("Atomic", "beast") {} void runTest() { diff --git a/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.cpp b/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.cpp index da68fe9941..7d66da47a0 100644 --- a/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.cpp +++ b/Subtrees/beast/modules/beast_core/zip/beast_GZIPCompressorOutputStream.cpp @@ -161,7 +161,7 @@ bool GZIPCompressorOutputStream::setPosition (int64 /*newPosition*/) class GZIPTests : public UnitTest { public: - GZIPTests() : UnitTest ("GZIP") {} + GZIPTests() : UnitTest ("GZIP", "beast") {} void runTest() { diff --git a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.cpp b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.cpp index a0532d38fa..b9a2b5d18f 100644 --- a/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.cpp +++ b/Subtrees/beast/modules/beast_crypto/math/beast_UnsignedInteger.cpp @@ -20,7 +20,7 @@ class UnsignedIntegerTests : public UnitTest { public: - UnsignedIntegerTests () : UnitTest ("UnsignedInteger") + UnsignedIntegerTests () : UnitTest ("UnsignedInteger", "beast") { } From 1975d81153006ea1c43612001a8489c7d2ef0fae Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sat, 20 Jul 2013 09:00:34 -0700 Subject: [PATCH 29/50] Update RandomAccessFile unit test --- .../files/beast_RandomAccessFile.cpp | 97 +++++++++++-------- .../beast_core/files/beast_RandomAccessFile.h | 7 +- .../native/beast_posix_SharedCode.h | 64 +++++++----- 3 files changed, 103 insertions(+), 65 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp index 0131f520a0..8ca6843491 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp @@ -3,10 +3,6 @@ This file is part of Beast: https://github.com/vinniefalco/Beast Copyright 2013, Vinnie Falco - Portions of this file are from JUCE. - Copyright (c) 2013 - Raw Material Software Ltd. - Please visit http://www.juce.com - Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. @@ -54,16 +50,17 @@ void RandomAccessFile::close () Result RandomAccessFile::setPosition (FileOffset newPosition) { - Result result (Result::ok ()); - if (newPosition != currentPosition) { flushBuffer (); - result = nativeSetPosition (newPosition); + // VFALCO NOTE I dislike return from the middle but + // Result::ok() is showing up in the profile + // + return nativeSetPosition (newPosition); } - return result; + return Result::ok (); } Result RandomAccessFile::read (void* buffer, ByteCount numBytes, ByteCount* pActualAmount) @@ -157,16 +154,13 @@ Result RandomAccessFile::flushBuffer () class RandomAccessFileTests : public UnitTest { public: - RandomAccessFileTests () - : UnitTest ("RandomAccessFile", "beast") - , numRecords (1000) - , seedValue (50) + RandomAccessFileTests () : UnitTest ("RandomAccessFile", "beast") { } /* For this test we will create a file which consists of a fixed number of variable length records. Each record is numbered sequentially - start at 1. To calculate the position of each record we first build + starting at 0. To calculate the position of each record we first build a table of size/offset pairs using a pseudorandom number generator. */ struct Record @@ -206,6 +200,8 @@ public: repeatableShuffle (numRecords, records, seedValue); } + // Write all the records to the file. + // The payload is pseudo-randomly generated. void writeRecords (RandomAccessFile& file, int numRecords, HeapBlock const& records, @@ -229,23 +225,26 @@ public: } } + // Read the records and verify the consistency. void readRecords (RandomAccessFile& file, int numRecords, - HeapBlock const & records, + HeapBlock const& records, int64 seedValue) { using namespace UnitTestUtilities; for (int i = 0; i < numRecords; ++i) { - int const bytes = records [i].bytes; + Record const& record (records [i]); + + int const bytes = record.bytes; Payload p1 (bytes); Payload p2 (bytes); - p1.repeatableRandomFill (bytes, bytes, records [i].index + seedValue); + p1.repeatableRandomFill (bytes, bytes, record.index + seedValue); - file.setPosition (records [i].offset); + file.setPosition (record.offset); Result result = file.read (p2.data.getData (), bytes); @@ -260,48 +259,68 @@ public: } } - void testFile (int const bufferSize) + // Perform the test at the given buffer size. + void testFile (int const numRecords, int const bufferSize) { using namespace UnitTestUtilities; - String s; - s << "bufferSize = " << String (bufferSize); - beginTest (s); + int const seedValue = 50; + + beginTest (String ("numRecords=") + String (numRecords) + ", bufferSize=" + String (bufferSize)); int const maxPayload = bmax (1000, bufferSize * 2); - RandomAccessFile file (bufferSize); + // Calculate the path + File const path (File::createTempFile ("RandomAccessFile")); - Result result = file.open (File::createTempFile ("tests"), RandomAccessFile::readWrite); + // Create a predictable set of records + HeapBlock records (numRecords); + createRecords (records, numRecords, maxPayload, seedValue); - expect (result.wasOk (), "Should be ok"); + Result result (Result::ok ()); + + { + // Create the file + RandomAccessFile file (bufferSize); + result = file.open (path, RandomAccessFile::readWrite); + expect (result.wasOk (), "Should be ok"); + + if (result.wasOk ()) + { + writeRecords (file, numRecords, records, seedValue); + + readRecords (file, numRecords, records, seedValue); + + repeatableShuffle (numRecords, records, seedValue); + + readRecords (file, numRecords, records, seedValue); + } + } if (result.wasOk ()) { - HeapBlock records (numRecords); + // Re-open the file in read only mode + RandomAccessFile file (bufferSize); + result = file.open (path, RandomAccessFile::readOnly); + expect (result.wasOk (), "Should be ok"); - createRecords (records, numRecords, maxPayload, seedValue); - - writeRecords (file, numRecords, records, seedValue); - - readRecords (file, numRecords, records, seedValue); - - repeatableShuffle (numRecords, records, seedValue); - - readRecords (file, numRecords, records, seedValue); + if (result.wasOk ()) + { + readRecords (file, numRecords, records, seedValue); + } } } void runTest () { - testFile (0); - testFile (1000); - testFile (10000); + int const numRecords = 1000; + + testFile (numRecords, 0); + testFile (numRecords, 1000); + testFile (numRecords, 10000); } private: - int const numRecords; - int64 const seedValue; }; static RandomAccessFileTests randomAccessFileTests; diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h index b97e17dcb1..f2eaca1ccf 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h @@ -41,6 +41,9 @@ @note All files are opened in binary mode. No text newline conversions are performed. + @note None of these members are thread safe. The caller is responsible + for synchronization. + @see FileInputStream, FileOutputStream */ class BEAST_API RandomAccessFile : Uncopyable, LeakChecked @@ -140,7 +143,7 @@ public: by `buffer` is at least as large as `bytesToRead`. @note The file must have been opened with read permission. - + @param buffer The memory to store the incoming data @param numBytes The number of bytes to read. @param pActualAmount Pointer to store the actual amount read, or `nullptr`. @@ -212,7 +215,7 @@ public: int read (void* destBuffer, int maxBytesToRead) { size_t actualBytes = 0; - m_file.read (destBuffer, maxBytesToRead, &actualBytes); + m_file.read (destBuffer, maxBytesToRead, &actualBytes); return actualBytes; } diff --git a/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h b/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h index 622d48a141..222490176e 100644 --- a/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h +++ b/Subtrees/beast/modules/beast_core/native/beast_posix_SharedCode.h @@ -586,54 +586,70 @@ Result RandomAccessFile::nativeSetPosition (FileOffset newPosition) { bassert (isOpen ()); - Result result (Result::ok ()); + off_t const actualPosition = lseek (getFD (fileHandle), newPosition, SEEK_SET); - off_t const actual = lseek (getFD (fileHandle), newPosition, SEEK_SET); + currentPosition = actualPosition; - if (actual != newPosition) - result = getResultForErrno(); + if (actualPosition != newPosition) + { + // VFALCO NOTE I dislike return from the middle but + // Result::ok() is showing up in the profile + // + return getResultForErrno(); + } - return result; + return Result::ok(); } Result RandomAccessFile::nativeRead (void* buffer, ByteCount numBytes, ByteCount* pActualAmount) { bassert (isOpen ()); - Result result (Result::ok ()); + ssize_t bytesRead = ::read (getFD (fileHandle), buffer, numBytes); - ssize_t amount = ::read (getFD (fileHandle), buffer, numBytes); - - if (amount < 0) + if (bytesRead < 0) { - result = getResultForErrno(); - amount = 0; + if (pActualAmount != nullptr) + *pActualAmount = 0; + + // VFALCO NOTE I dislike return from the middle but + // Result::ok() is showing up in the profile + // + return getResultForErrno(); } - if (pActualAmount != nullptr) - *pActualAmount = amount; + currentPosition += bytesRead; - return result; + if (pActualAmount != nullptr) + *pActualAmount = bytesRead; + + return Result::ok(); } Result RandomAccessFile::nativeWrite (void const* data, ByteCount numBytes, size_t* pActualAmount) { bassert (isOpen ()); - Result result (Result::ok ()); + ssize_t bytesWritten = ::write (getFD (fileHandle), data, numBytes); - ssize_t actual = ::write (getFD (fileHandle), data, numBytes); - - if (actual == -1) + // write(3) says that the actual return will be exactly -1 on + // error, but we will assume anything negative indicates failure. + // + if (bytesWritten < 0) { - result = getResultForErrno(); - actual = 0; + if (pActualAmount != nullptr) + *pActualAmount = 0; + + // VFALCO NOTE I dislike return from the middle but + // Result::ok() is showing up in the profile + // + return getResultForErrno(); } if (pActualAmount != nullptr) - *pActualAmount = actual; + *pActualAmount = bytesWritten; - return result; + return Result::ok(); } Result RandomAccessFile::nativeTruncate () @@ -654,14 +670,14 @@ Result RandomAccessFile::nativeFlush () if (fsync (getFD (fileHandle)) == -1) result = getResultForErrno(); - #if BEAST_ANDROID +#if BEAST_ANDROID // This stuff tells the OS to asynchronously update the metadata // that the OS has cached aboud the file - this metadata is used // when the device is acting as a USB drive, and unless it's explicitly // refreshed, it'll get out of step with the real file. const LocalRef t (javaString (file.getFullPathName())); android.activity.callVoidMethod (BeastAppActivity.scanFile, t.get()); - #endif +#endif return result; } From db26c3715639936e560986c9fea2cac66d704716 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sat, 20 Jul 2013 07:17:47 -0700 Subject: [PATCH 30/50] Refactor NodeStore --- BeastConfig.h | 15 + TODO.txt | 9 +- modules/ripple_app/ledger/Ledger.cpp | 10 +- .../ledger/ripple_InboundLedger.cpp | 4 +- .../ripple_HyperLevelDBBackendFactory.cpp | 215 ++- .../node/ripple_HyperLevelDBBackendFactory.h | 5 +- modules/ripple_app/node/ripple_KeyvaDB.cpp | 96 +- .../node/ripple_KeyvaDBBackendFactory.cpp | 151 +-- .../node/ripple_KeyvaDBBackendFactory.h | 5 +- .../node/ripple_LevelDBBackendFactory.cpp | 175 +-- .../node/ripple_LevelDBBackendFactory.h | 5 +- .../node/ripple_MdbBackendFactory.cpp | 268 ++-- .../node/ripple_MdbBackendFactory.h | 5 +- modules/ripple_app/node/ripple_NodeObject.cpp | 47 +- modules/ripple_app/node/ripple_NodeObject.h | 57 +- modules/ripple_app/node/ripple_NodeStore.cpp | 1191 ++++++++++++----- modules/ripple_app/node/ripple_NodeStore.h | 381 ++++-- .../node/ripple_NullBackendFactory.cpp | 30 +- .../node/ripple_NullBackendFactory.h | 5 +- .../node/ripple_SqliteBackendFactory.cpp | 167 ++- .../node/ripple_SqliteBackendFactory.h | 5 +- modules/ripple_basics/types/ripple_UInt256.h | 18 +- .../validator/ripple_Validators.cpp | 2 +- rippled-example.cfg | 14 +- src/cpp/ripple/NetworkOPs.cpp | 3 +- src/cpp/ripple/ripple_Application.cpp | 95 +- src/cpp/ripple/ripple_Application.h | 16 +- src/cpp/ripple/ripple_Main.cpp | 17 +- src/cpp/ripple/ripple_Peer.cpp | 2 +- src/cpp/ripple/ripple_ProofOfWorkFactory.cpp | 2 +- src/cpp/ripple/ripple_SHAMap.cpp | 30 +- src/cpp/ripple/ripple_SHAMapNode.cpp | 2 +- src/cpp/ripple/ripple_SHAMapSync.cpp | 6 +- src/cpp/ripple/ripple_SHAMapSyncFilter.h | 21 +- src/cpp/ripple/ripple_SHAMapSyncFilters.cpp | 6 +- src/cpp/ripple/ripple_SHAMapSyncFilters.h | 9 +- src/cpp/ripple/ripple_SHAMapTreeNode.cpp | 4 +- 37 files changed, 2075 insertions(+), 1018 deletions(-) diff --git a/BeastConfig.h b/BeastConfig.h index 4c35072c81..3b82014da9 100644 --- a/BeastConfig.h +++ b/BeastConfig.h @@ -120,4 +120,19 @@ //#define BEAST_BIND_USES_TR1 1 //#define BEAST_BIND_USES_BOOST 1 +//------------------------------------------------------------------------------ +// +// Ripple compilation settings +// +//------------------------------------------------------------------------------ + +/** Config: RIPPLE_VERIFY_NODEOBJECT_KEYS + + This verifies that the hash of node objects matches the payload. + It is quite expensive so normally this is turned off! +*/ +#ifndef RIPPLE_VERIFY_NODEOBJECT_KEYS +//#define RIPPLE_VERIFY_NODEOBJECT_KEYS 1 +#endif + #endif diff --git a/TODO.txt b/TODO.txt index 38c8cdd5b9..3fb99e9139 100644 --- a/TODO.txt +++ b/TODO.txt @@ -3,16 +3,19 @@ RIPPLE TODO -------------------------------------------------------------------------------- Vinnie's Short List (Changes day to day) +- Finish writing the NodeStore unit tests +- Finish converting backends to new API - Memory NodeStore::Backend for unit tests +- Performance test for NodeStore::Backend - Improved Mutex to track deadlocks -- Convert some Ripple boost unit tests to Beast. -- Eliminate new technical in NodeStore::Backend - Work on KeyvaDB +- Import beast::db and use it in SQliteBackend - Finish unit tests and code for Validators +- Convert some Ripple boost unit tests to Beast. -------------------------------------------------------------------------------- -- Replace master lock with +- Replace all throw with beast::Throw - Replace base_uint and uintXXX with UnsignedInteger * Need to specialize UnsignedInteger to work efficiently with 4 and 8 byte diff --git a/modules/ripple_app/ledger/Ledger.cpp b/modules/ripple_app/ledger/Ledger.cpp index 4b0300a554..6878b37d15 100644 --- a/modules/ripple_app/ledger/Ledger.cpp +++ b/modules/ripple_app/ledger/Ledger.cpp @@ -530,10 +530,12 @@ void Ledger::saveAcceptedLedger (Job&, bool fromConsensus) assert (getTransHash () == mTransactionMap->getHash ()); // Save the ledger header in the hashed object store - Serializer s (128); - s.add32 (HashPrefix::ledgerMaster); - addRaw (s); - getApp().getNodeStore ().store (hotLEDGER, mLedgerSeq, s.peekData (), mHash); + { + Serializer s (128); + s.add32 (HashPrefix::ledgerMaster); + addRaw (s); + getApp().getNodeStore ().store (hotLEDGER, mLedgerSeq, s.modData (), mHash); + } AcceptedLedger::pointer aLedger = AcceptedLedger::makeAcceptedLedger (shared_from_this ()); diff --git a/modules/ripple_app/ledger/ripple_InboundLedger.cpp b/modules/ripple_app/ledger/ripple_InboundLedger.cpp index e9101345dd..c38ed45474 100644 --- a/modules/ripple_app/ledger/ripple_InboundLedger.cpp +++ b/modules/ripple_app/ledger/ripple_InboundLedger.cpp @@ -48,7 +48,7 @@ bool InboundLedger::tryLocal () if (!mHaveBase) { // Nothing we can do without the ledger base - NodeObject::pointer node = getApp().getNodeStore ().retrieve (mHash); + NodeObject::pointer node = getApp().getNodeStore ().fetch (mHash); if (!node) { @@ -672,7 +672,7 @@ bool InboundLedger::takeBase (const std::string& data) // data must not have has Serializer s (data.size () + 4); s.add32 (HashPrefix::ledgerMaster); s.addRaw (data); - getApp().getNodeStore ().store (hotLEDGER, mLedger->getLedgerSeq (), s.peekData (), mHash); + getApp().getNodeStore ().store (hotLEDGER, mLedger->getLedgerSeq (), s.modData (), mHash); progress (); diff --git a/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.cpp b/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.cpp index 6d5ff693f1..860a53b4b5 100644 --- a/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.cpp @@ -6,120 +6,208 @@ #if RIPPLE_HYPERLEVELDB_AVAILABLE -class HyperLevelDBBackendFactory::Backend : public NodeStore::Backend +class HyperLevelDBBackendFactory::Backend + : public NodeStore::Backend + , public NodeStore::BatchWriter::Callback + , LeakChecked { public: - Backend (size_t keyBytes, StringPairArray const& keyValues) + typedef RecycledObjectPool StringPool; + + Backend (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) : m_keyBytes (keyBytes) - , mName(keyValues ["path"].toStdString ()) - , mDB(NULL) + , m_scheduler (scheduler) + , m_batch (*this, scheduler) + , m_name (keyValues ["path"].toStdString ()) { - if (mName.empty()) - throw std::runtime_error ("Missing path in LevelDB backend"); + if (m_name.empty ()) + Throw (std::runtime_error ("Missing path in LevelDB backend")); hyperleveldb::Options options; options.create_if_missing = true; - if (keyValues["cache_mb"].isEmpty()) + if (keyValues ["cache_mb"].isEmpty ()) + { options.block_cache = hyperleveldb::NewLRUCache (theConfig.getSize (siHashNodeDBCache) * 1024 * 1024); + } else + { options.block_cache = hyperleveldb::NewLRUCache (keyValues["cache_mb"].getIntValue() * 1024L * 1024L); + } - if (keyValues["filter_bits"].isEmpty()) + if (keyValues ["filter_bits"].isEmpty()) { if (theConfig.NODE_SIZE >= 2) options.filter_policy = hyperleveldb::NewBloomFilterPolicy (10); } - else if (keyValues["filter_bits"].getIntValue() != 0) - options.filter_policy = hyperleveldb::NewBloomFilterPolicy (keyValues["filter_bits"].getIntValue()); + else if (keyValues ["filter_bits"].getIntValue() != 0) + { + options.filter_policy = hyperleveldb::NewBloomFilterPolicy (keyValues ["filter_bits"].getIntValue ()); + } - if (!keyValues["open_files"].isEmpty()) - options.max_open_files = keyValues["open_files"].getIntValue(); + if (! keyValues["open_files"].isEmpty ()) + { + options.max_open_files = keyValues ["open_files"].getIntValue(); + } - hyperleveldb::Status status = hyperleveldb::DB::Open (options, mName, &mDB); - if (!status.ok () || !mDB) - throw (std::runtime_error (std::string("Unable to open/create leveldb: ") + status.ToString())); + hyperleveldb::DB* db = nullptr; + hyperleveldb::Status status = hyperleveldb::DB::Open (options, m_name, &db); + if (!status.ok () || !db) + Throw (std::runtime_error (std::string("Unable to open/create leveldb: ") + status.ToString())); + + m_db = db; } ~Backend () { - delete mDB; } - std::string getDataBaseName() + std::string getName() { - return mName; + return m_name; } - bool bulkStore (const std::vector< NodeObject::pointer >& objs) - { - hyperleveldb::WriteBatch batch; + //-------------------------------------------------------------------------- + + Status fetch (void const* key, NodeObject::Ptr* pObject) + { + pObject->reset (); + + Status status (ok); + + hyperleveldb::ReadOptions const options; + hyperleveldb::Slice const slice (static_cast (key), m_keyBytes); - BOOST_FOREACH (NodeObject::ref obj, objs) { - Blob blob (toBlob (obj)); - batch.Put ( - hyperleveldb::Slice (reinterpret_cast(obj->getHash ().begin ()), m_keyBytes), - hyperleveldb::Slice (reinterpret_cast(&blob.front ()), blob.size ())); + // These are reused std::string objects, + // required for leveldb's funky interface. + // + StringPool::ScopedItem item (m_stringPool); + std::string& string = item.getObject (); + + hyperleveldb::Status getStatus = m_db->Get (options, slice, &string); + + if (getStatus.ok ()) + { + NodeStore::DecodedBlob decoded (key, string.data (), string.size ()); + + if (decoded.wasOk ()) + { + *pObject = decoded.createObject (); + } + else + { + // Decoding failed, probably corrupted! + // + status = dataCorrupt; + } + } + else + { + if (getStatus.IsCorruption ()) + { + status = dataCorrupt; + } + else if (getStatus.IsNotFound ()) + { + status = notFound; + } + else + { + status = unknown; + } + } } - return mDB->Write (hyperleveldb::WriteOptions (), &batch).ok (); + + return status; } - NodeObject::pointer retrieve (uint256 const& hash) + void store (NodeObject::ref object) { - std::string sData; - if (!mDB->Get (hyperleveldb::ReadOptions (), - hyperleveldb::Slice (reinterpret_cast(hash.begin ()), m_keyBytes), &sData).ok ()) + m_batch.store (object); + } + + void storeBatch (NodeStore::Batch const& batch) + { + hyperleveldb::WriteBatch wb; + { - return NodeObject::pointer(); + NodeStore::EncodedBlob::Pool::ScopedItem item (m_blobPool); + + BOOST_FOREACH (NodeObject::ref object, batch) + { + item.getObject ().prepare (object); + + wb.Put ( + hyperleveldb::Slice (reinterpret_cast ( + item.getObject ().getKey ()), m_keyBytes), + hyperleveldb::Slice (reinterpret_cast ( + item.getObject ().getData ()), item.getObject ().getSize ())); + } } - return fromBinary(hash, &sData[0], sData.size ()); + + hyperleveldb::WriteOptions const options; + + m_db->Write (options, &wb).ok (); } - void visitAll (FUNCTION_TYPE func) + void visitAll (VisitCallback& callback) { - hyperleveldb::Iterator* it = mDB->NewIterator (hyperleveldb::ReadOptions ()); + hyperleveldb::ReadOptions const options; + + ScopedPointer it (m_db->NewIterator (options)); + for (it->SeekToFirst (); it->Valid (); it->Next ()) { if (it->key ().size () == m_keyBytes) { - uint256 hash; - memcpy(hash.begin(), it->key ().data(), m_keyBytes); - func (fromBinary (hash, it->value ().data (), it->value ().size ())); + NodeStore::DecodedBlob decoded (it->key ().data (), + it->value ().data (), + it->value ().size ()); + + if (decoded.wasOk ()) + { + NodeObject::Ptr object (decoded.createObject ()); + + callback.visitObject (object); + } + else + { + // Uh oh, corrupted data! + WriteLog (lsFATAL, NodeObject) << "Corrupt NodeObject #" << uint256 (it->key ().data ()); + } + } + else + { + // VFALCO NOTE What does it mean to find an + // incorrectly sized key? Corruption? + WriteLog (lsFATAL, NodeObject) << "Bad key size = " << it->key ().size (); } } } - Blob toBlob(NodeObject::ref obj) + int getWriteLoad () { - Blob rawData (9 + obj->getData ().size ()); - unsigned char* bufPtr = &rawData.front(); - - *reinterpret_cast (bufPtr + 0) = ntohl (obj->getIndex ()); - *reinterpret_cast (bufPtr + 4) = ntohl (obj->getIndex ()); - * (bufPtr + 8) = static_cast (obj->getType ()); - memcpy (bufPtr + 9, &obj->getData ().front (), obj->getData ().size ()); - - return rawData; + return m_batch.getWriteLoad (); } - NodeObject::pointer fromBinary(uint256 const& hash, - char const* data, int size) + //-------------------------------------------------------------------------- + + void writeBatch (NodeStore::Batch const& batch) { - if (size < 9) - throw std::runtime_error ("undersized object"); - - uint32 index = htonl (*reinterpret_cast (data)); - int htype = data[8]; - - return boost::make_shared (static_cast (htype), index, - data + 9, size - 9, hash); + storeBatch (batch); } private: size_t const m_keyBytes; - std::string mName; - hyperleveldb::DB* mDB; + NodeStore::Scheduler& m_scheduler; + NodeStore::BatchWriter m_batch; + StringPool m_stringPool; + NodeStore::EncodedBlob::Pool m_blobPool; + std::string m_name; + ScopedPointer m_db; }; //------------------------------------------------------------------------------ @@ -144,9 +232,12 @@ String HyperLevelDBBackendFactory::getName () const return "HyperLevelDB"; } -NodeStore::Backend* HyperLevelDBBackendFactory::createInstance (size_t keyBytes, StringPairArray const& keyValues) +NodeStore::Backend* HyperLevelDBBackendFactory::createInstance ( + size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) { - return new HyperLevelDBBackendFactory::Backend (keyBytes, keyValues); + return new HyperLevelDBBackendFactory::Backend (keyBytes, keyValues, scheduler); } //------------------------------------------------------------------------------ diff --git a/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.h b/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.h index 6691681c72..43920477d8 100644 --- a/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.h +++ b/modules/ripple_app/node/ripple_HyperLevelDBBackendFactory.h @@ -23,7 +23,10 @@ public: static HyperLevelDBBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); + + NodeStore::Backend* createInstance (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler); }; #endif diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 70d5954f72..6b5d68f395 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -463,9 +463,9 @@ public: keyRecord.valSize = valueBytes; keyRecord.leftIndex = 0; keyRecord.rightIndex = 0; - + memcpy (keyRecord.key, key, m_keyBytes); - + writeKeyRecord (keyRecord, state->newKeyIndex, state, true); // Key file has grown by one. @@ -536,7 +536,7 @@ public: maxPayloadBytes = 8 * 1024 }; - KeyvaDBTests () : UnitTest ("KeyvaDB") + KeyvaDBTests () : UnitTest ("KeyvaDB", "ripple") { } @@ -567,51 +567,78 @@ public: typedef UnsignedInteger KeyType; int64 const seedValue = 50; - + String s; s << "keyBytes=" << String (KeyBytes) << ", maxItems=" << String (maxItems); beginTest (s); - // Set up the key and value files and open the db. - File const keyPath = File::createTempFile ("").withFileExtension (".key"); - File const valPath = File::createTempFile ("").withFileExtension (".val"); - ScopedPointer db (KeyvaDB::New (KeyBytes, keyPath, valPath, true)); - - Payload payload (maxPayloadBytes); - Payload check (maxPayloadBytes); + // Set up the key and value files + File const tempFile (File::createTempFile ("")); + File const keyPath = tempFile.withFileExtension (".key"); + File const valPath = tempFile.withFileExtension (".val"); { - // Create an array of ascending integers. - HeapBlock items (maxItems); - for (unsigned int i = 0; i < maxItems; ++i) - items [i] = i; + // open the db + ScopedPointer db (KeyvaDB::New (KeyBytes, keyPath, valPath, false)); - // Now shuffle it deterministically. - repeatableShuffle (maxItems, items, seedValue); + Payload payload (maxPayloadBytes); + Payload check (maxPayloadBytes); - // Write all the keys of integers. - for (unsigned int i = 0; i < maxItems; ++i) { - unsigned int keyIndex = items [i]; - - KeyType const key = KeyType::createFromInteger (keyIndex); + // Create an array of ascending integers. + HeapBlock items (maxItems); + for (unsigned int i = 0; i < maxItems; ++i) + items [i] = i; - payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); - - db->put (key.cbegin (), payload.data.getData (), payload.bytes); + // Now shuffle it deterministically. + repeatableShuffle (maxItems, items, seedValue); + // Write all the keys of integers. + for (unsigned int i = 0; i < maxItems; ++i) { - // VFALCO TODO Check what we just wrote? - //db->get (key.cbegin (), check.data.getData (), payload.bytes); + unsigned int keyIndex = items [i]; + + KeyType const key = KeyType::createFromInteger (keyIndex); + + payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); + + db->put (key.cbegin (), payload.data.getData (), payload.bytes); + + { + // VFALCO TODO Check what we just wrote? + //db->get (key.cbegin (), check.data.getData (), payload.bytes); + } + } + } + + { + // Go through all of our keys and try to retrieve them. + // since this is done in ascending order, we should get + // random seeks at this point. + // + PayloadGetCallback cb; + for (unsigned int keyIndex = 0; keyIndex < maxItems; ++keyIndex) + { + KeyType const v = KeyType::createFromInteger (keyIndex); + + bool const found = db->get (v.cbegin (), &cb); + + expect (found, "Should be found"); + + payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); + + expect (payload == cb.payload, "Should be equal"); } } } { - // Go through all of our keys and try to retrieve them. - // since this is done in ascending order, we should get - // random seeks at this point. - // + // Re-open the database and confirm the data + ScopedPointer db (KeyvaDB::New (KeyBytes, keyPath, valPath, false)); + + Payload payload (maxPayloadBytes); + Payload check (maxPayloadBytes); + PayloadGetCallback cb; for (unsigned int keyIndex = 0; keyIndex < maxItems; ++keyIndex) { @@ -626,12 +653,15 @@ public: expect (payload == cb.payload, "Should be equal"); } } + + keyPath.deleteFile (); + valPath.deleteFile (); } void runTest () { - testKeySize <4> (512); - testKeySize <32> (4096); + testKeySize <4> (500); + testKeySize <32> (4000); } }; diff --git a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp index f9b65a0193..676fc7ecb1 100644 --- a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp @@ -6,9 +6,16 @@ class KeyvaDBBackendFactory::Backend : public NodeStore::Backend { +private: + typedef RecycledObjectPool MemoryPool; + typedef RecycledObjectPool EncodedBlobPool; + public: - Backend (size_t keyBytes, StringPairArray const& keyValues) + Backend (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) : m_keyBytes (keyBytes) + , m_scheduler (scheduler) , m_path (keyValues ["path"]) , m_db (KeyvaDB::New ( keyBytes, @@ -22,34 +29,53 @@ public: { } - std::string getDataBaseName () + std::string getName () { return m_path.toStdString (); } //-------------------------------------------------------------------------- - Status get (void const* key, GetCallback* callback) + Status fetch (void const* key, NodeObject::Ptr* pObject) { + pObject->reset (); + Status status (ok); - struct ForwardingGetCallback : KeyvaDB::GetCallback + struct Callback : KeyvaDB::GetCallback { - ForwardingGetCallback (Backend::GetCallback* callback) - : m_callback (callback) + explicit Callback (MemoryBlock& block) + : m_block (block) { } void* getStorageForValue (int valueBytes) { - return m_callback->getStorageForValue (valueBytes); + m_size = valueBytes; + m_block.ensureSize (valueBytes); + + return m_block.getData (); + } + + void const* getData () const noexcept + { + return m_block.getData (); + } + + size_t getSize () const noexcept + { + return m_size; } private: - Backend::GetCallback* const m_callback; + MemoryBlock& m_block; + size_t m_size; }; - ForwardingGetCallback cb (callback); + MemoryPool::ScopedItem item (m_memoryPool); + MemoryBlock& block (item.getObject ()); + + Callback cb (block); // VFALCO TODO Can't we get KeyvaDB to provide a proper status? // @@ -57,7 +83,18 @@ public: if (found) { - status = ok; + NodeStore::DecodedBlob decoded (key, cb.getData (), cb.getSize ()); + + if (decoded.wasOk ()) + { + *pObject = decoded.createObject (); + + status = ok; + } + else + { + status = dataCorrupt; + } } else { @@ -67,90 +104,45 @@ public: return status; } - //-------------------------------------------------------------------------- - - void writeObject (NodeObject::ref object) + void store (NodeObject::ref object) { - Blob blob (toBlob (object)); - m_db->put (object->getHash ().begin (), &blob [0], blob.size ()); + EncodedBlobPool::ScopedItem item (m_blobPool); + NodeStore::EncodedBlob& encoded (item.getObject ()); + + encoded.prepare (object); + + m_db->put (encoded.getKey (), encoded.getData (), encoded.getSize ()); } - bool bulkStore (std::vector const& objs) + void storeBatch (NodeStore::Batch const& batch) { - for (size_t i = 0; i < objs.size (); ++i) - { - writeObject (objs [i]); - } - - return true; + for (int i = 0; i < batch.size (); ++i) + store (batch [i]); } - struct MyGetCallback : KeyvaDB::GetCallback - { - int valueBytes; - HeapBlock data; - - void* getStorageForValue (int valueBytes_) - { - valueBytes = valueBytes_; - - data.malloc (valueBytes); - - return data.getData (); - } - }; - - NodeObject::pointer retrieve (uint256 const& hash) - { - NodeObject::pointer result; - - MyGetCallback cb; - - bool const found = m_db->get (hash.begin (), &cb); - - if (found) - { - result = fromBinary (hash, cb.data.getData (), cb.valueBytes); - } - - return result; - } - - void visitAll (FUNCTION_TYPE func) + void visitAll (VisitCallback& callback) { + // VFALCO TODO Implement this! + // bassertfalse; + //m_db->visitAll (); } - Blob toBlob (NodeObject::ref obj) + int getWriteLoad () { - Blob rawData (9 + obj->getData ().size ()); - unsigned char* bufPtr = &rawData.front(); - - *reinterpret_cast (bufPtr + 0) = ntohl (obj->getIndex ()); - *reinterpret_cast (bufPtr + 4) = ntohl (obj->getIndex ()); - * (bufPtr + 8) = static_cast (obj->getType ()); - memcpy (bufPtr + 9, &obj->getData ().front (), obj->getData ().size ()); - - return rawData; + // we dont do pending writes + return 0; } - NodeObject::pointer fromBinary (uint256 const& hash, char const* data, int size) - { - if (size < 9) - throw std::runtime_error ("undersized object"); - - uint32 index = htonl (*reinterpret_cast (data)); - - int htype = data[8]; - - return boost::make_shared (static_cast (htype), index, - data + 9, size - 9, hash); - } + //-------------------------------------------------------------------------- private: size_t const m_keyBytes; + NodeStore::Scheduler& m_scheduler; String m_path; ScopedPointer m_db; + MemoryPool m_memoryPool; + EncodedBlobPool m_blobPool; }; //------------------------------------------------------------------------------ @@ -175,9 +167,12 @@ String KeyvaDBBackendFactory::getName () const return "KeyvaDB"; } -NodeStore::Backend* KeyvaDBBackendFactory::createInstance (size_t keyBytes, StringPairArray const& keyValues) +NodeStore::Backend* KeyvaDBBackendFactory::createInstance ( + size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) { - return new KeyvaDBBackendFactory::Backend (keyBytes, keyValues); + return new KeyvaDBBackendFactory::Backend (keyBytes, keyValues, scheduler); } //------------------------------------------------------------------------------ diff --git a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h index 4ee95c7b25..40e76f1994 100644 --- a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h +++ b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.h @@ -21,7 +21,10 @@ public: static KeyvaDBBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); + + NodeStore::Backend* createInstance (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler); }; #endif diff --git a/modules/ripple_app/node/ripple_LevelDBBackendFactory.cpp b/modules/ripple_app/node/ripple_LevelDBBackendFactory.cpp index 3dbdcd3301..0beb2d5c1b 100644 --- a/modules/ripple_app/node/ripple_LevelDBBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_LevelDBBackendFactory.cpp @@ -4,24 +4,38 @@ */ //============================================================================== -class LevelDBBackendFactory::Backend : public NodeStore::Backend +class LevelDBBackendFactory::Backend + : public NodeStore::Backend + , public NodeStore::BatchWriter::Callback + , LeakChecked { public: - Backend (int keyBytes, StringPairArray const& keyValues) + typedef RecycledObjectPool StringPool; + + //-------------------------------------------------------------------------- + + Backend (int keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) : m_keyBytes (keyBytes) - , m_name(keyValues ["path"].toStdString ()) - , m_db(NULL) + , m_scheduler (scheduler) + , m_batch (*this, scheduler) + , m_name (keyValues ["path"].toStdString ()) { if (m_name.empty()) - throw std::runtime_error ("Missing path in LevelDB backend"); + Throw (std::runtime_error ("Missing path in LevelDB backend")); leveldb::Options options; options.create_if_missing = true; if (keyValues["cache_mb"].isEmpty()) + { options.block_cache = leveldb::NewLRUCache (theConfig.getSize (siHashNodeDBCache) * 1024 * 1024); + } else + { options.block_cache = leveldb::NewLRUCache (keyValues["cache_mb"].getIntValue() * 1024L * 1024L); + } if (keyValues["filter_bits"].isEmpty()) { @@ -29,39 +43,38 @@ public: options.filter_policy = leveldb::NewBloomFilterPolicy (10); } else if (keyValues["filter_bits"].getIntValue() != 0) + { options.filter_policy = leveldb::NewBloomFilterPolicy (keyValues["filter_bits"].getIntValue()); + } - if (!keyValues["open_files"].isEmpty()) + if (! keyValues["open_files"].isEmpty()) + { options.max_open_files = keyValues["open_files"].getIntValue(); + } - leveldb::Status status = leveldb::DB::Open (options, m_name, &m_db); - if (!status.ok () || !m_db) - throw (std::runtime_error (std::string("Unable to open/create leveldb: ") + status.ToString())); + leveldb::DB* db = nullptr; + leveldb::Status status = leveldb::DB::Open (options, m_name, &db); + if (!status.ok () || !db) + Throw (std::runtime_error (std::string("Unable to open/create leveldb: ") + status.ToString())); + + m_db = db; } ~Backend () { - delete m_db; } - std::string getDataBaseName() + std::string getName() { return m_name; } //-------------------------------------------------------------------------- - struct StdString + Status fetch (void const* key, NodeObject::Ptr* pObject) { - std::string blob; - }; + pObject->reset (); - typedef RecycledObjectPool StdStringPool; - - //-------------------------------------------------------------------------- - - Status get (void const* key, GetCallback* callback) - { Status status (ok); leveldb::ReadOptions const options; @@ -71,22 +84,24 @@ public: // These are reused std::string objects, // required for leveldb's funky interface. // - StdStringPool::ScopedItem item (m_stringPool); - std::string& blob = item.getObject ().blob; + StringPool::ScopedItem item (m_stringPool); + std::string& string = item.getObject (); - leveldb::Status getStatus = m_db->Get (options, slice, &blob); + leveldb::Status getStatus = m_db->Get (options, slice, &string); if (getStatus.ok ()) { - void* const buffer = callback->getStorageForValue (blob.size ()); + NodeStore::DecodedBlob decoded (key, string.data (), string.size ()); - if (buffer != nullptr) + if (decoded.wasOk ()) { - memcpy (buffer, blob.data (), blob.size ()); + *pObject = decoded.createObject (); } else { - Throw (std::bad_alloc ()); + // Decoding failed, probably corrupted! + // + status = dataCorrupt; } } else @@ -109,83 +124,90 @@ public: return status; } - //-------------------------------------------------------------------------- - - bool bulkStore (const std::vector< NodeObject::pointer >& objs) + void store (NodeObject::ref object) { - leveldb::WriteBatch batch; - - BOOST_FOREACH (NodeObject::ref obj, objs) - { - Blob blob (toBlob (obj)); - batch.Put ( - leveldb::Slice (reinterpret_cast(obj->getHash ().begin ()), m_keyBytes), - leveldb::Slice (reinterpret_cast(&blob.front ()), blob.size ())); - } - return m_db->Write (leveldb::WriteOptions (), &batch).ok (); + m_batch.store (object); } - NodeObject::pointer retrieve (uint256 const& hash) + void storeBatch (NodeStore::Batch const& batch) { - std::string sData; - if (!m_db->Get (leveldb::ReadOptions (), - leveldb::Slice (reinterpret_cast(hash.begin ()), m_keyBytes), &sData).ok ()) + leveldb::WriteBatch wb; + { - return NodeObject::pointer(); + NodeStore::EncodedBlob::Pool::ScopedItem item (m_blobPool); + + BOOST_FOREACH (NodeObject::ref object, batch) + { + item.getObject ().prepare (object); + + wb.Put ( + leveldb::Slice (reinterpret_cast (item.getObject ().getKey ()), + m_keyBytes), + leveldb::Slice (reinterpret_cast (item.getObject ().getData ()), + item.getObject ().getSize ())); + } } - return fromBinary(hash, &sData[0], sData.size ()); + + leveldb::WriteOptions const options; + + m_db->Write (options, &wb).ok (); } - void visitAll (FUNCTION_TYPE func) + void visitAll (VisitCallback& callback) { - leveldb::Iterator* it = m_db->NewIterator (leveldb::ReadOptions ()); + leveldb::ReadOptions const options; + + ScopedPointer it (m_db->NewIterator (options)); + for (it->SeekToFirst (); it->Valid (); it->Next ()) { if (it->key ().size () == m_keyBytes) { - uint256 hash; - memcpy(hash.begin(), it->key ().data(), m_keyBytes); - func (fromBinary (hash, it->value ().data (), it->value ().size ())); + NodeStore::DecodedBlob decoded (it->key ().data (), + it->value ().data (), + it->value ().size ()); + + if (decoded.wasOk ()) + { + NodeObject::Ptr object (decoded.createObject ()); + + callback.visitObject (object); + } + else + { + // Uh oh, corrupted data! + WriteLog (lsFATAL, NodeObject) << "Corrupt NodeObject #" << uint256 (it->key ().data ()); + } } else { // VFALCO NOTE What does it mean to find an // incorrectly sized key? Corruption? + WriteLog (lsFATAL, NodeObject) << "Bad key size = " << it->key ().size (); } } } - Blob toBlob(NodeObject::ref obj) + int getWriteLoad () { - Blob rawData (9 + obj->getData ().size ()); - unsigned char* bufPtr = &rawData.front(); - - *reinterpret_cast (bufPtr + 0) = ntohl (obj->getIndex ()); - *reinterpret_cast (bufPtr + 4) = ntohl (obj->getIndex ()); - * (bufPtr + 8) = static_cast (obj->getType ()); - memcpy (bufPtr + 9, &obj->getData ().front (), obj->getData ().size ()); - - return rawData; + return m_batch.getWriteLoad (); } - NodeObject::pointer fromBinary(uint256 const& hash, - char const* data, int size) + //-------------------------------------------------------------------------- + + void writeBatch (NodeStore::Batch const& batch) { - if (size < 9) - throw std::runtime_error ("undersized object"); - - uint32 index = htonl (*reinterpret_cast (data)); - int htype = data[8]; - - return boost::make_shared (static_cast (htype), index, - data + 9, size - 9, hash); + storeBatch (batch); } private: size_t const m_keyBytes; - StdStringPool m_stringPool; + NodeStore::Scheduler& m_scheduler; + NodeStore::BatchWriter m_batch; + StringPool m_stringPool; + NodeStore::EncodedBlob::Pool m_blobPool; std::string m_name; - leveldb::DB* m_db; + ScopedPointer m_db; }; //------------------------------------------------------------------------------ @@ -210,9 +232,12 @@ String LevelDBBackendFactory::getName () const return "LevelDB"; } -NodeStore::Backend* LevelDBBackendFactory::createInstance (size_t keyBytes, StringPairArray const& keyValues) +NodeStore::Backend* LevelDBBackendFactory::createInstance ( + size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) { - return new LevelDBBackendFactory::Backend (keyBytes, keyValues); + return new LevelDBBackendFactory::Backend (keyBytes, keyValues, scheduler); } //------------------------------------------------------------------------------ diff --git a/modules/ripple_app/node/ripple_LevelDBBackendFactory.h b/modules/ripple_app/node/ripple_LevelDBBackendFactory.h index 5843221c0d..3646125d1d 100644 --- a/modules/ripple_app/node/ripple_LevelDBBackendFactory.h +++ b/modules/ripple_app/node/ripple_LevelDBBackendFactory.h @@ -21,7 +21,10 @@ public: static LevelDBBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); + + NodeStore::Backend* createInstance (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler); }; #endif diff --git a/modules/ripple_app/node/ripple_MdbBackendFactory.cpp b/modules/ripple_app/node/ripple_MdbBackendFactory.cpp index 0b74349ab3..4be1def928 100644 --- a/modules/ripple_app/node/ripple_MdbBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_MdbBackendFactory.cpp @@ -6,170 +6,227 @@ #if RIPPLE_MDB_AVAILABLE -class MdbBackendFactory::Backend : public NodeStore::Backend +class MdbBackendFactory::Backend + : public NodeStore::Backend + , public NodeStore::BatchWriter::Callback + , LeakChecked { public: - explicit Backend (StringPairArray const& keyValues) - : m_env (nullptr) + typedef NodeStore::Batch Batch; + typedef NodeStore::EncodedBlob EncodedBlob; + typedef NodeStore::DecodedBlob DecodedBlob; + + explicit Backend (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) + : m_keyBytes (keyBytes) + , m_scheduler (scheduler) + , m_batch (*this, scheduler) + , m_env (nullptr) { - if (keyValues ["path"].isEmpty ()) - throw std::runtime_error ("Missing path in MDB backend"); + String path (keyValues ["path"]); + + m_name = path.toStdString(); + + if (path.isEmpty ()) + Throw (std::runtime_error ("Missing path in MDB backend")); int error = 0; error = mdb_env_create (&m_env); if (error == 0) // Should use the size of the file plus the free space on the disk - error = mdb_env_set_mapsize(m_env, 512L * 1024L * 1024L * 1024L); + error = mdb_env_set_mapsize (m_env, 512L * 1024L * 1024L * 1024L); if (error == 0) error = mdb_env_open ( m_env, - keyValues ["path"].toStdString().c_str (), + m_name.c_str (), MDB_NOTLS, 0664); - MDB_txn * txn; - if (error == 0) - error = mdb_txn_begin(m_env, NULL, 0, &txn); - if (error == 0) - error = mdb_dbi_open(txn, NULL, 0, &m_dbi); - if (error == 0) - error = mdb_txn_commit(txn); + MDB_txn* txn; + if (error == 0) + error = mdb_txn_begin (m_env, NULL, 0, &txn); + + if (error == 0) + error = mdb_dbi_open (txn, NULL, 0, &m_dbi); + + if (error == 0) + error = mdb_txn_commit (txn); if (error != 0) { String s; s << "Error #" << error << " creating mdb environment"; - throw std::runtime_error (s.toStdString ()); + Throw (std::runtime_error (s.toStdString ())); } - m_name = keyValues ["path"].toStdString(); } ~Backend () { if (m_env != nullptr) { - mdb_dbi_close(m_env, m_dbi); + mdb_dbi_close (m_env, m_dbi); mdb_env_close (m_env); } } - std::string getDataBaseName() + std::string getName() { return m_name; } - bool bulkStore (std::vector const& objs) + //-------------------------------------------------------------------------- + + template + unsigned char* mdb_cast (T* p) { - MDB_txn *txn = nullptr; - int rc = 0; + return const_cast (static_cast (p)); + } - rc = mdb_txn_begin(m_env, NULL, 0, &txn); + Status fetch (void const* key, NodeObject::Ptr* pObject) + { + pObject->reset (); - if (rc == 0) + Status status (ok); + + MDB_txn* txn = nullptr; + + int error = 0; + + error = mdb_txn_begin (m_env, NULL, MDB_RDONLY, &txn); + + if (error == 0) { - BOOST_FOREACH (NodeObject::ref obj, objs) - { - MDB_val key, data; - Blob blob (toBlob (obj)); + MDB_val dbkey; + MDB_val data; - key.mv_size = (256 / 8); - key.mv_data = const_cast(obj->getHash().begin()); + dbkey.mv_size = m_keyBytes; + dbkey.mv_data = mdb_cast (key); - data.mv_size = blob.size(); - data.mv_data = &blob.front(); + error = mdb_get (txn, m_dbi, &dbkey, &data); - rc = mdb_put(txn, m_dbi, &key, &data, 0); - if (rc != 0) + if (error == 0) + { + DecodedBlob decoded (key, data.mv_data, data.mv_size); + + if (decoded.wasOk ()) { - assert(false); + *pObject = decoded.createObject (); + } + else + { + status = dataCorrupt; + } + } + else if (error == MDB_NOTFOUND) + { + status = notFound; + } + else + { + status = unknown; + + WriteLog (lsWARNING, NodeObject) << "MDB txn failed, code=" << error; + } + + mdb_txn_abort (txn); + } + else + { + status = unknown; + + WriteLog (lsWARNING, NodeObject) << "MDB txn failed, code=" << error; + } + + return status; + } + + void store (NodeObject::ref object) + { + m_batch.store (object); + } + + void storeBatch (Batch const& batch) + { + MDB_txn* txn = nullptr; + + int error = 0; + + error = mdb_txn_begin (m_env, NULL, 0, &txn); + + if (error == 0) + { + EncodedBlob::Pool::ScopedItem item (m_blobPool); + + BOOST_FOREACH (NodeObject::Ptr const& object, batch) + { + EncodedBlob& encoded (item.getObject ()); + + encoded.prepare (object); + + MDB_val key; + key.mv_size = m_keyBytes; + key.mv_data = mdb_cast (encoded.getKey ()); + + MDB_val data; + data.mv_size = encoded.getSize (); + data.mv_data = mdb_cast (encoded.getData ()); + + error = mdb_put (txn, m_dbi, &key, &data, 0); + + if (error != 0) + { + WriteLog (lsWARNING, NodeObject) << "mdb_put failed, error=" << error; break; } - } + } + + if (error == 0) + { + error = mdb_txn_commit(txn); + + if (error != 0) + { + WriteLog (lsWARNING, NodeObject) << "mdb_txn_commit failed, error=" << error; + } + } + else + { + mdb_txn_abort (txn); + } } else - assert(false); - - if (rc == 0) - rc = mdb_txn_commit(txn); - else if (txn) - mdb_txn_abort(txn); - - assert(rc == 0); - return rc == 0; - } - - NodeObject::pointer retrieve (uint256 const& hash) - { - NodeObject::pointer ret; - - MDB_txn *txn = nullptr; - int rc = 0; - - rc = mdb_txn_begin(m_env, NULL, MDB_RDONLY, &txn); - - if (rc == 0) { - MDB_val key, data; - - key.mv_size = (256 / 8); - key.mv_data = const_cast(hash.begin()); - - rc = mdb_get(txn, m_dbi, &key, &data); - if (rc == 0) - ret = fromBinary(hash, static_cast(data.mv_data), data.mv_size); - else - assert(rc == MDB_NOTFOUND); + WriteLog (lsWARNING, NodeObject) << "mdb_txn_begin failed, error=" << error; } - else - assert(false); - - mdb_txn_abort(txn); - - return ret; } - void visitAll (FUNCTION_TYPE func) - { // WRITEME - assert(false); - } - - Blob toBlob (NodeObject::ref obj) const + void visitAll (VisitCallback& callback) { - Blob rawData (9 + obj->getData ().size ()); - unsigned char* bufPtr = &rawData.front(); - - *reinterpret_cast (bufPtr + 0) = ntohl (obj->getIndex ()); - - *reinterpret_cast (bufPtr + 4) = ntohl (obj->getIndex ()); - - *(bufPtr + 8) = static_cast (obj->getType ()); - - memcpy (bufPtr + 9, &obj->getData ().front (), obj->getData ().size ()); - - return rawData; + // VFALCO TODO Implement this! + bassertfalse; } - NodeObject::pointer fromBinary (uint256 const& hash, char const* data, int size) const + int getWriteLoad () { - if (size < 9) - throw std::runtime_error ("undersized object"); + return m_batch.getWriteLoad (); + } - uint32 const index = htonl (*reinterpret_cast (data)); + //-------------------------------------------------------------------------- - int const htype = data [8]; - - return boost::make_shared ( - static_cast (htype), - index, - data + 9, - size - 9, - hash); + void writeBatch (Batch const& batch) + { + storeBatch (batch); } private: + size_t const m_keyBytes; + NodeStore::Scheduler& m_scheduler; + NodeStore::BatchWriter m_batch; + NodeStore::EncodedBlob::Pool m_blobPool; std::string m_name; MDB_env* m_env; MDB_dbi m_dbi; @@ -197,9 +254,12 @@ String MdbBackendFactory::getName () const return "mdb"; } -NodeStore::Backend* MdbBackendFactory::createInstance (StringPairArray const& keyValues) +NodeStore::Backend* MdbBackendFactory::createInstance ( + size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) { - return new MdbBackendFactory::Backend (keyValues); + return new MdbBackendFactory::Backend (keyBytes, keyValues, scheduler); } #endif diff --git a/modules/ripple_app/node/ripple_MdbBackendFactory.h b/modules/ripple_app/node/ripple_MdbBackendFactory.h index 702ca3a14a..2e1cd7db65 100644 --- a/modules/ripple_app/node/ripple_MdbBackendFactory.h +++ b/modules/ripple_app/node/ripple_MdbBackendFactory.h @@ -25,7 +25,10 @@ public: static MdbBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (StringPairArray const& keyValues); + + NodeStore::Backend* createInstance (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler); }; #endif diff --git a/modules/ripple_app/node/ripple_NodeObject.cpp b/modules/ripple_app/node/ripple_NodeObject.cpp index b3de59ed95..1d3f282762 100644 --- a/modules/ripple_app/node/ripple_NodeObject.cpp +++ b/modules/ripple_app/node/ripple_NodeObject.cpp @@ -11,27 +11,27 @@ SETUP_LOG (NodeObject) NodeObject::NodeObject ( NodeObjectType type, LedgerIndex ledgerIndex, - Blob const& binaryDataToCopy, - uint256 const& hash) + Blob& data, + uint256 const& hash, + PrivateAccess) : mType (type) , mHash (hash) , mLedgerIndex (ledgerIndex) - , mData (binaryDataToCopy) { + // Take over the caller's buffer + mData.swap (data); } -NodeObject::NodeObject ( +NodeObject::Ptr NodeObject::createObject ( NodeObjectType type, LedgerIndex ledgerIndex, - void const* bufferToCopy, - int bytesInBuffer, - uint256 const& hash) - : mType (type) - , mHash (hash) - , mLedgerIndex (ledgerIndex) - , mData (static_cast (bufferToCopy), - static_cast (bufferToCopy) + bytesInBuffer) + Blob& data, + uint256 const & hash) { + // The boost::ref is important or + // else it will be passed by value! + return boost::make_shared ( + type, ledgerIndex, boost::ref (data), hash, PrivateAccess ()); } NodeObjectType NodeObject::getType () const @@ -54,14 +54,21 @@ Blob const& NodeObject::getData () const return mData; } -bool NodeObject::isCloneOf (NodeObject const& other) const +bool NodeObject::isCloneOf (NodeObject::Ptr const& other) const { - return - mType == other.mType && - mHash == other.mHash && - mLedgerIndex == other.mLedgerIndex && - mData == other.mData - ; + if (mType != other->mType) + return false; + + if (mHash != other->mHash) + return false; + + if (mLedgerIndex != other->mLedgerIndex) + return false; + + if (mData != other->mData) + return false; + + return true; } //------------------------------------------------------------------------------ @@ -70,7 +77,7 @@ class NodeObjectTests : public UnitTest { public: - NodeObjectTests () : UnitTest ("NodeObject") + NodeObjectTests () : UnitTest ("NodeObject", "ripple") { } diff --git a/modules/ripple_app/node/ripple_NodeObject.h b/modules/ripple_app/node/ripple_NodeObject.h index 0637b29426..2ead4b370c 100644 --- a/modules/ripple_app/node/ripple_NodeObject.h +++ b/modules/ripple_app/node/ripple_NodeObject.h @@ -42,27 +42,41 @@ public: */ typedef UnsignedInteger <32> Hash; + // Please use this one. For a reference use Ptr const& + typedef boost::shared_ptr Ptr; + + // These are DEPRECATED, type names are capitalized. typedef boost::shared_ptr pointer; typedef pointer const& ref; - /** Create from a vector of data. - - @note A copy of the data is created. - */ +private: + // This hack is used to make the constructor effectively private + // except for when we use it in the call to make_shared. + // There's no portable way to make make_shared<> a friend work. + struct PrivateAccess { }; +public: + // This constructor is private, use createObject instead. NodeObject (NodeObjectType type, LedgerIndex ledgerIndex, - Blob const & binaryDataToCopy, - uint256 const & hash); + Blob& data, + uint256 const& hash, + PrivateAccess); - /** Create from an area of memory. + /** Create an object from fields. - @note A copy of the data is created. + The caller's variable is modified during this call. The + underlying storage for the Blob is taken over by the NodeObject. + + @param type The type of object. + @param ledgerIndex The ledger in which this object appears. + @param data A buffer containing the payload. The caller's variable + is overwritten. + @param hash The 256-bit hash of the payload data. */ - NodeObject (NodeObjectType type, - LedgerIndex ledgerIndex, - void const * bufferToCopy, - int bytesInBuffer, - uint256 const & hash); + static Ptr createObject (NodeObjectType type, + LedgerIndex ledgerIndex, + Blob& data, + uint256 const& hash); /** Retrieve the type of this object. */ @@ -83,7 +97,22 @@ public: /** See if this object has the same data as another object. */ - bool isCloneOf (NodeObject const& other) const; + bool isCloneOf (NodeObject::Ptr const& other) const; + + /** Binary function that satisfies the strict-weak-ordering requirement. + + This compares the hashes of both objects and returns true if + the first hash is considered to go before the second. + + @see std::sort + */ + struct LessThan + { + inline bool operator() (NodeObject::Ptr const& lhs, NodeObject::Ptr const& rhs) const noexcept + { + return lhs->getHash () < rhs->getHash (); + } + }; private: NodeObjectType mType; diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index ef7e7a965e..1ac8ef95bc 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -4,49 +4,164 @@ */ //============================================================================== -// -// NodeStore::Backend -// - -NodeStore::Backend::Backend () - : mWriteGeneration(0) - , mWriteLoad(0) - , mWritePending(false) +NodeStore::DecodedBlob::DecodedBlob (void const* key, void const* value, int valueBytes) { - mWriteSet.reserve (bulkWriteBatchSize); + /* Data format: + + Bytes + + 0...3 LedgerIndex 32-bit big endian integer + 4...7 Unused? An unused copy of the LedgerIndex + 8 char One of NodeObjectType + 9...end The body of the object data + */ + + m_success = false; + m_key = key; + // VFALCO NOTE Ledger indexes should have started at 1 + m_ledgerIndex = LedgerIndex (-1); + m_objectType = hotUNKNOWN; + m_objectData = nullptr; + m_dataBytes = bmax (0, valueBytes - 9); + + if (valueBytes > 4) + { + LedgerIndex const* index = static_cast (value); + m_ledgerIndex = ByteOrder::swapIfLittleEndian (*index); + } + + // VFALCO NOTE What about bytes 4 through 7 inclusive? + + if (valueBytes > 8) + { + unsigned char const* byte = static_cast (value); + m_objectType = static_cast (byte [8]); + } + + if (valueBytes > 9) + { + m_objectData = static_cast (value) + 9; + + switch (m_objectType) + { + case hotUNKNOWN: + default: + break; + + case hotLEDGER: + case hotTRANSACTION: + case hotACCOUNT_NODE: + case hotTRANSACTION_NODE: + m_success = true; + break; + } + } } -bool NodeStore::Backend::store (NodeObject::ref object) +NodeObject::Ptr NodeStore::DecodedBlob::createObject () { - boost::mutex::scoped_lock sl (mWriteMutex); + bassert (m_success); + + NodeObject::Ptr object; + + if (m_success) + { + Blob data (m_dataBytes); + + memcpy (data.data (), m_objectData, m_dataBytes); + + object = NodeObject::createObject ( + m_objectType, m_ledgerIndex, data, uint256 (m_key)); + } + + return object; +} + +//------------------------------------------------------------------------------ + +void NodeStore::EncodedBlob::prepare (NodeObject::Ptr const& object) +{ + m_key = object->getHash ().begin (); + + // This is how many bytes we need in the flat data + m_size = object->getData ().size () + 9; + + m_data.ensureSize (m_size); + + // These sizes must be the same! + static_bassert (sizeof (uint32) == sizeof (object->getIndex ())); + + { + uint32* buf = static_cast (m_data.getData ()); + + buf [0] = ByteOrder::swapIfLittleEndian (object->getIndex ()); + buf [1] = ByteOrder::swapIfLittleEndian (object->getIndex ()); + } + + { + unsigned char* buf = static_cast (m_data.getData ()); + + buf [8] = static_cast (object->getType ()); + + memcpy (&buf [9], object->getData ().data (), object->getData ().size ()); + } +} + +//============================================================================== + +NodeStore::BatchWriter::BatchWriter (Callback& callback, Scheduler& scheduler) + : m_callback (callback) + , m_scheduler (scheduler) + , mWriteGeneration (0) + , mWriteLoad (0) + , mWritePending (false) +{ + mWriteSet.reserve (batchWritePreallocationSize); +} + +NodeStore::BatchWriter::~BatchWriter () +{ + waitForWriting (); +} + +void NodeStore::BatchWriter::store (NodeObject::ref object) +{ + LockType::scoped_lock sl (mWriteMutex); + mWriteSet.push_back (object); - if (!mWritePending) + if (! mWritePending) { mWritePending = true; - // VFALCO TODO Eliminate this dependency on the Application object. - getApp().getJobQueue ().addJob ( - jtWRITE, - "NodeObject::store", - BIND_TYPE (&NodeStore::Backend::bulkWrite, this, P_1)); + m_scheduler.scheduleTask (this); } - return true; } -void NodeStore::Backend::bulkWrite (Job &) +int NodeStore::BatchWriter::getWriteLoad () +{ + LockType::scoped_lock sl (mWriteMutex); + + return std::max (mWriteLoad, static_cast (mWriteSet.size ())); +} + +void NodeStore::BatchWriter::performScheduledTask () +{ + writeBatch (); +} + +void NodeStore::BatchWriter::writeBatch () { int setSize = 0; - // VFALCO NOTE Use the canonical for(;;) instead. - // Or better, provide a proper terminating condition. - while (1) + for (;;) { std::vector< boost::shared_ptr > set; - set.reserve (bulkWriteBatchSize); + + set.reserve (batchWritePreallocationSize); { - boost::mutex::scoped_lock sl (mWriteMutex); + LockType::scoped_lock sl (mWriteMutex); mWriteSet.swap (set); assert (mWriteSet.empty ()); @@ -62,162 +177,43 @@ void NodeStore::Backend::bulkWrite (Job &) return; } + // VFALCO NOTE On the first trip through, mWriteLoad will be 0. + // This is probably not intended. Perhaps the order + // of calls isn't quite right + // mWriteLoad = std::max (setSize, static_cast (mWriteSet.size ())); + setSize = set.size (); } - bulkStore (set); + m_callback.writeBatch (set); } } -// VFALCO TODO This function should not be needed. Instead, the -// destructor should handle flushing of the bulk write buffer. -// -void NodeStore::Backend::waitWrite () +void NodeStore::BatchWriter::waitForWriting () { - boost::mutex::scoped_lock sl (mWriteMutex); + LockType::scoped_lock sl (mWriteMutex); int gen = mWriteGeneration; while (mWritePending && (mWriteGeneration == gen)) mWriteCondition.wait (sl); } -int NodeStore::Backend::getWriteLoad () +//============================================================================== + +class NodeStoreImp + : public NodeStore + , LeakChecked { - boost::mutex::scoped_lock sl (mWriteMutex); - - return std::max (mWriteLoad, static_cast (mWriteSet.size ())); -} - -//------------------------------------------------------------------------------ - -// -// NodeStore -// - -class NodeStoreImp : public NodeStore -{ -public: - /** Size of a key. - */ - enum - { - keyBytes = 32 - }; - - /** Parsed key/value blob into NodeObject components. - - This will extract the information required to construct - a NodeObject. It also does consistency checking and returns - the result, so it is possible to determine if the data - is corrupted without throwing an exception. Note all forms - of corruption are detected so further analysis will be - needed to eliminate false positives. - - This is the format in which a NodeObject is stored in the - persistent storage layer. - */ - struct DecodedBlob - { - /** Construct the decoded blob from raw data. - - The `success` member will indicate if the operation was succesful. - */ - DecodedBlob (void const* keyParam, void const* value, int valueBytes) - { - /* Data format: - - Bytes - - 0...3 LedgerIndex 32-bit big endian integer - 4...7 Unused? An unused copy of the LedgerIndex - 8 char One of NodeObjectType - 9...end The body of the object data - */ - - success = false; - key = keyParam; - // VFALCO NOTE Ledger indexes should have started at 1 - ledgerIndex = LedgerIndex (-1); - objectType = hotUNKNOWN; - objectData = nullptr; - dataBytes = bmax (0, valueBytes - 9); - - if (dataBytes > 4) - { - LedgerIndex const* index = static_cast (value); - ledgerIndex = ByteOrder::swapIfLittleEndian (*index); - } - - // VFALCO NOTE What about bytes 4 through 7 inclusive? - - if (dataBytes > 8) - { - unsigned char const* byte = static_cast (value); - objectType = static_cast (byte [8]); - } - - if (dataBytes > 9) - { - objectData = static_cast (value) + 9; - - switch (objectType) - { - case hotUNKNOWN: - default: - break; - - case hotLEDGER: - case hotTRANSACTION: - case hotACCOUNT_NODE: - case hotTRANSACTION_NODE: - success = true; - break; - } - } - } - - /** Create a NodeObject from this data. - */ - NodeObject::pointer createObject () - { - NodeObject::pointer object; - - if (success) - { - // VFALCO NOTE I dislke these shared pointers from boost - object = boost::make_shared ( - objectType, ledgerIndex, objectData, dataBytes, uint256 (key)); - } - - return object; - } - - bool success; - - void const* key; - LedgerIndex ledgerIndex; - NodeObjectType objectType; - unsigned char const* objectData; - int dataBytes; - }; - - //-------------------------------------------------------------------------- - - class EncodedBlob - { - HeapBlock data; - }; - public: NodeStoreImp (String backendParameters, String fastBackendParameters, - int cacheSize, - int cacheAge) - : m_backend (createBackend (backendParameters)) - , m_fastBackend (fastBackendParameters.isNotEmpty () ? createBackend (fastBackendParameters) - : nullptr) - , m_cache ("NodeStore", cacheSize, cacheAge) + Scheduler& scheduler) + : m_scheduler (scheduler) + , m_backend (createBackend (backendParameters, scheduler)) + , m_fastBackend (fastBackendParameters.isNotEmpty () + ? createBackend (fastBackendParameters, scheduler) : nullptr) + , m_cache ("NodeStore", 16384, 300) , m_negativeCache ("NoteStoreNegativeCache", 0, 120) { } @@ -227,84 +223,21 @@ public: // VFALCO NOTE This shouldn't be necessary, the backend can // just handle it in the destructor. // + /* m_backend->waitWrite (); if (m_fastBackend) m_fastBackend->waitWrite (); - } - - float getCacheHitRate () - { - return m_cache.getHitRate (); - } - - void tune (int size, int age) - { - m_cache.setTargetSize (size); - m_cache.setTargetAge (age); - } - - void sweep () - { - m_cache.sweep (); - m_negativeCache.sweep (); - } - - int getWriteLoad () - { - return m_backend->getWriteLoad (); - } - - bool store (NodeObjectType type, - uint32 index, - Blob const& data, - uint256 const& hash) - { - bool wasStored = false; - - bool const keyFoundAndObjectCached = m_cache.refreshIfPresent (hash); - - // VFALCO NOTE What happens if the key is found, but the object - // fell out of the cache? We will end up passing it - // to the backend anyway. - // - if (! keyFoundAndObjectCached) - { - - // VFALCO TODO Rename this to RIPPLE_NODESTORE_VERIFY_HASHES and make - // it be 1 or 0 instead of merely defined or undefined. - // - #ifdef PARANOID - assert (hash == Serializer::getSHA512Half (data)); - #endif - - NodeObject::pointer object = boost::make_shared (type, index, data, hash); - - // VFALCO NOTE What does it mean to canonicalize an object? - // - if (!m_cache.canonicalize (hash, object)) - { - m_backend->store (object); - - if (m_fastBackend) - m_fastBackend->store (object); - } - - m_negativeCache.del (hash); - - wasStored = true; - } - - return wasStored; + */ } //------------------------------------------------------------------------------ - NodeObject::pointer retrieve (uint256 const& hash) + NodeObject::Ptr fetch (uint256 const& hash) { // See if the object already exists in the cache // - NodeObject::pointer obj = m_cache.fetch (hash); + NodeObject::Ptr obj = m_cache.fetch (hash); if (obj == nullptr) { @@ -320,7 +253,7 @@ public: // if (m_fastBackend != nullptr) { - obj = retrieveInternal (m_fastBackend, hash); + obj = fetchInternal (m_fastBackend, hash); // If we found the object, avoid storing it again later. if (obj != nullptr) @@ -335,16 +268,14 @@ public: // { // Monitor this operation's load since it is expensive. - - // m_hooks->onRetrieveBegin () - + // // VFALCO TODO Why is this an autoptr? Why can't it just be a plain old object? // - LoadEvent::autoptr event (getApp().getJobQueue ().getLoadEventAP (jtHO_READ, "HOS::retrieve")); + // VFALCO NOTE Commented this out because it breaks the unit test! + // + //LoadEvent::autoptr event (getApp().getJobQueue ().getLoadEventAP (jtHO_READ, "HOS::retrieve")); - obj = retrieveInternal (m_backend, hash); - - // m_hooks->onRetrieveEnd () + obj = fetchInternal (m_backend, hash); } // If it's not in the main database, remember that so we @@ -389,47 +320,27 @@ public: return obj; } - NodeObject::pointer retrieveInternal (Backend* backend, uint256 const& hash) + NodeObject::Ptr fetchInternal (Backend* backend, uint256 const& hash) { - // VFALCO TODO Make this not allocate and free on each call - // - struct MyGetCallback : Backend::GetCallback + NodeObject::Ptr object; + + Backend::Status const status = backend->fetch (hash.begin (), &object); + + switch (status) { - void* getStorageForValue (size_t sizeInBytes) - { - bytes = sizeInBytes; - data.malloc (sizeInBytes); + case Backend::ok: + case Backend::notFound: + break; - return &data [0]; - } - - size_t bytes; - HeapBlock data; - }; - - NodeObject::pointer object; - - MyGetCallback cb; - Backend::Status const status = backend->get (hash.begin (), &cb); - - if (status == Backend::ok) - { - // Deserialize the payload into its components. + case Backend::dataCorrupt: + // VFALCO TODO Deal with encountering corrupt data! // - DecodedBlob decoded (hash.begin (), cb.data.getData (), cb.bytes); + WriteLog (lsFATAL, NodeObject) << "Corrupt NodeObject #" << hash; + break; - if (decoded.success) - { - object = decoded.createObject (); - } - else - { - // Houston, we've had a problem. Data is likely corrupt. - - // VFALCO TODO Deal with encountering corrupt data! - - WriteLog (lsFATAL, NodeObject) << "Corrupt NodeObject #" << hash; - } + default: + WriteLog (lsWARNING, NodeObject) << "Unknown status=" << status; + break; } return object; @@ -437,42 +348,119 @@ public: //------------------------------------------------------------------------------ - void importVisitor ( - std::vector & objects, - NodeObject::pointer object) + void store (NodeObjectType type, + uint32 index, + Blob& data, + uint256 const& hash) { - if (objects.size() >= bulkWriteBatchSize) + bool const keyFoundAndObjectCached = m_cache.refreshIfPresent (hash); + + // VFALCO NOTE What happens if the key is found, but the object + // fell out of the cache? We will end up passing it + // to the backend anyway. + // + if (! keyFoundAndObjectCached) { - m_backend->bulkStore (objects); - objects.clear (); - objects.reserve (bulkWriteBatchSize); + // VFALCO TODO Rename this to RIPPLE_VERIFY_NODEOBJECT_KEYS and make + // it be 1 or 0 instead of merely defined or undefined. + // + #if RIPPLE_VERIFY_NODEOBJECT_KEYS + assert (hash == Serializer::getSHA512Half (data)); + #endif + + NodeObject::Ptr object = NodeObject::createObject ( + type, index, data, hash); + + if (!m_cache.canonicalize (hash, object)) + { + m_backend->store (object); + + if (m_fastBackend) + m_fastBackend->store (object); + } + + m_negativeCache.del (hash); } - - objects.push_back (object); } - int import (String sourceBackendParameters) + //------------------------------------------------------------------------------ + + float getCacheHitRate () { - ScopedPointer srcBackend (createBackend (sourceBackendParameters)); + return m_cache.getHitRate (); + } + + void tune (int size, int age) + { + m_cache.setTargetSize (size); + m_cache.setTargetAge (age); + } + + void sweep () + { + m_cache.sweep (); + m_negativeCache.sweep (); + } + + int getWriteLoad () + { + return m_backend->getWriteLoad (); + } + + //------------------------------------------------------------------------------ + + void import (String sourceBackendParameters) + { + class ImportVisitCallback : public Backend::VisitCallback + { + public: + explicit ImportVisitCallback (Backend& backend) + : m_backend (backend) + { + m_objects.reserve (batchWritePreallocationSize); + } + + ~ImportVisitCallback () + { + if (! m_objects.empty ()) + m_backend.storeBatch (m_objects); + } + + void visitObject (NodeObject::Ptr const& object) + { + if (m_objects.size () >= batchWritePreallocationSize) + { + m_backend.storeBatch (m_objects); + + m_objects.clear (); + m_objects.reserve (batchWritePreallocationSize); + } + + m_objects.push_back (object); + } + + private: + Backend& m_backend; + Batch m_objects; + }; + + //-------------------------------------------------------------------------- + + ScopedPointer srcBackend (createBackend (sourceBackendParameters, m_scheduler)); WriteLog (lsWARNING, NodeObject) << - "Node import from '" << srcBackend->getDataBaseName() << "' to '" - << m_backend->getDataBaseName() << "'."; + "Node import from '" << srcBackend->getName() << "' to '" + << m_backend->getName() << "'."; - std::vector objects; + ImportVisitCallback callback (*m_backend); - objects.reserve (bulkWriteBatchSize); - - srcBackend->visitAll (BIND_TYPE (&NodeStoreImp::importVisitor, this, boost::ref (objects), P_1)); - - if (!objects.empty ()) - m_backend->bulkStore (objects); - - return 0; + srcBackend->visitAll (callback); } - NodeStore::Backend* createBackend (String const& parameters) + //------------------------------------------------------------------------------ + + static NodeStore::Backend* createBackend (String const& parameters, Scheduler& scheduler) { Backend* backend = nullptr; @@ -486,7 +474,7 @@ public: for (int i = 0; i < s_factories.size (); ++i) { - if (s_factories [i]->getName () == type) + if (s_factories [i]->getName ().compareIgnoreCase (type) == 0) { factory = s_factories [i]; break; @@ -495,16 +483,16 @@ public: if (factory != nullptr) { - backend = factory->createInstance (keyBytes, keyValues); + backend = factory->createInstance (keyBytes, keyValues, scheduler); } else { - throw std::runtime_error ("unkown backend type"); + Throw (std::runtime_error ("unknown backend type")); } } else { - throw std::runtime_error ("missing backend type"); + Throw (std::runtime_error ("missing backend type")); } return backend; @@ -515,10 +503,12 @@ public: s_factories.add (&factory); } + //------------------------------------------------------------------------------ + private: static Array s_factories; - RecycledObjectPool m_blobPool; + Scheduler& m_scheduler; // Persistent key/value storage. ScopedPointer m_backend; @@ -542,72 +532,597 @@ void NodeStore::addBackendFactory (BackendFactory& factory) NodeStore* NodeStore::New (String backendParameters, String fastBackendParameters, - int cacheSize, - int cacheAge) + Scheduler& scheduler) { return new NodeStoreImp (backendParameters, fastBackendParameters, - cacheSize, - cacheAge); + scheduler); } -//------------------------------------------------------------------------------ +//============================================================================== -class NodeStoreTests : public UnitTest +// Some common code for the unit tests +// +class NodeStoreUnitTest : public UnitTest { public: + // Tunable parameters + // enum { maxPayloadBytes = 1000, - - numObjects = 1000 + numObjectsToTest = 1000 }; - NodeStoreTests () : UnitTest ("NodeStore") - { - } + // Shorthand type names + // + typedef NodeStore::Backend Backend; + typedef NodeStore::Batch Batch; - // Create a pseudo-random object - static NodeObject* createNodeObject (int index, int64 seedValue, HeapBlock & payloadBuffer) + // Immediately performs the task + struct TestScheduler : NodeStore::Scheduler { - Random r (seedValue + index); - - NodeObjectType type; - switch (r.nextInt (4)) + void scheduleTask (Task* task) { - case 0: type = hotLEDGER; break; - case 1: type = hotTRANSACTION; break; - case 2: type = hotACCOUNT_NODE; break; - case 3: type = hotTRANSACTION_NODE; break; - default: - type = hotUNKNOWN; - break; - }; + task->performScheduledTask (); + } + }; - LedgerIndex ledgerIndex = 1 + r.nextInt (1024 * 1024); + // Creates predictable objects + class PredictableObjectFactory + { + public: + explicit PredictableObjectFactory (int64 seedValue) + : m_seedValue (seedValue) + { + } - uint256 hash; - r.nextBlob (hash.begin (), hash.size ()); + NodeObject::Ptr createObject (int index) + { + Random r (m_seedValue + index); - int payloadBytes = 1 + r.nextInt (maxPayloadBytes); - r.nextBlob (payloadBuffer.getData (), payloadBytes); + NodeObjectType type; + switch (r.nextInt (4)) + { + case 0: type = hotLEDGER; break; + case 1: type = hotTRANSACTION; break; + case 2: type = hotACCOUNT_NODE; break; + case 3: type = hotTRANSACTION_NODE; break; + default: + type = hotUNKNOWN; + break; + }; - return new NodeObject (type, ledgerIndex, payloadBuffer.getData (), payloadBytes, hash); + LedgerIndex ledgerIndex = 1 + r.nextInt (1024 * 1024); + + uint256 hash; + r.nextBlob (hash.begin (), hash.size ()); + + int const payloadBytes = 1 + r.nextInt (maxPayloadBytes); + + Blob data (payloadBytes); + + r.nextBlob (data.data (), payloadBytes); + + return NodeObject::createObject (type, ledgerIndex, data, hash); + } + + private: + int64 const m_seedValue; + }; + +public: + NodeStoreUnitTest (String name, UnitTest::When when = UnitTest::runAlways) + : UnitTest (name, "ripple", when) + { } - void runTest () + // Create a predictable batch of objects + static void createPredictableBatch (Batch& batch, int startingIndex, int numObjects, int64 seedValue) { - beginTest ("create"); + batch.reserve (numObjects); - int64 const seedValue = 50; - - HeapBlock payloadBuffer (maxPayloadBytes); + PredictableObjectFactory factory (seedValue); for (int i = 0; i < numObjects; ++i) + batch.push_back (factory.createObject (startingIndex + i)); + } + + // Compare two batches for equality + static bool areBatchesEqual (Batch const& lhs, Batch const& rhs) + { + bool result = true; + + if (lhs.size () == rhs.size ()) { - ScopedPointer object (createNodeObject (i, seedValue, payloadBuffer)); + for (int i = 0; i < lhs.size (); ++i) + { + if (! lhs [i]->isCloneOf (rhs [i])) + { + result = false; + break; + } + } + } + else + { + result = false; + } + + return result; + } + + // Store a batch in a backend + void storeBatch (Backend& backend, Batch const& batch) + { + for (int i = 0; i < batch.size (); ++i) + { + backend.store (batch [i]); + } + } + + // Get a copy of a batch in a backend + void fetchCopyOfBatch (Backend& backend, Batch* pCopy, Batch const& batch) + { + pCopy->clear (); + pCopy->reserve (batch.size ()); + + for (int i = 0; i < batch.size (); ++i) + { + NodeObject::Ptr object; + + Backend::Status const status = backend.fetch ( + batch [i]->getHash ().cbegin (), &object); + + expect (status == Backend::ok, "Should be ok"); + + if (status == Backend::ok) + { + expect (object != nullptr, "Should not be null"); + + pCopy->push_back (object); + } + } + } + + // Store all objects in a batch + static void storeBatch (NodeStore& db, NodeStore::Batch const& batch) + { + for (int i = 0; i < batch.size (); ++i) + { + NodeObject::Ptr const object (batch [i]); + + Blob data (object->getData ()); + + db.store (object->getType (), + object->getIndex (), + data, + object->getHash ()); + } + } + + // Fetch all the hashes in one batch, into another batch. + static void fetchCopyOfBatch (NodeStore& db, + NodeStore::Batch* pCopy, + NodeStore::Batch const& batch) + { + pCopy->clear (); + pCopy->reserve (batch.size ()); + + for (int i = 0; i < batch.size (); ++i) + { + NodeObject::Ptr object = db.fetch (batch [i]->getHash ()); + + if (object != nullptr) + pCopy->push_back (object); } } }; +//------------------------------------------------------------------------------ + +// Tests predictable batches, and NodeObject blob encoding +// +class NodeStoreBasicsTests : public NodeStoreUnitTest +{ +public: + typedef NodeStore::EncodedBlob EncodedBlob; + typedef NodeStore::DecodedBlob DecodedBlob; + + NodeStoreBasicsTests () : NodeStoreUnitTest ("NodeStoreBasics") + { + } + + // Make sure predictable object generation works! + void testBatches (int64 const seedValue) + { + beginTest ("batch"); + + Batch batch1; + createPredictableBatch (batch1, 0, numObjectsToTest, seedValue); + + Batch batch2; + createPredictableBatch (batch2, 0, numObjectsToTest, seedValue); + + expect (areBatchesEqual (batch1, batch2), "Should be equal"); + + Batch batch3; + createPredictableBatch (batch3, 1, numObjectsToTest, seedValue); + + expect (! areBatchesEqual (batch1, batch3), "Should be equal"); + } + + // Checks encoding/decoding blobs + void testBlobs (int64 const seedValue) + { + beginTest ("encoding"); + + Batch batch; + createPredictableBatch (batch, 0, numObjectsToTest, seedValue); + + EncodedBlob encoded; + for (int i = 0; i < batch.size (); ++i) + { + encoded.prepare (batch [i]); + + DecodedBlob decoded (encoded.getKey (), encoded.getData (), encoded.getSize ()); + + expect (decoded.wasOk (), "Should be ok"); + + if (decoded.wasOk ()) + { + NodeObject::Ptr const object (decoded.createObject ()); + + expect (batch [i]->isCloneOf (object), "Should be clones"); + } + } + } + + void runTest () + { + int64 const seedValue = 50; + + testBatches (seedValue); + + testBlobs (seedValue); + } +}; + +static NodeStoreBasicsTests nodeStoreBasicsTests; + +//------------------------------------------------------------------------------ + +// Tests the NodeStore::Backend interface +// +class NodeStoreBackendTests : public NodeStoreUnitTest +{ +public: + NodeStoreBackendTests () : NodeStoreUnitTest ("NodeStoreBackend") + { + } + + //-------------------------------------------------------------------------- + + void testBackend (String type, int64 const seedValue) + { + beginTest (String ("NodeStore::Backend type=") + type); + + String params; + params << "type=" << type + << "|path=" << File::createTempFile ("unittest").getFullPathName (); + + // Create a batch + NodeStore::Batch batch; + createPredictableBatch (batch, 0, numObjectsToTest, seedValue); + //createPredictableBatch (batch, 0, 10, seedValue); + + { + // Open the backend + ScopedPointer backend ( + NodeStoreImp::createBackend (params, m_scheduler)); + + // Write the batch + storeBatch (*backend, batch); + + { + // Read it back in + NodeStore::Batch copy; + fetchCopyOfBatch (*backend, ©, batch); + expect (areBatchesEqual (batch, copy), "Should be equal"); + } + + { + // Reorder and read the copy again + NodeStore::Batch copy; + UnitTestUtilities::repeatableShuffle (batch.size (), batch, seedValue); + fetchCopyOfBatch (*backend, ©, batch); + expect (areBatchesEqual (batch, copy), "Should be equal"); + } + } + + { + // Re-open the backend + ScopedPointer backend ( + NodeStoreImp::createBackend (params, m_scheduler)); + + // Read it back in + NodeStore::Batch copy; + fetchCopyOfBatch (*backend, ©, batch); + // Canonicalize the source and destination batches + std::sort (batch.begin (), batch.end (), NodeObject::LessThan ()); + std::sort (copy.begin (), copy.end (), NodeObject::LessThan ()); + expect (areBatchesEqual (batch, copy), "Should be equal"); + } + } + + void runTest () + { + int const seedValue = 50; + + testBackend ("keyvadb", seedValue); + + testBackend ("leveldb", seedValue); + + testBackend ("sqlite", seedValue); + + #if RIPPLE_HYPERLEVELDB_AVAILABLE + testBackend ("hyperleveldb", seedValue); + #endif + + #if RIPPLE_MDB_AVAILABLE + testBackend ("mdb", seedValue); + #endif + } + +private: + TestScheduler m_scheduler; +}; + +static NodeStoreBackendTests nodeStoreBackendTests; + +//------------------------------------------------------------------------------ + +class NodeStoreTimingTests : public NodeStoreUnitTest +{ +public: + enum + { + numObjectsToTest = 10000 + }; + + NodeStoreTimingTests () + : NodeStoreUnitTest ("NodeStoreTiming", UnitTest::runManual) + { + } + + class Stopwatch + { + public: + Stopwatch () + { + } + + void start () + { + m_startTime = Time::getHighResolutionTicks (); + } + + double getElapsed () + { + int64 const now = Time::getHighResolutionTicks(); + + return Time::highResolutionTicksToSeconds (now - m_startTime); + } + + private: + int64 m_startTime; + }; + + void testBackend (String type, int64 const seedValue) + { + String s; + s << "Testing backend '" << type << "' performance"; + beginTest (s); + + String params; + params << "type=" << type + << "|path=" << File::createTempFile ("unittest").getFullPathName (); + + // Create batches + NodeStore::Batch batch1; + createPredictableBatch (batch1, 0, numObjectsToTest, seedValue); + NodeStore::Batch batch2; + createPredictableBatch (batch2, 0, numObjectsToTest, seedValue); + + // Open the backend + ScopedPointer backend ( + NodeStoreImp::createBackend (params, m_scheduler)); + + Stopwatch t; + + // Individual write batch test + t.start (); + storeBatch (*backend, batch1); + s = ""; + s << " Single write: " << String (t.getElapsed (), 2) << " seconds"; + logMessage (s); + + // Bulk write batch test + t.start (); + backend->storeBatch (batch2); + s = ""; + s << " Batch write: " << String (t.getElapsed (), 2) << " seconds"; + logMessage (s); + + // Read test + Batch copy; + t.start (); + fetchCopyOfBatch (*backend, ©, batch1); + fetchCopyOfBatch (*backend, ©, batch2); + s = ""; + s << " Batch read: " << String (t.getElapsed (), 2) << " seconds"; + logMessage (s); + } + + void runTest () + { + int const seedValue = 50; + + testBackend ("keyvadb", seedValue); + +#if 0 + testBackend ("leveldb", seedValue); + + testBackend ("sqlite", seedValue); + + #if RIPPLE_HYPERLEVELDB_AVAILABLE + testBackend ("hyperleveldb", seedValue); + #endif + + #if RIPPLE_MDB_AVAILABLE + testBackend ("mdb", seedValue); + #endif +#endif + } + +private: + TestScheduler m_scheduler; +}; + +//------------------------------------------------------------------------------ + +class NodeStoreTests : public NodeStoreUnitTest +{ +public: + NodeStoreTests () : NodeStoreUnitTest ("NodeStore") + { + } + + void testImport (String destBackendType, String srcBackendType, int64 seedValue) + { + String srcParams; + srcParams << "type=" << srcBackendType + << "|path=" << File::createTempFile ("unittest").getFullPathName (); + + // Create a batch + NodeStore::Batch batch; + createPredictableBatch (batch, 0, numObjectsToTest, seedValue); + + // Write to source db + { + ScopedPointer src (NodeStore::New (srcParams, "", m_scheduler)); + + storeBatch (*src, batch); + } + + String destParams; + destParams << "type=" << destBackendType + << "|path=" << File::createTempFile ("unittest").getFullPathName (); + + ScopedPointer dest (NodeStore::New ( + destParams, "", m_scheduler)); + + beginTest (String ("import into '") + destBackendType + "' from '" + srcBackendType + "'"); + + // Do the import + dest->import (srcParams); + + // Get the results of the import + NodeStore::Batch copy; + fetchCopyOfBatch (*dest, ©, batch); + + // Canonicalize the source and destination batches + std::sort (batch.begin (), batch.end (), NodeObject::LessThan ()); + std::sort (copy.begin (), copy.end (), NodeObject::LessThan ()); + expect (areBatchesEqual (batch, copy), "Should be equal"); + + } + + void testBackend (String type, int64 const seedValue) + { + beginTest (String ("NodeStore backend type=") + type); + + String params; + params << "type=" << type + << "|path=" << File::createTempFile ("unittest").getFullPathName (); + + // Create a batch + NodeStore::Batch batch; + createPredictableBatch (batch, 0, numObjectsToTest, seedValue); + + { + // Open the database + ScopedPointer db (NodeStore::New (params, "", m_scheduler)); + + // Write the batch + storeBatch (*db, batch); + + { + // Read it back in + NodeStore::Batch copy; + fetchCopyOfBatch (*db, ©, batch); + expect (areBatchesEqual (batch, copy), "Should be equal"); + } + + { + // Reorder and read the copy again + NodeStore::Batch copy; + UnitTestUtilities::repeatableShuffle (batch.size (), batch, seedValue); + fetchCopyOfBatch (*db, ©, batch); + expect (areBatchesEqual (batch, copy), "Should be equal"); + } + } + + { + // Re-open the database + ScopedPointer db (NodeStore::New (params, "", m_scheduler)); + + // Read it back in + NodeStore::Batch copy; + fetchCopyOfBatch (*db, ©, batch); + // Canonicalize the source and destination batches + std::sort (batch.begin (), batch.end (), NodeObject::LessThan ()); + std::sort (copy.begin (), copy.end (), NodeObject::LessThan ()); + expect (areBatchesEqual (batch, copy), "Should be equal"); + } + } + +public: + void runTest () + { + int64 const seedValue = 50; + + // + // Backend tests + // + + testBackend ("keyvadb", seedValue); + + testBackend ("leveldb", seedValue); + + testBackend ("sqlite", seedValue); + + #if RIPPLE_HYPERLEVELDB_AVAILABLE + testBackend ("hyperleveldb", seedValue); + #endif + + #if RIPPLE_MDB_AVAILABLE + testBackend ("mdb", seedValue); + #endif + + // + // Import tests + // + + //testImport ("leveldb", "keyvadb", seedValue); +//testImport ("sqlite", "leveldb", seedValue); + testImport ("leveldb", "sqlite", seedValue); + } + +private: + TestScheduler m_scheduler; +}; + static NodeStoreTests nodeStoreTests; + +static NodeStoreTimingTests nodeStoreTimingTests; diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index 60c3ff3a4a..dc91bd98c2 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -8,31 +8,196 @@ #define RIPPLE_NODESTORE_H_INCLUDED /** Persistency layer for NodeObject + + A Node is a ledger object which is uniquely identified by a key, + which is the 256-bit hash of the body of the node. The payload is + a variable length block of serialized data. + + All ledger data is stored as node objects and as such, needs to + be persisted between launches. Furthermore, since the set of + node objects will in general be larger than the amount of available + memory, purged node objects which are later accessed must be retrieved + from the node store. */ -class NodeStore : LeakChecked +class NodeStore { public: enum { - /** This is the largest number of key/value pairs we - will write during a bulk write. - */ - // VFALCO TODO Make this a tunable parameter in the key value pairs - bulkWriteBatchSize = 128 - /** Size of the fixed keys, in bytes. + + We use a 256-bit hash for the keys. + + @see NodeObject */ - ,keyBytes = 32 // 256 bit hash + keyBytes = 32, + + // This is only used to pre-allocate the array for + // batch objects and does not affect the amount written. + // + batchWritePreallocationSize = 128 }; - /** Interface to inform callers of cetain activities. + typedef std::vector Batch; + + //-------------------------------------------------------------------------- + + /** Parsed key/value blob into NodeObject components. + + This will extract the information required to construct + a NodeObject. It also does consistency checking and returns + the result, so it is possible to determine if the data + is corrupted without throwing an exception. Note all forms + of corruption are detected so further analysis will be + needed to eliminate false positives. + + @note This is the format in which a NodeObject is stored in the + persistent storage layer. */ - class Hooks + class DecodedBlob { - virtual void onRetrieveBegin () { } - virtual void onRetrieveEnd () { } + public: + /** Construct the decoded blob from raw data. + */ + DecodedBlob (void const* key, void const* value, int valueBytes); + + /** Determine if the decoding was successful. + */ + bool wasOk () const noexcept { return m_success; } + + /** Create a NodeObject from this data. + */ + NodeObject::Ptr createObject (); + + private: + bool m_success; + + void const* m_key; + LedgerIndex m_ledgerIndex; + NodeObjectType m_objectType; + unsigned char const* m_objectData; + int m_dataBytes; }; + //-------------------------------------------------------------------------- + + /** Utility for producing flattened node objects. + + These get recycled to prevent many small allocations. + + @note This is the format in which a NodeObject is stored in the + persistent storage layer. + */ + struct EncodedBlob + { + typedef RecycledObjectPool Pool; + + void prepare (NodeObject::Ptr const& object); + + void const* getKey () const noexcept { return m_key; } + + size_t getSize () const noexcept { return m_size; } + + void const* getData () const noexcept { return m_data.getData (); } + + private: + void const* m_key; + MemoryBlock m_data; + size_t m_size; + }; + + //-------------------------------------------------------------------------- + + /** Provides the asynchronous scheduling feature. + */ + class Scheduler + { + public: + /** Derived classes perform scheduled tasks. + */ + struct Task + { + virtual ~Task () { } + + /** Performs the task. + + The call may take place on a foreign thread. + */ + virtual void performScheduledTask () = 0; + }; + + /** Schedules a task. + + Depending on the implementation, this could happen + immediately or get deferred. + */ + virtual void scheduleTask (Task* task) = 0; + }; + + //-------------------------------------------------------------------------- + + /** A helper to assist with batch writing. + + The batch writes are performed with a scheduled task. + + @see Scheduler + */ + // VFALCO NOTE I'm not entirely happy having placed this here, + // because whoever needs to use NodeStore certainly doesn't + // need to see the implementation details of BatchWriter. + // + class BatchWriter : private Scheduler::Task + { + public: + /** This callback does the actual writing. + */ + struct Callback + { + virtual void writeBatch (Batch const& batch) = 0; + }; + + /** Create a batch writer. + */ + BatchWriter (Callback& callback, Scheduler& scheduler); + + /** Destroy a batch writer. + + Anything pending in the batch is written out before this returns. + */ + ~BatchWriter (); + + /** Store the object. + + This will add to the batch and initiate a scheduled task to + write the batch out. + */ + void store (NodeObject::ref object); + + /** Get an estimate of the amount of writing I/O pending. + */ + int getWriteLoad (); + + private: + void performScheduledTask (); + void writeBatch (); + void waitForWriting (); + + private: + typedef boost::recursive_mutex LockType; + typedef boost::condition_variable_any CondvarType; + + Callback& m_callback; + Scheduler& m_scheduler; + LockType mWriteMutex; + CondvarType mWriteCondition; + int mWriteGeneration; + int mWriteLoad; + bool mWritePending; + Batch mWriteSet; + }; + + //-------------------------------------------------------------------------- + /** Back end used for the store. A Backend implements a persistent key/value storage system. @@ -51,92 +216,76 @@ public: unknown }; - Backend (); + /** Destroy the backend. + All open files are closed and flushed. If there are batched + writes or other tasks scheduled, they will be completed before + this call returns. + */ virtual ~Backend () { } - /** Provides storage for retrieved objects. + /** Get the human-readable name of this backend. + + This is used for diagnostic output. */ - struct GetCallback - { - /** Get storage for an object. + virtual std::string getName() = 0; - @param sizeInBytes The number of bytes needed to store the value. - - @return A pointer to a buffer large enough to hold all the bytes. - */ - virtual void* getStorageForValue (size_t sizeInBytes) = 0; - }; - - /** Retrieve a single object. + /** Fetch a single object. If the object is not found or an error is encountered, the result will indicate the condition. + @note This will be called concurrently. + @param key A pointer to the key data. - @param callback The callback used to obtain storage for the value. + @param pObject [out] The created object if successful. @return The result of the operation. */ - virtual Status get (void const* key, GetCallback* callback) { return notFound; } - - - + virtual Status fetch (void const* key, NodeObject::Ptr* pObject) = 0; /** Store a single object. + + Depending on the implementation this may happen immediately + or deferred using a scheduled task. + + @note This will be called concurrently. + + @param object The object to store. */ - // VFALCO TODO Why should the Backend know or care about NodeObject? - // It should just deal with a fixed key and raw data. - // - virtual bool store (NodeObject::ref); - //virtual bool put (void const* key, void const* value, int valueBytes) { return false; } + virtual void store (NodeObject::Ptr const& object) = 0; - /** Retrieve an individual object. + /** Store a group of objects. + + @note This function will not be called concurrently with + itself or @ref store. */ - virtual NodeObject::pointer retrieve (uint256 const &hash) = 0; + virtual void storeBatch (Batch const& batch) = 0; - // Visit every object in the database - // This function will only be called during an import operation - // - // VFALCO TODO Replace FUNCTION_TYPE with a beast lift. - // - virtual void visitAll (FUNCTION_TYPE ) = 0; + /** Callback for iterating through objects. - private: - friend class NodeStoreImp; + @see visitAll + */ + struct VisitCallback + { + virtual void visitObject (NodeObject::Ptr const& object) = 0; + }; - // VFALCO TODO Put this bulk writing logic into a separate class. - // NOTE Why are these virtual? - void bulkWrite (Job &); - void waitWrite (); - int getWriteLoad (); + /** Visit every object in the database + + This is usually called during import. - private: - virtual std::string getDataBaseName() = 0; + @see import + */ + virtual void visitAll (VisitCallback& callback) = 0; - // Store a group of objects - // This function will only be called from a single thread - // VFALCO NOTE It looks like NodeStore throws this into the job queue? - virtual bool bulkStore (const std::vector< NodeObject::pointer >&) = 0; - - protected: - // VFALCO TODO Put this bulk writing logic into a separate class. - boost::mutex mWriteMutex; - boost::condition_variable mWriteCondition; - int mWriteGeneration; - int mWriteLoad; - bool mWritePending; - std::vector > mWriteSet; + /** Estimate the number of write operations pending. + */ + virtual int getWriteLoad () = 0; }; -public: - // Helper functions for the backend - class BackendHelper - { - public: - }; + //-------------------------------------------------------------------------- -public: /** Factory to produce backends. */ class BackendFactory @@ -152,50 +301,89 @@ public: @param keyBytes The fixed number of bytes per key. @param keyValues A set of key/value configuration pairs. + @param scheduler The scheduler to use for running tasks. @return A pointer to the Backend object. */ - virtual Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues) = 0; + virtual Backend* createInstance (size_t keyBytes, + StringPairArray const& keyValues, + Scheduler& scheduler) = 0; }; -public: + //-------------------------------------------------------------------------- + /** Construct a node store. - parameters has the format: + Parameter strings have the format: =['|'=] - The key "type" must exist, it defines the backend. For example - "type=LevelDB|path=/mnt/ephemeral" + The key "type" must exist, it defines the choice of backend. + For example + `type=LevelDB|path=/mnt/ephemeral` + + @param backendParameters The parameter string for the persistent backend. + @param fastBackendParameters The parameter string for the ephemeral backend. + @param cacheSize ? + @param cacheAge ? + @param scheduler The scheduler to use for performing asynchronous tasks. + + @return A pointer to the created object. */ - // VFALCO NOTE Is cacheSize in bytes? objects? KB? - // Is cacheAge in minutes? seconds? - // These should be in the parameters. - // static NodeStore* New (String backendParameters, String fastBackendParameters, - int cacheSize, - int cacheAge); + Scheduler& scheduler); + + /** Destroy the node store. + + All pending operations are completed, pending writes flushed, + and files closed before this returns. + */ + virtual ~NodeStore () { } /** Add the specified backend factory to the list of available factories. The names of available factories are compared against the "type" value in the parameter list on construction. + + @param factory The factory to add. */ static void addBackendFactory (BackendFactory& factory); + /** Fetch an object. + + If the object is known to be not in the database, not + in the database, or failed to load correctly, nullptr is + returned. + + @note This can be called concurrently. + + @param hash The key of the object to retrieve. + + @return The object, or nullptr if it couldn't be retrieved. + */ + virtual NodeObject::pointer fetch (uint256 const& hash) = 0; + + /** Store the object. + + The caller's Blob parameter is overwritten. + + @param type The type of object. + @param ledgerIndex The ledger in which the object appears. + @param data The payload of the object. The caller's + variable is overwritten. + @param hash The 256-bit hash of the payload data. + + @return `true` if the object was stored? + */ + virtual void store (NodeObjectType type, + uint32 ledgerIndex, + Blob& data, + uint256 const& hash) = 0; + // VFALCO TODO Document this. virtual float getCacheHitRate () = 0; - // VFALCO TODO Document this. - virtual bool store (NodeObjectType type, uint32 index, Blob const& data, - uint256 const& hash) = 0; - - // VFALCO TODO Document this. - // TODO Replace uint256 with void* - // - virtual NodeObject::pointer retrieve (uint256 const& hash) = 0; - // VFALCO TODO Document this. // TODO Document the parameter meanings. virtual void tune (int size, int age) = 0; @@ -203,13 +391,14 @@ public: // VFALCO TODO Document this. virtual void sweep () = 0; - // VFALCO TODO Document this. - // What are the units of the return value? + /** Retrieve the estimated number of pending write operations. + + This is used for diagnostics. + */ virtual int getWriteLoad () = 0; // VFALCO TODO Document this. - // NOTE What's the return value? - virtual int import (String sourceBackendParameters) = 0; + virtual void import (String sourceBackendParameters) = 0; }; #endif diff --git a/modules/ripple_app/node/ripple_NullBackendFactory.cpp b/modules/ripple_app/node/ripple_NullBackendFactory.cpp index 79607fa923..6a3b000c75 100644 --- a/modules/ripple_app/node/ripple_NullBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_NullBackendFactory.cpp @@ -15,28 +15,31 @@ public: { } - std::string getDataBaseName() + std::string getName() { return std::string (); } - bool store (NodeObject::ref obj) + Status fetch (void const*, NodeObject::Ptr*) + { + return notFound; + } + + void store (NodeObject::ref object) + { + } + + void storeBatch (NodeStore::Batch const& batch) { - return false; } - bool bulkStore (const std::vector< NodeObject::pointer >& objs) + void visitAll (VisitCallback& callback) { - return false; } - NodeObject::pointer retrieve (uint256 const& hash) - { - return NodeObject::pointer (); - } - - void visitAll (FUNCTION_TYPE func) + int getWriteLoad () { + return 0; } }; @@ -62,7 +65,10 @@ String NullBackendFactory::getName () const return "none"; } -NodeStore::Backend* NullBackendFactory::createInstance (size_t, StringPairArray const&) +NodeStore::Backend* NullBackendFactory::createInstance ( + size_t, + StringPairArray const&, + NodeStore::Scheduler&) { return new NullBackendFactory::Backend; } diff --git a/modules/ripple_app/node/ripple_NullBackendFactory.h b/modules/ripple_app/node/ripple_NullBackendFactory.h index 2284fed2d2..a68c1838ea 100644 --- a/modules/ripple_app/node/ripple_NullBackendFactory.h +++ b/modules/ripple_app/node/ripple_NullBackendFactory.h @@ -23,7 +23,10 @@ public: static NullBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); + + NodeStore::Backend* createInstance (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler); }; #endif diff --git a/modules/ripple_app/node/ripple_SqliteBackendFactory.cpp b/modules/ripple_app/node/ripple_SqliteBackendFactory.cpp index aa4d4096dc..7b4a7a9dc4 100644 --- a/modules/ripple_app/node/ripple_SqliteBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_SqliteBackendFactory.cpp @@ -11,23 +11,25 @@ static const char* s_nodeStoreDBInit [] = "PRAGMA journal_size_limit=1582080;", #if (ULONG_MAX > UINT_MAX) && !defined (NO_SQLITE_MMAP) - "PRAGMA mmap_size=171798691840;", + "PRAGMA mmap_size=171798691840;", #endif "BEGIN TRANSACTION;", "CREATE TABLE CommittedObjects ( \ - Hash CHARACTER(64) PRIMARY KEY, \ - ObjType CHAR(1) NOT NULL, \ - LedgerIndex BIGINT UNSIGNED, \ - Object BLOB \ - );", + Hash CHARACTER(64) PRIMARY KEY, \ + ObjType CHAR(1) NOT NULL, \ + LedgerIndex BIGINT UNSIGNED, \ + Object BLOB \ + );", "END TRANSACTION;" }; static int s_nodeStoreDBCount = NUMBER (s_nodeStoreDBInit); +//------------------------------------------------------------------------------ + class SqliteBackendFactory::Backend : public NodeStore::Backend { public: @@ -42,92 +44,137 @@ public: // s << "PRAGMA cache_size=-" << String (theConfig.getSize(siHashNodeDBCache) * 1024); m_db->getDB()->executeSQL (s.toStdString ().c_str ()); - - //m_db->getDB()->executeSQL (boost::str (boost::format ("PRAGMA cache_size=-%d;") % - // (theConfig.getSize(siHashNodeDBCache) * 1024))); } ~Backend() { - delete m_db; } - std::string getDataBaseName() + std::string getName() { return m_name; } - bool bulkStore (const std::vector< NodeObject::pointer >& objects) + //-------------------------------------------------------------------------- + + Status fetch (void const* key, NodeObject::Ptr* pObject) { - ScopedLock sl(m_db->getDBLock()); - static SqliteStatement pStB(m_db->getDB()->getSqliteDB(), "BEGIN TRANSACTION;"); - static SqliteStatement pStE(m_db->getDB()->getSqliteDB(), "END TRANSACTION;"); - static SqliteStatement pSt(m_db->getDB()->getSqliteDB(), + Status result = ok; + + pObject->reset (); + + { + ScopedLock sl (m_db->getDBLock()); + + uint256 const hash (key); + + static SqliteStatement pSt (m_db->getDB()->getSqliteDB(), + "SELECT ObjType,LedgerIndex,Object FROM CommittedObjects WHERE Hash = ?;"); + + pSt.bind (1, hash.GetHex()); + + if (pSt.isRow (pSt.step())) + { + // VFALCO NOTE This is unfortunately needed, + // the DatabaseCon creates the blob? + Blob data (pSt.getBlob (2)); + *pObject = NodeObject::createObject ( + getTypeFromString (pSt.peekString (0)), + pSt.getUInt32 (1), + data, + hash); + } + else + { + result = notFound; + } + + pSt.reset(); + } + + return result; + } + + void store (NodeObject::ref object) + { + NodeStore::Batch batch; + + batch.push_back (object); + + storeBatch (batch); + } + + void storeBatch (NodeStore::Batch const& batch) + { + // VFALCO TODO Rewrite this to use Beast::db + + ScopedLock sl (m_db->getDBLock()); + + static SqliteStatement pStB (m_db->getDB()->getSqliteDB(), "BEGIN TRANSACTION;"); + static SqliteStatement pStE (m_db->getDB()->getSqliteDB(), "END TRANSACTION;"); + static SqliteStatement pSt (m_db->getDB()->getSqliteDB(), "INSERT OR IGNORE INTO CommittedObjects " "(Hash,ObjType,LedgerIndex,Object) VALUES (?, ?, ?, ?);"); pStB.step(); pStB.reset(); - BOOST_FOREACH(NodeObject::ref object, objects) + BOOST_FOREACH (NodeObject::Ptr const& object, batch) { - bind(pSt, object); + doBind (pSt, object); + pSt.step(); pSt.reset(); } pStE.step(); pStE.reset(); - - return true; - } - NodeObject::pointer retrieve(uint256 const& hash) + void visitAll (VisitCallback& callback) { - NodeObject::pointer ret; + // No lock needed as per the visitAll() API - { - ScopedLock sl(m_db->getDBLock()); - static SqliteStatement pSt(m_db->getDB()->getSqliteDB(), - "SELECT ObjType,LedgerIndex,Object FROM CommittedObjects WHERE Hash = ?;"); - - pSt.bind(1, hash.GetHex()); - - if (pSt.isRow(pSt.step())) - ret = boost::make_shared(getType(pSt.peekString(0)), pSt.getUInt32(1), pSt.getBlob(2), hash); - - pSt.reset(); - } - - return ret; - } - - void visitAll(FUNCTION_TYPE func) - { uint256 hash; static SqliteStatement pSt(m_db->getDB()->getSqliteDB(), "SELECT ObjType,LedgerIndex,Object,Hash FROM CommittedObjects;"); - while (pSt.isRow(pSt.step())) + while (pSt.isRow (pSt.step())) { hash.SetHexExact(pSt.getString(3)); - func(boost::make_shared(getType(pSt.peekString(0)), pSt.getUInt32(1), pSt.getBlob(2), hash)); + + // VFALCO NOTE This is unfortunately needed, + // the DatabaseCon creates the blob? + Blob data (pSt.getBlob (2)); + NodeObject::Ptr const object (NodeObject::createObject ( + getTypeFromString (pSt.peekString (0)), + pSt.getUInt32 (1), + data, + hash)); + + callback.visitObject (object); } - pSt.reset(); + pSt.reset (); } - void bind(SqliteStatement& statement, NodeObject::ref object) + int getWriteLoad () + { + return 0; + } + + //-------------------------------------------------------------------------- + + void doBind (SqliteStatement& statement, NodeObject::ref object) { char const* type; switch (object->getType()) { - case hotLEDGER: type = "L"; break; + case hotLEDGER: type = "L"; break; case hotTRANSACTION: type = "T"; break; - case hotACCOUNT_NODE: type = "A"; break; - case hotTRANSACTION_NODE: type = "N"; break; + case hotACCOUNT_NODE: type = "A"; break; + case hotTRANSACTION_NODE: type = "N"; break; default: type = "U"; } @@ -137,20 +184,21 @@ public: statement.bindStatic(4, object->getData()); } - NodeObjectType getType(std::string const& type) + NodeObjectType getTypeFromString (std::string const& s) { - NodeObjectType htype = hotUNKNOWN; - if (!type.empty()) + NodeObjectType type = hotUNKNOWN; + + if (!s.empty ()) { - switch (type[0]) + switch (s [0]) { - case 'L': htype = hotLEDGER; break; - case 'T': htype = hotTRANSACTION; break; - case 'A': htype = hotACCOUNT_NODE; break; - case 'N': htype = hotTRANSACTION_NODE; break; + case 'L': type = hotLEDGER; break; + case 'T': type = hotTRANSACTION; break; + case 'A': type = hotACCOUNT_NODE; break; + case 'N': type = hotTRANSACTION_NODE; break; } } - return htype; + return type; } private: @@ -181,7 +229,10 @@ String SqliteBackendFactory::getName () const return "Sqlite"; } -NodeStore::Backend* SqliteBackendFactory::createInstance (size_t keyBytes, StringPairArray const& keyValues) +NodeStore::Backend* SqliteBackendFactory::createInstance ( + size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler) { return new Backend (keyBytes, keyValues ["path"].toStdString ()); } diff --git a/modules/ripple_app/node/ripple_SqliteBackendFactory.h b/modules/ripple_app/node/ripple_SqliteBackendFactory.h index dfb10b1bd7..828588fd74 100644 --- a/modules/ripple_app/node/ripple_SqliteBackendFactory.h +++ b/modules/ripple_app/node/ripple_SqliteBackendFactory.h @@ -21,7 +21,10 @@ public: static SqliteBackendFactory& getInstance (); String getName () const; - NodeStore::Backend* createInstance (size_t keyBytes, StringPairArray const& keyValues); + + NodeStore::Backend* createInstance (size_t keyBytes, + StringPairArray const& keyValues, + NodeStore::Scheduler& scheduler); }; #endif diff --git a/modules/ripple_basics/types/ripple_UInt256.h b/modules/ripple_basics/types/ripple_UInt256.h index 9605dfc59b..4790fea7f3 100644 --- a/modules/ripple_basics/types/ripple_UInt256.h +++ b/modules/ripple_basics/types/ripple_UInt256.h @@ -365,14 +365,24 @@ public: return reinterpret_cast (pn + WIDTH); } - const unsigned char* begin () const + unsigned char const* cbegin () const noexcept { - return reinterpret_cast (pn); + return reinterpret_cast (pn); } - const unsigned char* end () const + unsigned char const* cend () const noexcept { - return reinterpret_cast (pn + WIDTH); + return reinterpret_cast (pn + WIDTH); + } + + const unsigned char* begin () const noexcept + { + return cbegin (); + } + + const unsigned char* end () const noexcept + { + return cend (); } unsigned int size () const diff --git a/modules/ripple_core/validator/ripple_Validators.cpp b/modules/ripple_core/validator/ripple_Validators.cpp index e722d0b624..60eaed5279 100644 --- a/modules/ripple_core/validator/ripple_Validators.cpp +++ b/modules/ripple_core/validator/ripple_Validators.cpp @@ -402,7 +402,7 @@ private: class ValidatorListTests : public UnitTest { public: - ValidatorListTests () : UnitTest ("ValidatorList") + ValidatorListTests () : UnitTest ("ValidatorList", "ripple") { } diff --git a/rippled-example.cfg b/rippled-example.cfg index 8f760c3e68..2c89e205ae 100644 --- a/rippled-example.cfg +++ b/rippled-example.cfg @@ -222,23 +222,33 @@ # Examples: RASH BUSH MILK LOOK BAD BRIM AVID GAFF BAIT ROT POD LOVE # shfArahZT9Q9ckTf3s1psJ7C7qzVN # +# +# # [node_db] # [temp_db] +# # Set the choice of databases for storing Node objects. +# # Format (without spaces): # '=' [ '|' '=' ]... +# # Examples: # type=HyperLevelDB|path=db/hashnode -# Choices for 'type': +# +# Choices for 'type' (not case-sensitive) # HyperLevelDB Use an improved version of LevelDB (preferred) # LevelDB Use Google's LevelDB database (deprecated) # MDB Use MDB # none Use no backend # KeyvaDB Use OpenCoin's KeyvaDB (experimental) +# SQLite Use SQLite +# # Required keys: # path Location to store the database (all types) +# # Optional keys: -# ... +# (none yet) +# # Notes # The 'node_db' entry configures the primary, persistent storage. # The 'temp_db' configures a look-aside cache for high volume storage diff --git a/src/cpp/ripple/NetworkOPs.cpp b/src/cpp/ripple/NetworkOPs.cpp index 6b08e8bb65..438d8cb9a7 100644 --- a/src/cpp/ripple/NetworkOPs.cpp +++ b/src/cpp/ripple/NetworkOPs.cpp @@ -48,8 +48,7 @@ void NetworkOPs::processNetTimer () // VFALCO NOTE This is for diagnosing a crash on exit Application& app (getApp ()); ILoadManager& mgr (app.getLoadManager ()); - - getApp().getLoadManager ().resetDeadlockDetector (); + mgr.resetDeadlockDetector (); std::size_t const numPeers = getApp().getPeers ().getPeerVector ().size (); diff --git a/src/cpp/ripple/ripple_Application.cpp b/src/cpp/ripple/ripple_Application.cpp index 1b7c31c0f8..42307dc853 100644 --- a/src/cpp/ripple/ripple_Application.cpp +++ b/src/cpp/ripple/ripple_Application.cpp @@ -16,6 +16,7 @@ class ApplicationImp : public Application , public SharedSingleton , public Validators::Listener + , public NodeStore::Scheduler , LeakChecked { public: @@ -46,15 +47,14 @@ public: , mNetOps (new NetworkOPs (&mLedgerMaster)) , m_rpcServerHandler (*mNetOps) , mTempNodeCache ("NodeCache", 16384, 90) - , m_nodeStore (NodeStore::New ( - theConfig.NODE_DB, - theConfig.FASTNODE_DB, - 16384, - 300)) , mSLECache ("LedgerEntryCache", 4096, 120) , mSNTPClient (mAuxService) , mJobQueue (mIOService) // VFALCO New stuff + , m_nodeStore (NodeStore::New ( + theConfig.NODE_DB, + theConfig.FASTNODE_DB, + *this)) , m_validators (Validators::New (this)) , mFeatures (IFeatures::New (2 * 7 * 24 * 60 * 60, 200)) // two weeks, 200/256 , mFeeVote (IFeeVote::New (10, 50 * SYSTEM_CURRENCY_PARTS, 12.5 * SYSTEM_CURRENCY_PARTS)) @@ -92,11 +92,28 @@ public: delete mWalletDB; } + //-------------------------------------------------------------------------- + + static void callScheduledTask (NodeStore::Scheduler::Task* task, Job&) + { + task->performScheduledTask (); + } + + void scheduleTask (NodeStore::Scheduler::Task* task) + { + getJobQueue ().addJob ( + jtWRITE, + "NodeObject::store", + BIND_TYPE (&ApplicationImp::callScheduledTask, task, P_1)); + } + + //-------------------------------------------------------------------------- + LocalCredentials& getLocalCredentials () { return m_localCredentials ; } - + NetworkOPs& getOPs () { return *mNetOps; @@ -106,62 +123,62 @@ public: { return mIOService; } - + LedgerMaster& getLedgerMaster () { return mLedgerMaster; } - + InboundLedgers& getInboundLedgers () { return m_inboundLedgers; } - + TransactionMaster& getMasterTransaction () { return mMasterTransaction; } - + NodeCache& getTempNodeCache () { return mTempNodeCache; } - + NodeStore& getNodeStore () { return *m_nodeStore; } - + JobQueue& getJobQueue () { return mJobQueue; } - + MasterLockType& getMasterLock () { return mMasterLock; } - + ILoadManager& getLoadManager () { return *m_loadManager; } - + TXQueue& getTxnQueue () { return mTxnQueue; } - + PeerDoor& getPeerDoor () { return *mPeerDoor; } - + OrderBookDB& getOrderBookDB () { return mOrderBookDB; } - + SLECache& getSLECache () { return mSLECache; @@ -176,37 +193,37 @@ public: { return *mFeatures; } - + ILoadFeeTrack& getFeeTrack () { return *mFeeTrack; } - + IFeeVote& getFeeVote () { return *mFeeVote; } - + IHashRouter& getHashRouter () { return *mHashRouter; } - + IValidations& getValidations () { return *mValidations; } - + UniqueNodeList& getUNL () { return *mUNL; } - + IProofOfWorkFactory& getProofOfWorkFactory () { return *mProofOfWorkFactory; } - + IPeers& getPeers () { return *mPeers; @@ -272,7 +289,6 @@ private: ScopedPointer mNetOps; RPCServerHandler m_rpcServerHandler; NodeCache mTempNodeCache; - ScopedPointer m_nodeStore; SLECache mSLECache; SNTPClient mSNTPClient; JobQueue mJobQueue; @@ -280,16 +296,17 @@ private: OrderBookDB mOrderBookDB; // VFALCO Clean stuff - beast::ScopedPointer m_validators; - beast::ScopedPointer mFeatures; - beast::ScopedPointer mFeeVote; - beast::ScopedPointer mFeeTrack; - beast::ScopedPointer mHashRouter; - beast::ScopedPointer mValidations; - beast::ScopedPointer mUNL; - beast::ScopedPointer mProofOfWorkFactory; - beast::ScopedPointer mPeers; - beast::ScopedPointer m_loadManager; + ScopedPointer m_nodeStore; + ScopedPointer m_validators; + ScopedPointer mFeatures; + ScopedPointer mFeeVote; + ScopedPointer mFeeTrack; + ScopedPointer mHashRouter; + ScopedPointer mValidations; + ScopedPointer mUNL; + ScopedPointer mProofOfWorkFactory; + ScopedPointer mPeers; + ScopedPointer m_loadManager; // VFALCO End Clean stuff DatabaseCon* mRpcDB; @@ -382,7 +399,7 @@ void ApplicationImp::setup () if (!theConfig.DEBUG_LOGFILE.empty ()) { - // Let BEAST_DEBUG messages go to the file but only WARNING or higher to regular output (unless verbose) + // Let debug messages go to the file but only WARNING or higher to regular output (unless verbose) Log::setLogFile (theConfig.DEBUG_LOGFILE); if (Log::getMinSeverity () > lsDEBUG) @@ -596,7 +613,7 @@ void ApplicationImp::run () // VFALCO NOTE This seems unnecessary. If we properly refactor the load // manager then the deadlock detector can just always be "armed" // - getApp().getLoadManager ().activateDeadlockDetector (); + getApp().getLoadManager ().activateDeadlockDetector (); } mIOService.run (); // This blocks @@ -964,7 +981,7 @@ void ApplicationImp::updateTables () } if (!theConfig.DB_IMPORT.empty()) - getApp().getNodeStore().import(theConfig.DB_IMPORT); + getApp().getNodeStore().import(theConfig.DB_IMPORT); } //------------------------------------------------------------------------------ diff --git a/src/cpp/ripple/ripple_Application.h b/src/cpp/ripple/ripple_Application.h index 605674d4d1..59a2f62a2b 100644 --- a/src/cpp/ripple/ripple_Application.h +++ b/src/cpp/ripple/ripple_Application.h @@ -65,12 +65,12 @@ public: char const* getFileName () const noexcept { - return m_fileName; + return m_fileName.get (); } int getLineNumber () const noexcept { - return m_lineNumber; + return m_lineNumber.get (); } private: @@ -78,19 +78,19 @@ public: void setOwner (char const* fileName, int lineNumber) { - m_fileName = fileName; - m_lineNumber = lineNumber; + m_fileName.set (fileName); + m_lineNumber.set (lineNumber); } void resetOwner () { - m_fileName = ""; - m_lineNumber = 0; + m_fileName.set (""); + m_lineNumber.set (0); } boost::recursive_mutex m_mutex; - char const* m_fileName; - int m_lineNumber; + Atomic m_fileName; + Atomic m_lineNumber; }; class ScopedLockType diff --git a/src/cpp/ripple/ripple_Main.cpp b/src/cpp/ripple/ripple_Main.cpp index 6e0a0919aa..8f79353ee5 100644 --- a/src/cpp/ripple/ripple_Main.cpp +++ b/src/cpp/ripple/ripple_Main.cpp @@ -156,15 +156,6 @@ static void runBeastUnitTests (std::string const& individualTest = "") { tr.runTest (individualTest.c_str ()); } - - // Report - for (int i = 0; i < tr.getNumResults (); ++i) - { - UnitTests::TestResult const& r (*tr.getResult (i)); - - for (int j = 0; j < r.messages.size (); ++j) - Log::out () << r.messages [j].toStdString (); - } } //------------------------------------------------------------------------------ @@ -257,16 +248,16 @@ int rippleMain (int argc, char** argv) p.add ("parameters", -1); // These must be added before the Application object is created + NodeStore::addBackendFactory (KeyvaDBBackendFactory::getInstance ()); + NodeStore::addBackendFactory (LevelDBBackendFactory::getInstance ()); + NodeStore::addBackendFactory (NullBackendFactory::getInstance ()); + NodeStore::addBackendFactory (SqliteBackendFactory::getInstance ()); #if RIPPLE_HYPERLEVELDB_AVAILABLE NodeStore::addBackendFactory (HyperLevelDBBackendFactory::getInstance ()); #endif - NodeStore::addBackendFactory (KeyvaDBBackendFactory::getInstance ()); - NodeStore::addBackendFactory (LevelDBBackendFactory::getInstance ()); #if RIPPLE_MDB_AVAILABLE NodeStore::addBackendFactory (MdbBackendFactory::getInstance ()); #endif - NodeStore::addBackendFactory (NullBackendFactory::getInstance ()); - NodeStore::addBackendFactory (SqliteBackendFactory::getInstance ()); if (! RandomNumbers::getInstance ().initialize ()) { diff --git a/src/cpp/ripple/ripple_Peer.cpp b/src/cpp/ripple/ripple_Peer.cpp index f6d33e00c8..4e4b9a3322 100644 --- a/src/cpp/ripple/ripple_Peer.cpp +++ b/src/cpp/ripple/ripple_Peer.cpp @@ -1554,7 +1554,7 @@ void PeerImp::recvGetObjectByHash (const boost::shared_ptrgetNodeHash () != hash) { @@ -213,7 +213,7 @@ SHAMapTreeNode::pointer SHAMap::getNode (const SHAMapNode& id, uint256 const& ha WriteLog (lsFATAL, SHAMap) << "ID: " << id; WriteLog (lsFATAL, SHAMap) << "TgtHash " << hash; WriteLog (lsFATAL, SHAMap) << "NodHash " << node->getNodeHash (); - throw std::runtime_error ("invalid node"); + Throw (std::runtime_error ("invalid node")); } #endif @@ -230,7 +230,7 @@ SHAMapTreeNode* SHAMap::getNodePointer (const SHAMapNode& id, uint256 const& has SHAMapTreeNode* ret = getNodePointerNT (id, hash); if (!ret) - throw SHAMapMissingNode (mType, id, hash); + Throw (SHAMapMissingNode (mType, id, hash)); return ret; } @@ -251,7 +251,7 @@ SHAMapTreeNode* SHAMap::getNodePointer (const SHAMapNode& id, uint256 const& has SHAMapTreeNode* ret = getNodePointerNT (id, hash, filter); if (!ret) - throw SHAMapMissingNode (mType, id, hash); + Throw (SHAMapMissingNode (mType, id, hash)); return ret; } @@ -493,7 +493,7 @@ SHAMapItem::pointer SHAMap::peekNextItem (uint256 const& id, SHAMapTreeNode::TNT firstNode = firstBelow (firstNode); if (!firstNode || firstNode->isInner ()) - throw std::runtime_error ("missing/corrupt node"); + Throw (std::runtime_error ("missing/corrupt node")); type = firstNode->getType (); return firstNode->peekItem (); @@ -531,7 +531,7 @@ SHAMapItem::pointer SHAMap::peekPrevItem (uint256 const& id) SHAMapTreeNode* item = firstBelow (node.get ()); if (!item) - throw std::runtime_error ("missing node"); + Throw (std::runtime_error ("missing node")); return item->peekItem (); } @@ -597,7 +597,7 @@ bool SHAMap::delItem (uint256 const& id) std::stack stack = getStack (id, true); if (stack.empty ()) - throw std::runtime_error ("missing node"); + Throw (std::runtime_error ("missing node")); SHAMapTreeNode::pointer leaf = stack.top (); stack.pop (); @@ -678,7 +678,7 @@ bool SHAMap::addGiveItem (SHAMapItem::ref item, bool isTransaction, bool hasMeta std::stack stack = getStack (tag, true); if (stack.empty ()) - throw std::runtime_error ("missing node"); + Throw (std::runtime_error ("missing node")); SHAMapTreeNode::pointer node = stack.top (); stack.pop (); @@ -703,7 +703,7 @@ bool SHAMap::addGiveItem (SHAMapItem::ref item, bool isTransaction, bool hasMeta WriteLog (lsFATAL, SHAMap) << "NewNode: " << *newNode; dump (); assert (false); - throw std::runtime_error ("invalid inner node"); + Throw (std::runtime_error ("invalid inner node")); } trackNewNode (newNode); @@ -776,7 +776,7 @@ bool SHAMap::updateGiveItem (SHAMapItem::ref item, bool isTransaction, bool hasM std::stack stack = getStack (tag, true); if (stack.empty ()) - throw std::runtime_error ("missing node"); + Throw (std::runtime_error ("missing node")); SHAMapTreeNode::pointer node = stack.top (); stack.pop (); @@ -810,7 +810,7 @@ SHAMapTreeNode::pointer SHAMap::fetchNodeExternal (const SHAMapNode& id, uint256 SHAMapTreeNode::pointer ret = fetchNodeExternalNT (id, hash); if (!ret) - throw SHAMapMissingNode (mType, id, hash); + Throw (SHAMapMissingNode (mType, id, hash)); return ret; } @@ -825,8 +825,7 @@ SHAMapTreeNode::pointer SHAMap::fetchNodeExternalNT (const SHAMapNode& id, uint2 // These are for diagnosing a crash on exit Application& app (getApp ()); NodeStore& nodeStore (app.getNodeStore ()); - - NodeObject::pointer obj (getApp().getNodeStore ().retrieve (hash)); + NodeObject::pointer obj (nodeStore.fetch (hash)); if (!obj) { @@ -889,8 +888,11 @@ bool SHAMap::fetchRoot (uint256 const& hash, SHAMapSyncFilter* filter) } SHAMapTreeNode::pointer newRoot = fetchNodeExternalNT(SHAMapNode(), hash); + if (newRoot) + { root = newRoot; + } else { Blob nodeData; @@ -939,7 +941,7 @@ int SHAMap::flushDirty (DirtyMap& map, int maxNodes, NodeObjectType t, uint32 se #endif - getApp().getNodeStore ().store (t, seq, s.peekData (), it->second->getNodeHash ()); + getApp().getNodeStore ().store (t, seq, s.modData (), it->second->getNodeHash ()); if (flushed++ >= maxNodes) return flushed; diff --git a/src/cpp/ripple/ripple_SHAMapNode.cpp b/src/cpp/ripple/ripple_SHAMapNode.cpp index dcd8c0b3dc..86062eb56e 100644 --- a/src/cpp/ripple/ripple_SHAMapNode.cpp +++ b/src/cpp/ripple/ripple_SHAMapNode.cpp @@ -128,7 +128,7 @@ SHAMapNode SHAMapNode::getChildNodeID (int m) const // Which branch would contain the specified hash int SHAMapNode::selectBranch (uint256 const& hash) const { -#ifdef PARANOID +#if RIPPLE_VERIFY_NODEOBJECT_KEYS if (mDepth >= 64) { diff --git a/src/cpp/ripple/ripple_SHAMapSync.cpp b/src/cpp/ripple/ripple_SHAMapSync.cpp index 941f3b4ee8..d9579600fe 100644 --- a/src/cpp/ripple/ripple_SHAMapSync.cpp +++ b/src/cpp/ripple/ripple_SHAMapSync.cpp @@ -243,7 +243,7 @@ SHAMapAddNode SHAMap::addRootNode (Blob const& rootNode, SHANodeFormat format, { Serializer s; root->addRaw (s, snfPREFIX); - filter->gotNode (false, *root, root->getNodeHash (), s.peekData (), root->getType ()); + filter->gotNode (false, *root, root->getNodeHash (), s.modData (), root->getType ()); } return SHAMapAddNode::useful (); @@ -281,7 +281,7 @@ SHAMapAddNode SHAMap::addRootNode (uint256 const& hash, Blob const& rootNode, SH { Serializer s; root->addRaw (s, snfPREFIX); - filter->gotNode (false, *root, root->getNodeHash (), s.peekData (), root->getType ()); + filter->gotNode (false, *root, root->getNodeHash (), s.modData (), root->getType ()); } return SHAMapAddNode::useful (); @@ -345,7 +345,7 @@ SHAMapAddNode SHAMap::addKnownNode (const SHAMapNode& node, Blob const& rawNode, { Serializer s; newNode->addRaw (s, snfPREFIX); - filter->gotNode (false, node, iNode->getChildHash (branch), s.peekData (), newNode->getType ()); + filter->gotNode (false, node, iNode->getChildHash (branch), s.modData (), newNode->getType ()); } mTNByID[node] = newNode; diff --git a/src/cpp/ripple/ripple_SHAMapSyncFilter.h b/src/cpp/ripple/ripple_SHAMapSyncFilter.h index 1b2ec89b4b..f4c83d6181 100644 --- a/src/cpp/ripple/ripple_SHAMapSyncFilter.h +++ b/src/cpp/ripple/ripple_SHAMapSyncFilter.h @@ -12,29 +12,18 @@ class SHAMapSyncFilter { public: - SHAMapSyncFilter () - { - } - - virtual ~SHAMapSyncFilter () - { - } + virtual ~SHAMapSyncFilter () { } + // Note that the nodeData is overwritten by this call virtual void gotNode (bool fromFilter, SHAMapNode const& id, uint256 const& nodeHash, - Blob const& nodeData, - SHAMapTreeNode::TNType type) - { - } + Blob& nodeData, + SHAMapTreeNode::TNType type) = 0; virtual bool haveNode (SHAMapNode const& id, uint256 const& nodeHash, - Blob& nodeData) - { - return false; - } + Blob& nodeData) = 0; }; #endif -// vim:ts=4 diff --git a/src/cpp/ripple/ripple_SHAMapSyncFilters.cpp b/src/cpp/ripple/ripple_SHAMapSyncFilters.cpp index db451f56c2..157a903f2c 100644 --- a/src/cpp/ripple/ripple_SHAMapSyncFilters.cpp +++ b/src/cpp/ripple/ripple_SHAMapSyncFilters.cpp @@ -9,7 +9,7 @@ ConsensusTransSetSF::ConsensusTransSetSF () } void ConsensusTransSetSF::gotNode (bool fromFilter, const SHAMapNode& id, uint256 const& nodeHash, - Blob const& nodeData, SHAMapTreeNode::TNType type) + Blob& nodeData, SHAMapTreeNode::TNType type) { if (fromFilter) return; @@ -70,7 +70,7 @@ AccountStateSF::AccountStateSF (uint32 ledgerSeq) void AccountStateSF::gotNode (bool fromFilter, SHAMapNode const& id, uint256 const& nodeHash, - Blob const& nodeData, + Blob& nodeData, SHAMapTreeNode::TNType) { getApp().getNodeStore ().store (hotACCOUNT_NODE, mLedgerSeq, nodeData, nodeHash); @@ -93,7 +93,7 @@ TransactionStateSF::TransactionStateSF (uint32 ledgerSeq) void TransactionStateSF::gotNode (bool fromFilter, SHAMapNode const& id, uint256 const& nodeHash, - Blob const& nodeData, + Blob& nodeData, SHAMapTreeNode::TNType type) { getApp().getNodeStore ().store ( diff --git a/src/cpp/ripple/ripple_SHAMapSyncFilters.h b/src/cpp/ripple/ripple_SHAMapSyncFilters.h index d41593bb72..0bf834b8b9 100644 --- a/src/cpp/ripple/ripple_SHAMapSyncFilters.h +++ b/src/cpp/ripple/ripple_SHAMapSyncFilters.h @@ -17,10 +17,11 @@ class ConsensusTransSetSF : public SHAMapSyncFilter public: ConsensusTransSetSF (); + // Note that the nodeData is overwritten by this call void gotNode (bool fromFilter, SHAMapNode const& id, uint256 const& nodeHash, - Blob const& nodeData, + Blob& nodeData, SHAMapTreeNode::TNType); bool haveNode (SHAMapNode const& id, @@ -35,10 +36,11 @@ class AccountStateSF : public SHAMapSyncFilter public: explicit AccountStateSF (uint32 ledgerSeq); + // Note that the nodeData is overwritten by this call void gotNode (bool fromFilter, SHAMapNode const& id, uint256 const& nodeHash, - Blob const& nodeData, + Blob& nodeData, SHAMapTreeNode::TNType); bool haveNode (SHAMapNode const& id, @@ -56,10 +58,11 @@ class TransactionStateSF : public SHAMapSyncFilter public: explicit TransactionStateSF (uint32 ledgerSeq); + // Note that the nodeData is overwritten by this call void gotNode (bool fromFilter, SHAMapNode const& id, uint256 const& nodeHash, - Blob const& nodeData, + Blob& nodeData, SHAMapTreeNode::TNType); bool haveNode (SHAMapNode const& id, diff --git a/src/cpp/ripple/ripple_SHAMapTreeNode.cpp b/src/cpp/ripple/ripple_SHAMapTreeNode.cpp index c406808f0a..1d244f64ee 100644 --- a/src/cpp/ripple/ripple_SHAMapTreeNode.cpp +++ b/src/cpp/ripple/ripple_SHAMapTreeNode.cpp @@ -207,7 +207,7 @@ SHAMapTreeNode::SHAMapTreeNode (const SHAMapNode& id, Blob const& rawNode, uint3 if (hashValid) { mHash = hash; -#ifdef PARANOID +#if RIPPLE_VERIFY_NODEOBJECT_KEYS updateHash (); assert (mHash == hash); #endif @@ -225,7 +225,7 @@ bool SHAMapTreeNode::updateHash () if (mIsBranch != 0) { nh = Serializer::getPrefixHash (HashPrefix::innerNode, reinterpret_cast (mHashes), sizeof (mHashes)); -#ifdef PARANOID +#if RIPPLE_VERIFY_NODEOBJECT_KEYS Serializer s; s.add32 (HashPrefix::innerNode); From f9fd3f1b060a6d72d694f4ecf10905b9a1f6863d Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sat, 20 Jul 2013 15:01:36 -0700 Subject: [PATCH 31/50] Clean up RandomAccessFile --- .../files/beast_RandomAccessFile.cpp | 84 ++++--------------- .../beast_core/files/beast_RandomAccessFile.h | 52 +----------- 2 files changed, 17 insertions(+), 119 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp index 8ca6843491..28c029cd8b 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.cpp @@ -17,12 +17,9 @@ */ //============================================================================== -RandomAccessFile::RandomAccessFile (int bufferSizeToUse) noexcept +RandomAccessFile::RandomAccessFile () noexcept : fileHandle (nullptr) , currentPosition (0) - , bufferSize (bufferSizeToUse) - , bytesInBuffer (0) - , writeBuffer (bmax (bufferSizeToUse, 16)) // enforce minimum size of 16 { } @@ -42,7 +39,6 @@ void RandomAccessFile::close () { if (isOpen ()) { - flushBuffer (); nativeFlush (); nativeClose (); } @@ -52,8 +48,6 @@ Result RandomAccessFile::setPosition (FileOffset newPosition) { if (newPosition != currentPosition) { - flushBuffer (); - // VFALCO NOTE I dislike return from the middle but // Result::ok() is showing up in the profile // @@ -76,37 +70,10 @@ Result RandomAccessFile::write (const void* data, ByteCount numBytes, ByteCount* ByteCount amountWritten = 0; - if (bytesInBuffer + numBytes < bufferSize) - { - memcpy (writeBuffer + bytesInBuffer, data, numBytes); - bytesInBuffer += numBytes; - currentPosition += numBytes; - } - else - { - result = flushBuffer (); + result = nativeWrite (data, numBytes, &amountWritten); - if (result.wasOk ()) - { - if (numBytes < bufferSize) - { - bassert (bytesInBuffer == 0); - - memcpy (writeBuffer + bytesInBuffer, data, numBytes); - bytesInBuffer += numBytes; - currentPosition += numBytes; - } - else - { - ByteCount bytesWritten; - - result = nativeWrite (data, numBytes, &bytesWritten); - - if (result.wasOk ()) - currentPosition += bytesWritten; - } - } - } + if (result.wasOk ()) + currentPosition += amountWritten; if (pActualAmount != nullptr) *pActualAmount = amountWritten; @@ -126,27 +93,7 @@ Result RandomAccessFile::truncate () Result RandomAccessFile::flush () { - Result result = flushBuffer (); - - if (result.wasOk ()) - result = nativeFlush (); - - return result; -} - -Result RandomAccessFile::flushBuffer () -{ - bassert (isOpen ()); - - Result result (Result::ok ()); - - if (bytesInBuffer > 0) - { - result = nativeWrite (writeBuffer, bytesInBuffer); - bytesInBuffer = 0; - } - - return result; + return nativeFlush (); } //------------------------------------------------------------------------------ @@ -158,6 +105,11 @@ public: { } + enum + { + maxPayload = 8192 + }; + /* For this test we will create a file which consists of a fixed number of variable length records. Each record is numbered sequentially starting at 0. To calculate the position of each record we first build @@ -260,15 +212,13 @@ public: } // Perform the test at the given buffer size. - void testFile (int const numRecords, int const bufferSize) + void testFile (int const numRecords) { using namespace UnitTestUtilities; int const seedValue = 50; - beginTest (String ("numRecords=") + String (numRecords) + ", bufferSize=" + String (bufferSize)); - - int const maxPayload = bmax (1000, bufferSize * 2); + beginTest (String ("numRecords=") + String (numRecords)); // Calculate the path File const path (File::createTempFile ("RandomAccessFile")); @@ -281,7 +231,7 @@ public: { // Create the file - RandomAccessFile file (bufferSize); + RandomAccessFile file; result = file.open (path, RandomAccessFile::readWrite); expect (result.wasOk (), "Should be ok"); @@ -300,7 +250,7 @@ public: if (result.wasOk ()) { // Re-open the file in read only mode - RandomAccessFile file (bufferSize); + RandomAccessFile file; result = file.open (path, RandomAccessFile::readOnly); expect (result.wasOk (), "Should be ok"); @@ -313,11 +263,7 @@ public: void runTest () { - int const numRecords = 1000; - - testFile (numRecords, 0); - testFile (numRecords, 1000); - testFile (numRecords, 10000); + testFile (10000); } private: diff --git a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h index f2eaca1ccf..2b7c9505c6 100644 --- a/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h +++ b/Subtrees/beast/modules/beast_core/files/beast_RandomAccessFile.h @@ -3,10 +3,6 @@ This file is part of Beast: https://github.com/vinniefalco/Beast Copyright 2013, Vinnie Falco - Portions of this file are from JUCE. - Copyright (c) 2013 - Raw Material Software Ltd. - Please visit http://www.juce.com - Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. @@ -34,10 +30,6 @@ provided for obtaining an input or output stream which will work with the file. - Writes are batched using an internal buffer. The buffer is flushed when - it fills, the current position is manually changed, or the file - is closed. It is also possible to explicitly flush the buffer. - @note All files are opened in binary mode. No text newline conversions are performed. @@ -76,7 +68,7 @@ public: @see open, isOpen */ - explicit RandomAccessFile (int bufferSizeToUse = 16384) noexcept; + RandomAccessFile () noexcept; /** Destroy the file object. @@ -182,12 +174,11 @@ public: //============================================================================== private: - Result flushBuffer (); - // Some of these these methods are implemented natively on // the corresponding platform. // // See beast_posix_SharedCode.h and beast_win32_Files.cpp + // Result nativeOpen (File const& path, Mode mode); void nativeClose (); Result nativeSetPosition (FileOffset newPosition); @@ -200,45 +191,6 @@ private: File file; void* fileHandle; FileOffset currentPosition; - ByteCount const bufferSize; - ByteCount bytesInBuffer; - HeapBlock writeBuffer; -}; - -class BEAST_API RandomAccessFileInputStream : public InputStream -{ -public: - explicit RandomAccessFileInputStream (RandomAccessFile& file) : m_file (file) { } - - int64 getTotalLength() { return m_file.getFile ().getSize (); } - bool isExhausted() { return getPosition () == getTotalLength (); } - int read (void* destBuffer, int maxBytesToRead) - { - size_t actualBytes = 0; - m_file.read (destBuffer, maxBytesToRead, &actualBytes); - return actualBytes; - } - - int64 getPosition() { return m_file.getPosition (); } - bool setPosition (int64 newPosition) { return m_file.setPosition (newPosition); } - void skipNextBytes (int64 numBytesToSkip) { m_file.setPosition (getPosition () + numBytesToSkip); } - -private: - RandomAccessFile& m_file; -}; - -class BEAST_API RandomAccessFileOutputStream : public OutputStream -{ -public: - explicit RandomAccessFileOutputStream (RandomAccessFile& file) : m_file (file) { } - - void flush() { m_file.flush (); } - int64 getPosition() { return m_file.getPosition (); } - bool setPosition (int64 newPosition) { return m_file.setPosition (newPosition); } - bool write (const void* dataToWrite, size_t numberOfBytes) { return m_file.write (dataToWrite, numberOfBytes); } - -private: - RandomAccessFile& m_file; }; #endif From cb22f63c081024c99ae3ce63a68eef5e2e638a2b Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sat, 20 Jul 2013 15:01:44 -0700 Subject: [PATCH 32/50] Tune up KeyvaDB --- modules/ripple_app/node/ripple_KeyvaDB.cpp | 249 ++++++++++-------- modules/ripple_app/node/ripple_KeyvaDB.h | 4 +- .../node/ripple_KeyvaDBBackendFactory.cpp | 4 +- 3 files changed, 138 insertions(+), 119 deletions(-) diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 6b5d68f395..df4fc947dc 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -29,7 +29,7 @@ private: typedef int32 KeyIndex; // Size of a value. - typedef int32 ByteSize; + typedef size_t ByteSize; private: enum @@ -44,12 +44,6 @@ private: // Accessed by multiple threads struct State { - State () - : keyFile (16384) // buffer size - , valFile (16384) // buffer size - { - } - RandomAccessFile keyFile; RandomAccessFile valFile; KeyIndex newKeyIndex; @@ -89,12 +83,17 @@ private: public: KeyvaDBImp (int keyBytes, + int keyBlockDepth, File keyPath, - File valPath, - bool filesAreTemporary) + File valPath) : m_keyBytes (keyBytes) - , m_keyRecordBytes (getKeyRecordBytes ()) - , m_filesAreTemporary (filesAreTemporary) + , m_keyBlockDepth (keyBlockDepth) + , m_keyRecordBytes ( + sizeof (FileOffset) + + sizeof (ByteSize) + + sizeof (KeyIndex) + + sizeof (KeyIndex) + + keyBytes) , m_keyStorage (keyBytes) { SharedState::WriteAccess state (m_state); @@ -105,11 +104,20 @@ public: if (fileSize == 0) { + // VFALCO TODO Better error handling here // initialize the key file - RandomAccessFileOutputStream stream (state->keyFile); - stream.setPosition (keyFileHeaderBytes - 1); - stream.writeByte (0); - stream.flush (); + Result result = state->keyFile.setPosition (keyFileHeaderBytes - 1); + if (result.wasOk ()) + { + char byte = 0; + + result = state->keyFile.write (&byte, 1); + + if (result.wasOk ()) + { + state->keyFile.flush (); + } + } } state->newKeyIndex = 1 + (state->keyFile.getFile ().getSize () - keyFileHeaderBytes) / m_keyRecordBytes; @@ -124,44 +132,10 @@ public: SharedState::WriteAccess state (m_state); flushInternal (state); - - // Delete the database files if requested. - // - if (m_filesAreTemporary) - { - { - File const path = state->keyFile.getFile (); - state->keyFile.close (); - path.deleteFile (); - } - - { - File const path = state->valFile.getFile (); - state->valFile.close (); - path.deleteFile (); - } - } } //-------------------------------------------------------------------------- - // Returns the number of physical bytes in a key record. - // This is specific to the format of the data. - // - int getKeyRecordBytes () const noexcept - { - int bytes = 0; - - bytes += sizeof (FileOffset); // valFileOffset - bytes += sizeof (ByteSize); // valSize - bytes += sizeof (KeyIndex); // leftIndex - bytes += sizeof (KeyIndex); // rightIndex - - bytes += m_keyBytes; - - return bytes; - } - FileOffset calcKeyRecordOffset (KeyIndex keyIndex) { bassert (keyIndex > 0); @@ -180,25 +154,42 @@ public: { FileOffset const byteOffset = calcKeyRecordOffset (keyIndex); - RandomAccessFileInputStream stream (state->keyFile); + Result result = state->keyFile.setPosition (byteOffset); - bool const success = stream.setPosition (byteOffset); - - if (success) + if (result.wasOk ()) { - // This defines the file format! - keyRecord->valFileOffset = stream.readInt64BigEndian (); - keyRecord->valSize = stream.readIntBigEndian (); - keyRecord->leftIndex = stream.readIntBigEndian (); - keyRecord->rightIndex = stream.readIntBigEndian (); + MemoryBlock data (m_keyRecordBytes); + + size_t bytesRead; + + result = state->keyFile.read (data.getData (), m_keyRecordBytes, &bytesRead); - // Grab the key - stream.read (keyRecord->key, m_keyBytes); + if (result.wasOk ()) + { + if (bytesRead == m_keyRecordBytes) + { + MemoryInputStream stream (data, false); + + // This defines the file format! + keyRecord->valFileOffset = stream.readInt64BigEndian (); + keyRecord->valSize = stream.readIntBigEndian (); + keyRecord->leftIndex = stream.readIntBigEndian (); + keyRecord->rightIndex = stream.readIntBigEndian (); + + // Grab the key + stream.read (keyRecord->key, m_keyBytes); + } + else + { + result = Result::fail ("KeyvaDB: amountRead != m_keyRecordBytes"); + } + } } - else + + if (! result.wasOk ()) { String s; - s << "KeyvaDB: Seek failed in " << state->keyFile.getFile ().getFileName (); + s << "KeyvaDB readKeyRecord failed in " << state->keyFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } @@ -211,15 +202,15 @@ public: { FileOffset const byteOffset = calcKeyRecordOffset (keyIndex); - RandomAccessFileOutputStream stream (state->keyFile); + int const bytes = includingKey ? m_keyRecordBytes : m_keyRecordBytes - m_keyBytes; - bool const success = stream.setPosition (byteOffset); + // VFALCO TODO Recycle this buffer + MemoryBlock data (bytes); - if (success) { + MemoryOutputStream stream (data, false); + // This defines the file format! - // VFALCO TODO Make OutputStream return the bool errors here - // stream.writeInt64BigEndian (keyRecord.valFileOffset); stream.writeIntBigEndian (keyRecord.valSize); stream.writeIntBigEndian (keyRecord.leftIndex); @@ -227,23 +218,30 @@ public: // Write the key if (includingKey) - { - bool const success = stream.write (keyRecord.key, m_keyBytes); + stream.write (keyRecord.key, m_keyBytes); + } - if (! success) + Result result = state->keyFile.setPosition (byteOffset); + + if (result.wasOk ()) + { + size_t bytesWritten; + + result = state->keyFile.write (data.getData (), bytes, &bytesWritten); + + if (result.wasOk ()) + { + if (bytesWritten != bytes) { - String s; - s << "KeyvaDB: Write failed in " << state->keyFile.getFile ().getFileName (); - Throw (std::runtime_error (s.toStdString ())); + result = Result::fail ("KeyvaDB: bytesWritten != bytes"); } } - - //stream.flush (); } - else + + if (!result.wasOk ()) { String s; - s << "KeyvaDB: Seek failed in " << state->keyFile.getFile ().getFileName (); + s << "KeyvaDB: writeKeyRecord failed in " << state->keyFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } @@ -252,29 +250,31 @@ public: // VFALCO TODO return a Result void writeValue (void const* const value, ByteSize valueBytes, SharedState::WriteAccess& state) { - RandomAccessFileOutputStream stream (state->valFile); + Result result = state->valFile.setPosition (state->valFileSize); - bool const success = stream.setPosition (state->valFileSize); - - if (success) + if (result.wasOk ()) { - bool const success = stream.write (value, static_cast (valueBytes)); + size_t bytesWritten; - if (! success) + result = state->valFile.write (value, valueBytes, &bytesWritten); + + if (result.wasOk ()) { - String s; - s << "KeyvaDB: Write failed in " << state->valFile.getFile ().getFileName (); - Throw (std::runtime_error (s.toStdString ())); + if (bytesWritten == valueBytes) + { + state->valFileSize += valueBytes; + } + else + { + result = Result::fail ("KeyvaDB: bytesWritten != valueBytes"); + } } - - state->valFileSize += valueBytes; - - //stream.flush (); } - else + + if (! result.wasOk ()) { String s; - s << "KeyvaDB: Seek failed in " << state->valFile.getFile ().getFileName (); + s << "KeyvaDB: writeValue failed in " << state->valFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } @@ -363,23 +363,27 @@ public: { void* const destStorage = callback->getStorageForValue (findResult.keyRecord.valSize); - RandomAccessFileInputStream stream (state->valFile); + Result result = state->valFile.setPosition (findResult.keyRecord.valFileOffset); - bool const success = stream.setPosition (findResult.keyRecord.valFileOffset); - - if (! success) + if (result.wasOk ()) { - String s; - s << "KeyvaDB: Seek failed in " << state->valFile.getFile ().getFileName (); - Throw (std::runtime_error (s.toStdString ())); + size_t bytesRead; + + result = state->valFile.read (destStorage, findResult.keyRecord.valSize, &bytesRead); + + if (result.wasOk ()) + { + if (bytesRead != findResult.keyRecord.valSize) + { + result = Result::fail ("KeyvaDB: bytesRead != valSize"); + } + } } - int const bytesRead = stream.read (destStorage, findResult.keyRecord.valSize); - - if (bytesRead != findResult.keyRecord.valSize) + if (! result.wasOk ()) { String s; - s << "KeyvaDB: Couldn't read a value from " << state->valFile.getFile ().getFileName (); + s << "KeyvaDB: get in " << state->valFile.getFile ().getFileName (); Throw (std::runtime_error (s.toStdString ())); } } @@ -515,15 +519,15 @@ private: private: int const m_keyBytes; + int const m_keyBlockDepth; int const m_keyRecordBytes; - bool const m_filesAreTemporary; SharedState m_state; HeapBlock m_keyStorage; }; -KeyvaDB* KeyvaDB::New (int keyBytes, File keyPath, File valPath, bool filesAreTemporary) +KeyvaDB* KeyvaDB::New (int keyBytes, int keyBlockDepth, File keyPath, File valPath) { - return new KeyvaDBImp (keyBytes, keyPath, valPath, filesAreTemporary); + return new KeyvaDBImp (keyBytes, keyBlockDepth, keyPath, valPath); } //------------------------------------------------------------------------------ @@ -559,7 +563,24 @@ public: } }; - template + KeyvaDB* createDB (unsigned int keyBytes, File const& path) + { + File const keyPath = path.withFileExtension (".key"); + File const valPath = path.withFileExtension (".val"); + + return KeyvaDB::New (keyBytes, 1, keyPath, valPath); + } + + void deleteDBFiles (File const& path) + { + File const keyPath = path.withFileExtension (".key"); + File const valPath = path.withFileExtension (".val"); + + keyPath.deleteFile (); + valPath.deleteFile (); + } + + template void testKeySize (unsigned int const maxItems) { using namespace UnitTestUtilities; @@ -569,17 +590,16 @@ public: int64 const seedValue = 50; String s; - s << "keyBytes=" << String (KeyBytes) << ", maxItems=" << String (maxItems); + + s << "keyBytes=" << String (uint64(KeyBytes)) << ", maxItems=" << String (maxItems); beginTest (s); // Set up the key and value files - File const tempFile (File::createTempFile ("")); - File const keyPath = tempFile.withFileExtension (".key"); - File const valPath = tempFile.withFileExtension (".val"); + File const path (File::createTempFile ("")); { // open the db - ScopedPointer db (KeyvaDB::New (KeyBytes, keyPath, valPath, false)); + ScopedPointer db (createDB (KeyBytes, path)); Payload payload (maxPayloadBytes); Payload check (maxPayloadBytes); @@ -634,7 +654,7 @@ public: { // Re-open the database and confirm the data - ScopedPointer db (KeyvaDB::New (KeyBytes, keyPath, valPath, false)); + ScopedPointer db (createDB (KeyBytes, path)); Payload payload (maxPayloadBytes); Payload check (maxPayloadBytes); @@ -654,8 +674,7 @@ public: } } - keyPath.deleteFile (); - valPath.deleteFile (); + deleteDBFiles (path); } void runTest () diff --git a/modules/ripple_app/node/ripple_KeyvaDB.h b/modules/ripple_app/node/ripple_KeyvaDB.h index 7c36c5051c..a58a469829 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.h +++ b/modules/ripple_app/node/ripple_KeyvaDB.h @@ -19,9 +19,9 @@ public: }; static KeyvaDB* New (int keyBytes, + int keyBlockDepth, File keyPath, - File valPath, - bool filesAreTemporary); + File valPath); virtual ~KeyvaDB () { } diff --git a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp index 676fc7ecb1..8b08c87d41 100644 --- a/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDBBackendFactory.cpp @@ -19,9 +19,9 @@ public: , m_path (keyValues ["path"]) , m_db (KeyvaDB::New ( keyBytes, + 3, File::getCurrentWorkingDirectory().getChildFile (m_path).withFileExtension ("key"), - File::getCurrentWorkingDirectory().getChildFile (m_path).withFileExtension ("val"), - false)) + File::getCurrentWorkingDirectory().getChildFile (m_path).withFileExtension ("val"))) { } From 2f929373c3d6cc7b80d9c35abddb1ec7e38d08cb Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sat, 20 Jul 2013 19:11:12 -0700 Subject: [PATCH 33/50] Add template I/O for streams --- .../beast_core/streams/beast_InputStream.cpp | 90 +++++++++++++++++++ .../beast_core/streams/beast_InputStream.h | 61 ++++++++++++- .../beast_core/streams/beast_OutputStream.cpp | 87 +++++++++++++++++- .../beast_core/streams/beast_OutputStream.h | 23 +++++ 4 files changed, 258 insertions(+), 3 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/streams/beast_InputStream.cpp b/Subtrees/beast/modules/beast_core/streams/beast_InputStream.cpp index ebefef3171..ac47e96991 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_InputStream.cpp +++ b/Subtrees/beast/modules/beast_core/streams/beast_InputStream.cpp @@ -65,6 +65,8 @@ short InputStream::readShortBigEndian() int InputStream::readInt() { + static_bassert (sizeof (int) == 4); + char temp[4]; if (read (temp, 4) == 4) @@ -73,6 +75,16 @@ int InputStream::readInt() return 0; } +int32 InputStream::readInt32() +{ + char temp[4]; + + if (read (temp, 4) == 4) + return (int32) ByteOrder::littleEndianInt (temp); + + return 0; +} + int InputStream::readIntBigEndian() { char temp[4]; @@ -83,6 +95,16 @@ int InputStream::readIntBigEndian() return 0; } +int32 InputStream::readInt32BigEndian() +{ + char temp[4]; + + if (read (temp, 4) == 4) + return (int32) ByteOrder::bigEndianInt (temp); + + return 0; +} + int InputStream::readCompressedInt() { const uint8 sizeByte = (uint8) readByte(); @@ -229,3 +251,71 @@ void InputStream::skipNextBytes (int64 numBytesToSkip) numBytesToSkip -= read (temp, (int) bmin (numBytesToSkip, (int64) skipBufferSize)); } } + +//------------------------------------------------------------------------------ + +// Unfortunately, putting these in the header causes duplicate +// definition linker errors, even with the inline keyword! + +template <> +char InputStream::readType () { return readByte (); } + +template <> +short InputStream::readType () { return readShort (); } + +template <> +int32 InputStream::readType () { return readInt32 (); } + +template <> +int64 InputStream::readType () { return readInt64 (); } + +template <> +unsigned char InputStream::readType () { return static_cast (readByte ()); } + +template <> +unsigned short InputStream::readType () { return static_cast (readShort ()); } + +template <> +uint32 InputStream::readType () { return static_cast (readInt32 ()); } + +template <> +uint64 InputStream::readType () { return static_cast (readInt64 ()); } + +template <> +float InputStream::readType () { return readFloat (); } + +template <> +double InputStream::readType () { return readDouble (); } + +//------------------------------------------------------------------------------ + +template <> +char InputStream::readTypeBigEndian () { return readByte (); } + +template <> +short InputStream::readTypeBigEndian () { return readShortBigEndian (); } + +template <> +int32 InputStream::readTypeBigEndian () { return readInt32BigEndian (); } + +template <> +int64 InputStream::readTypeBigEndian () { return readInt64BigEndian (); } + +template <> +unsigned char InputStream::readTypeBigEndian () { return static_cast (readByte ()); } + +template <> +unsigned short InputStream::readTypeBigEndian () { return static_cast (readShortBigEndian ()); } + +template <> +uint32 InputStream::readTypeBigEndian () { return static_cast (readInt32BigEndian ()); } + +template <> +uint64 InputStream::readTypeBigEndian () { return static_cast (readInt64BigEndian ()); } + +template <> +float InputStream::readTypeBigEndian () { return readFloatBigEndian (); } + +template <> +double InputStream::readTypeBigEndian () { return readDoubleBigEndian (); } + diff --git a/Subtrees/beast/modules/beast_core/streams/beast_InputStream.h b/Subtrees/beast/modules/beast_core/streams/beast_InputStream.h index 7d7e643234..30df5ac567 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_InputStream.h +++ b/Subtrees/beast/modules/beast_core/streams/beast_InputStream.h @@ -92,7 +92,7 @@ public: /** Reads a boolean from the stream. - The bool is encoded as a single byte - 1 for true, 0 for false. + The bool is encoded as a single byte - 0 for false, nonzero for true. If the stream is exhausted, this will return false. @@ -111,6 +111,10 @@ public: */ virtual short readShort(); + // VFALCO TODO Implement these functions + //virtual int16 readInt16 (); + //virtual uint16 readUInt16 (); + /** Reads two bytes from the stream as a little-endian 16-bit value. If the next two bytes read are byte1 and byte2, this returns @@ -131,6 +135,13 @@ public: @see OutputStream::writeInt, readIntBigEndian */ + virtual int32 readInt32(); + + // VFALCO TODO Implement these functions + //virtual int16 readInt16BigEndian (); + //virtual uint16 readUInt16BigEndian (); + + // DEPRECATED, assumes sizeof(int) == 4! virtual int readInt(); /** Reads four bytes from the stream as a big-endian 32-bit value. @@ -142,6 +153,9 @@ public: @see OutputStream::writeIntBigEndian, readInt */ + virtual int32 readInt32BigEndian(); + + // DEPRECATED, assumes sizeof(int) == 4! virtual int readIntBigEndian(); /** Reads eight bytes from the stream as a little-endian 64-bit value. @@ -216,6 +230,49 @@ public: */ virtual int readCompressedInt(); + /** Reads a type using a template specialization. + + This is useful when doing template meta-programming. + */ + template + T readType (); + + /** Reads a type using a template specialization. + + The variable is passed as a parameter so that the template type + can be deduced. + + This is useful when doing template meta-programming. + */ + template + void readTypeInto (T* p) + { + *p = readType (); + } + + /** Reads a type from a big endian stream using a template specialization. + + The raw encoding of the type is read from the stream as a big-endian value + where applicable. + + This is useful when doing template meta-programming. + */ + template + T readTypeBigEndian (); + + /** Reads a type using a template specialization. + + The variable is passed as a parameter so that the template type + can be deduced. + + This is useful when doing template meta-programming. + */ + template + void readTypeBigEndianInto (T* p) + { + *p = readTypeBigEndian (); + } + //============================================================================== /** Reads a UTF-8 string from the stream, up to the next linefeed or carriage return. @@ -289,4 +346,4 @@ protected: InputStream() noexcept {} }; -#endif // BEAST_INPUTSTREAM_BEASTHEADER +#endif diff --git a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.cpp b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.cpp index c1ac44c04c..614b32e1db 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.cpp +++ b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.cpp @@ -93,14 +93,32 @@ bool OutputStream::writeShortBigEndian (short value) return write (&v, 2); } +bool OutputStream::writeInt32 (int32 value) +{ + static_bassert (sizeof (int32) == 4); + + const unsigned int v = ByteOrder::swapIfBigEndian ((uint32) value); + return write (&v, 4); +} + bool OutputStream::writeInt (int value) { + static_bassert (sizeof (int) == 4); + const unsigned int v = ByteOrder::swapIfBigEndian ((unsigned int) value); return write (&v, 4); } +bool OutputStream::writeInt32BigEndian (int value) +{ + static_bassert (sizeof (int32) == 4); + const uint32 v = ByteOrder::swapIfLittleEndian ((uint32) value); + return write (&v, 4); +} + bool OutputStream::writeIntBigEndian (int value) { + static_bassert (sizeof (int) == 4); const unsigned int v = ByteOrder::swapIfLittleEndian ((unsigned int) value); return write (&v, 4); } @@ -328,4 +346,71 @@ BEAST_API OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, InputSt BEAST_API OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, const NewLine&) { return stream << stream.getNewLineString(); -} \ No newline at end of file +} + +//------------------------------------------------------------------------------ + +// Unfortunately, putting these in the header causes duplicate +// definition linker errors, even with the inline keyword! + +template <> +BEAST_API bool OutputStream::writeType (char v) { return writeByte (v); } + +template <> +BEAST_API bool OutputStream::writeType (short v) { return writeShort (v); } + +template <> +BEAST_API bool OutputStream::writeType (int32 v) { return writeInt32 (v); } + +template <> +BEAST_API bool OutputStream::writeType (int64 v) { return writeInt64 (v); } + +template <> +BEAST_API bool OutputStream::writeType (unsigned char v) { return writeByte (static_cast (v)); } + +template <> +BEAST_API bool OutputStream::writeType (unsigned short v) { return writeShort (static_cast (v)); } + +template <> +BEAST_API bool OutputStream::writeType (uint32 v) { return writeInt32 (static_cast (v)); } + +template <> +BEAST_API bool OutputStream::writeType (uint64 v) { return writeInt64 (static_cast (v)); } + +template <> +BEAST_API bool OutputStream::writeType (float v) { return writeFloat (v); } + +template <> +BEAST_API bool OutputStream::writeType (double v) { return writeDouble (v); } + +//------------------------------------------------------------------------------ + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (char v) { return writeByte (v); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (short v) { return writeShortBigEndian (v); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (int32 v) { return writeInt32BigEndian (v); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (int64 v) { return writeInt64BigEndian (v); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (unsigned char v) { return writeByte (static_cast (v)); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (unsigned short v) { return writeShortBigEndian (static_cast (v)); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (uint32 v) { return writeInt32BigEndian (static_cast (v)); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (uint64 v) { return writeInt64BigEndian (static_cast (v)); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (float v) { return writeFloatBigEndian (v); } + +template <> +BEAST_API bool OutputStream::writeTypeBigEndian (double v) { return writeDoubleBigEndian (v); } diff --git a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h index 0528f0fcac..b536c48a57 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h +++ b/Subtrees/beast/modules/beast_core/streams/beast_OutputStream.h @@ -120,12 +120,18 @@ public: @returns false if the write operation fails for some reason @see InputStream::readInt */ + virtual bool writeInt32 (int32 value); + + // DEPRECATED, assumes sizeof (int) == 4! virtual bool writeInt (int value); /** Writes a 32-bit integer to the stream in a big-endian byte order. @returns false if the write operation fails for some reason @see InputStream::readIntBigEndian */ + virtual bool writeInt32BigEndian (int32 value); + + // DEPRECATED, assumes sizeof (int) == 4! virtual bool writeIntBigEndian (int value); /** Writes a 64-bit integer to the stream in a little-endian byte order. @@ -168,6 +174,23 @@ public: */ virtual bool writeDoubleBigEndian (double value); + /** Write a type using a template specialization. + + This is useful when doing template meta-programming. + */ + template + bool writeType (T value); + + /** Write a type using a template specialization. + + The raw encoding of the type is written to the stream as a big-endian value + where applicable. + + This is useful when doing template meta-programming. + */ + template + bool writeTypeBigEndian (T value); + /** Writes a byte to the output stream a given number of times. @returns false if the write operation fails for some reason */ From 4fcbbe60723ccbc11c1656e83a79a39372ed5497 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sat, 20 Jul 2013 19:18:29 -0700 Subject: [PATCH 34/50] Fix KeyvaDB for sizeof(size_t) --- modules/ripple_app/node/ripple_KeyvaDB.cpp | 69 ++++++++++++++------ modules/ripple_app/node/ripple_NodeStore.cpp | 4 +- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index df4fc947dc..3f46a994cd 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -29,7 +29,7 @@ private: typedef int32 KeyIndex; // Size of a value. - typedef size_t ByteSize; + typedef uint32 ByteSize; private: enum @@ -110,7 +110,7 @@ public: if (result.wasOk ()) { char byte = 0; - + result = state->keyFile.write (&byte, 1); if (result.wasOk ()) @@ -159,9 +159,9 @@ public: if (result.wasOk ()) { MemoryBlock data (m_keyRecordBytes); - + size_t bytesRead; - + result = state->keyFile.read (data.getData (), m_keyRecordBytes, &bytesRead); if (result.wasOk ()) @@ -171,10 +171,10 @@ public: MemoryInputStream stream (data, false); // This defines the file format! - keyRecord->valFileOffset = stream.readInt64BigEndian (); - keyRecord->valSize = stream.readIntBigEndian (); - keyRecord->leftIndex = stream.readIntBigEndian (); - keyRecord->rightIndex = stream.readIntBigEndian (); + stream.readTypeBigEndianInto (&keyRecord->valFileOffset); + stream.readTypeBigEndianInto (&keyRecord->valSize); + stream.readTypeBigEndianInto (&keyRecord->leftIndex); + stream.readTypeBigEndianInto (&keyRecord->rightIndex); // Grab the key stream.read (keyRecord->key, m_keyBytes); @@ -211,10 +211,10 @@ public: MemoryOutputStream stream (data, false); // This defines the file format! - stream.writeInt64BigEndian (keyRecord.valFileOffset); - stream.writeIntBigEndian (keyRecord.valSize); - stream.writeIntBigEndian (keyRecord.leftIndex); - stream.writeIntBigEndian (keyRecord.rightIndex); + stream.writeTypeBigEndian (keyRecord.valFileOffset); + stream.writeTypeBigEndian (keyRecord.valSize); + stream.writeTypeBigEndian (keyRecord.leftIndex); + stream.writeTypeBigEndian (keyRecord.rightIndex); // Write the key if (includingKey) @@ -226,7 +226,7 @@ public: if (result.wasOk ()) { size_t bytesWritten; - + result = state->keyFile.write (data.getData (), bytes, &bytesWritten); if (result.wasOk ()) @@ -237,7 +237,7 @@ public: } } } - + if (!result.wasOk ()) { String s; @@ -270,7 +270,7 @@ public: } } } - + if (! result.wasOk ()) { String s; @@ -394,6 +394,27 @@ public: //-------------------------------------------------------------------------- + // Compares two key records for equality + bool areKeyRecordsEqual (KeyRecord const& lhs, KeyRecord const& rhs) + { + return lhs.leftIndex == rhs.leftIndex && + lhs.rightIndex == rhs.rightIndex && + lhs.valFileOffset == rhs.valFileOffset && + lhs.valSize == rhs.valSize && + (memcmp (lhs.key, rhs.key, m_keyBytes) == 0); + } + + // Makes sure a key record matches disk + void checkKeyRecord (KeyRecord const& keyRecord, KeyIndex keyIndex, SharedState::WriteAccess& state) + { + MemoryBlock keyStorage (m_keyBytes); + KeyRecord checkRecord (keyStorage.getData ()); + readKeyRecord (&checkRecord, keyIndex, state); + + bassert (areKeyRecordsEqual (checkRecord, keyRecord)); + } + + // Write a key value pair. Does nothing if the key exists. void put (void const* key, void const* value, int valueBytes) { bassert (valueBytes > 0); @@ -425,6 +446,8 @@ public: } writeKeyRecord (findResult.keyRecord, findResult.keyIndex, state, false); + + //checkKeyRecord (findResult.keyRecord, findResult.keyIndex, state); } // Write the new key @@ -437,6 +460,8 @@ public: memcpy (findResult.keyRecord.key, key, m_keyBytes); writeKeyRecord (findResult.keyRecord, state->newKeyIndex, state, true); + + //checkKeyRecord (findResult.keyRecord, findResult.keyIndex, state); } // Key file has grown by one. @@ -645,9 +670,12 @@ public: expect (found, "Should be found"); - payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); + if (found) + { + payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); - expect (payload == cb.payload, "Should be equal"); + expect (payload == cb.payload, "Should be equal"); + } } } } @@ -668,9 +696,12 @@ public: expect (found, "Should be found"); - payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); + if (found) + { + payload.repeatableRandomFill (1, maxPayloadBytes, keyIndex + seedValue); - expect (payload == cb.payload, "Should be equal"); + expect (payload == cb.payload, "Should be equal"); + } } } diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index 1ac8ef95bc..4082c5f5c5 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -887,7 +887,7 @@ class NodeStoreTimingTests : public NodeStoreUnitTest public: enum { - numObjectsToTest = 10000 + numObjectsToTest = 50000 }; NodeStoreTimingTests () @@ -970,7 +970,7 @@ public: testBackend ("keyvadb", seedValue); -#if 0 +#if 1 testBackend ("leveldb", seedValue); testBackend ("sqlite", seedValue); From 1d0795095bf0a0ec647f6ebeb05c9bbe01f66aff Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sun, 21 Jul 2013 12:50:58 -0700 Subject: [PATCH 35/50] Fix MDB backend for directory creation --- .../node/ripple_MdbBackendFactory.cpp | 62 ++++++++++++------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/modules/ripple_app/node/ripple_MdbBackendFactory.cpp b/modules/ripple_app/node/ripple_MdbBackendFactory.cpp index 4be1def928..c454380f8f 100644 --- a/modules/ripple_app/node/ripple_MdbBackendFactory.cpp +++ b/modules/ripple_app/node/ripple_MdbBackendFactory.cpp @@ -26,41 +26,55 @@ public: { String path (keyValues ["path"]); - m_name = path.toStdString(); - if (path.isEmpty ()) Throw (std::runtime_error ("Missing path in MDB backend")); - int error = 0; + m_basePath = path.toStdString(); - error = mdb_env_create (&m_env); + // Regarding the path supplied to mdb_env_open: + // This directory must already exist and be writable. + // + File dir (File::getCurrentWorkingDirectory().getChildFile (path)); + Result result = dir.createDirectory (); - if (error == 0) // Should use the size of the file plus the free space on the disk - error = mdb_env_set_mapsize (m_env, 512L * 1024L * 1024L * 1024L); + if (result.wasOk ()) + { + int error = mdb_env_create (&m_env); - if (error == 0) - error = mdb_env_open ( - m_env, - m_name.c_str (), - MDB_NOTLS, - 0664); + // Should use the size of the file plus the free space on the disk + if (error == 0) + error = mdb_env_set_mapsize (m_env, 512L * 1024L * 1024L * 1024L); - MDB_txn* txn; + if (error == 0) + error = mdb_env_open ( + m_env, + m_basePath.c_str (), + MDB_NOTLS, + 0664); - if (error == 0) - error = mdb_txn_begin (m_env, NULL, 0, &txn); + MDB_txn* txn; - if (error == 0) - error = mdb_dbi_open (txn, NULL, 0, &m_dbi); + if (error == 0) + error = mdb_txn_begin (m_env, NULL, 0, &txn); - if (error == 0) - error = mdb_txn_commit (txn); + if (error == 0) + error = mdb_dbi_open (txn, NULL, 0, &m_dbi); - if (error != 0) + if (error == 0) + error = mdb_txn_commit (txn); + + if (error != 0) + { + String s; + s << "Error #" << error << " creating mdb environment"; + Throw (std::runtime_error (s.toStdString ())); + } + } + else { String s; - s << "Error #" << error << " creating mdb environment"; - Throw (std::runtime_error (s.toStdString ())); + s << "MDB Backend failed to create directory, " << result.getErrorMessage (); + Throw (std::runtime_error (s.toStdString().c_str())); } } @@ -75,7 +89,7 @@ public: std::string getName() { - return m_name; + return m_basePath; } //-------------------------------------------------------------------------- @@ -227,7 +241,7 @@ private: NodeStore::Scheduler& m_scheduler; NodeStore::BatchWriter m_batch; NodeStore::EncodedBlob::Pool m_blobPool; - std::string m_name; + std::string m_basePath; MDB_env* m_env; MDB_dbi m_dbi; }; From 6781d9da0e6ac27b08b9d260129124fe4c588e7d Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sun, 21 Jul 2013 13:06:31 -0700 Subject: [PATCH 36/50] Add identity import tests to NodeStoreTests --- Subtrees/beast/TODO.txt | 2 + TODO.txt | 1 + modules/ripple_app/node/ripple_KeyvaDB.cpp | 287 +++++++++++++------ modules/ripple_app/node/ripple_NodeStore.cpp | 49 ++-- modules/ripple_app/node/ripple_NodeStore.h | 27 +- 5 files changed, 247 insertions(+), 119 deletions(-) diff --git a/Subtrees/beast/TODO.txt b/Subtrees/beast/TODO.txt index ae7183418e..e4a9e8f073 100644 --- a/Subtrees/beast/TODO.txt +++ b/Subtrees/beast/TODO.txt @@ -2,6 +2,8 @@ BEAST TODO -------------------------------------------------------------------------------- +- Specialize UnsignedInteger<> for peformance in the storage format + - Macro for acquiring a ScopedLock that records file and line. - Rename HeapBlock routines to not conflict with _CRTDBG_MAP_ALLOC macros diff --git a/TODO.txt b/TODO.txt index 3fb99e9139..db50945f86 100644 --- a/TODO.txt +++ b/TODO.txt @@ -3,6 +3,7 @@ RIPPLE TODO -------------------------------------------------------------------------------- Vinnie's Short List (Changes day to day) +- Give mdb a proper spot in Subtrees/ - Finish writing the NodeStore unit tests - Finish converting backends to new API - Memory NodeStore::Backend for unit tests diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/modules/ripple_app/node/ripple_KeyvaDB.cpp index 3f46a994cd..8b8e4652dc 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/modules/ripple_app/node/ripple_KeyvaDB.cpp @@ -26,37 +26,89 @@ private: typedef int64 FileOffset; // Index of a key. + // + // The value is broken up into two parts. The key block index, + // and a 1 based index within the keyblock corresponding to the + // internal key number. + // typedef int32 KeyIndex; + typedef int32 KeyBlockIndex; // Size of a value. typedef uint32 ByteSize; private: + // returns the number of keys in a key block with the specified depth + static int calcKeysAtDepth (int depth) + { + return (1U << depth) - 1; + } + + // returns the number of bytes in a key record + static int calcKeyRecordBytes (int keyBytes) + { + // This depends on the format of a serialized key record + return + sizeof (FileOffset) + + sizeof (ByteSize) + + sizeof (KeyIndex) + + sizeof (KeyIndex) + + keyBytes + ; + } + + // returns the number of bytes in a key block + static int calcKeyBlockBytes (int depth, int keyBytes) + { + return calcKeysAtDepth (depth) * calcKeyRecordBytes (keyBytes); + } + +public: + enum + { + currentVersion = 1 + }; + + + //-------------------------------------------------------------------------- + + struct KeyAddress + { + // 1 based key block number + uint32 blockNumber; + + // 1 based key index within the block, breadth-first left to right + uint32 keyNumber; + }; + enum { // The size of the fixed area at the beginning of the key file. // This is used to store some housekeeping information like the // key size and version number. // - keyFileHeaderBytes = 1024 + masterHeaderBytes = 1000 }; - // Accessed by multiple threads - struct State + // The master record is at the beginning of the key file + struct MasterRecord { - RandomAccessFile keyFile; - RandomAccessFile valFile; - KeyIndex newKeyIndex; - FileOffset valFileSize; + // version number, starting from 1 + int32 version; - bool hasKeys () const noexcept + KeyBlockIndex nextKeyBlockIndex; + + void write (OutputStream& stream) { - return newKeyIndex > 1; + stream.writeTypeBigEndian (version); + } + + void read (InputStream& stream) + { + stream.readTypeBigEndianInto (&version); } }; - typedef SharedData SharedState; - // Key records are indexed starting at one. struct KeyRecord { @@ -81,19 +133,110 @@ private: void* const key; }; -public: + //-------------------------------------------------------------------------- + + // A complete keyblock. The contents of the memory for the key block + // are identical to the format on disk. Therefore it is necessary to + // use the serialization routines to extract or update the key records. + // + class KeyBlock + { + public: + KeyBlock (int depth, int keyBytes) + : m_depth (depth) + , m_keyBytes (keyBytes) + , m_storage (calcKeyBlockBytes (depth, keyBytes)) + { + } + + void read (InputStream& stream) + { + stream.read (m_storage.getData (), calcKeyBlockBytes (m_depth, m_keyBytes)); + } + + void write (OutputStream& stream) + { + stream.write (m_storage.getData (), calcKeyBlockBytes (m_depth, m_keyBytes)); + } + + void readKeyRecord (KeyRecord* keyRecord, int keyIndex) + { + bassert (keyIndex >=1 && keyIndex <= calcKeysAtDepth (m_depth)); + + size_t const byteOffset = (keyIndex - 1) * calcKeyRecordBytes (m_keyBytes); + + MemoryInputStream stream ( + addBytesToPointer (m_storage.getData (), byteOffset), + calcKeyRecordBytes (m_keyBytes), + false); + + stream.readTypeBigEndianInto (&keyRecord->valFileOffset); + stream.readTypeBigEndianInto (&keyRecord->valSize); + stream.readTypeBigEndianInto (&keyRecord->leftIndex); + stream.readTypeBigEndianInto (&keyRecord->rightIndex); + stream.read (keyRecord->key, m_keyBytes); + } + + void writeKeyRecord (KeyRecord const& keyRecord, int keyIndex) + { + bassert (keyIndex >=1 && keyIndex <= calcKeysAtDepth (m_depth)); + +#if 0 + size_t const byteOffset = (keyIndex - 1) * calcKeyRecordBytes (m_keyBytes); + + MemoryOutputStream stream ( + addBytesToPointer (m_storage.getData (), byteOffset), + calcKeyRecordBytes (m_keyBytes)); + + stream.writeTypeBigEndian (keyRecord.valFileOffset); + stream.writeTypeBigEndian (keyRecord.valSize); + stream.writeTypeBigEndian (keyRecord.leftIndex); + stream.writeTypeBigEndian (keyRecord.rightIndex); + stream.write (keyRecord.key, m_keyBytes); +#endif + } + + private: + int const m_depth; + int const m_keyBytes; + MemoryBlock m_storage; + }; + + //-------------------------------------------------------------------------- + + // Concurrent data + // + struct State + { + RandomAccessFile keyFile; + RandomAccessFile valFile; + MasterRecord masterRecord; + KeyIndex newKeyIndex; + FileOffset valFileSize; + + bool hasKeys () const noexcept + { + return newKeyIndex > 1; + } + }; + + typedef SharedData SharedState; + + //-------------------------------------------------------------------------- + + int const m_keyBytes; + int const m_keyBlockDepth; + SharedState m_state; + HeapBlock m_keyStorage; + + //-------------------------------------------------------------------------- + KeyvaDBImp (int keyBytes, int keyBlockDepth, File keyPath, File valPath) : m_keyBytes (keyBytes) , m_keyBlockDepth (keyBlockDepth) - , m_keyRecordBytes ( - sizeof (FileOffset) + - sizeof (ByteSize) + - sizeof (KeyIndex) + - sizeof (KeyIndex) + - keyBytes) , m_keyStorage (keyBytes) { SharedState::WriteAccess state (m_state); @@ -106,7 +249,7 @@ public: { // VFALCO TODO Better error handling here // initialize the key file - Result result = state->keyFile.setPosition (keyFileHeaderBytes - 1); + Result result = state->keyFile.setPosition (masterHeaderBytes - 1); if (result.wasOk ()) { char byte = 0; @@ -120,7 +263,8 @@ public: } } - state->newKeyIndex = 1 + (state->keyFile.getFile ().getSize () - keyFileHeaderBytes) / m_keyRecordBytes; + state->newKeyIndex = 1 + (state->keyFile.getFile ().getSize () - masterHeaderBytes) + / calcKeyRecordBytes (m_keyBytes); openFile (&state->valFile, valPath); @@ -134,13 +278,47 @@ public: flushInternal (state); } + // Open a file for reading and writing. + // Creates the file if it doesn't exist. + static void openFile (RandomAccessFile* file, File path) + { + Result const result = file->open (path, RandomAccessFile::readWrite); + + if (! result) + { + String s; + s << "KeyvaDB: Couldn't open " << path.getFileName () << " for writing."; + Throw (std::runtime_error (s.toStdString ())); + } + } + + //-------------------------------------------------------------------------- + + Result createMasterRecord (SharedState::WriteAccess& state) + { + MemoryBlock buffer (masterHeaderBytes, true); + + Result result = state->keyFile.setPosition (0); + + if (result.wasOk ()) + { + MasterRecord mr; + + mr.version = 1; + + result = state->keyFile.write (buffer.getData (), buffer.getSize ()); + } + + return result; + } + //-------------------------------------------------------------------------- FileOffset calcKeyRecordOffset (KeyIndex keyIndex) { bassert (keyIndex > 0); - FileOffset const byteOffset = keyFileHeaderBytes + (keyIndex - 1) * m_keyRecordBytes; + FileOffset const byteOffset = masterHeaderBytes + (keyIndex - 1) * calcKeyRecordBytes (m_keyBytes); return byteOffset; } @@ -158,15 +336,15 @@ public: if (result.wasOk ()) { - MemoryBlock data (m_keyRecordBytes); + MemoryBlock data (calcKeyRecordBytes (m_keyBytes)); size_t bytesRead; - result = state->keyFile.read (data.getData (), m_keyRecordBytes, &bytesRead); + result = state->keyFile.read (data.getData (), calcKeyRecordBytes (m_keyBytes), &bytesRead); if (result.wasOk ()) { - if (bytesRead == m_keyRecordBytes) + if (bytesRead == calcKeyRecordBytes (m_keyBytes)) { MemoryInputStream stream (data, false); @@ -181,7 +359,7 @@ public: } else { - result = Result::fail ("KeyvaDB: amountRead != m_keyRecordBytes"); + result = Result::fail ("KeyvaDB: amountRead != calcKeyRecordBytes()"); } } } @@ -202,7 +380,7 @@ public: { FileOffset const byteOffset = calcKeyRecordOffset (keyIndex); - int const bytes = includingKey ? m_keyRecordBytes : m_keyRecordBytes - m_keyBytes; + int const bytes = calcKeyRecordBytes (m_keyBytes) - (includingKey ? 0 : m_keyBytes); // VFALCO TODO Recycle this buffer MemoryBlock data (bytes); @@ -290,6 +468,7 @@ public: int compare; // result of the last comparison KeyIndex keyIndex; // index we looked at last + //KeyBlock keyBlock; // KeyBlock we looked at last KeyRecord keyRecord; // KeyRecord we looked at last }; @@ -394,26 +573,6 @@ public: //-------------------------------------------------------------------------- - // Compares two key records for equality - bool areKeyRecordsEqual (KeyRecord const& lhs, KeyRecord const& rhs) - { - return lhs.leftIndex == rhs.leftIndex && - lhs.rightIndex == rhs.rightIndex && - lhs.valFileOffset == rhs.valFileOffset && - lhs.valSize == rhs.valSize && - (memcmp (lhs.key, rhs.key, m_keyBytes) == 0); - } - - // Makes sure a key record matches disk - void checkKeyRecord (KeyRecord const& keyRecord, KeyIndex keyIndex, SharedState::WriteAccess& state) - { - MemoryBlock keyStorage (m_keyBytes); - KeyRecord checkRecord (keyStorage.getData ()); - readKeyRecord (&checkRecord, keyIndex, state); - - bassert (areKeyRecordsEqual (checkRecord, keyRecord)); - } - // Write a key value pair. Does nothing if the key exists. void put (void const* key, void const* value, int valueBytes) { @@ -446,8 +605,6 @@ public: } writeKeyRecord (findResult.keyRecord, findResult.keyIndex, state, false); - - //checkKeyRecord (findResult.keyRecord, findResult.keyIndex, state); } // Write the new key @@ -460,8 +617,6 @@ public: memcpy (findResult.keyRecord.key, key, m_keyBytes); writeKeyRecord (findResult.keyRecord, state->newKeyIndex, state, true); - - //checkKeyRecord (findResult.keyRecord, findResult.keyIndex, state); } // Key file has grown by one. @@ -472,12 +627,8 @@ public: } else { - // Do nothing - /* - String s; - s << "KeyvaDB: Attempt to write a duplicate key!"; - Throw (std::runtime_error (s.toStdString ())); - */ + // Key already exists, do nothing. + // We could check to make sure the payloads are the same. } } else @@ -524,30 +675,6 @@ public: state->keyFile.flush (); state->valFile.flush (); } - - //-------------------------------------------------------------------------- - -private: - // Open a file for reading and writing. - // Creates the file if it doesn't exist. - static void openFile (RandomAccessFile* file, File path) - { - Result const result = file->open (path, RandomAccessFile::readWrite); - - if (! result) - { - String s; - s << "KeyvaDB: Couldn't open " << path.getFileName () << " for writing."; - Throw (std::runtime_error (s.toStdString ())); - } - } - -private: - int const m_keyBytes; - int const m_keyBlockDepth; - int const m_keyRecordBytes; - SharedState m_state; - HeapBlock m_keyStorage; }; KeyvaDB* KeyvaDB::New (int keyBytes, int keyBlockDepth, File keyPath, File valPath) diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index 4082c5f5c5..d202273f67 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -220,15 +220,6 @@ public: ~NodeStoreImp () { - // VFALCO NOTE This shouldn't be necessary, the backend can - // just handle it in the destructor. - // - /* - m_backend->waitWrite (); - - if (m_fastBackend) - m_fastBackend->waitWrite (); - */ } //------------------------------------------------------------------------------ @@ -361,13 +352,9 @@ public: // if (! keyFoundAndObjectCached) { - - // VFALCO TODO Rename this to RIPPLE_VERIFY_NODEOBJECT_KEYS and make - // it be 1 or 0 instead of merely defined or undefined. - // - #if RIPPLE_VERIFY_NODEOBJECT_KEYS + #if RIPPLE_VERIFY_NODEOBJECT_KEYS assert (hash == Serializer::getSHA512Half (data)); - #endif + #endif NodeObject::Ptr object = NodeObject::createObject ( type, index, data, hash); @@ -887,7 +874,7 @@ class NodeStoreTimingTests : public NodeStoreUnitTest public: enum { - numObjectsToTest = 50000 + numObjectsToTest = 20000 }; NodeStoreTimingTests () @@ -970,11 +957,8 @@ public: testBackend ("keyvadb", seedValue); -#if 1 testBackend ("leveldb", seedValue); - testBackend ("sqlite", seedValue); - #if RIPPLE_HYPERLEVELDB_AVAILABLE testBackend ("hyperleveldb", seedValue); #endif @@ -982,7 +966,8 @@ public: #if RIPPLE_MDB_AVAILABLE testBackend ("mdb", seedValue); #endif -#endif + + testBackend ("sqlite", seedValue); } private: @@ -1102,21 +1087,31 @@ public: testBackend ("sqlite", seedValue); - #if RIPPLE_HYPERLEVELDB_AVAILABLE + #if RIPPLE_HYPERLEVELDB_AVAILABLE testBackend ("hyperleveldb", seedValue); - #endif + #endif - #if RIPPLE_MDB_AVAILABLE + #if RIPPLE_MDB_AVAILABLE testBackend ("mdb", seedValue); - #endif + #endif // // Import tests // - //testImport ("leveldb", "keyvadb", seedValue); -//testImport ("sqlite", "leveldb", seedValue); - testImport ("leveldb", "sqlite", seedValue); + //testImport ("keyvadb", "keyvadb", seedValue); + + testImport ("leveldb", "leveldb", seedValue); + + #if RIPPLE_HYPERLEVELDB_AVAILABLE + testImport ("hyperleveldb", "hyperleveldb", seedValue); + #endif + + #if RIPPLE_MDB_AVAILABLE + testImport ("mdb", "mdb", seedValue); + #endif + + testImport ("sqlite", "sqlite", seedValue); } private: diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index dc91bd98c2..3f3d5240cc 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -44,12 +44,11 @@ public: /** Parsed key/value blob into NodeObject components. - This will extract the information required to construct - a NodeObject. It also does consistency checking and returns - the result, so it is possible to determine if the data - is corrupted without throwing an exception. Note all forms - of corruption are detected so further analysis will be - needed to eliminate false positives. + This will extract the information required to construct a NodeObject. + It also does consistency checking and returns the result, so it is + possible to determine if the data is corrupted without throwing an + exception. Not all forms of corruption are detected so further analysis + will be needed to eliminate false positives. @note This is the format in which a NodeObject is stored in the persistent storage layer. @@ -352,9 +351,9 @@ public: /** Fetch an object. - If the object is known to be not in the database, not - in the database, or failed to load correctly, nullptr is - returned. + If the object is known to be not in the database, isn't found in + the database during the fetch, or failed to load correctly during + the fetch, `nullptr` is returned. @note This can be called concurrently. @@ -381,6 +380,13 @@ public: Blob& data, uint256 const& hash) = 0; + /** Import objects from another database. + + The other NodeStore database is constructed using the specified + backend parameters. + */ + virtual void import (String sourceBackendParameters) = 0; + // VFALCO TODO Document this. virtual float getCacheHitRate () = 0; @@ -396,9 +402,6 @@ public: This is used for diagnostics. */ virtual int getWriteLoad () = 0; - - // VFALCO TODO Document this. - virtual void import (String sourceBackendParameters) = 0; }; #endif From 8be6ba63cb1d522a2982f311c5e26db9096f1629 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sun, 21 Jul 2013 14:40:41 -0700 Subject: [PATCH 37/50] Tidy up some annotations --- modules/ripple_app/node/ripple_NodeObject.h | 11 ++ modules/ripple_app/node/ripple_NodeStore.cpp | 2 +- modules/ripple_app/node/ripple_NodeStore.h | 166 ++++++++++++------- 3 files changed, 115 insertions(+), 64 deletions(-) diff --git a/modules/ripple_app/node/ripple_NodeObject.h b/modules/ripple_app/node/ripple_NodeObject.h index 2ead4b370c..7bbf7dd584 100644 --- a/modules/ripple_app/node/ripple_NodeObject.h +++ b/modules/ripple_app/node/ripple_NodeObject.h @@ -34,6 +34,17 @@ class NodeObject : public CountedObject public: static char const* getCountedObjectName () { return "NodeObject"; } + enum + { + /** Size of the fixed keys, in bytes. + + We use a 256-bit hash for the keys. + + @see NodeObject + */ + keyBytes = 32, + }; + /** The type used to hold the hash. The hahes are fixed size, SHA256. diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index d202273f67..47f660a9fb 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -470,7 +470,7 @@ public: if (factory != nullptr) { - backend = factory->createInstance (keyBytes, keyValues, scheduler); + backend = factory->createInstance (NodeObject::keyBytes, keyValues, scheduler); } else { diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index 3f3d5240cc..0202f60178 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -7,31 +7,69 @@ #ifndef RIPPLE_NODESTORE_H_INCLUDED #define RIPPLE_NODESTORE_H_INCLUDED +// Javadoc comments are added to all public classes, member functions, +// type definitions, data types, and global variables (which we should +// minimize the use of. +// +// A Javadoc comment is introduced with an extra asterisk following the +// beginning of a normal C++ style comment, or by using a triple forward slash. +// +// Structure of a Javadoc comment: + +/** Brief one line description. + + A more detailed description, which may be broken up into multiple + paragraphs. Doxygen commands are prefixed with the at-sign '@'. For + example, here's a formatted code snippet: + + @code + + int main (int argc, char** argv) + { + return 0; + } + + @endcode + + You can also add a note, or document an invariant: + + @note This appears as its own note. + + @invariant This must not be called while holding the lock. + + When documenting functions, you can use these Doxygen commands + to annotate the parameters, return value, template types. + + @param argc The number of arguments to the program. + @param argv An array of strings argc in size, one for each argument. + + @return The return value of the program, passed to to the enclosing process. +*/ + +/** Functions can be documented with just the brief description, like this */ + +/// Here's the alternate form of a brief description. + +//------------------------------------------------------------------------------ + /** Persistency layer for NodeObject - A Node is a ledger object which is uniquely identified by a key, - which is the 256-bit hash of the body of the node. The payload is - a variable length block of serialized data. + A Node is a ledger object which is uniquely identified by a key, which is + the 256-bit hash of the body of the node. The payload is a variable length + block of serialized data. - All ledger data is stored as node objects and as such, needs to - be persisted between launches. Furthermore, since the set of - node objects will in general be larger than the amount of available - memory, purged node objects which are later accessed must be retrieved - from the node store. + All ledger data is stored as node objects and as such, needs to be persisted + between launches. Furthermore, since the set of node objects will in + general be larger than the amount of available memory, purged node objects + which are later accessed must be retrieved from the node store. + + @see NodeObject */ class NodeStore { public: enum { - /** Size of the fixed keys, in bytes. - - We use a 256-bit hash for the keys. - - @see NodeObject - */ - keyBytes = 32, - // This is only used to pre-allocate the array for // batch objects and does not affect the amount written. // @@ -44,28 +82,24 @@ public: /** Parsed key/value blob into NodeObject components. - This will extract the information required to construct a NodeObject. - It also does consistency checking and returns the result, so it is - possible to determine if the data is corrupted without throwing an - exception. Not all forms of corruption are detected so further analysis - will be needed to eliminate false positives. + This will extract the information required to construct a NodeObject. It + also does consistency checking and returns the result, so it is possible + to determine if the data is corrupted without throwing an exception. Not + all forms of corruption are detected so further analysis will be needed + to eliminate false negatives. - @note This is the format in which a NodeObject is stored in the - persistent storage layer. + @note This defines the database format of a NodeObject! */ class DecodedBlob { public: - /** Construct the decoded blob from raw data. - */ + /** Construct the decoded blob from raw data. */ DecodedBlob (void const* key, void const* value, int valueBytes); - /** Determine if the decoding was successful. - */ + /** Determine if the decoding was successful. */ bool wasOk () const noexcept { return m_success; } - /** Create a NodeObject from this data. - */ + /** Create a NodeObject from this data. */ NodeObject::Ptr createObject (); private: @@ -84,8 +118,7 @@ public: These get recycled to prevent many small allocations. - @note This is the format in which a NodeObject is stored in the - persistent storage layer. + @note This defines the database format of a NodeObject! */ struct EncodedBlob { @@ -107,13 +140,16 @@ public: //-------------------------------------------------------------------------- - /** Provides the asynchronous scheduling feature. + /** Provides optional asynchronous scheduling for backends. + + For improved performance, a backend has the option of performing writes + in batches. These writes can be scheduled using the provided scheduler + object. */ class Scheduler { public: - /** Derived classes perform scheduled tasks. - */ + /** Derived classes perform scheduled tasks. */ struct Task { virtual ~Task () { } @@ -135,9 +171,11 @@ public: //-------------------------------------------------------------------------- - /** A helper to assist with batch writing. + /** Helps with batch writing. - The batch writes are performed with a scheduled task. + The batch writes are performed with a scheduled task. Use of the + class it not required. A backend can implement its own write batching, + or skip write batching if doing so yields a performance benefit. @see Scheduler */ @@ -148,15 +186,13 @@ public: class BatchWriter : private Scheduler::Task { public: - /** This callback does the actual writing. - */ + /** This callback does the actual writing. */ struct Callback { virtual void writeBatch (Batch const& batch) = 0; }; - /** Create a batch writer. - */ + /** Create a batch writer. */ BatchWriter (Callback& callback, Scheduler& scheduler); /** Destroy a batch writer. @@ -170,10 +206,9 @@ public: This will add to the batch and initiate a scheduled task to write the batch out. */ - void store (NodeObject::ref object); + void store (NodeObject::Ptr const& object); - /** Get an estimate of the amount of writing I/O pending. - */ + /** Get an estimate of the amount of writing I/O pending. */ int getWriteLoad (); private: @@ -197,16 +232,19 @@ public: //-------------------------------------------------------------------------- - /** Back end used for the store. + /** A backend used for the store. - A Backend implements a persistent key/value storage system. - Keys sizes are all fixed within the same database. + The NodeStore uses a swappable backend so that other database systems + can be tried. Different databases may offer various features such + as improved performance, fault tolerant or distributed storage, or + all in-memory operation. + + A given instance of a backend is fixed to a particular key size. */ class Backend { public: - /** Return codes from operations. - */ + /** Return codes from operations. */ enum Status { ok, @@ -217,9 +255,9 @@ public: /** Destroy the backend. - All open files are closed and flushed. If there are batched - writes or other tasks scheduled, they will be completed before - this call returns. + All open files are closed and flushed. If there are batched writes + or other tasks scheduled, they will be completed before this call + returns. */ virtual ~Backend () { } @@ -274,12 +312,14 @@ public: This is usually called during import. + @note This routine will not be called concurrently with itself + or other methods. + @see import */ virtual void visitAll (VisitCallback& callback) = 0; - /** Estimate the number of write operations pending. - */ + /** Estimate the number of write operations pending. */ virtual int getWriteLoad () = 0; }; @@ -292,8 +332,7 @@ public: public: virtual ~BackendFactory () { } - /** Retrieve the name of this factory. - */ + /** Retrieve the name of this factory. */ virtual String getName () const = 0; /** Create an instance of this factory's backend. @@ -351,9 +390,9 @@ public: /** Fetch an object. - If the object is known to be not in the database, isn't found in - the database during the fetch, or failed to load correctly during - the fetch, `nullptr` is returned. + If the object is known to be not in the database, isn't found in the + database during the fetch, or failed to load correctly during the fetch, + `nullptr` is returned. @note This can be called concurrently. @@ -387,6 +426,12 @@ public: */ virtual void import (String sourceBackendParameters) = 0; + /** Retrieve the estimated number of pending write operations. + + This is used for diagnostics. + */ + virtual int getWriteLoad () = 0; + // VFALCO TODO Document this. virtual float getCacheHitRate () = 0; @@ -397,11 +442,6 @@ public: // VFALCO TODO Document this. virtual void sweep () = 0; - /** Retrieve the estimated number of pending write operations. - - This is used for diagnostics. - */ - virtual int getWriteLoad () = 0; }; #endif From d90ab063b3a57e9c7710eb62107ce871ba666d79 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sun, 21 Jul 2013 14:50:41 -0700 Subject: [PATCH 38/50] Squashed 'Subtrees/mdb/' content from commit de72c08 git-subtree-dir: Subtrees/mdb git-subtree-split: de72c08660fca1597be0bb0b6536c122ae324645 --- libraries/liblmdb/.gitignore | 16 + libraries/liblmdb/COPYRIGHT | 20 + libraries/liblmdb/Doxyfile | 1631 +++++++ libraries/liblmdb/LICENSE | Bin 0 -> 2214 bytes libraries/liblmdb/Makefile | 88 + libraries/liblmdb/lmdb.h | 1331 ++++++ libraries/liblmdb/mdb.c | 8112 ++++++++++++++++++++++++++++++++ libraries/liblmdb/mdb_copy.1 | 28 + libraries/liblmdb/mdb_copy.c | 66 + libraries/liblmdb/mdb_stat.1 | 59 + libraries/liblmdb/mdb_stat.c | 248 + libraries/liblmdb/midl.c | 348 ++ libraries/liblmdb/midl.h | 177 + libraries/liblmdb/mtest.c | 176 + libraries/liblmdb/mtest2.c | 117 + libraries/liblmdb/mtest3.c | 127 + libraries/liblmdb/mtest4.c | 161 + libraries/liblmdb/mtest5.c | 129 + libraries/liblmdb/mtest6.c | 131 + libraries/liblmdb/sample-bdb.c | 71 + libraries/liblmdb/sample-mdb.c | 60 + 21 files changed, 13096 insertions(+) create mode 100644 libraries/liblmdb/.gitignore create mode 100644 libraries/liblmdb/COPYRIGHT create mode 100644 libraries/liblmdb/Doxyfile create mode 100644 libraries/liblmdb/LICENSE create mode 100644 libraries/liblmdb/Makefile create mode 100644 libraries/liblmdb/lmdb.h create mode 100644 libraries/liblmdb/mdb.c create mode 100644 libraries/liblmdb/mdb_copy.1 create mode 100644 libraries/liblmdb/mdb_copy.c create mode 100644 libraries/liblmdb/mdb_stat.1 create mode 100644 libraries/liblmdb/mdb_stat.c create mode 100644 libraries/liblmdb/midl.c create mode 100644 libraries/liblmdb/midl.h create mode 100644 libraries/liblmdb/mtest.c create mode 100644 libraries/liblmdb/mtest2.c create mode 100644 libraries/liblmdb/mtest3.c create mode 100644 libraries/liblmdb/mtest4.c create mode 100644 libraries/liblmdb/mtest5.c create mode 100644 libraries/liblmdb/mtest6.c create mode 100644 libraries/liblmdb/sample-bdb.c create mode 100644 libraries/liblmdb/sample-mdb.c diff --git a/libraries/liblmdb/.gitignore b/libraries/liblmdb/.gitignore new file mode 100644 index 0000000000..0d493fe188 --- /dev/null +++ b/libraries/liblmdb/.gitignore @@ -0,0 +1,16 @@ +mtest +mtest[23456] +testdb +mdb_copy +mdb_stat +*.[ao] +*.so +*[~#] +*.bak +*.orig +*.rej +core +core.* +valgrind.* +man/ +html/ diff --git a/libraries/liblmdb/COPYRIGHT b/libraries/liblmdb/COPYRIGHT new file mode 100644 index 0000000000..4482816cf5 --- /dev/null +++ b/libraries/liblmdb/COPYRIGHT @@ -0,0 +1,20 @@ +Copyright 2011-2013 Howard Chu, Symas Corp. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted only as authorized by the OpenLDAP +Public License. + +A copy of this license is available in the file LICENSE in the +top-level directory of the distribution or, alternatively, at +. + +OpenLDAP is a registered trademark of the OpenLDAP Foundation. + +Individual files and/or contributed packages may be copyright by +other parties and/or subject to additional restrictions. + +This work also contains materials derived from public sources. + +Additional information about OpenLDAP can be obtained at +. diff --git a/libraries/liblmdb/Doxyfile b/libraries/liblmdb/Doxyfile new file mode 100644 index 0000000000..3fd0365c7d --- /dev/null +++ b/libraries/liblmdb/Doxyfile @@ -0,0 +1,1631 @@ +# Doxyfile 1.7.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = MDB + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +INLINE_GROUPED_CLASSES = YES +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = YES + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = lmdb.h midl.h mdb.c midl.c + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvances is that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = YES + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = DEBUG=2 __GNUC__=1 + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans.ttf + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/libraries/liblmdb/LICENSE b/libraries/liblmdb/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..05ad7571e448b9d83ead5d4691274d9484574714 GIT binary patch literal 2214 zcmZ`*%W|AJ6y579Ze^3Wit*&Jn8{>CgW9bs3_S?!^e%KGyH&%Z2Z3Gw`<$zOx}2;~ zq5C}da4yBxP_{E&5{FA#PV2^QD6t#UIU|Vf%;0R_(dX;G1N!tAh128YM*95m;};=v zQ(HF&QSkA$hA zSj4pu#U9^F$d{k=vzWSyzc$;TF7d%-P z1uxbwRn?WT5ZhHIi#KlvPBCLc&?7kw-0?-b2TlgPstL}x| zw07qI3G_JE`uiprrC;$g`{jLQ~wFhvoejtLS-q1 zX-ulnBFfStSF^Ina!o&egc`=9xV_hoT)?=E5QZLRgoea&5}yvXiT z$!9K+FHgA6BpM56iQHEQY+{wms6gQKaAGikJ9Vv;S8hr6^xi@ROa#)Zd{lyXSz^DSxCCcvRHE`TIP2ulKPuSGC&BcQp*4t ziqI!U2*9hqvF)tX$^)&^Lgsn7WJ+AoBD)8Yxrjoz#-0P7DV{}HzT%*afge~v_X`Px zK#O#bYsg?|uqi6Ug&Cuq<;6vhl**e#-N-ZoHvu_ExmQ|VVZI8L-5@0RumaJL6RbMS z0#)sg7kmf47(S^vh4CF?fT(_O0({YGUn;-SWwbcJeGrPSYR%!JgmO;fwjUtq5WcfG zZ~F$HuiHyEbeYzMw@?lJ6IBg48m;4va8$i#uP{AJUfYi>Hilf)m|(xVj@nq8s)3vE zDtf6FERVdI+|F!xHDPBBypK9Wv`FIjlVj}x^$R;Tkc7XHy`wSD^f_3IzJ&FMF~ z9Pr9~?)#b|JH7-W0xVdSovi|pH$;yi%FcODI+!{$JeeAAm%OGu46NJS1kX*Jei+QF XO<|8yW-&kf9. + * + * @par Derived From: + * This code is derived from btree.c written by Martin Hedenfalk. + * + * Copyright (c) 2009, 2010 Martin Hedenfalk + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef _LMDB_H_ +#define _LMDB_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +typedef int mdb_mode_t; +#else +typedef mode_t mdb_mode_t; +#endif + +/** An abstraction for a file handle. + * On POSIX systems file handles are small integers. On Windows + * they're opaque pointers. + */ +#ifdef _WIN32 +typedef void *mdb_filehandle_t; +#else +typedef int mdb_filehandle_t; +#endif + +/** @defgroup mdb MDB API + * @{ + * @brief OpenLDAP Lightning Memory-Mapped Database Manager + */ +/** @defgroup Version Version Macros + * @{ + */ +/** Library major version */ +#define MDB_VERSION_MAJOR 0 +/** Library minor version */ +#define MDB_VERSION_MINOR 9 +/** Library patch version */ +#define MDB_VERSION_PATCH 7 + +/** Combine args a,b,c into a single integer for easy version comparisons */ +#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) + +/** The full library version as a single integer */ +#define MDB_VERSION_FULL \ + MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) + +/** The release date of this library version */ +#define MDB_VERSION_DATE "January 10, 2013" + +/** A stringifier for the version info */ +#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")" + +/** A helper for the stringifier macro */ +#define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d) + +/** The full library version as a C string */ +#define MDB_VERSION_STRING \ + MDB_VERFOO(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH,MDB_VERSION_DATE) +/** @} */ + +/** @brief Opaque structure for a database environment. + * + * A DB environment supports multiple databases, all residing in the same + * shared-memory map. + */ +typedef struct MDB_env MDB_env; + +/** @brief Opaque structure for a transaction handle. + * + * All database operations require a transaction handle. Transactions may be + * read-only or read-write. + */ +typedef struct MDB_txn MDB_txn; + +/** @brief A handle for an individual database in the DB environment. */ +typedef unsigned int MDB_dbi; + +/** @brief Opaque structure for navigating through a database */ +typedef struct MDB_cursor MDB_cursor; + +/** @brief Generic structure used for passing keys and data in and out + * of the database. + * + * Key sizes must be between 1 and the liblmdb build-time constant + * #MDB_MAXKEYSIZE inclusive. This currently defaults to 511. The + * same applies to data sizes in databases with the #MDB_DUPSORT flag. + * Other data items can in theory be from 0 to 0xffffffff bytes long. + * + * Values returned from the database are valid only until a subsequent + * update operation, or the end of the transaction. + */ +typedef struct MDB_val { + size_t mv_size; /**< size of the data item */ + void *mv_data; /**< address of the data item */ +} MDB_val; + +/** @brief A callback function used to compare two keys in a database */ +typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b); + +/** @brief A callback function used to relocate a position-dependent data item + * in a fixed-address database. + * + * The \b newptr gives the item's desired address in + * the memory map, and \b oldptr gives its previous address. The item's actual + * data resides at the address in \b item. This callback is expected to walk + * through the fields of the record in \b item and modify any + * values based at the \b oldptr address to be relative to the \b newptr address. + * @param[in,out] item The item that is to be relocated. + * @param[in] oldptr The previous address. + * @param[in] newptr The new address to relocate to. + * @param[in] relctx An application-provided context, set by #mdb_set_relctx(). + * @todo This feature is currently unimplemented. + */ +typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx); + +/** @defgroup mdb_env Environment Flags + * + * Values do not overlap Database Flags. + * @{ + */ + /** mmap at a fixed address (experimental) */ +#define MDB_FIXEDMAP 0x01 + /** no environment directory */ +#define MDB_NOSUBDIR 0x4000 + /** don't fsync after commit */ +#define MDB_NOSYNC 0x10000 + /** read only */ +#define MDB_RDONLY 0x20000 + /** don't fsync metapage after commit */ +#define MDB_NOMETASYNC 0x40000 + /** use writable mmap */ +#define MDB_WRITEMAP 0x80000 + /** use asynchronous msync when MDB_WRITEMAP is used */ +#define MDB_MAPASYNC 0x100000 + /** tie reader locktable slots to #MDB_txn objects instead of to threads */ +#define MDB_NOTLS 0x200000 +/** @} */ + +/** @defgroup mdb_dbi_open Database Flags + * + * Values do not overlap Environment Flags. + * @{ + */ + /** use reverse string keys */ +#define MDB_REVERSEKEY 0x02 + /** use sorted duplicates */ +#define MDB_DUPSORT 0x04 + /** numeric keys in native byte order. + * The keys must all be of the same size. */ +#define MDB_INTEGERKEY 0x08 + /** with #MDB_DUPSORT, sorted dup items have fixed size */ +#define MDB_DUPFIXED 0x10 + /** with #MDB_DUPSORT, dups are numeric in native byte order */ +#define MDB_INTEGERDUP 0x20 + /** with #MDB_DUPSORT, use reverse string dups */ +#define MDB_REVERSEDUP 0x40 + /** create DB if not already existing */ +#define MDB_CREATE 0x40000 +/** @} */ + +/** @defgroup mdb_put Write Flags + * @{ + */ +/** For put: Don't write if the key already exists. */ +#define MDB_NOOVERWRITE 0x10 +/** Only for #MDB_DUPSORT
+ * For put: don't write if the key and data pair already exist.
+ * For mdb_cursor_del: remove all duplicate data items. + */ +#define MDB_NODUPDATA 0x20 +/** For mdb_cursor_put: overwrite the current key/data pair */ +#define MDB_CURRENT 0x40 +/** For put: Just reserve space for data, don't copy it. Return a + * pointer to the reserved space. + */ +#define MDB_RESERVE 0x10000 +/** Data is being appended, don't split full pages. */ +#define MDB_APPEND 0x20000 +/** Duplicate data is being appended, don't split full pages. */ +#define MDB_APPENDDUP 0x40000 +/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */ +#define MDB_MULTIPLE 0x80000 +/* @} */ + +/** @brief Cursor Get operations. + * + * This is the set of all operations for retrieving data + * using a cursor. + */ +typedef enum MDB_cursor_op { + MDB_FIRST, /**< Position at first key/data item */ + MDB_FIRST_DUP, /**< Position at first data item of current key. + Only for #MDB_DUPSORT */ + MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */ + MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */ + MDB_GET_CURRENT, /**< Return key/data at current cursor position */ + MDB_GET_MULTIPLE, /**< Return all the duplicate data items at the current + cursor position. Only for #MDB_DUPFIXED */ + MDB_LAST, /**< Position at last key/data item */ + MDB_LAST_DUP, /**< Position at last data item of current key. + Only for #MDB_DUPSORT */ + MDB_NEXT, /**< Position at next data item */ + MDB_NEXT_DUP, /**< Position at next data item of current key. + Only for #MDB_DUPSORT */ + MDB_NEXT_MULTIPLE, /**< Return all duplicate data items at the next + cursor position. Only for #MDB_DUPFIXED */ + MDB_NEXT_NODUP, /**< Position at first data item of next key */ + MDB_PREV, /**< Position at previous data item */ + MDB_PREV_DUP, /**< Position at previous data item of current key. + Only for #MDB_DUPSORT */ + MDB_PREV_NODUP, /**< Position at last data item of previous key */ + MDB_SET, /**< Position at specified key */ + MDB_SET_KEY, /**< Position at specified key, return key + data */ + MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ +} MDB_cursor_op; + +/** @defgroup errors Return Codes + * + * BerkeleyDB uses -30800 to -30999, we'll go under them + * @{ + */ + /** Successful result */ +#define MDB_SUCCESS 0 + /** key/data pair already exists */ +#define MDB_KEYEXIST (-30799) + /** key/data pair not found (EOF) */ +#define MDB_NOTFOUND (-30798) + /** Requested page not found - this usually indicates corruption */ +#define MDB_PAGE_NOTFOUND (-30797) + /** Located page was wrong type */ +#define MDB_CORRUPTED (-30796) + /** Update of meta page failed, probably I/O error */ +#define MDB_PANIC (-30795) + /** Environment version mismatch */ +#define MDB_VERSION_MISMATCH (-30794) + /** File is not a valid MDB file */ +#define MDB_INVALID (-30793) + /** Environment mapsize reached */ +#define MDB_MAP_FULL (-30792) + /** Environment maxdbs reached */ +#define MDB_DBS_FULL (-30791) + /** Environment maxreaders reached */ +#define MDB_READERS_FULL (-30790) + /** Too many TLS keys in use - Windows only */ +#define MDB_TLS_FULL (-30789) + /** Txn has too many dirty pages */ +#define MDB_TXN_FULL (-30788) + /** Cursor stack too deep - internal error */ +#define MDB_CURSOR_FULL (-30787) + /** Page has not enough space - internal error */ +#define MDB_PAGE_FULL (-30786) + /** Database contents grew beyond environment mapsize */ +#define MDB_MAP_RESIZED (-30785) + /** Database flags changed or would change */ +#define MDB_INCOMPATIBLE (-30784) + /** Invalid reuse of reader locktable slot */ +#define MDB_BAD_RSLOT (-30783) +#define MDB_LAST_ERRCODE MDB_BAD_RSLOT +/** @} */ + +/** @brief Statistics for a database in the environment */ +typedef struct MDB_stat { + unsigned int ms_psize; /**< Size of a database page. + This is currently the same for all databases. */ + unsigned int ms_depth; /**< Depth (height) of the B-tree */ + size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ + size_t ms_leaf_pages; /**< Number of leaf pages */ + size_t ms_overflow_pages; /**< Number of overflow pages */ + size_t ms_entries; /**< Number of data items */ +} MDB_stat; + +/** @brief Information about the environment */ +typedef struct MDB_envinfo { + void *me_mapaddr; /**< Address of map, if fixed */ + size_t me_mapsize; /**< Size of the data memory map */ + size_t me_last_pgno; /**< ID of the last used page */ + size_t me_last_txnid; /**< ID of the last committed transaction */ + unsigned int me_maxreaders; /**< max reader slots in the environment */ + unsigned int me_numreaders; /**< max reader slots used in the environment */ +} MDB_envinfo; + + /** @brief Return the mdb library version information. + * + * @param[out] major if non-NULL, the library major version number is copied here + * @param[out] minor if non-NULL, the library minor version number is copied here + * @param[out] patch if non-NULL, the library patch version number is copied here + * @retval "version string" The library version as a string + */ +char *mdb_version(int *major, int *minor, int *patch); + + /** @brief Return a string describing a given error code. + * + * This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3) + * function. If the error code is greater than or equal to 0, then the string + * returned by the system function strerror(3) is returned. If the error code + * is less than 0, an error string corresponding to the MDB library error is + * returned. See @ref errors for a list of MDB-specific error codes. + * @param[in] err The error code + * @retval "error message" The description of the error + */ +char *mdb_strerror(int err); + + /** @brief Create an MDB environment handle. + * + * This function allocates memory for a #MDB_env structure. To release + * the allocated memory and discard the handle, call #mdb_env_close(). + * Before the handle may be used, it must be opened using #mdb_env_open(). + * Various other options may also need to be set before opening the handle, + * e.g. #mdb_env_set_mapsize(), #mdb_env_set_maxreaders(), #mdb_env_set_maxdbs(), + * depending on usage requirements. + * @param[out] env The address where the new handle will be stored + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_create(MDB_env **env); + + /** @brief Open an environment handle. + * + * If this function fails, #mdb_env_close() must be called to discard the #MDB_env handle. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] path The directory in which the database files reside. This + * directory must already exist and be writable. + * @param[in] flags Special options for this environment. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + * Flags set by mdb_env_set_flags() are also used. + *
    + *
  • #MDB_FIXEDMAP + * use a fixed address for the mmap region. This flag must be specified + * when creating the environment, and is stored persistently in the environment. + * If successful, the memory map will always reside at the same virtual address + * and pointers used to reference data items in the database will be constant + * across multiple invocations. This option may not always work, depending on + * how the operating system has allocated memory to shared libraries and other uses. + * The feature is highly experimental. + *
  • #MDB_NOSUBDIR + * By default, MDB creates its environment in a directory whose + * pathname is given in \b path, and creates its data and lock files + * under that directory. With this option, \b path is used as-is for + * the database main data file. The database lock file is the \b path + * with "-lock" appended. + *
  • #MDB_RDONLY + * Open the environment in read-only mode. No write operations will be + * allowed. MDB will still modify the lock file - except on read-only + * filesystems, where MDB does not use locks. + *
  • #MDB_WRITEMAP + * Use a writeable memory map unless MDB_RDONLY is set. This is faster + * and uses fewer mallocs, but loses protection from application bugs + * like wild pointer writes and other bad updates into the database. + * Incompatible with nested transactions. + *
  • #MDB_NOMETASYNC + * Flush system buffers to disk only once per transaction, omit the + * metadata flush. Defer that until the system flushes files to disk, + * or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization + * maintains database integrity, but a system crash may undo the last + * committed transaction. I.e. it preserves the ACI (atomicity, + * consistency, isolation) but not D (durability) database property. + * This flag may be changed at any time using #mdb_env_set_flags(). + *
  • #MDB_NOSYNC + * Don't flush system buffers to disk when committing a transaction. + * This optimization means a system crash can corrupt the database or + * lose the last transactions if buffers are not yet flushed to disk. + * The risk is governed by how often the system flushes dirty buffers + * to disk and how often #mdb_env_sync() is called. However, if the + * filesystem preserves write order and the #MDB_WRITEMAP flag is not + * used, transactions exhibit ACI (atomicity, consistency, isolation) + * properties and only lose D (durability). I.e. database integrity + * is maintained, but a system crash may undo the final transactions. + * Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no + * hint for when to write transactions to disk, unless #mdb_env_sync() + * is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable. + * This flag may be changed at any time using #mdb_env_set_flags(). + *
  • #MDB_MAPASYNC + * When using #MDB_WRITEMAP, use asynchronous flushes to disk. + * As with #MDB_NOSYNC, a system crash can then corrupt the + * database or lose the last transactions. Calling #mdb_env_sync() + * ensures on-disk database integrity until next commit. + * This flag may be changed at any time using #mdb_env_set_flags(). + *
  • #MDB_NOTLS + * Don't use Thread-Local Storage. Tie reader locktable slots to + * #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps + * the slot reseved for the #MDB_txn object. A thread may use parallel + * read-only transactions. A read-only transaction may span threads if + * the user synchronizes its use. Applications that multiplex many + * user threads over individual OS threads need this option. Such an + * application must also serialize the write transactions in an OS + * thread, since MDB's write locking is unaware of the user threads. + *
+ * @param[in] mode The UNIX permissions to set on created files. This parameter + * is ignored on Windows. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_VERSION_MISMATCH - the version of the MDB library doesn't match the + * version that created the database environment. + *
  • #MDB_INVALID - the environment file headers are corrupted. + *
  • ENOENT - the directory specified by the path parameter doesn't exist. + *
  • EACCES - the user didn't have permission to access the environment files. + *
  • EAGAIN - the environment was locked by another process. + *
+ */ +int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode); + + /** @brief Copy an MDB environment to the specified path. + * + * This function may be used to make a backup of an existing environment. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] path The directory in which the copy will reside. This + * directory must already exist and be writable but must otherwise be + * empty. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copy(MDB_env *env, const char *path); + + /** @brief Copy an MDB environment to the specified file descriptor. + * + * This function may be used to make a backup of an existing environment. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); + + /** @brief Return statistics about the MDB environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] stat The address of an #MDB_stat structure + * where the statistics will be copied + */ +int mdb_env_stat(MDB_env *env, MDB_stat *stat); + + /** @brief Return information about the MDB environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] stat The address of an #MDB_envinfo structure + * where the information will be copied + */ +int mdb_env_info(MDB_env *env, MDB_envinfo *stat); + + /** @brief Flush the data buffers to disk. + * + * Data is always written to disk when #mdb_txn_commit() is called, + * but the operating system may keep it buffered. MDB always flushes + * the OS buffers upon commit as well, unless the environment was + * opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] force If non-zero, force a synchronous flush. Otherwise + * if the environment has the #MDB_NOSYNC flag set the flushes + * will be omitted, and with #MDB_MAPASYNC they will be asynchronous. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
  • EIO - an error occurred during synchronization. + *
+ */ +int mdb_env_sync(MDB_env *env, int force); + + /** @brief Close the environment and release the memory map. + * + * Only a single thread may call this function. All transactions, databases, + * and cursors must already be closed before calling this function. Attempts to + * use any such handles after calling this function will cause a SIGSEGV. + * The environment handle will be freed and must not be used again after this call. + * @param[in] env An environment handle returned by #mdb_env_create() + */ +void mdb_env_close(MDB_env *env); + + /** @brief Set environment flags. + * + * This may be used to set some flags in addition to those from + * #mdb_env_open(), or to unset these flags. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] flags The flags to change, bitwise OR'ed together + * @param[in] onoff A non-zero value sets the flags, zero clears them. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_env_set_flags(MDB_env *env, unsigned int flags, int onoff); + + /** @brief Get environment flags. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] flags The address of an integer to store the flags + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_env_get_flags(MDB_env *env, unsigned int *flags); + + /** @brief Return the path that was used in #mdb_env_open(). + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] path Address of a string pointer to contain the path. This + * is the actual string in the environment, not a copy. It should not be + * altered in any way. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_env_get_path(MDB_env *env, const char **path); + + /** @brief Set the size of the memory map to use for this environment. + * + * The size should be a multiple of the OS page size. The default is + * 10485760 bytes. The size of the memory map is also the maximum size + * of the database. The value should be chosen as large as possible, + * to accommodate future growth of the database. + * This function may only be called after #mdb_env_create() and before #mdb_env_open(). + * The size may be changed by closing and reopening the environment. + * Any attempt to set a size smaller than the space already consumed + * by the environment will be silently changed to the current size of the used space. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] size The size in bytes + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified, or the environment is already open. + *
+ */ +int mdb_env_set_mapsize(MDB_env *env, size_t size); + + /** @brief Set the maximum number of threads/reader slots for the environment. + * + * This defines the number of slots in the lock table that is used to track readers in the + * the environment. The default is 126. + * Starting a read-only transaction normally ties a lock table slot to the + * current thread until the environment closes or the thread exits. If + * MDB_NOTLS is in use, #mdb_txn_begin() instead ties the slot to the + * MDB_txn object until it or the #MDB_env object is destroyed. + * This function may only be called after #mdb_env_create() and before #mdb_env_open(). + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] readers The maximum number of reader lock table slots + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified, or the environment is already open. + *
+ */ +int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers); + + /** @brief Get the maximum number of threads/reader slots for the environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] readers Address of an integer to store the number of readers + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers); + + /** @brief Set the maximum number of named databases for the environment. + * + * This function is only needed if multiple databases will be used in the + * environment. Simpler applications that use the environment as a single + * unnamed database can ignore this option. + * This function may only be called after #mdb_env_create() and before #mdb_env_open(). + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] dbs The maximum number of databases + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified, or the environment is already open. + *
+ */ +int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); + + /** @brief Create a transaction for use with the environment. + * + * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit(). + * @note A transaction and its cursors must only be used by a single + * thread, and a thread may only have a single transaction at a time. + * If #MDB_NOTLS is in use, this does not apply to read-only transactions. + * @note Cursors may not span transactions. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] parent If this parameter is non-NULL, the new transaction + * will be a nested transaction, with the transaction indicated by \b parent + * as its parent. Transactions may be nested to any level. A parent + * transaction may not issue any other operations besides mdb_txn_begin, + * mdb_txn_abort, or mdb_txn_commit while it has active child transactions. + * @param[in] flags Special options for this transaction. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + *
    + *
  • #MDB_RDONLY + * This transaction will not perform any write operations. + *
+ * @param[out] txn Address where the new #MDB_txn handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_PANIC - a fatal error occurred earlier and the environment + * must be shut down. + *
  • #MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's + * mapsize and the environment must be shut down. + *
  • #MDB_READERS_FULL - a read-only transaction was requested and + * the reader lock table is full. See #mdb_env_set_maxreaders(). + *
  • ENOMEM - out of memory. + *
+ */ +int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn); + + /** @brief Commit all the operations of a transaction into the database. + * + * The transaction handle is freed. It and its cursors must not be used + * again after this call, except with #mdb_cursor_renew(). + * @note Earlier documentation incorrectly said all cursors would be freed. + * Only write-transactions free cursors. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
  • ENOSPC - no more disk space. + *
  • EIO - a low-level I/O error occurred while writing. + *
  • ENOMEM - out of memory. + *
+ */ +int mdb_txn_commit(MDB_txn *txn); + + /** @brief Abandon all the operations of the transaction instead of saving them. + * + * The transaction handle is freed. It and its cursors must not be used + * again after this call, except with #mdb_cursor_renew(). + * @note Earlier documentation incorrectly said all cursors would be freed. + * Only write-transactions free cursors. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +void mdb_txn_abort(MDB_txn *txn); + + /** @brief Reset a read-only transaction. + * + * Abort the transaction like #mdb_txn_abort(), but keep the transaction + * handle. #mdb_txn_renew() may reuse the handle. This saves allocation + * overhead if the process will start a new read-only transaction soon, + * and also locking overhead if #MDB_NOTLS is in use. The reader table + * lock is released, but the table slot stays tied to its thread or + * #MDB_txn. Use mdb_txn_abort() to discard a reset handle, and to free + * its lock table slot if MDB_NOTLS is in use. + * Cursors opened within the transaction must not be used + * again after this call, except with #mdb_cursor_renew(). + * Reader locks generally don't interfere with writers, but they keep old + * versions of database pages allocated. Thus they prevent the old pages + * from being reused when writers commit new data, and so under heavy load + * the database size may grow much more rapidly than otherwise. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +void mdb_txn_reset(MDB_txn *txn); + + /** @brief Renew a read-only transaction. + * + * This acquires a new reader lock for a transaction handle that had been + * released by #mdb_txn_reset(). It must be called before a reset transaction + * may be used again. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_PANIC - a fatal error occurred earlier and the environment + * must be shut down. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_txn_renew(MDB_txn *txn); + +/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */ +#define mdb_open(txn,name,flags,dbi) mdb_dbi_open(txn,name,flags,dbi) +/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */ +#define mdb_close(env,dbi) mdb_dbi_close(env,dbi) + + /** @brief Open a database in the environment. + * + * A database handle denotes the name and parameters of a database, + * independently of whether such a database exists. + * The database handle may be discarded by calling #mdb_dbi_close(). + * The old database handle is returned if the database was already open. + * The handle must only be closed once. + * The database handle will be private to the current transaction until + * the transaction is successfully committed. If the transaction is + * aborted the handle will be closed automatically. + * After a successful commit the + * handle will reside in the shared environment, and may be used + * by other transactions. This function must not be called from + * multiple concurrent transactions. A transaction that uses this function + * must finish (either commit or abort) before any other transaction may + * use this function. + * + * To use named databases (with name != NULL), #mdb_env_set_maxdbs() + * must be called before opening the environment. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] name The name of the database to open. If only a single + * database is needed in the environment, this value may be NULL. + * @param[in] flags Special options for this database. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + *
    + *
  • #MDB_REVERSEKEY + * Keys are strings to be compared in reverse order, from the end + * of the strings to the beginning. By default, Keys are treated as strings and + * compared from beginning to end. + *
  • #MDB_DUPSORT + * Duplicate keys may be used in the database. (Or, from another perspective, + * keys may have multiple data items, stored in sorted order.) By default + * keys must be unique and may have only a single data item. + *
  • #MDB_INTEGERKEY + * Keys are binary integers in native byte order. Setting this option + * requires all keys to be the same size, typically sizeof(int) + * or sizeof(size_t). + *
  • #MDB_DUPFIXED + * This flag may only be used in combination with #MDB_DUPSORT. This option + * tells the library that the data items for this database are all the same + * size, which allows further optimizations in storage and retrieval. When + * all data items are the same size, the #MDB_GET_MULTIPLE and #MDB_NEXT_MULTIPLE + * cursor operations may be used to retrieve multiple items at once. + *
  • #MDB_INTEGERDUP + * This option specifies that duplicate data items are also integers, and + * should be sorted as such. + *
  • #MDB_REVERSEDUP + * This option specifies that duplicate data items should be compared as + * strings in reverse order. + *
  • #MDB_CREATE + * Create the named database if it doesn't exist. This option is not + * allowed in a read-only transaction or a read-only environment. + *
+ * @param[out] dbi Address where the new #MDB_dbi handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_NOTFOUND - the specified database doesn't exist in the environment + * and #MDB_CREATE was not specified. + *
  • #MDB_DBS_FULL - too many databases have been opened. See #mdb_env_set_maxdbs(). + *
+ */ +int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi); + + /** @brief Retrieve statistics for a database. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] stat The address of an #MDB_stat structure + * where the statistics will be copied + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); + + /** @brief Retrieve the DB flags for a database handle. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] flags Address where the flags will be returned. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_dbi_flags(MDB_env *env, MDB_dbi dbi, unsigned int *flags); + + /** @brief Close a database handle. + * + * This call is not mutex protected. Handles should only be closed by + * a single thread, and only if no other threads are going to reference + * the database handle or one of its cursors any further. Do not close + * a handle if an existing transaction has modified its database. + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + */ +void mdb_dbi_close(MDB_env *env, MDB_dbi dbi); + + /** @brief Delete a database and/or free all its pages. + * + * If the \b del parameter is 1, the DB handle will be closed + * and the DB will be deleted. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] del 1 to delete the DB from the environment, + * 0 to just free its pages. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del); + + /** @brief Set a custom key comparison function for a database. + * + * The comparison function is called whenever it is necessary to compare a + * key specified by the application with a key currently stored in the database. + * If no comparison function is specified, and no special key flags were specified + * with #mdb_dbi_open(), the keys are compared lexically, with shorter keys collating + * before longer keys. + * @warning This function must be called before any data access functions are used, + * otherwise data corruption may occur. The same comparison function must be used by every + * program accessing the database, every time the database is used. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] cmp A #MDB_cmp_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); + + /** @brief Set a custom data comparison function for a #MDB_DUPSORT database. + * + * This comparison function is called whenever it is necessary to compare a data + * item specified by the application with a data item currently stored in the database. + * This function only takes effect if the database was opened with the #MDB_DUPSORT + * flag. + * If no comparison function is specified, and no special key flags were specified + * with #mdb_dbi_open(), the data items are compared lexically, with shorter items collating + * before longer items. + * @warning This function must be called before any data access functions are used, + * otherwise data corruption may occur. The same comparison function must be used by every + * program accessing the database, every time the database is used. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] cmp A #MDB_cmp_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); + + /** @brief Set a relocation function for a #MDB_FIXEDMAP database. + * + * @todo The relocation function is called whenever it is necessary to move the data + * of an item to a different position in the database (e.g. through tree + * balancing operations, shifts as a result of adds or deletes, etc.). It is + * intended to allow address/position-dependent data items to be stored in + * a database in an environment opened with the #MDB_FIXEDMAP option. + * Currently the relocation feature is unimplemented and setting + * this function has no effect. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] rel A #MDB_rel_func function + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel); + + /** @brief Set a context pointer for a #MDB_FIXEDMAP database's relocation function. + * + * See #mdb_set_relfunc and #MDB_rel_func for more details. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] ctx An arbitrary pointer for whatever the application needs. + * It will be passed to the callback function set by #mdb_set_relfunc + * as its \b relctx parameter whenever the callback is invoked. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx); + + /** @brief Get items from a database. + * + * This function retrieves key/data pairs from the database. The address + * and length of the data associated with the specified \b key are returned + * in the structure to which \b data refers. + * If the database supports duplicate keys (#MDB_DUPSORT) then the + * first data item for the key will be returned. Retrieval of other + * items requires the use of #mdb_cursor_get(). + * + * @note The memory pointed to by the returned values is owned by the + * database. The caller need not dispose of the memory, and may not + * modify it in any way. For values returned in a read-only transaction + * any modification attempts will cause a SIGSEGV. + * @note Values returned from the database are valid only until a + * subsequent update operation, or the end of the transaction. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to search for in the database + * @param[out] data The data corresponding to the key + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_NOTFOUND - the key was not in the database. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); + + /** @brief Store items into a database. + * + * This function stores key/data pairs in the database. The default behavior + * is to enter the new key/data pair, replacing any previously existing key + * if duplicates are disallowed, or adding a duplicate data item if + * duplicates are allowed (#MDB_DUPSORT). + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to store in the database + * @param[in,out] data The data to store + * @param[in] flags Special options for this operation. This parameter + * must be set to 0 or by bitwise OR'ing together one or more of the + * values described here. + *
    + *
  • #MDB_NODUPDATA - enter the new key/data pair only if it does not + * already appear in the database. This flag may only be specified + * if the database was opened with #MDB_DUPSORT. The function will + * return #MDB_KEYEXIST if the key/data pair already appears in the + * database. + *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key + * does not already appear in the database. The function will return + * #MDB_KEYEXIST if the key already appears in the database, even if + * the database supports duplicates (#MDB_DUPSORT). The \b data + * parameter will be set to point to the existing item. + *
  • #MDB_RESERVE - reserve space for data of the given size, but + * don't copy the given data. Instead, return a pointer to the + * reserved space, which the caller can fill in later - before + * the next update operation or the transaction ends. This saves + * an extra memcpy if the data is being generated later. + *
  • #MDB_APPEND - append the given key/data pair to the end of the + * database. No key comparisons are performed. This option allows + * fast bulk loading when keys are already known to be in the + * correct order. Loading unsorted keys with this flag will cause + * data corruption. + *
  • #MDB_APPENDDUP - as above, but for sorted dup data. + *
+ * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). + *
  • #MDB_TXN_FULL - the transaction has too many dirty pages. + *
  • EACCES - an attempt was made to write in a read-only transaction. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + unsigned int flags); + + /** @brief Delete items from a database. + * + * This function removes key/data pairs from the database. + * If the database does not support sorted duplicate data items + * (#MDB_DUPSORT) the data parameter is ignored. + * If the database supports sorted duplicates and the data parameter + * is NULL, all of the duplicate data items for the key will be + * deleted. Otherwise, if the data parameter is non-NULL + * only the matching data item will be deleted. + * This function will return #MDB_NOTFOUND if the specified key/data + * pair is not in the database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] key The key to delete from the database + * @param[in] data The data to delete + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EACCES - an attempt was made to write in a read-only transaction. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); + + /** @brief Create a cursor handle. + * + * A cursor is associated with a specific transaction and database. + * A cursor cannot be used when its database handle is closed. Nor + * when its transaction has ended, except with #mdb_cursor_renew(). + * It can be discarded with #mdb_cursor_close(). + * A cursor in a write-transaction can be closed before its transaction + * ends, and will otherwise be closed when its transaction ends. + * A cursor in a read-only transaction must be closed explicitly, before + * or after its transaction ends. It can be reused with + * #mdb_cursor_renew() before finally closing it. + * @note Earlier documentation said that cursors in every transaction + * were closed when the transaction committed or aborted. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] cursor Address where the new #MDB_cursor handle will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor); + + /** @brief Close a cursor handle. + * + * The cursor handle will be freed and must not be used again after this call. + * Its transaction must still be live if it is a write-transaction. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +void mdb_cursor_close(MDB_cursor *cursor); + + /** @brief Renew a cursor handle. + * + * A cursor is associated with a specific transaction and database. + * Cursors that are only used in read-only + * transactions may be re-used, to avoid unnecessary malloc/free overhead. + * The cursor may be associated with a new read-only transaction, and + * referencing the same database handle as it was created with. + * This may be done whether the previous transaction is live or dead. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_cursor_renew(MDB_txn *txn, MDB_cursor *cursor); + + /** @brief Return the cursor's transaction handle. + * + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +MDB_txn *mdb_cursor_txn(MDB_cursor *cursor); + + /** @brief Return the cursor's database handle. + * + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + */ +MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor); + + /** @brief Retrieve by cursor. + * + * This function retrieves key/data pairs from the database. The address and length + * of the key are returned in the object to which \b key refers (except for the + * case of the #MDB_SET option, in which the \b key object is unchanged), and + * the address and length of the data are returned in the object to which \b data + * refers. + * See #mdb_get() for restrictions on using the output values. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in,out] key The key for a retrieved item + * @param[in,out] data The data of a retrieved item + * @param[in] op A cursor operation #MDB_cursor_op + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_NOTFOUND - no matching key found. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + MDB_cursor_op op); + + /** @brief Store by cursor. + * + * This function stores key/data pairs into the database. + * If the function fails for any reason, the state of the cursor will be + * unchanged. If the function succeeds and an item is inserted into the + * database, the cursor is always positioned to refer to the newly inserted item. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] key The key operated on. + * @param[in] data The data operated on. + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + *
    + *
  • #MDB_CURRENT - overwrite the data of the key/data pair to which + * the cursor refers with the specified data item. The \b key + * parameter is ignored. + *
  • #MDB_NODUPDATA - enter the new key/data pair only if it does not + * already appear in the database. This flag may only be specified + * if the database was opened with #MDB_DUPSORT. The function will + * return #MDB_KEYEXIST if the key/data pair already appears in the + * database. + *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key + * does not already appear in the database. The function will return + * #MDB_KEYEXIST if the key already appears in the database, even if + * the database supports duplicates (#MDB_DUPSORT). + *
  • #MDB_RESERVE - reserve space for data of the given size, but + * don't copy the given data. Instead, return a pointer to the + * reserved space, which the caller can fill in later. This saves + * an extra memcpy if the data is being generated later. + *
  • #MDB_APPEND - append the given key/data pair to the end of the + * database. No key comparisons are performed. This option allows + * fast bulk loading when keys are already known to be in the + * correct order. Loading unsorted keys with this flag will cause + * data corruption. + *
  • #MDB_APPENDDUP - as above, but for sorted dup data. + *
  • #MDB_MULTIPLE - store multiple contiguous data elements in a + * single request. This flag may only be specified if the database + * was opened with #MDB_DUPFIXED. The \b data argument must be an + * array of two MDB_vals. The mv_size of the first MDB_val must be + * the size of a single data element. The mv_data of the first MDB_val + * must point to the beginning of the array of contiguous data elements. + * The mv_size of the second MDB_val must be the count of the number + * of data elements to store. On return this field will be set to + * the count of the number of elements actually written. The mv_data + * of the second MDB_val is unused. + *
+ * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • #MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). + *
  • #MDB_TXN_FULL - the transaction has too many dirty pages. + *
  • EACCES - an attempt was made to modify a read-only database. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + unsigned int flags); + + /** @brief Delete current key/data pair + * + * This function deletes the key/data pair to which the cursor refers. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[in] flags Options for this operation. This parameter + * must be set to 0 or one of the values described here. + *
    + *
  • #MDB_NODUPDATA - delete all of the data items for the current key. + * This flag may only be specified if the database was opened with #MDB_DUPSORT. + *
+ * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EACCES - an attempt was made to modify a read-only database. + *
  • EINVAL - an invalid parameter was specified. + *
+ */ +int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags); + + /** @brief Return count of duplicates for current key. + * + * This call is only valid on databases that support sorted duplicate + * data items #MDB_DUPSORT. + * @param[in] cursor A cursor handle returned by #mdb_cursor_open() + * @param[out] countp Address where the count will be stored + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + *
    + *
  • EINVAL - cursor is not initialized, or an invalid parameter was specified. + *
+ */ +int mdb_cursor_count(MDB_cursor *cursor, size_t *countp); + + /** @brief Compare two data items according to a particular database. + * + * This returns a comparison as if the two data items were keys in the + * specified database. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] a The first item to compare + * @param[in] b The second item to compare + * @return < 0 if a < b, 0 if a == b, > 0 if a > b + */ +int mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); + + /** @brief Compare two data items according to a particular database. + * + * This returns a comparison as if the two items were data items of + * the specified database. The database must have the #MDB_DUPSORT flag. + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[in] a The first item to compare + * @param[in] b The second item to compare + * @return < 0 if a < b, 0 if a == b, > 0 if a > b + */ +int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); + + /** @brief A callback function used to print a message from the library. + * + * @param[in] msg The string to be printed. + * @param[in] ctx An arbitrary context pointer for the callback. + * @return < 0 on failure, 0 on success. + */ +typedef int (MDB_msg_func)(const char *msg, void *ctx); + + /** @brief Dump the entries in the reader lock table. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] func A #MDB_msg_func function + * @param[in] ctx Anything the message function needs + * @return < 0 on failure, 0 on success. + */ +int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx); + + /** @brief Check for stale entries in the reader lock table. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] dead Number of stale slots that were cleared + * @return 0 on success, non-zero on failure. + */ +int mdb_reader_check(MDB_env *env, int *dead); +/** @} */ + +#ifdef __cplusplus +} +#endif +#endif /* _LMDB_H_ */ diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c new file mode 100644 index 0000000000..4d686007ba --- /dev/null +++ b/libraries/liblmdb/mdb.c @@ -0,0 +1,8112 @@ +/** @file mdb.c + * @brief memory-mapped database library + * + * A Btree-based database management library modeled loosely on the + * BerkeleyDB API, but much simplified. + */ +/* + * Copyright 2011-2013 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + * + * This code is derived from btree.c written by Martin Hedenfalk. + * + * Copyright (c) 2009, 2010 Martin Hedenfalk + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#include +#ifdef HAVE_SYS_FILE_H +#include +#endif +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER)) +#include +#include /* defines BYTE_ORDER on HPUX and Solaris */ +#endif + +#if defined(__APPLE__) || defined (BSD) +# define MDB_USE_POSIX_SEM 1 +# define MDB_FDATASYNC fsync +#elif defined(ANDROID) +# define MDB_FDATASYNC fsync +#endif + +#ifndef _WIN32 +#include +#ifdef MDB_USE_POSIX_SEM +#include +#endif +#endif + +#ifdef USE_VALGRIND +#include +#define VGMEMP_CREATE(h,r,z) VALGRIND_CREATE_MEMPOOL(h,r,z) +#define VGMEMP_ALLOC(h,a,s) VALGRIND_MEMPOOL_ALLOC(h,a,s) +#define VGMEMP_FREE(h,a) VALGRIND_MEMPOOL_FREE(h,a) +#define VGMEMP_DESTROY(h) VALGRIND_DESTROY_MEMPOOL(h) +#define VGMEMP_DEFINED(a,s) VALGRIND_MAKE_MEM_DEFINED(a,s) +#else +#define VGMEMP_CREATE(h,r,z) +#define VGMEMP_ALLOC(h,a,s) +#define VGMEMP_FREE(h,a) +#define VGMEMP_DESTROY(h) +#define VGMEMP_DEFINED(a,s) +#endif + +#ifndef BYTE_ORDER +# if (defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)) +/* Solaris just defines one or the other */ +# define LITTLE_ENDIAN 1234 +# define BIG_ENDIAN 4321 +# ifdef _LITTLE_ENDIAN +# define BYTE_ORDER LITTLE_ENDIAN +# else +# define BYTE_ORDER BIG_ENDIAN +# endif +# else +# define BYTE_ORDER __BYTE_ORDER +# endif +#endif + +#ifndef LITTLE_ENDIAN +#define LITTLE_ENDIAN __LITTLE_ENDIAN +#endif +#ifndef BIG_ENDIAN +#define BIG_ENDIAN __BIG_ENDIAN +#endif + +#if defined(__i386) || defined(__x86_64) || defined(_M_IX86) +#define MISALIGNED_OK 1 +#endif + +#include "lmdb.h" +#include "midl.h" + +#if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN) +# error "Unknown or unsupported endianness (BYTE_ORDER)" +#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF +# error "Two's complement, reasonably sized integer types, please" +#endif + +/** @defgroup internal MDB Internals + * @{ + */ +/** @defgroup compat Windows Compatibility Macros + * A bunch of macros to minimize the amount of platform-specific ifdefs + * needed throughout the rest of the code. When the features this library + * needs are similar enough to POSIX to be hidden in a one-or-two line + * replacement, this macro approach is used. + * @{ + */ +#ifdef _WIN32 +#define pthread_t DWORD +#define pthread_mutex_t HANDLE +#define pthread_key_t DWORD +#define pthread_self() GetCurrentThreadId() +#define pthread_key_create(x,y) \ + ((*(x) = TlsAlloc()) == TLS_OUT_OF_INDEXES ? ErrCode() : 0) +#define pthread_key_delete(x) TlsFree(x) +#define pthread_getspecific(x) TlsGetValue(x) +#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) +#define pthread_mutex_unlock(x) ReleaseMutex(x) +#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) +#define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex) +#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex) +#define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex) +#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex) +#define getpid() GetCurrentProcessId() +#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd)) +#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len)) +#define ErrCode() GetLastError() +#define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;} +#define close(fd) (CloseHandle(fd) ? 0 : -1) +#define munmap(ptr,len) UnmapViewOfFile(ptr) +#else + +#ifdef MDB_USE_POSIX_SEM + +#define LOCK_MUTEX_R(env) mdb_sem_wait((env)->me_rmutex) +#define UNLOCK_MUTEX_R(env) sem_post((env)->me_rmutex) +#define LOCK_MUTEX_W(env) mdb_sem_wait((env)->me_wmutex) +#define UNLOCK_MUTEX_W(env) sem_post((env)->me_wmutex) + +static int +mdb_sem_wait(sem_t *sem) +{ + int rc; + while ((rc = sem_wait(sem)) && (rc = errno) == EINTR) ; + return rc; +} + +#else + /** Lock the reader mutex. + */ +#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex) + /** Unlock the reader mutex. + */ +#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex) + + /** Lock the writer mutex. + * Only a single write transaction is allowed at a time. Other writers + * will block waiting for this mutex. + */ +#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex) + /** Unlock the writer mutex. + */ +#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex) +#endif /* MDB_USE_POSIX_SEM */ + + /** Get the error code for the last failed system function. + */ +#define ErrCode() errno + + /** An abstraction for a file handle. + * On POSIX systems file handles are small integers. On Windows + * they're opaque pointers. + */ +#define HANDLE int + + /** A value for an invalid file handle. + * Mainly used to initialize file variables and signify that they are + * unused. + */ +#define INVALID_HANDLE_VALUE (-1) + + /** Get the size of a memory page for the system. + * This is the basic size that the platform's memory manager uses, and is + * fundamental to the use of memory-mapped files. + */ +#define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) +#endif + +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) +#define MNAME_LEN 32 +#else +#define MNAME_LEN (sizeof(pthread_mutex_t)) +#endif + +/** @} */ + +#ifndef _WIN32 +/** A flag for opening a file and requesting synchronous data writes. + * This is only used when writing a meta page. It's not strictly needed; + * we could just do a normal write and then immediately perform a flush. + * But if this flag is available it saves us an extra system call. + * + * @note If O_DSYNC is undefined but exists in /usr/include, + * preferably set some compiler flag to get the definition. + * Otherwise compile with the less efficient -DMDB_DSYNC=O_SYNC. + */ +#ifndef MDB_DSYNC +# define MDB_DSYNC O_DSYNC +#endif +#endif + +/** Function for flushing the data of a file. Define this to fsync + * if fdatasync() is not supported. + */ +#ifndef MDB_FDATASYNC +# define MDB_FDATASYNC fdatasync +#endif + +#ifndef MDB_MSYNC +# define MDB_MSYNC(addr,len,flags) msync(addr,len,flags) +#endif + +#ifndef MS_SYNC +#define MS_SYNC 1 +#endif + +#ifndef MS_ASYNC +#define MS_ASYNC 0 +#endif + + /** A page number in the database. + * Note that 64 bit page numbers are overkill, since pages themselves + * already represent 12-13 bits of addressable memory, and the OS will + * always limit applications to a maximum of 63 bits of address space. + * + * @note In the #MDB_node structure, we only store 48 bits of this value, + * which thus limits us to only 60 bits of addressable data. + */ +typedef MDB_ID pgno_t; + + /** A transaction ID. + * See struct MDB_txn.mt_txnid for details. + */ +typedef MDB_ID txnid_t; + +/** @defgroup debug Debug Macros + * @{ + */ +#ifndef MDB_DEBUG + /** Enable debug output. + * Set this to 1 for copious tracing. Set to 2 to add dumps of all IDLs + * read from and written to the database (used for free space management). + */ +#define MDB_DEBUG 0 +#endif + +#if !(__STDC_VERSION__ >= 199901L || defined(__GNUC__)) +# undef MDB_DEBUG +# define MDB_DEBUG 0 +# define DPRINTF (void) /* Vararg macros may be unsupported */ +#elif MDB_DEBUG +static int mdb_debug; +static txnid_t mdb_debug_start; + + /** Print a debug message with printf formatting. */ +# define DPRINTF(fmt, ...) /**< Requires 2 or more args */ \ + ((void) ((mdb_debug) && \ + fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__))) +#else +# define DPRINTF(fmt, ...) ((void) 0) +# define MDB_DEBUG_SKIP +#endif + /** Print a debug string. + * The string is printed literally, with no format processing. + */ +#define DPUTS(arg) DPRINTF("%s", arg) +/** @} */ + + /** A default memory page size. + * The actual size is platform-dependent, but we use this for + * boot-strapping. We probably should not be using this any more. + * The #GET_PAGESIZE() macro is used to get the actual size. + * + * Note that we don't currently support Huge pages. On Linux, + * regular data files cannot use Huge pages, and in general + * Huge pages aren't actually pageable. We rely on the OS + * demand-pager to read our data and page it out when memory + * pressure from other processes is high. So until OSs have + * actual paging support for Huge pages, they're not viable. + */ +#define MDB_PAGESIZE 4096 + + /** The minimum number of keys required in a database page. + * Setting this to a larger value will place a smaller bound on the + * maximum size of a data item. Data items larger than this size will + * be pushed into overflow pages instead of being stored directly in + * the B-tree node. This value used to default to 4. With a page size + * of 4096 bytes that meant that any item larger than 1024 bytes would + * go into an overflow page. That also meant that on average 2-3KB of + * each overflow page was wasted space. The value cannot be lower than + * 2 because then there would no longer be a tree structure. With this + * value, items larger than 2KB will go into overflow pages, and on + * average only 1KB will be wasted. + */ +#define MDB_MINKEYS 2 + + /** A stamp that identifies a file as an MDB file. + * There's nothing special about this value other than that it is easily + * recognizable, and it will reflect any byte order mismatches. + */ +#define MDB_MAGIC 0xBEEFC0DE + + /** The version number for a database's datafile format. */ +#define MDB_DATA_VERSION 1 + /** The version number for a database's lockfile format. */ +#define MDB_LOCK_VERSION 1 + + /** @brief The maximum size of a key in the database. + * + * The library rejects bigger keys, and cannot deal with records + * with bigger keys stored by a library with bigger max keysize. + * + * We require that keys all fit onto a regular page. This limit + * could be raised a bit further if needed; to something just + * under #MDB_PAGESIZE / #MDB_MINKEYS. + * + * Note that data items in an #MDB_DUPSORT database are actually keys + * of a subDB, so they're also limited to this size. + */ +#ifndef MDB_MAXKEYSIZE +#define MDB_MAXKEYSIZE 511 +#endif + + /** @brief The maximum size of a data item. + * + * We only store a 32 bit value for node sizes. + */ +#define MAXDATASIZE 0xffffffffUL + +#if MDB_DEBUG + /** A key buffer. + * @ingroup debug + * This is used for printing a hex dump of a key's contents. + */ +#define DKBUF char kbuf[(MDB_MAXKEYSIZE*2+1)] + /** Display a key in hex. + * @ingroup debug + * Invoke a function to display a key in hex. + */ +#define DKEY(x) mdb_dkey(x, kbuf) +#else +#define DKBUF typedef int dummy_kbuf /* so we can put ';' after */ +#define DKEY(x) 0 +#endif + + /** An invalid page number. + * Mainly used to denote an empty tree. + */ +#define P_INVALID (~(pgno_t)0) + + /** Test if the flags \b f are set in a flag word \b w. */ +#define F_ISSET(w, f) (((w) & (f)) == (f)) + + /** Used for offsets within a single page. + * Since memory pages are typically 4 or 8KB in size, 12-13 bits, + * this is plenty. + */ +typedef uint16_t indx_t; + + /** Default size of memory map. + * This is certainly too small for any actual applications. Apps should always set + * the size explicitly using #mdb_env_set_mapsize(). + */ +#define DEFAULT_MAPSIZE 1048576 + +/** @defgroup readers Reader Lock Table + * Readers don't acquire any locks for their data access. Instead, they + * simply record their transaction ID in the reader table. The reader + * mutex is needed just to find an empty slot in the reader table. The + * slot's address is saved in thread-specific data so that subsequent read + * transactions started by the same thread need no further locking to proceed. + * + * If #MDB_NOTLS is set, the slot address is not saved in thread-specific data. + * + * No reader table is used if the database is on a read-only filesystem. + * + * Since the database uses multi-version concurrency control, readers don't + * actually need any locking. This table is used to keep track of which + * readers are using data from which old transactions, so that we'll know + * when a particular old transaction is no longer in use. Old transactions + * that have discarded any data pages can then have those pages reclaimed + * for use by a later write transaction. + * + * The lock table is constructed such that reader slots are aligned with the + * processor's cache line size. Any slot is only ever used by one thread. + * This alignment guarantees that there will be no contention or cache + * thrashing as threads update their own slot info, and also eliminates + * any need for locking when accessing a slot. + * + * A writer thread will scan every slot in the table to determine the oldest + * outstanding reader transaction. Any freed pages older than this will be + * reclaimed by the writer. The writer doesn't use any locks when scanning + * this table. This means that there's no guarantee that the writer will + * see the most up-to-date reader info, but that's not required for correct + * operation - all we need is to know the upper bound on the oldest reader, + * we don't care at all about the newest reader. So the only consequence of + * reading stale information here is that old pages might hang around a + * while longer before being reclaimed. That's actually good anyway, because + * the longer we delay reclaiming old pages, the more likely it is that a + * string of contiguous pages can be found after coalescing old pages from + * many old transactions together. + * @{ + */ + /** Number of slots in the reader table. + * This value was chosen somewhat arbitrarily. 126 readers plus a + * couple mutexes fit exactly into 8KB on my development machine. + * Applications should set the table size using #mdb_env_set_maxreaders(). + */ +#define DEFAULT_READERS 126 + + /** The size of a CPU cache line in bytes. We want our lock structures + * aligned to this size to avoid false cache line sharing in the + * lock table. + * This value works for most CPUs. For Itanium this should be 128. + */ +#ifndef CACHELINE +#define CACHELINE 64 +#endif + + /** The information we store in a single slot of the reader table. + * In addition to a transaction ID, we also record the process and + * thread ID that owns a slot, so that we can detect stale information, + * e.g. threads or processes that went away without cleaning up. + * @note We currently don't check for stale records. We simply re-init + * the table when we know that we're the only process opening the + * lock file. + */ +typedef struct MDB_rxbody { + /** Current Transaction ID when this transaction began, or (txnid_t)-1. + * Multiple readers that start at the same time will probably have the + * same ID here. Again, it's not important to exclude them from + * anything; all we need to know is which version of the DB they + * started from so we can avoid overwriting any data used in that + * particular version. + */ + txnid_t mrb_txnid; + /** The process ID of the process owning this reader txn. */ + pid_t mrb_pid; + /** The thread ID of the thread owning this txn. */ + pthread_t mrb_tid; +} MDB_rxbody; + + /** The actual reader record, with cacheline padding. */ +typedef struct MDB_reader { + union { + MDB_rxbody mrx; + /** shorthand for mrb_txnid */ +#define mr_txnid mru.mrx.mrb_txnid +#define mr_pid mru.mrx.mrb_pid +#define mr_tid mru.mrx.mrb_tid + /** cache line alignment */ + char pad[(sizeof(MDB_rxbody)+CACHELINE-1) & ~(CACHELINE-1)]; + } mru; +} MDB_reader; + + /** The header for the reader table. + * The table resides in a memory-mapped file. (This is a different file + * than is used for the main database.) + * + * For POSIX the actual mutexes reside in the shared memory of this + * mapped file. On Windows, mutexes are named objects allocated by the + * kernel; we store the mutex names in this mapped file so that other + * processes can grab them. This same approach is also used on + * MacOSX/Darwin (using named semaphores) since MacOSX doesn't support + * process-shared POSIX mutexes. For these cases where a named object + * is used, the object name is derived from a 64 bit FNV hash of the + * environment pathname. As such, naming collisions are extremely + * unlikely. If a collision occurs, the results are unpredictable. + */ +typedef struct MDB_txbody { + /** Stamp identifying this as an MDB file. It must be set + * to #MDB_MAGIC. */ + uint32_t mtb_magic; + /** Version number of this lock file. Must be set to #MDB_LOCK_VERSION. */ + uint32_t mtb_version; +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) + char mtb_rmname[MNAME_LEN]; +#else + /** Mutex protecting access to this table. + * This is the reader lock that #LOCK_MUTEX_R acquires. + */ + pthread_mutex_t mtb_mutex; +#endif + /** The ID of the last transaction committed to the database. + * This is recorded here only for convenience; the value can always + * be determined by reading the main database meta pages. + */ + txnid_t mtb_txnid; + /** The number of slots that have been used in the reader table. + * This always records the maximum count, it is not decremented + * when readers release their slots. + */ + unsigned mtb_numreaders; +} MDB_txbody; + + /** The actual reader table definition. */ +typedef struct MDB_txninfo { + union { + MDB_txbody mtb; +#define mti_magic mt1.mtb.mtb_magic +#define mti_version mt1.mtb.mtb_version +#define mti_mutex mt1.mtb.mtb_mutex +#define mti_rmname mt1.mtb.mtb_rmname +#define mti_txnid mt1.mtb.mtb_txnid +#define mti_numreaders mt1.mtb.mtb_numreaders + char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)]; + } mt1; + union { +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) + char mt2_wmname[MNAME_LEN]; +#define mti_wmname mt2.mt2_wmname +#else + pthread_mutex_t mt2_wmutex; +#define mti_wmutex mt2.mt2_wmutex +#endif + char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)]; + } mt2; + MDB_reader mti_readers[1]; +} MDB_txninfo; +/** @} */ + +/** Common header for all page types. + * Overflow records occupy a number of contiguous pages with no + * headers on any page after the first. + */ +typedef struct MDB_page { +#define mp_pgno mp_p.p_pgno +#define mp_next mp_p.p_next + union { + pgno_t p_pgno; /**< page number */ + void * p_next; /**< for in-memory list of freed structs */ + } mp_p; + uint16_t mp_pad; +/** @defgroup mdb_page Page Flags + * @ingroup internal + * Flags for the page headers. + * @{ + */ +#define P_BRANCH 0x01 /**< branch page */ +#define P_LEAF 0x02 /**< leaf page */ +#define P_OVERFLOW 0x04 /**< overflow page */ +#define P_META 0x08 /**< meta page */ +#define P_DIRTY 0x10 /**< dirty page */ +#define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */ +#define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */ +#define P_KEEP 0x8000 /**< leave this page alone during spill */ +/** @} */ + uint16_t mp_flags; /**< @ref mdb_page */ +#define mp_lower mp_pb.pb.pb_lower +#define mp_upper mp_pb.pb.pb_upper +#define mp_pages mp_pb.pb_pages + union { + struct { + indx_t pb_lower; /**< lower bound of free space */ + indx_t pb_upper; /**< upper bound of free space */ + } pb; + uint32_t pb_pages; /**< number of overflow pages */ + } mp_pb; + indx_t mp_ptrs[1]; /**< dynamic size */ +} MDB_page; + + /** Size of the page header, excluding dynamic data at the end */ +#define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs)) + + /** Address of first usable data byte in a page, after the header */ +#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ)) + + /** Number of nodes on a page */ +#define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1) + + /** The amount of space remaining in the page */ +#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) + + /** The percentage of space used in the page, in tenths of a percent. */ +#define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \ + ((env)->me_psize - PAGEHDRSZ)) + /** The minimum page fill factor, in tenths of a percent. + * Pages emptier than this are candidates for merging. + */ +#define FILL_THRESHOLD 250 + + /** Test if a page is a leaf page */ +#define IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF) + /** Test if a page is a LEAF2 page */ +#define IS_LEAF2(p) F_ISSET((p)->mp_flags, P_LEAF2) + /** Test if a page is a branch page */ +#define IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH) + /** Test if a page is an overflow page */ +#define IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW) + /** Test if a page is a sub page */ +#define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP) + + /** The number of overflow pages needed to store the given size. */ +#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) + + /** Header for a single key/data pair within a page. + * We guarantee 2-byte alignment for nodes. + */ +typedef struct MDB_node { + /** lo and hi are used for data size on leaf nodes and for + * child pgno on branch nodes. On 64 bit platforms, flags + * is also used for pgno. (Branch nodes have no flags). + * They are in host byte order in case that lets some + * accesses be optimized into a 32-bit word access. + */ +#define mn_lo mn_offset[BYTE_ORDER!=LITTLE_ENDIAN] +#define mn_hi mn_offset[BYTE_ORDER==LITTLE_ENDIAN] /**< part of dsize or pgno */ + unsigned short mn_offset[2]; /**< storage for #mn_lo and #mn_hi */ +/** @defgroup mdb_node Node Flags + * @ingroup internal + * Flags for node headers. + * @{ + */ +#define F_BIGDATA 0x01 /**< data put on overflow page */ +#define F_SUBDATA 0x02 /**< data is a sub-database */ +#define F_DUPDATA 0x04 /**< data has duplicates */ + +/** valid flags for #mdb_node_add() */ +#define NODE_ADD_FLAGS (F_DUPDATA|F_SUBDATA|MDB_RESERVE|MDB_APPEND) + +/** @} */ + unsigned short mn_flags; /**< @ref mdb_node */ + unsigned short mn_ksize; /**< key size */ + char mn_data[1]; /**< key and data are appended here */ +} MDB_node; + + /** Size of the node header, excluding dynamic data at the end */ +#define NODESIZE offsetof(MDB_node, mn_data) + + /** Bit position of top word in page number, for shifting mn_flags */ +#define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0) + + /** Size of a node in a branch page with a given key. + * This is just the node header plus the key, there is no data. + */ +#define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size)) + + /** Size of a node in a leaf page with a given key and data. + * This is node header plus key plus data size. + */ +#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) + + /** Address of node \b i in page \b p */ +#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i])) + + /** Address of the key for the node */ +#define NODEKEY(node) (void *)((node)->mn_data) + + /** Address of the data for a node */ +#define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize) + + /** Get the page number pointed to by a branch node */ +#define NODEPGNO(node) \ + ((node)->mn_lo | ((pgno_t) (node)->mn_hi << 16) | \ + (PGNO_TOPWORD ? ((pgno_t) (node)->mn_flags << PGNO_TOPWORD) : 0)) + /** Set the page number in a branch node */ +#define SETPGNO(node,pgno) do { \ + (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \ + if (PGNO_TOPWORD) (node)->mn_flags = (pgno) >> PGNO_TOPWORD; } while(0) + + /** Get the size of the data in a leaf node */ +#define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16)) + /** Set the size of the data for a leaf node */ +#define SETDSZ(node,size) do { \ + (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0) + /** The size of a key in a node */ +#define NODEKSZ(node) ((node)->mn_ksize) + + /** Copy a page number from src to dst */ +#ifdef MISALIGNED_OK +#define COPY_PGNO(dst,src) dst = src +#else +#if SIZE_MAX > 4294967295UL +#define COPY_PGNO(dst,src) do { \ + unsigned short *s, *d; \ + s = (unsigned short *)&(src); \ + d = (unsigned short *)&(dst); \ + *d++ = *s++; \ + *d++ = *s++; \ + *d++ = *s++; \ + *d = *s; \ +} while (0) +#else +#define COPY_PGNO(dst,src) do { \ + unsigned short *s, *d; \ + s = (unsigned short *)&(src); \ + d = (unsigned short *)&(dst); \ + *d++ = *s++; \ + *d = *s; \ +} while (0) +#endif +#endif + /** The address of a key in a LEAF2 page. + * LEAF2 pages are used for #MDB_DUPFIXED sorted-duplicate sub-DBs. + * There are no node headers, keys are stored contiguously. + */ +#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) + + /** Set the \b node's key into \b key, if requested. */ +#define MDB_GET_KEY(node, key) { if ((key) != NULL) { \ + (key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node); } } + + /** Information about a single database in the environment. */ +typedef struct MDB_db { + uint32_t md_pad; /**< also ksize for LEAF2 pages */ + uint16_t md_flags; /**< @ref mdb_dbi_open */ + uint16_t md_depth; /**< depth of this tree */ + pgno_t md_branch_pages; /**< number of internal pages */ + pgno_t md_leaf_pages; /**< number of leaf pages */ + pgno_t md_overflow_pages; /**< number of overflow pages */ + size_t md_entries; /**< number of data items */ + pgno_t md_root; /**< the root page of this tree */ +} MDB_db; + + /** mdb_dbi_open flags */ +#define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */ +#define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID)) +#define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\ + MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE) + + /** Handle for the DB used to track free pages. */ +#define FREE_DBI 0 + /** Handle for the default DB. */ +#define MAIN_DBI 1 + + /** Meta page content. */ +typedef struct MDB_meta { + /** Stamp identifying this as an MDB file. It must be set + * to #MDB_MAGIC. */ + uint32_t mm_magic; + /** Version number of this lock file. Must be set to #MDB_DATA_VERSION. */ + uint32_t mm_version; + void *mm_address; /**< address for fixed mapping */ + size_t mm_mapsize; /**< size of mmap region */ + MDB_db mm_dbs[2]; /**< first is free space, 2nd is main db */ + /** The size of pages used in this DB */ +#define mm_psize mm_dbs[0].md_pad + /** Any persistent environment flags. @ref mdb_env */ +#define mm_flags mm_dbs[0].md_flags + pgno_t mm_last_pg; /**< last used page in file */ + txnid_t mm_txnid; /**< txnid that committed this page */ +} MDB_meta; + + /** Buffer for a stack-allocated dirty page. + * The members define size and alignment, and silence type + * aliasing warnings. They are not used directly; that could + * mean incorrectly using several union members in parallel. + */ +typedef union MDB_pagebuf { + char mb_raw[MDB_PAGESIZE]; + MDB_page mb_page; + struct { + char mm_pad[PAGEHDRSZ]; + MDB_meta mm_meta; + } mb_metabuf; +} MDB_pagebuf; + + /** Auxiliary DB info. + * The information here is mostly static/read-only. There is + * only a single copy of this record in the environment. + */ +typedef struct MDB_dbx { + MDB_val md_name; /**< name of the database */ + MDB_cmp_func *md_cmp; /**< function for comparing keys */ + MDB_cmp_func *md_dcmp; /**< function for comparing data items */ + MDB_rel_func *md_rel; /**< user relocate function */ + void *md_relctx; /**< user-provided context for md_rel */ +} MDB_dbx; + + /** A database transaction. + * Every operation requires a transaction handle. + */ +struct MDB_txn { + MDB_txn *mt_parent; /**< parent of a nested txn */ + MDB_txn *mt_child; /**< nested txn under this txn */ + pgno_t mt_next_pgno; /**< next unallocated page */ + /** The ID of this transaction. IDs are integers incrementing from 1. + * Only committed write transactions increment the ID. If a transaction + * aborts, the ID may be re-used by the next writer. + */ + txnid_t mt_txnid; + MDB_env *mt_env; /**< the DB environment */ + /** The list of pages that became unused during this transaction. + */ + MDB_IDL mt_free_pgs; + /** The list of dirty pages we temporarily wrote to disk + * because the dirty list was full. + */ + MDB_IDL mt_spill_pgs; + union { + MDB_ID2L dirty_list; /**< for write txns: modified pages */ + MDB_reader *reader; /**< this thread's reader table slot or NULL */ + } mt_u; + /** Array of records for each DB known in the environment. */ + MDB_dbx *mt_dbxs; + /** Array of MDB_db records for each known DB */ + MDB_db *mt_dbs; +/** @defgroup mt_dbflag Transaction DB Flags + * @ingroup internal + * @{ + */ +#define DB_DIRTY 0x01 /**< DB was written in this txn */ +#define DB_STALE 0x02 /**< DB record is older than txnID */ +#define DB_NEW 0x04 /**< DB handle opened in this txn */ +#define DB_VALID 0x08 /**< DB handle is valid, see also #MDB_VALID */ +/** @} */ + /** In write txns, array of cursors for each DB */ + MDB_cursor **mt_cursors; + /** Array of flags for each DB */ + unsigned char *mt_dbflags; + /** Number of DB records in use. This number only ever increments; + * we don't decrement it when individual DB handles are closed. + */ + MDB_dbi mt_numdbs; + +/** @defgroup mdb_txn Transaction Flags + * @ingroup internal + * @{ + */ +#define MDB_TXN_RDONLY 0x01 /**< read-only transaction */ +#define MDB_TXN_ERROR 0x02 /**< an error has occurred */ +#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */ +#define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */ +/** @} */ + unsigned int mt_flags; /**< @ref mdb_txn */ + /** dirty_list maxsize - # of allocated pages allowed, including in parent txns */ + unsigned int mt_dirty_room; + /** Tracks which of the two meta pages was used at the start + * of this transaction. + */ + unsigned int mt_toggle; +}; + +/** Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty. + * At 4 keys per node, enough for 2^64 nodes, so there's probably no need to + * raise this on a 64 bit machine. + */ +#define CURSOR_STACK 32 + +struct MDB_xcursor; + + /** Cursors are used for all DB operations */ +struct MDB_cursor { + /** Next cursor on this DB in this txn */ + MDB_cursor *mc_next; + /** Backup of the original cursor if this cursor is a shadow */ + MDB_cursor *mc_backup; + /** Context used for databases with #MDB_DUPSORT, otherwise NULL */ + struct MDB_xcursor *mc_xcursor; + /** The transaction that owns this cursor */ + MDB_txn *mc_txn; + /** The database handle this cursor operates on */ + MDB_dbi mc_dbi; + /** The database record for this cursor */ + MDB_db *mc_db; + /** The database auxiliary record for this cursor */ + MDB_dbx *mc_dbx; + /** The @ref mt_dbflag for this database */ + unsigned char *mc_dbflag; + unsigned short mc_snum; /**< number of pushed pages */ + unsigned short mc_top; /**< index of top page, normally mc_snum-1 */ +/** @defgroup mdb_cursor Cursor Flags + * @ingroup internal + * Cursor state flags. + * @{ + */ +#define C_INITIALIZED 0x01 /**< cursor has been initialized and is valid */ +#define C_EOF 0x02 /**< No more data */ +#define C_SUB 0x04 /**< Cursor is a sub-cursor */ +#define C_SPLITTING 0x20 /**< Cursor is in page_split */ +#define C_UNTRACK 0x40 /**< Un-track cursor when closing */ +/** @} */ + unsigned int mc_flags; /**< @ref mdb_cursor */ + MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */ + indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */ +}; + + /** Context for sorted-dup records. + * We could have gone to a fully recursive design, with arbitrarily + * deep nesting of sub-databases. But for now we only handle these + * levels - main DB, optional sub-DB, sorted-duplicate DB. + */ +typedef struct MDB_xcursor { + /** A sub-cursor for traversing the Dup DB */ + MDB_cursor mx_cursor; + /** The database record for this Dup DB */ + MDB_db mx_db; + /** The auxiliary DB record for this Dup DB */ + MDB_dbx mx_dbx; + /** The @ref mt_dbflag for this Dup DB */ + unsigned char mx_dbflag; +} MDB_xcursor; + + /** State of FreeDB old pages, stored in the MDB_env */ +typedef struct MDB_pgstate { + pgno_t *mf_pghead; /**< Reclaimed freeDB pages, or NULL before use */ + txnid_t mf_pglast; /**< ID of last used record, or 0 if !mf_pghead */ +} MDB_pgstate; + + /** The database environment. */ +struct MDB_env { + HANDLE me_fd; /**< The main data file */ + HANDLE me_lfd; /**< The lock file */ + HANDLE me_mfd; /**< just for writing the meta pages */ + /** Failed to update the meta page. Probably an I/O error. */ +#define MDB_FATAL_ERROR 0x80000000U + /** Some fields are initialized. */ +#define MDB_ENV_ACTIVE 0x20000000U + /** me_txkey is set */ +#define MDB_ENV_TXKEY 0x10000000U + /** Have liveness lock in reader table */ +#define MDB_LIVE_READER 0x08000000U + uint32_t me_flags; /**< @ref mdb_env */ + unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */ + unsigned int me_maxreaders; /**< size of the reader table */ + unsigned int me_numreaders; /**< max numreaders set by this env */ + MDB_dbi me_numdbs; /**< number of DBs opened */ + MDB_dbi me_maxdbs; /**< size of the DB table */ + pid_t me_pid; /**< process ID of this env */ + char *me_path; /**< path to the DB files */ + char *me_map; /**< the memory map of the data file */ + MDB_txninfo *me_txns; /**< the memory map of the lock file or NULL */ + MDB_meta *me_metas[2]; /**< pointers to the two meta pages */ + MDB_txn *me_txn; /**< current write transaction */ + size_t me_mapsize; /**< size of the data memory map */ + off_t me_size; /**< current file size */ + pgno_t me_maxpg; /**< me_mapsize / me_psize */ + MDB_dbx *me_dbxs; /**< array of static DB info */ + uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */ + pthread_key_t me_txkey; /**< thread-key for readers */ + MDB_pgstate me_pgstate; /**< state of old pages from freeDB */ +# define me_pglast me_pgstate.mf_pglast +# define me_pghead me_pgstate.mf_pghead + MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */ + /** IDL of pages that became unused in a write txn */ + MDB_IDL me_free_pgs; + /** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. */ + MDB_ID2L me_dirty_list; + /** Max number of freelist items that can fit in a single overflow page */ + int me_maxfree_1pg; + /** Max size of a node on a page */ + unsigned int me_nodemax; +#ifdef _WIN32 + int me_pidquery; /**< Used in OpenProcess */ + HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */ + HANDLE me_wmutex; +#elif defined(MDB_USE_POSIX_SEM) + sem_t *me_rmutex; /* Shared mutexes are not supported */ + sem_t *me_wmutex; +#endif +}; + + /** Nested transaction */ +typedef struct MDB_ntxn { + MDB_txn mnt_txn; /* the transaction */ + MDB_pgstate mnt_pgstate; /* parent transaction's saved freestate */ +} MDB_ntxn; + + /** max number of pages to commit in one writev() call */ +#define MDB_COMMIT_PAGES 64 +#if defined(IOV_MAX) && IOV_MAX < MDB_COMMIT_PAGES +#undef MDB_COMMIT_PAGES +#define MDB_COMMIT_PAGES IOV_MAX +#endif + + /* max bytes to write in one call */ +#define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4)) + +static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp); +static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp); +static int mdb_page_touch(MDB_cursor *mc); + +static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl); +static int mdb_page_search_root(MDB_cursor *mc, + MDB_val *key, int modify); +#define MDB_PS_MODIFY 1 +#define MDB_PS_ROOTONLY 2 +static int mdb_page_search(MDB_cursor *mc, + MDB_val *key, int flags); +static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst); + +#define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */ +static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, + pgno_t newpgno, unsigned int nflags); + +static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); +static int mdb_env_pick_meta(const MDB_env *env); +static int mdb_env_write_meta(MDB_txn *txn); +#if !(defined(_WIN32) || defined(MDB_USE_POSIX_SEM)) /* Drop unused excl arg */ +# define mdb_env_close0(env, excl) mdb_env_close1(env) +#endif +static void mdb_env_close0(MDB_env *env, int excl); + +static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp); +static int mdb_node_add(MDB_cursor *mc, indx_t indx, + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags); +static void mdb_node_del(MDB_page *mp, indx_t indx, int ksize); +static void mdb_node_shrink(MDB_page *mp, indx_t indx); +static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst); +static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data); +static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); +static size_t mdb_branch_size(MDB_env *env, MDB_val *key); + +static int mdb_rebalance(MDB_cursor *mc); +static int mdb_update_key(MDB_cursor *mc, MDB_val *key); + +static void mdb_cursor_pop(MDB_cursor *mc); +static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp); + +static int mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf); +static int mdb_cursor_sibling(MDB_cursor *mc, int move_right); +static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); +static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); +static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op, + int *exactp); +static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); +static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); + +static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); +static void mdb_xcursor_init0(MDB_cursor *mc); +static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); + +static int mdb_drop0(MDB_cursor *mc, int subs); +static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); + +/** @cond */ +static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long; +/** @endcond */ + +#ifdef _WIN32 +static SECURITY_DESCRIPTOR mdb_null_sd; +static SECURITY_ATTRIBUTES mdb_all_sa; +static int mdb_sec_inited; +#endif + +/** Return the library version info. */ +char * +mdb_version(int *major, int *minor, int *patch) +{ + if (major) *major = MDB_VERSION_MAJOR; + if (minor) *minor = MDB_VERSION_MINOR; + if (patch) *patch = MDB_VERSION_PATCH; + return MDB_VERSION_STRING; +} + +/** Table of descriptions for MDB @ref errors */ +static char *const mdb_errstr[] = { + "MDB_KEYEXIST: Key/data pair already exists", + "MDB_NOTFOUND: No matching key/data pair found", + "MDB_PAGE_NOTFOUND: Requested page not found", + "MDB_CORRUPTED: Located page was wrong type", + "MDB_PANIC: Update of meta page failed", + "MDB_VERSION_MISMATCH: Database environment version mismatch", + "MDB_INVALID: File is not an MDB file", + "MDB_MAP_FULL: Environment mapsize limit reached", + "MDB_DBS_FULL: Environment maxdbs limit reached", + "MDB_READERS_FULL: Environment maxreaders limit reached", + "MDB_TLS_FULL: Thread-local storage keys full - too many environments open", + "MDB_TXN_FULL: Transaction has too many dirty pages - transaction too big", + "MDB_CURSOR_FULL: Internal error - cursor stack limit reached", + "MDB_PAGE_FULL: Internal error - page has no more space", + "MDB_MAP_RESIZED: Database contents grew beyond environment mapsize", + "MDB_INCOMPATIBLE: Database flags changed or would change", + "MDB_BAD_RSLOT: Invalid reuse of reader locktable slot", +}; + +char * +mdb_strerror(int err) +{ + int i; + if (!err) + return ("Successful return: 0"); + + if (err >= MDB_KEYEXIST && err <= MDB_LAST_ERRCODE) { + i = err - MDB_KEYEXIST; + return mdb_errstr[i]; + } + + return strerror(err); +} + +#if MDB_DEBUG +/** Display a key in hexadecimal and return the address of the result. + * @param[in] key the key to display + * @param[in] buf the buffer to write into. Should always be #DKBUF. + * @return The key in hexadecimal form. + */ +char * +mdb_dkey(MDB_val *key, char *buf) +{ + char *ptr = buf; + unsigned char *c = key->mv_data; + unsigned int i; + + if (!key) + return ""; + + if (key->mv_size > MDB_MAXKEYSIZE) + return "MDB_MAXKEYSIZE"; + /* may want to make this a dynamic check: if the key is mostly + * printable characters, print it as-is instead of converting to hex. + */ +#if 1 + buf[0] = '\0'; + for (i=0; imv_size; i++) + ptr += sprintf(ptr, "%02x", *c++); +#else + sprintf(buf, "%.*s", key->mv_size, key->mv_data); +#endif + return buf; +} + +/** Display all the keys in the page. */ +void +mdb_page_list(MDB_page *mp) +{ + MDB_node *node; + unsigned int i, nkeys, nsize; + MDB_val key; + DKBUF; + + nkeys = NUMKEYS(mp); + fprintf(stderr, "Page %zu numkeys %d\n", mp->mp_pgno, nkeys); + for (i=0; imn_ksize; + key.mv_data = node->mn_data; + nsize = NODESIZE + NODEKSZ(node) + sizeof(indx_t); + if (IS_BRANCH(mp)) { + fprintf(stderr, "key %d: page %zu, %s\n", i, NODEPGNO(node), + DKEY(&key)); + } else { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + nsize += sizeof(pgno_t); + else + nsize += NODEDSZ(node); + fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key)); + } + } +} + +void +mdb_cursor_chk(MDB_cursor *mc) +{ + unsigned int i; + MDB_node *node; + MDB_page *mp; + + if (!mc->mc_snum && !(mc->mc_flags & C_INITIALIZED)) return; + for (i=0; imc_top; i++) { + mp = mc->mc_pg[i]; + node = NODEPTR(mp, mc->mc_ki[i]); + if (NODEPGNO(node) != mc->mc_pg[i+1]->mp_pgno) + printf("oops!\n"); + } + if (mc->mc_ki[i] >= NUMKEYS(mc->mc_pg[i])) + printf("ack!\n"); +} +#endif + +#if MDB_DEBUG > 2 +/** Count all the pages in each DB and in the freelist + * and make sure it matches the actual number of pages + * being used. + */ +static void mdb_audit(MDB_txn *txn) +{ + MDB_cursor mc; + MDB_val key, data; + MDB_ID freecount, count; + MDB_dbi i; + int rc; + + freecount = 0; + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) + freecount += *(MDB_ID *)data.mv_data; + + count = 0; + for (i = 0; imt_numdbs; i++) { + MDB_xcursor mx; + mdb_cursor_init(&mc, txn, i, &mx); + if (txn->mt_dbs[i].md_root == P_INVALID) + continue; + count += txn->mt_dbs[i].md_branch_pages + + txn->mt_dbs[i].md_leaf_pages + + txn->mt_dbs[i].md_overflow_pages; + if (txn->mt_dbs[i].md_flags & MDB_DUPSORT) { + mdb_page_search(&mc, NULL, 0); + do { + unsigned j; + MDB_page *mp; + mp = mc.mc_pg[mc.mc_top]; + for (j=0; jmn_flags & F_SUBDATA) { + MDB_db db; + memcpy(&db, NODEDATA(leaf), sizeof(db)); + count += db.md_branch_pages + db.md_leaf_pages + + db.md_overflow_pages; + } + } + } + while (mdb_cursor_sibling(&mc, 1) == 0); + } + } + if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) { + fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n", + txn->mt_txnid, freecount, count+2, freecount+count+2, txn->mt_next_pgno); + } +} +#endif + +int +mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) +{ + return txn->mt_dbxs[dbi].md_cmp(a, b); +} + +int +mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) +{ + return txn->mt_dbxs[dbi].md_dcmp(a, b); +} + +/** Allocate a page. + * Re-use old malloc'd pages first for singletons, otherwise just malloc. + */ +static MDB_page * +mdb_page_malloc(MDB_txn *txn, unsigned num) +{ + MDB_env *env = txn->mt_env; + MDB_page *ret = env->me_dpages; + size_t sz = env->me_psize; + if (num == 1) { + if (ret) { + VGMEMP_ALLOC(env, ret, sz); + VGMEMP_DEFINED(ret, sizeof(ret->mp_next)); + env->me_dpages = ret->mp_next; + return ret; + } + } else { + sz *= num; + } + if ((ret = malloc(sz)) != NULL) { + VGMEMP_ALLOC(env, ret, sz); + } + return ret; +} + +/** Free a single page. + * Saves single pages to a list, for future reuse. + * (This is not used for multi-page overflow pages.) + */ +static void +mdb_page_free(MDB_env *env, MDB_page *mp) +{ + mp->mp_next = env->me_dpages; + VGMEMP_FREE(env, mp); + env->me_dpages = mp; +} + +/* Free a dirty page */ +static void +mdb_dpage_free(MDB_env *env, MDB_page *dp) +{ + if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) { + mdb_page_free(env, dp); + } else { + /* large pages just get freed directly */ + VGMEMP_FREE(env, dp); + free(dp); + } +} + +/** Return all dirty pages to dpage list */ +static void +mdb_dlist_free(MDB_txn *txn) +{ + MDB_env *env = txn->mt_env; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned i, n = dl[0].mid; + + for (i = 1; i <= n; i++) { + mdb_dpage_free(env, dl[i].mptr); + } + dl[0].mid = 0; +} + +/* Set or clear P_KEEP in non-overflow, non-sub pages in known cursors. + * When clearing, only consider backup cursors (from parent txns) since + * other P_KEEP flags have already been cleared. + * @param[in] mc A cursor handle for the current operation. + * @param[in] pflags Flags of the pages to update: + * P_DIRTY to set P_KEEP, P_DIRTY|P_KEEP to clear it. + */ +static void +mdb_cursorpages_mark(MDB_cursor *mc, unsigned pflags) +{ + MDB_txn *txn = mc->mc_txn; + MDB_cursor *m2, *m3; + MDB_xcursor *mx; + unsigned i, j; + + if (mc->mc_flags & C_UNTRACK) + mc = NULL; /* will find mc in mt_cursors */ + for (i = txn->mt_numdbs;; mc = txn->mt_cursors[--i]) { + for (; mc; mc=mc->mc_next) { + m2 = pflags == P_DIRTY ? mc : mc->mc_backup; + for (; m2; m2 = m2->mc_backup) { + for (m3=m2; m3->mc_flags & C_INITIALIZED; m3=&mx->mx_cursor) { + for (j=0; jmc_snum; j++) + if ((m3->mc_pg[j]->mp_flags & (P_SUBP|P_DIRTY|P_KEEP)) + == pflags) + m3->mc_pg[j]->mp_flags ^= P_KEEP; + if (!(m3->mc_db->md_flags & MDB_DUPSORT)) + break; + /* Cursor backups have mx malloced at the end of m2 */ + mx = (m3 == mc ? m3->mc_xcursor : (MDB_xcursor *)(m3+1)); + } + } + } + if (i == 0) + break; + } +} + +static int mdb_page_flush(MDB_txn *txn); + +/** Spill pages from the dirty list back to disk. + * This is intended to prevent running into #MDB_TXN_FULL situations, + * but note that they may still occur in a few cases: + * 1) pages in #MDB_DUPSORT sub-DBs are never spilled, so if there + * are too many of these dirtied in one txn, the txn may still get + * too full. + * 2) child txns may run out of space if their parents dirtied a + * lot of pages and never spilled them. TODO: we probably should do + * a preemptive spill during #mdb_txn_begin() of a child txn, if + * the parent's dirty_room is below a given threshold. + * 3) our estimate of the txn size could be too small. At the + * moment this seems unlikely. + * + * Otherwise, if not using nested txns, it is expected that apps will + * not run into #MDB_TXN_FULL any more. The pages are flushed to disk + * the same way as for a txn commit, e.g. their P_DIRTY flag is cleared. + * If the txn never references them again, they can be left alone. + * If the txn only reads them, they can be used without any fuss. + * If the txn writes them again, they can be dirtied immediately without + * going thru all of the work of #mdb_page_touch(). Such references are + * handled by #mdb_page_unspill(). + * + * Also note, we never spill DB root pages, nor pages of active cursors, + * because we'll need these back again soon anyway. And in nested txns, + * we can't spill a page in a child txn if it was already spilled in a + * parent txn. That would alter the parent txns' data even though + * the child hasn't committed yet, and we'd have no way to undo it if + * the child aborted. + * + * @param[in] m0 cursor A cursor handle identifying the transaction and + * database for which we are checking space. + * @param[in] key For a put operation, the key being stored. + * @param[in] data For a put operation, the data being stored. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) +{ + MDB_txn *txn = m0->mc_txn; + MDB_page *dp; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned int i, j; + int rc; + + if (m0->mc_flags & C_SUB) + return MDB_SUCCESS; + + /* Estimate how much space this op will take */ + i = m0->mc_db->md_depth; + /* Named DBs also dirty the main DB */ + if (m0->mc_dbi > MAIN_DBI) + i += txn->mt_dbs[MAIN_DBI].md_depth; + /* For puts, roughly factor in the key+data size */ + if (key) + i += (LEAFSIZE(key, data) + txn->mt_env->me_psize) / txn->mt_env->me_psize; + i += i; /* double it for good measure */ + + if (txn->mt_dirty_room > i) + return MDB_SUCCESS; + + if (!txn->mt_spill_pgs) { + txn->mt_spill_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX); + if (!txn->mt_spill_pgs) + return ENOMEM; + } + + /* Mark all the dirty root pages we want to preserve */ + for (i=0; imt_numdbs; i++) { + if (txn->mt_dbflags[i] & DB_DIRTY) { + j = mdb_mid2l_search(dl, txn->mt_dbs[i].md_root); + if (j <= dl[0].mid) { + dp = dl[j].mptr; + dp->mp_flags |= P_KEEP; + } + } + } + + /* Preserve pages used by cursors */ + mdb_cursorpages_mark(m0, P_DIRTY); + + /* Save the page IDs of all the pages we're flushing */ + for (i=1; i<=dl[0].mid; i++) { + dp = dl[i].mptr; + if (dp->mp_flags & P_KEEP) + continue; + /* Can't spill twice, make sure it's not already in a parent's + * spill list. + */ + if (txn->mt_parent) { + MDB_txn *tx2; + for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) { + if (tx2->mt_spill_pgs) { + j = mdb_midl_search(tx2->mt_spill_pgs, dl[i].mid); + if (j <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[j] == dl[i].mid) { + dp->mp_flags |= P_KEEP; + break; + } + } + } + if (tx2) + continue; + } + if ((rc = mdb_midl_append(&txn->mt_spill_pgs, dl[i].mid))) + return rc; + } + mdb_midl_sort(txn->mt_spill_pgs); + + rc = mdb_page_flush(txn); + + mdb_cursorpages_mark(m0, P_DIRTY|P_KEEP); + + if (rc == 0) { + if (txn->mt_parent) { + MDB_txn *tx2; + pgno_t pgno = dl[i].mid; + txn->mt_dirty_room = txn->mt_parent->mt_dirty_room - dl[0].mid; + /* dirty pages that are dirty in an ancestor don't + * count against this txn's dirty_room. + */ + for (i=1; i<=dl[0].mid; i++) { + for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) { + j = mdb_mid2l_search(tx2->mt_u.dirty_list, pgno); + if (j <= tx2->mt_u.dirty_list[0].mid && + tx2->mt_u.dirty_list[j].mid == pgno) { + txn->mt_dirty_room++; + break; + } + } + } + } else { + txn->mt_dirty_room = MDB_IDL_UM_MAX - dl[0].mid; + } + txn->mt_flags |= MDB_TXN_SPILLS; + } + return rc; +} + +/** Find oldest txnid still referenced. Expects txn->mt_txnid > 0. */ +static txnid_t +mdb_find_oldest(MDB_txn *txn) +{ + int i; + txnid_t mr, oldest = txn->mt_txnid - 1; + MDB_reader *r = txn->mt_env->me_txns->mti_readers; + for (i = txn->mt_env->me_txns->mti_numreaders; --i >= 0; ) { + if (r[i].mr_pid) { + mr = r[i].mr_txnid; + if (oldest > mr) + oldest = mr; + } + } + return oldest; +} + +/** Add a page to the txn's dirty list */ +static void +mdb_page_dirty(MDB_txn *txn, MDB_page *mp) +{ + MDB_ID2 mid; + int (*insert)(MDB_ID2L, MDB_ID2 *); + + if (txn->mt_env->me_flags & MDB_WRITEMAP) { + insert = mdb_mid2l_append; + } else { + insert = mdb_mid2l_insert; + } + mid.mid = mp->mp_pgno; + mid.mptr = mp; + insert(txn->mt_u.dirty_list, &mid); + txn->mt_dirty_room--; +} + +/** Allocate pages for writing. + * If there are free pages available from older transactions, they + * will be re-used first. Otherwise a new page will be allocated. + * @param[in] mc cursor A cursor handle identifying the transaction and + * database for which we are allocating. + * @param[in] num the number of pages to allocate. + * @param[out] mp Address of the allocated page(s). Requests for multiple pages + * will always be satisfied by a single contiguous chunk of memory. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) +{ +#ifdef MDB_PARANOID /* Seems like we can ignore this now */ + /* Get at most more freeDB records once me_pghead + * has enough pages. If not enough, use new pages from the map. + * If and mc is updating the freeDB, only get new + * records if me_pghead is empty. Then the freelist cannot play + * catch-up with itself by growing while trying to save it. + */ + enum { Paranoid = 1, Max_retries = 500 }; +#else + enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ }; +#endif + int rc, n2 = num-1, retry = Max_retries; + MDB_txn *txn = mc->mc_txn; + MDB_env *env = txn->mt_env; + pgno_t pgno, *mop = env->me_pghead; + unsigned i, j, k, mop_len = mop ? mop[0] : 0; + MDB_page *np; + txnid_t oldest = 0, last; + MDB_cursor_op op; + MDB_cursor m2; + + *mp = NULL; + + /* If our dirty list is already full, we can't do anything */ + if (txn->mt_dirty_room == 0) + return MDB_TXN_FULL; + + for (op = MDB_FIRST;; op = MDB_NEXT) { + MDB_val key, data; + MDB_node *leaf; + pgno_t *idl, old_id, new_id; + + /* Seek a big enough contiguous page range. Prefer + * pages at the tail, just truncating the list. + */ + if (mop_len >= (unsigned)num) { + i = mop_len; + do { + pgno = mop[i]; + if (mop[i-n2] == pgno+n2) + goto search_done; + } while (--i >= (unsigned)num); + if (Max_retries < INT_MAX && --retry < 0) + break; + } + + if (op == MDB_FIRST) { /* 1st iteration */ + /* Prepare to fetch more and coalesce */ + oldest = mdb_find_oldest(txn); + last = env->me_pglast; + mdb_cursor_init(&m2, txn, FREE_DBI, NULL); + if (last) { + op = MDB_SET_RANGE; + key.mv_data = &last; /* will loop up last+1 */ + key.mv_size = sizeof(last); + } + if (Paranoid && mc->mc_dbi == FREE_DBI) + retry = -1; + } + if (Paranoid && retry < 0 && mop_len) + break; + + last++; + /* Do not fetch more if the record will be too recent */ + if (oldest <= last) + break; + rc = mdb_cursor_get(&m2, &key, NULL, op); + if (rc) { + if (rc == MDB_NOTFOUND) + break; + return rc; + } + last = *(txnid_t*)key.mv_data; + if (oldest <= last) + break; + np = m2.mc_pg[m2.mc_top]; + leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); + if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS) + return rc; + + idl = (MDB_ID *) data.mv_data; + i = idl[0]; + if (!mop) { + if (!(env->me_pghead = mop = mdb_midl_alloc(i))) + return ENOMEM; + } else { + if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0) + return rc; + mop = env->me_pghead; + } + env->me_pglast = last; +#if MDB_DEBUG > 1 + DPRINTF("IDL read txn %zu root %zu num %u", + last, txn->mt_dbs[FREE_DBI].md_root, i); + for (k = i; k; k--) + DPRINTF("IDL %zu", idl[k]); +#endif + /* Merge in descending sorted order */ + j = mop_len; + k = mop_len += i; + mop[0] = (pgno_t)-1; + old_id = mop[j]; + while (i) { + new_id = idl[i--]; + for (; old_id < new_id; old_id = mop[--j]) + mop[k--] = old_id; + mop[k--] = new_id; + } + mop[0] = mop_len; + } + + /* Use new pages from the map when nothing suitable in the freeDB */ + i = 0; + pgno = txn->mt_next_pgno; + if (pgno + num >= env->me_maxpg) { + DPUTS("DB size maxed out"); + return MDB_MAP_FULL; + } + +search_done: + if (env->me_flags & MDB_WRITEMAP) { + np = (MDB_page *)(env->me_map + env->me_psize * pgno); + } else { + if (!(np = mdb_page_malloc(txn, num))) + return ENOMEM; + } + if (i) { + mop[0] = mop_len -= num; + /* Move any stragglers down */ + for (j = i-num; j < mop_len; ) + mop[++j] = mop[++i]; + } else { + txn->mt_next_pgno = pgno + num; + } + np->mp_pgno = pgno; + mdb_page_dirty(txn, np); + *mp = np; + + return MDB_SUCCESS; +} + +/** Copy the used portions of a non-overflow page. + * @param[in] dst page to copy into + * @param[in] src page to copy from + * @param[in] psize size of a page + */ +static void +mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize) +{ + enum { Align = sizeof(pgno_t) }; + indx_t upper = src->mp_upper, lower = src->mp_lower, unused = upper-lower; + + /* If page isn't full, just copy the used portion. Adjust + * alignment so memcpy may copy words instead of bytes. + */ + if ((unused &= -Align) && !IS_LEAF2(src)) { + upper &= -Align; + memcpy(dst, src, (lower + (Align-1)) & -Align); + memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper), + psize - upper); + } else { + memcpy(dst, src, psize - unused); + } +} + +/** Pull a page off the txn's spill list, if present. + * If a page being referenced was spilled to disk in this txn, bring + * it back and make it dirty/writable again. + * @param[in] tx0 the transaction handle. + * @param[in] mp the page being referenced. + * @param[out] ret the writable page, if any. ret is unchanged if + * mp wasn't spilled. + */ +static int +mdb_page_unspill(MDB_txn *tx0, MDB_page *mp, MDB_page **ret) +{ + MDB_env *env = tx0->mt_env; + MDB_txn *txn; + unsigned x; + pgno_t pgno = mp->mp_pgno; + + for (txn = tx0; txn; txn=txn->mt_parent) { + if (!txn->mt_spill_pgs) + continue; + x = mdb_midl_search(txn->mt_spill_pgs, pgno); + if (x <= txn->mt_spill_pgs[0] && txn->mt_spill_pgs[x] == pgno) { + MDB_page *np; + int num; + if (IS_OVERFLOW(mp)) + num = mp->mp_pages; + else + num = 1; + if (env->me_flags & MDB_WRITEMAP) { + np = mp; + } else { + np = mdb_page_malloc(txn, num); + if (!np) + return ENOMEM; + if (num > 1) + memcpy(np, mp, num * env->me_psize); + else + mdb_page_copy(np, mp, env->me_psize); + } + if (txn == tx0) { + /* If in current txn, this page is no longer spilled */ + for (; x < txn->mt_spill_pgs[0]; x++) + txn->mt_spill_pgs[x] = txn->mt_spill_pgs[x+1]; + txn->mt_spill_pgs[0]--; + } /* otherwise, if belonging to a parent txn, the + * page remains spilled until child commits + */ + + if (txn->mt_parent) { + MDB_txn *tx2; + /* If this page is also in a parent's dirty list, then + * it's already accounted in dirty_room, and we need to + * cancel out the decrement that mdb_page_dirty does. + */ + for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) { + x = mdb_mid2l_search(tx2->mt_u.dirty_list, pgno); + if (x <= tx2->mt_u.dirty_list[0].mid && + tx2->mt_u.dirty_list[x].mid == pgno) { + txn->mt_dirty_room++; + break; + } + } + } + mdb_page_dirty(tx0, np); + np->mp_flags |= P_DIRTY; + *ret = np; + break; + } + } + return MDB_SUCCESS; +} + +/** Touch a page: make it dirty and re-insert into tree with updated pgno. + * @param[in] mc cursor pointing to the page to be touched + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_touch(MDB_cursor *mc) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top], *np; + MDB_txn *txn = mc->mc_txn; + MDB_cursor *m2, *m3; + MDB_dbi dbi; + pgno_t pgno; + int rc; + + if (!F_ISSET(mp->mp_flags, P_DIRTY)) { + if (txn->mt_flags & MDB_TXN_SPILLS) { + np = NULL; + rc = mdb_page_unspill(txn, mp, &np); + if (rc) + return rc; + if (np) + goto done; + } + if ((rc = mdb_midl_need(&txn->mt_free_pgs, 1)) || + (rc = mdb_page_alloc(mc, 1, &np))) + return rc; + pgno = np->mp_pgno; + DPRINTF("touched db %u page %zu -> %zu", mc->mc_dbi,mp->mp_pgno,pgno); + assert(mp->mp_pgno != pgno); + mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); + /* Update the parent page, if any, to point to the new page */ + if (mc->mc_top) { + MDB_page *parent = mc->mc_pg[mc->mc_top-1]; + MDB_node *node = NODEPTR(parent, mc->mc_ki[mc->mc_top-1]); + SETPGNO(node, pgno); + } else { + mc->mc_db->md_root = pgno; + } + } else if (txn->mt_parent && !IS_SUBP(mp)) { + MDB_ID2 mid, *dl = txn->mt_u.dirty_list; + pgno = mp->mp_pgno; + /* If txn has a parent, make sure the page is in our + * dirty list. + */ + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + if (mp != dl[x].mptr) { /* bad cursor? */ + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + return MDB_CORRUPTED; + } + return 0; + } + } + assert(dl[0].mid < MDB_IDL_UM_MAX); + /* No - copy it */ + np = mdb_page_malloc(txn, 1); + if (!np) + return ENOMEM; + mid.mid = pgno; + mid.mptr = np; + mdb_mid2l_insert(dl, &mid); + } else { + return 0; + } + + mdb_page_copy(np, mp, txn->mt_env->me_psize); + np->mp_pgno = pgno; + np->mp_flags |= P_DIRTY; + +done: + /* Adjust cursors pointing to mp */ + mc->mc_pg[mc->mc_top] = np; + dbi = mc->mc_dbi; + if (mc->mc_flags & C_SUB) { + dbi--; + for (m2 = txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + m3 = &m2->mc_xcursor->mx_cursor; + if (m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[mc->mc_top] == mp) + m3->mc_pg[mc->mc_top] = np; + } + } else { + for (m2 = txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (m2->mc_snum < mc->mc_snum) continue; + if (m2->mc_pg[mc->mc_top] == mp) { + m2->mc_pg[mc->mc_top] = np; + if ((mc->mc_db->md_flags & MDB_DUPSORT) && + m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) + { + MDB_node *leaf = NODEPTR(np, mc->mc_ki[mc->mc_top]); + if (!(leaf->mn_flags & F_SUBDATA)) + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + } + } + } + return 0; +} + +int +mdb_env_sync(MDB_env *env, int force) +{ + int rc = 0; + if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { + if (env->me_flags & MDB_WRITEMAP) { + int flags = ((env->me_flags & MDB_MAPASYNC) && !force) + ? MS_ASYNC : MS_SYNC; + if (MDB_MSYNC(env->me_map, env->me_mapsize, flags)) + rc = ErrCode(); +#ifdef _WIN32 + else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd)) + rc = ErrCode(); +#endif + } else { + if (MDB_FDATASYNC(env->me_fd)) + rc = ErrCode(); + } + } + return rc; +} + +/** Back up parent txn's cursors, then grab the originals for tracking */ +static int +mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst) +{ + MDB_cursor *mc, *bk; + MDB_xcursor *mx; + size_t size; + int i; + + for (i = src->mt_numdbs; --i >= 0; ) { + if ((mc = src->mt_cursors[i]) != NULL) { + size = sizeof(MDB_cursor); + if (mc->mc_xcursor) + size += sizeof(MDB_xcursor); + for (; mc; mc = bk->mc_next) { + bk = malloc(size); + if (!bk) + return ENOMEM; + *bk = *mc; + mc->mc_backup = bk; + mc->mc_db = &dst->mt_dbs[i]; + /* Kill pointers into src - and dst to reduce abuse: The + * user may not use mc until dst ends. Otherwise we'd... + */ + mc->mc_txn = NULL; /* ...set this to dst */ + mc->mc_dbflag = NULL; /* ...and &dst->mt_dbflags[i] */ + if ((mx = mc->mc_xcursor) != NULL) { + *(MDB_xcursor *)(bk+1) = *mx; + mx->mx_cursor.mc_txn = NULL; /* ...and dst. */ + } + mc->mc_next = dst->mt_cursors[i]; + dst->mt_cursors[i] = mc; + } + } + } + return MDB_SUCCESS; +} + +/** Close this write txn's cursors, give parent txn's cursors back to parent. + * @param[in] txn the transaction handle. + * @param[in] merge true to keep changes to parent cursors, false to revert. + * @return 0 on success, non-zero on failure. + */ +static void +mdb_cursors_close(MDB_txn *txn, unsigned merge) +{ + MDB_cursor **cursors = txn->mt_cursors, *mc, *next, *bk; + MDB_xcursor *mx; + int i; + + for (i = txn->mt_numdbs; --i >= 0; ) { + for (mc = cursors[i]; mc; mc = next) { + next = mc->mc_next; + if ((bk = mc->mc_backup) != NULL) { + if (merge) { + /* Commit changes to parent txn */ + mc->mc_next = bk->mc_next; + mc->mc_backup = bk->mc_backup; + mc->mc_txn = bk->mc_txn; + mc->mc_db = bk->mc_db; + mc->mc_dbflag = bk->mc_dbflag; + if ((mx = mc->mc_xcursor) != NULL) + mx->mx_cursor.mc_txn = bk->mc_txn; + } else { + /* Abort nested txn */ + *mc = *bk; + if ((mx = mc->mc_xcursor) != NULL) + *mx = *(MDB_xcursor *)(bk+1); + } + mc = bk; + } + free(mc); + } + cursors[i] = NULL; + } +} + +#ifdef MDB_DEBUG_SKIP +#define mdb_txn_reset0(txn, act) mdb_txn_reset0(txn) +#endif +static void +mdb_txn_reset0(MDB_txn *txn, const char *act); + +#ifdef _WIN32 +enum Pidlock_op { + Pidset, Pidcheck +}; +#else +enum Pidlock_op { + Pidset = F_SETLK, Pidcheck = F_GETLK +}; +#endif + +/** Set or check a pid lock. Set returns 0 on success. + * Check returns 0 if lock exists (meaning the process is alive). + * + * On Windows Pidset is a no-op, we merely check for the existence + * of the process with the given pid. On POSIX we use a single byte + * lock on the lockfile, set at an offset equal to the pid. + */ +static int +mdb_reader_pid(MDB_env *env, enum Pidlock_op op, pid_t pid) +{ +#ifdef _WIN32 + HANDLE h; + int ver, query; + switch(op) { + case Pidset: + break; + case Pidcheck: + h = OpenProcess(env->me_pidquery, FALSE, pid); + if (!h) + return GetLastError(); + CloseHandle(h); + break; + } + return 0; +#else + int rc; + struct flock lock_info; + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_WRLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = pid; + lock_info.l_len = 1; + while ((rc = fcntl(env->me_lfd, op, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + if (op == F_GETLK && rc == 0 && lock_info.l_type == F_UNLCK) + rc = -1; + return rc; +#endif +} + +/** Common code for #mdb_txn_begin() and #mdb_txn_renew(). + * @param[in] txn the transaction handle to initialize + * @return 0 on success, non-zero on failure. + */ +static int +mdb_txn_renew0(MDB_txn *txn) +{ + MDB_env *env = txn->mt_env; + unsigned int i; + uint16_t x; + int rc, new_notls = 0; + + /* Setup db info */ + txn->mt_numdbs = env->me_numdbs; + txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ + + if (txn->mt_flags & MDB_TXN_RDONLY) { + if (!env->me_txns) { + i = mdb_env_pick_meta(env); + txn->mt_txnid = env->me_metas[i]->mm_txnid; + txn->mt_u.reader = NULL; + } else { + MDB_reader *r = (env->me_flags & MDB_NOTLS) ? txn->mt_u.reader : + pthread_getspecific(env->me_txkey); + if (r) { + if (r->mr_pid != env->me_pid || r->mr_txnid != (txnid_t)-1) + return MDB_BAD_RSLOT; + } else { + pid_t pid = env->me_pid; + pthread_t tid = pthread_self(); + + if (!(env->me_flags & MDB_LIVE_READER)) { + rc = mdb_reader_pid(env, Pidset, pid); + if (rc) { + UNLOCK_MUTEX_R(env); + return rc; + } + env->me_flags |= MDB_LIVE_READER; + } + + LOCK_MUTEX_R(env); + for (i=0; ime_txns->mti_numreaders; i++) + if (env->me_txns->mti_readers[i].mr_pid == 0) + break; + if (i == env->me_maxreaders) { + UNLOCK_MUTEX_R(env); + return MDB_READERS_FULL; + } + env->me_txns->mti_readers[i].mr_pid = pid; + env->me_txns->mti_readers[i].mr_tid = tid; + if (i >= env->me_txns->mti_numreaders) + env->me_txns->mti_numreaders = i+1; + /* Save numreaders for un-mutexed mdb_env_close() */ + env->me_numreaders = env->me_txns->mti_numreaders; + UNLOCK_MUTEX_R(env); + r = &env->me_txns->mti_readers[i]; + new_notls = (env->me_flags & MDB_NOTLS); + if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) { + r->mr_pid = 0; + return rc; + } + } + txn->mt_txnid = r->mr_txnid = env->me_txns->mti_txnid; + txn->mt_u.reader = r; + } + txn->mt_toggle = txn->mt_txnid & 1; + } else { + LOCK_MUTEX_W(env); + + txn->mt_txnid = env->me_txns->mti_txnid; + txn->mt_toggle = txn->mt_txnid & 1; + txn->mt_txnid++; +#if MDB_DEBUG + if (txn->mt_txnid == mdb_debug_start) + mdb_debug = 1; +#endif + txn->mt_dirty_room = MDB_IDL_UM_MAX; + txn->mt_u.dirty_list = env->me_dirty_list; + txn->mt_u.dirty_list[0].mid = 0; + txn->mt_free_pgs = env->me_free_pgs; + txn->mt_free_pgs[0] = 0; + txn->mt_spill_pgs = NULL; + env->me_txn = txn; + } + + /* Copy the DB info and flags */ + memcpy(txn->mt_dbs, env->me_metas[txn->mt_toggle]->mm_dbs, 2 * sizeof(MDB_db)); + + /* Moved to here to avoid a data race in read TXNs */ + txn->mt_next_pgno = env->me_metas[txn->mt_toggle]->mm_last_pg+1; + + for (i=2; imt_numdbs; i++) { + x = env->me_dbflags[i]; + txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS; + txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0; + } + txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID; + + if (env->me_maxpg < txn->mt_next_pgno) { + mdb_txn_reset0(txn, "renew0-mapfail"); + if (new_notls) { + txn->mt_u.reader->mr_pid = 0; + txn->mt_u.reader = NULL; + } + return MDB_MAP_RESIZED; + } + + return MDB_SUCCESS; +} + +int +mdb_txn_renew(MDB_txn *txn) +{ + int rc; + + if (!txn || txn->mt_dbxs) /* A reset txn has mt_dbxs==NULL */ + return EINVAL; + + if (txn->mt_env->me_flags & MDB_FATAL_ERROR) { + DPUTS("environment had fatal error, must shutdown!"); + return MDB_PANIC; + } + + rc = mdb_txn_renew0(txn); + if (rc == MDB_SUCCESS) { + DPRINTF("renew txn %zu%c %p on mdbenv %p, root page %zu", + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); + } + return rc; +} + +int +mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) +{ + MDB_txn *txn; + MDB_ntxn *ntxn; + int rc, size, tsize = sizeof(MDB_txn); + + if (env->me_flags & MDB_FATAL_ERROR) { + DPUTS("environment had fatal error, must shutdown!"); + return MDB_PANIC; + } + if ((env->me_flags & MDB_RDONLY) && !(flags & MDB_RDONLY)) + return EACCES; + if (parent) { + /* Nested transactions: Max 1 child, write txns only, no writemap */ + if (parent->mt_child || + (flags & MDB_RDONLY) || (parent->mt_flags & MDB_TXN_RDONLY) || + (env->me_flags & MDB_WRITEMAP)) + { + return EINVAL; + } + tsize = sizeof(MDB_ntxn); + } + size = tsize + env->me_maxdbs * (sizeof(MDB_db)+1); + if (!(flags & MDB_RDONLY)) + size += env->me_maxdbs * sizeof(MDB_cursor *); + + if ((txn = calloc(1, size)) == NULL) { + DPRINTF("calloc: %s", strerror(ErrCode())); + return ENOMEM; + } + txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); + if (flags & MDB_RDONLY) { + txn->mt_flags |= MDB_TXN_RDONLY; + txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs); + } else { + txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); + txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs); + } + txn->mt_env = env; + + if (parent) { + unsigned int i; + txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE); + if (!txn->mt_u.dirty_list || + !(txn->mt_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX))) + { + free(txn->mt_u.dirty_list); + free(txn); + return ENOMEM; + } + txn->mt_txnid = parent->mt_txnid; + txn->mt_toggle = parent->mt_toggle; + txn->mt_dirty_room = parent->mt_dirty_room; + txn->mt_u.dirty_list[0].mid = 0; + txn->mt_spill_pgs = NULL; + txn->mt_next_pgno = parent->mt_next_pgno; + parent->mt_child = txn; + txn->mt_parent = parent; + txn->mt_numdbs = parent->mt_numdbs; + txn->mt_flags = parent->mt_flags; + txn->mt_dbxs = parent->mt_dbxs; + memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); + /* Copy parent's mt_dbflags, but clear DB_NEW */ + for (i=0; imt_numdbs; i++) + txn->mt_dbflags[i] = parent->mt_dbflags[i] & ~DB_NEW; + rc = 0; + ntxn = (MDB_ntxn *)txn; + ntxn->mnt_pgstate = env->me_pgstate; /* save parent me_pghead & co */ + if (env->me_pghead) { + size = MDB_IDL_SIZEOF(env->me_pghead); + env->me_pghead = mdb_midl_alloc(env->me_pghead[0]); + if (env->me_pghead) + memcpy(env->me_pghead, ntxn->mnt_pgstate.mf_pghead, size); + else + rc = ENOMEM; + } + if (!rc) + rc = mdb_cursor_shadow(parent, txn); + if (rc) + mdb_txn_reset0(txn, "beginchild-fail"); + } else { + rc = mdb_txn_renew0(txn); + } + if (rc) + free(txn); + else { + *ret = txn; + DPRINTF("begin txn %zu%c %p on mdbenv %p, root page %zu", + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root); + } + + return rc; +} + +/** Export or close DBI handles opened in this txn. */ +static void +mdb_dbis_update(MDB_txn *txn, int keep) +{ + int i; + MDB_dbi n = txn->mt_numdbs; + MDB_env *env = txn->mt_env; + unsigned char *tdbflags = txn->mt_dbflags; + + for (i = n; --i >= 2;) { + if (tdbflags[i] & DB_NEW) { + if (keep) { + env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID; + } else { + char *ptr = env->me_dbxs[i].md_name.mv_data; + env->me_dbxs[i].md_name.mv_data = NULL; + env->me_dbxs[i].md_name.mv_size = 0; + env->me_dbflags[i] = 0; + free(ptr); + } + } + } + if (keep && env->me_numdbs < n) + env->me_numdbs = n; +} + +/** Common code for #mdb_txn_reset() and #mdb_txn_abort(). + * May be called twice for readonly txns: First reset it, then abort. + * @param[in] txn the transaction handle to reset + */ +static void +mdb_txn_reset0(MDB_txn *txn, const char *act) +{ + MDB_env *env = txn->mt_env; + + /* Close any DBI handles opened in this txn */ + mdb_dbis_update(txn, 0); + + DPRINTF("%s txn %zu%c %p on mdbenv %p, root page %zu", + act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + if (txn->mt_u.reader) { + txn->mt_u.reader->mr_txnid = (txnid_t)-1; + if (!(env->me_flags & MDB_NOTLS)) + txn->mt_u.reader = NULL; /* txn does not own reader */ + } + txn->mt_numdbs = 0; /* close nothing if called again */ + txn->mt_dbxs = NULL; /* mark txn as reset */ + } else { + mdb_cursors_close(txn, 0); + + if (!(env->me_flags & MDB_WRITEMAP)) { + mdb_dlist_free(txn); + } + mdb_midl_free(env->me_pghead); + + if (txn->mt_parent) { + txn->mt_parent->mt_child = NULL; + env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; + mdb_midl_free(txn->mt_free_pgs); + mdb_midl_free(txn->mt_spill_pgs); + free(txn->mt_u.dirty_list); + return; + } + + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + env->me_pghead = NULL; + env->me_pglast = 0; + + env->me_txn = NULL; + /* The writer mutex was locked in mdb_txn_begin. */ + UNLOCK_MUTEX_W(env); + } +} + +void +mdb_txn_reset(MDB_txn *txn) +{ + if (txn == NULL) + return; + + /* This call is only valid for read-only txns */ + if (!(txn->mt_flags & MDB_TXN_RDONLY)) + return; + + mdb_txn_reset0(txn, "reset"); +} + +void +mdb_txn_abort(MDB_txn *txn) +{ + if (txn == NULL) + return; + + if (txn->mt_child) + mdb_txn_abort(txn->mt_child); + + mdb_txn_reset0(txn, "abort"); + /* Free reader slot tied to this txn (if MDB_NOTLS && writable FS) */ + if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader) + txn->mt_u.reader->mr_pid = 0; + + free(txn); +} + +/** Save the freelist as of this transaction to the freeDB. + * This changes the freelist. Keep trying until it stabilizes. + */ +static int +mdb_freelist_save(MDB_txn *txn) +{ + /* env->me_pghead[] can grow and shrink during this call. + * env->me_pglast and txn->mt_free_pgs[] can only grow. + * Page numbers cannot disappear from txn->mt_free_pgs[]. + */ + MDB_cursor mc; + MDB_env *env = txn->mt_env; + int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1; + txnid_t pglast = 0, head_id = 0; + pgno_t freecnt = 0, *free_pgs, *mop; + ssize_t head_room = 0, total_room = 0, mop_len; + + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + + if (env->me_pghead) { + /* Make sure first page of freeDB is touched and on freelist */ + rc = mdb_page_search(&mc, NULL, MDB_PS_MODIFY); + if (rc && rc != MDB_NOTFOUND) + return rc; + } + + for (;;) { + /* Come back here after each Put() in case freelist changed */ + MDB_val key, data; + + /* If using records from freeDB which we have not yet + * deleted, delete them and any we reserved for me_pghead. + */ + while (pglast < env->me_pglast) { + rc = mdb_cursor_first(&mc, &key, NULL); + if (rc) + return rc; + pglast = head_id = *(txnid_t *)key.mv_data; + total_room = head_room = 0; + assert(pglast <= env->me_pglast); + rc = mdb_cursor_del(&mc, 0); + if (rc) + return rc; + } + + /* Save the IDL of pages freed by this txn, to a single record */ + if (freecnt < txn->mt_free_pgs[0]) { + if (!freecnt) { + /* Make sure last page of freeDB is touched and on freelist */ + key.mv_size = MDB_MAXKEYSIZE+1; + key.mv_data = NULL; + rc = mdb_page_search(&mc, &key, MDB_PS_MODIFY); + if (rc && rc != MDB_NOTFOUND) + return rc; + } + free_pgs = txn->mt_free_pgs; + /* Write to last page of freeDB */ + key.mv_size = sizeof(txn->mt_txnid); + key.mv_data = &txn->mt_txnid; + do { + freecnt = free_pgs[0]; + data.mv_size = MDB_IDL_SIZEOF(free_pgs); + rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); + if (rc) + return rc; + /* Retry if mt_free_pgs[] grew during the Put() */ + free_pgs = txn->mt_free_pgs; + } while (freecnt < free_pgs[0]); + mdb_midl_sort(free_pgs); + memcpy(data.mv_data, free_pgs, data.mv_size); +#if MDB_DEBUG > 1 + { + unsigned int i = free_pgs[0]; + DPRINTF("IDL write txn %zu root %zu num %u", + txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i); + for (; i; i--) + DPRINTF("IDL %zu", free_pgs[i]); + } +#endif + continue; + } + + mop = env->me_pghead; + mop_len = mop ? mop[0] : 0; + + /* Reserve records for me_pghead[]. Split it if multi-page, + * to avoid searching freeDB for a page range. Use keys in + * range [1,me_pglast]: Smaller than txnid of oldest reader. + */ + if (total_room >= mop_len) { + if (total_room == mop_len || --more < 0) + break; + } else if (head_room >= maxfree_1pg && head_id > 1) { + /* Keep current record (overflow page), add a new one */ + head_id--; + head_room = 0; + } + /* (Re)write {key = head_id, IDL length = head_room} */ + total_room -= head_room; + head_room = mop_len - total_room; + if (head_room > maxfree_1pg && head_id > 1) { + /* Overflow multi-page for part of me_pghead */ + head_room /= head_id; /* amortize page sizes */ + head_room += maxfree_1pg - head_room % (maxfree_1pg + 1); + } else if (head_room < 0) { + /* Rare case, not bothering to delete this record */ + head_room = 0; + } + key.mv_size = sizeof(head_id); + key.mv_data = &head_id; + data.mv_size = (head_room + 1) * sizeof(pgno_t); + rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); + if (rc) + return rc; + *(MDB_ID *)data.mv_data = 0; /* IDL is initially empty */ + total_room += head_room; + } + + /* Fill in the reserved, touched me_pghead records */ + rc = MDB_SUCCESS; + if (mop_len) { + MDB_val key, data; + + mop += mop_len; + rc = mdb_cursor_first(&mc, &key, &data); + for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) { + unsigned flags = MDB_CURRENT; + txnid_t id = *(txnid_t *)key.mv_data; + ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1; + MDB_ID save; + + assert(len >= 0 && id <= env->me_pglast); + key.mv_data = &id; + if (len > mop_len) { + len = mop_len; + data.mv_size = (len + 1) * sizeof(MDB_ID); + flags = 0; + } + data.mv_data = mop -= len; + save = mop[0]; + mop[0] = len; + rc = mdb_cursor_put(&mc, &key, &data, flags); + mop[0] = save; + if (rc || !(mop_len -= len)) + break; + } + } + return rc; +} + +/** Flush dirty pages to the map, after clearing their dirty flag. + */ +static int +mdb_page_flush(MDB_txn *txn) +{ + MDB_env *env = txn->mt_env; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned psize = env->me_psize, j; + int i, pagecount = dl[0].mid, rc; + size_t size = 0, pos = 0; + pgno_t pgno = 0; + MDB_page *dp = NULL; +#ifdef _WIN32 + OVERLAPPED ov; +#else + struct iovec iov[MDB_COMMIT_PAGES]; + ssize_t wpos = 0, wsize = 0, wres; + size_t next_pos = 1; /* impossible pos, so pos != next_pos */ + int n = 0; +#endif + + j = 0; + if (env->me_flags & MDB_WRITEMAP) { + /* Clear dirty flags */ + for (i = pagecount; i; i--) { + dp = dl[i].mptr; + /* Don't flush this page yet */ + if (dp->mp_flags & P_KEEP) { + dp->mp_flags ^= P_KEEP; + dl[++j] = dl[i]; + continue; + } + dp->mp_flags &= ~P_DIRTY; + } + dl[0].mid = j; + return MDB_SUCCESS; + } + + /* Write the pages */ + for (i = 1;; i++) { + if (i <= pagecount) { + dp = dl[i].mptr; + /* Don't flush this page yet */ + if (dp->mp_flags & P_KEEP) { + dp->mp_flags ^= P_KEEP; + dl[i].mid = 0; + continue; + } + pgno = dl[i].mid; + /* clear dirty flag */ + dp->mp_flags &= ~P_DIRTY; + pos = pgno * psize; + size = psize; + if (IS_OVERFLOW(dp)) size *= dp->mp_pages; + } +#ifdef _WIN32 + else break; + + /* Windows actually supports scatter/gather I/O, but only on + * unbuffered file handles. Since we're relying on the OS page + * cache for all our data, that's self-defeating. So we just + * write pages one at a time. We use the ov structure to set + * the write offset, to at least save the overhead of a Seek + * system call. + */ + DPRINTF("committing page %zu", pgno); + memset(&ov, 0, sizeof(ov)); + ov.Offset = pos & 0xffffffff; + ov.OffsetHigh = pos >> 16 >> 16; + if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) { + rc = ErrCode(); + DPRINTF("WriteFile: %d", rc); + return rc; + } +#else + /* Write up to MDB_COMMIT_PAGES dirty pages at a time. */ + if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) { + if (n) { + /* Write previous page(s) */ +#ifdef MDB_USE_PWRITEV + wres = pwritev(env->me_fd, iov, n, wpos); +#else + if (n == 1) { + wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos); + } else { + if (lseek(env->me_fd, wpos, SEEK_SET) == -1) { + rc = ErrCode(); + DPRINTF("lseek: %s", strerror(rc)); + return rc; + } + wres = writev(env->me_fd, iov, n); + } +#endif + if (wres != wsize) { + if (wres < 0) { + rc = ErrCode(); + DPRINTF("Write error: %s", strerror(rc)); + } else { + rc = EIO; /* TODO: Use which error code? */ + DPUTS("short write, filesystem full?"); + } + return rc; + } + n = 0; + } + if (i > pagecount) + break; + wpos = pos; + wsize = 0; + } + DPRINTF("committing page %zu", pgno); + next_pos = pos + size; + iov[n].iov_len = size; + iov[n].iov_base = (char *)dp; + wsize += size; + n++; +#endif /* _WIN32 */ + } + + j = 0; + for (i=1; i<=pagecount; i++) { + dp = dl[i].mptr; + /* This is a page we skipped above */ + if (!dl[i].mid) { + dl[++j] = dl[i]; + dl[j].mid = dp->mp_pgno; + continue; + } + mdb_dpage_free(env, dp); + } + dl[0].mid = j; + + return MDB_SUCCESS; +} + +int +mdb_txn_commit(MDB_txn *txn) +{ + int rc; + unsigned int i; + MDB_env *env; + + assert(txn != NULL); + assert(txn->mt_env != NULL); + + if (txn->mt_child) { + rc = mdb_txn_commit(txn->mt_child); + txn->mt_child = NULL; + if (rc) + goto fail; + } + + env = txn->mt_env; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + mdb_dbis_update(txn, 1); + txn->mt_numdbs = 2; /* so txn_abort() doesn't close any new handles */ + mdb_txn_abort(txn); + return MDB_SUCCESS; + } + + if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { + DPUTS("error flag is set, can't commit"); + if (txn->mt_parent) + txn->mt_parent->mt_flags |= MDB_TXN_ERROR; + rc = EINVAL; + goto fail; + } + + if (txn->mt_parent) { + MDB_txn *parent = txn->mt_parent; + unsigned x, y, len; + MDB_ID2L dst, src; + + /* Append our free list to parent's */ + rc = mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs); + if (rc) + goto fail; + mdb_midl_free(txn->mt_free_pgs); + + parent->mt_next_pgno = txn->mt_next_pgno; + parent->mt_flags = txn->mt_flags; + + /* Merge our cursors into parent's and close them */ + mdb_cursors_close(txn, 1); + + /* Update parent's DB table. */ + memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); + parent->mt_numdbs = txn->mt_numdbs; + parent->mt_dbflags[0] = txn->mt_dbflags[0]; + parent->mt_dbflags[1] = txn->mt_dbflags[1]; + for (i=2; imt_numdbs; i++) { + /* preserve parent's DB_NEW status */ + x = parent->mt_dbflags[i] & DB_NEW; + parent->mt_dbflags[i] = txn->mt_dbflags[i] | x; + } + + dst = parent->mt_u.dirty_list; + src = txn->mt_u.dirty_list; + /* Remove anything in our dirty list from parent's spill list */ + if (parent->mt_spill_pgs) { + x = parent->mt_spill_pgs[0]; + len = x; + /* zero out our dirty pages in parent spill list */ + for (i=1; i<=src[0].mid; i++) { + if (src[i].mid < parent->mt_spill_pgs[x]) + continue; + if (src[i].mid > parent->mt_spill_pgs[x]) { + if (x <= 1) + break; + x--; + continue; + } + parent->mt_spill_pgs[x] = 0; + len--; + } + /* OK, we had a few hits, squash zeros from the spill list */ + if (len < parent->mt_spill_pgs[0]) { + x=1; + for (y=1; y<=parent->mt_spill_pgs[0]; y++) { + if (parent->mt_spill_pgs[y]) { + if (y != x) { + parent->mt_spill_pgs[x] = parent->mt_spill_pgs[y]; + } + x++; + } + } + parent->mt_spill_pgs[0] = len; + } + } + /* Find len = length of merging our dirty list with parent's */ + x = dst[0].mid; + dst[0].mid = 0; /* simplify loops */ + if (parent->mt_parent) { + len = x + src[0].mid; + y = mdb_mid2l_search(src, dst[x].mid + 1) - 1; + for (i = x; y && i; y--) { + pgno_t yp = src[y].mid; + while (yp < dst[i].mid) + i--; + if (yp == dst[i].mid) { + i--; + len--; + } + } + } else { /* Simplify the above for single-ancestor case */ + len = MDB_IDL_UM_MAX - txn->mt_dirty_room; + } + /* Merge our dirty list with parent's */ + y = src[0].mid; + for (i = len; y; dst[i--] = src[y--]) { + pgno_t yp = src[y].mid; + while (yp < dst[x].mid) + dst[i--] = dst[x--]; + if (yp == dst[x].mid) + free(dst[x--].mptr); + } + assert(i == x); + dst[0].mid = len; + free(txn->mt_u.dirty_list); + parent->mt_dirty_room = txn->mt_dirty_room; + if (txn->mt_spill_pgs) { + if (parent->mt_spill_pgs) { + mdb_midl_append_list(&parent->mt_spill_pgs, txn->mt_spill_pgs); + mdb_midl_free(txn->mt_spill_pgs); + mdb_midl_sort(parent->mt_spill_pgs); + } else { + parent->mt_spill_pgs = txn->mt_spill_pgs; + } + } + + parent->mt_child = NULL; + mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead); + free(txn); + return MDB_SUCCESS; + } + + if (txn != env->me_txn) { + DPUTS("attempt to commit unknown transaction"); + rc = EINVAL; + goto fail; + } + + mdb_cursors_close(txn, 0); + + if (!txn->mt_u.dirty_list[0].mid && !(txn->mt_flags & MDB_TXN_DIRTY)) + goto done; + + DPRINTF("committing txn %zu %p on mdbenv %p, root page %zu", + txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); + + /* Update DB root pointers */ + if (txn->mt_numdbs > 2) { + MDB_cursor mc; + MDB_dbi i; + MDB_val data; + data.mv_size = sizeof(MDB_db); + + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); + for (i = 2; i < txn->mt_numdbs; i++) { + if (txn->mt_dbflags[i] & DB_DIRTY) { + data.mv_data = &txn->mt_dbs[i]; + rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0); + if (rc) + goto fail; + } + } + } + + rc = mdb_freelist_save(txn); + if (rc) + goto fail; + + mdb_midl_free(env->me_pghead); + env->me_pghead = NULL; + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + +#if MDB_DEBUG > 2 + mdb_audit(txn); +#endif + + if ((rc = mdb_page_flush(txn)) || + (rc = mdb_env_sync(env, 0)) || + (rc = mdb_env_write_meta(txn))) + goto fail; + +done: + env->me_pglast = 0; + env->me_txn = NULL; + mdb_dbis_update(txn, 1); + + UNLOCK_MUTEX_W(env); + free(txn); + + return MDB_SUCCESS; + +fail: + mdb_txn_abort(txn); + return rc; +} + +/** Read the environment parameters of a DB environment before + * mapping it into memory. + * @param[in] env the environment handle + * @param[out] meta address of where to store the meta information + * @return 0 on success, non-zero on failure. + */ +static int +mdb_env_read_header(MDB_env *env, MDB_meta *meta) +{ + MDB_pagebuf pbuf; + MDB_page *p; + MDB_meta *m; + int i, rc, off; + + /* We don't know the page size yet, so use a minimum value. + * Read both meta pages so we can use the latest one. + */ + + for (i=off=0; i<2; i++, off = meta->mm_psize) { +#ifdef _WIN32 + DWORD len; + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + rc = ReadFile(env->me_fd,&pbuf,MDB_PAGESIZE,&len,&ov) ? (int)len : -1; + if (rc == -1 && ErrCode() == ERROR_HANDLE_EOF) + rc = 0; +#else + rc = pread(env->me_fd, &pbuf, MDB_PAGESIZE, off); +#endif + if (rc != MDB_PAGESIZE) { + if (rc == 0 && off == 0) + return ENOENT; + rc = rc < 0 ? (int) ErrCode() : MDB_INVALID; + DPRINTF("read: %s", mdb_strerror(rc)); + return rc; + } + + p = (MDB_page *)&pbuf; + + if (!F_ISSET(p->mp_flags, P_META)) { + DPRINTF("page %zu not a meta page", p->mp_pgno); + return MDB_INVALID; + } + + m = METADATA(p); + if (m->mm_magic != MDB_MAGIC) { + DPUTS("meta has invalid magic"); + return MDB_INVALID; + } + + if (m->mm_version != MDB_DATA_VERSION) { + DPRINTF("database is version %u, expected version %u", + m->mm_version, MDB_DATA_VERSION); + return MDB_VERSION_MISMATCH; + } + + if (off == 0 || m->mm_txnid > meta->mm_txnid) + *meta = *m; + } + return 0; +} + +/** Write the environment parameters of a freshly created DB environment. + * @param[in] env the environment handle + * @param[out] meta address of where to store the meta information + * @return 0 on success, non-zero on failure. + */ +static int +mdb_env_init_meta(MDB_env *env, MDB_meta *meta) +{ + MDB_page *p, *q; + int rc; + unsigned int psize; + + DPUTS("writing new meta page"); + + GET_PAGESIZE(psize); + + meta->mm_magic = MDB_MAGIC; + meta->mm_version = MDB_DATA_VERSION; + meta->mm_mapsize = env->me_mapsize; + meta->mm_psize = psize; + meta->mm_last_pg = 1; + meta->mm_flags = env->me_flags & 0xffff; + meta->mm_flags |= MDB_INTEGERKEY; + meta->mm_dbs[0].md_root = P_INVALID; + meta->mm_dbs[1].md_root = P_INVALID; + + p = calloc(2, psize); + p->mp_pgno = 0; + p->mp_flags = P_META; + *(MDB_meta *)METADATA(p) = *meta; + + q = (MDB_page *)((char *)p + psize); + q->mp_pgno = 1; + q->mp_flags = P_META; + *(MDB_meta *)METADATA(q) = *meta; + +#ifdef _WIN32 + { + DWORD len; + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); + rc = WriteFile(env->me_fd, p, psize * 2, &len, &ov); + rc = rc ? (len == psize * 2 ? MDB_SUCCESS : EIO) : ErrCode(); + } +#else + rc = pwrite(env->me_fd, p, psize * 2, 0); + rc = (rc == (int)psize * 2) ? MDB_SUCCESS : rc < 0 ? ErrCode() : EIO; +#endif + free(p); + return rc; +} + +/** Update the environment info to commit a transaction. + * @param[in] txn the transaction that's being committed + * @return 0 on success, non-zero on failure. + */ +static int +mdb_env_write_meta(MDB_txn *txn) +{ + MDB_env *env; + MDB_meta meta, metab, *mp; + off_t off; + int rc, len, toggle; + char *ptr; + HANDLE mfd; +#ifdef _WIN32 + OVERLAPPED ov; +#else + int r2; +#endif + + assert(txn != NULL); + assert(txn->mt_env != NULL); + + toggle = !txn->mt_toggle; + DPRINTF("writing meta page %d for root page %zu", + toggle, txn->mt_dbs[MAIN_DBI].md_root); + + env = txn->mt_env; + mp = env->me_metas[toggle]; + + if (env->me_flags & MDB_WRITEMAP) { + /* Persist any increases of mapsize config */ + if (env->me_mapsize > mp->mm_mapsize) + mp->mm_mapsize = env->me_mapsize; + mp->mm_dbs[0] = txn->mt_dbs[0]; + mp->mm_dbs[1] = txn->mt_dbs[1]; + mp->mm_last_pg = txn->mt_next_pgno - 1; + mp->mm_txnid = txn->mt_txnid; + if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { + rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC; + ptr = env->me_map; + if (toggle) + ptr += env->me_psize; + if (MDB_MSYNC(ptr, env->me_psize, rc)) { + rc = ErrCode(); + goto fail; + } + } + goto done; + } + metab.mm_txnid = env->me_metas[toggle]->mm_txnid; + metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg; + + ptr = (char *)&meta; + if (env->me_mapsize > mp->mm_mapsize) { + /* Persist any increases of mapsize config */ + meta.mm_mapsize = env->me_mapsize; + off = offsetof(MDB_meta, mm_mapsize); + } else { + off = offsetof(MDB_meta, mm_dbs[0].md_depth); + } + len = sizeof(MDB_meta) - off; + + ptr += off; + meta.mm_dbs[0] = txn->mt_dbs[0]; + meta.mm_dbs[1] = txn->mt_dbs[1]; + meta.mm_last_pg = txn->mt_next_pgno - 1; + meta.mm_txnid = txn->mt_txnid; + + if (toggle) + off += env->me_psize; + off += PAGEHDRSZ; + + /* Write to the SYNC fd */ + mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ? + env->me_fd : env->me_mfd; +#ifdef _WIN32 + { + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov)) + rc = -1; + } +#else + rc = pwrite(mfd, ptr, len, off); +#endif + if (rc != len) { + rc = rc < 0 ? ErrCode() : EIO; + DPUTS("write failed, disk error?"); + /* On a failure, the pagecache still contains the new data. + * Write some old data back, to prevent it from being used. + * Use the non-SYNC fd; we know it will fail anyway. + */ + meta.mm_last_pg = metab.mm_last_pg; + meta.mm_txnid = metab.mm_txnid; +#ifdef _WIN32 + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + WriteFile(env->me_fd, ptr, len, NULL, &ov); +#else + r2 = pwrite(env->me_fd, ptr, len, off); +#endif +fail: + env->me_flags |= MDB_FATAL_ERROR; + return rc; + } +done: + /* Memory ordering issues are irrelevant; since the entire writer + * is wrapped by wmutex, all of these changes will become visible + * after the wmutex is unlocked. Since the DB is multi-version, + * readers will get consistent data regardless of how fresh or + * how stale their view of these values is. + */ + env->me_txns->mti_txnid = txn->mt_txnid; + + return MDB_SUCCESS; +} + +/** Check both meta pages to see which one is newer. + * @param[in] env the environment handle + * @return meta toggle (0 or 1). + */ +static int +mdb_env_pick_meta(const MDB_env *env) +{ + return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid); +} + +int +mdb_env_create(MDB_env **env) +{ + MDB_env *e; + + e = calloc(1, sizeof(MDB_env)); + if (!e) + return ENOMEM; + + e->me_maxreaders = DEFAULT_READERS; + e->me_maxdbs = e->me_numdbs = 2; + e->me_fd = INVALID_HANDLE_VALUE; + e->me_lfd = INVALID_HANDLE_VALUE; + e->me_mfd = INVALID_HANDLE_VALUE; +#ifdef MDB_USE_POSIX_SEM + e->me_rmutex = SEM_FAILED; + e->me_wmutex = SEM_FAILED; +#endif + e->me_pid = getpid(); + VGMEMP_CREATE(e,0,0); + *env = e; + return MDB_SUCCESS; +} + +int +mdb_env_set_mapsize(MDB_env *env, size_t size) +{ + if (env->me_map) + return EINVAL; + env->me_mapsize = size; + if (env->me_psize) + env->me_maxpg = env->me_mapsize / env->me_psize; + return MDB_SUCCESS; +} + +int +mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) +{ + if (env->me_map) + return EINVAL; + env->me_maxdbs = dbs + 2; /* Named databases + main and free DB */ + return MDB_SUCCESS; +} + +int +mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) +{ + if (env->me_map || readers < 1) + return EINVAL; + env->me_maxreaders = readers; + return MDB_SUCCESS; +} + +int +mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) +{ + if (!env || !readers) + return EINVAL; + *readers = env->me_maxreaders; + return MDB_SUCCESS; +} + +/** Further setup required for opening an MDB environment + */ +static int +mdb_env_open2(MDB_env *env) +{ + unsigned int flags = env->me_flags; + int i, newenv = 0; + MDB_meta meta; + MDB_page *p; +#ifndef _WIN32 + int prot; +#endif + + memset(&meta, 0, sizeof(meta)); + + if ((i = mdb_env_read_header(env, &meta)) != 0) { + if (i != ENOENT) + return i; + DPUTS("new mdbenv"); + newenv = 1; + } + + /* Was a mapsize configured? */ + if (!env->me_mapsize) { + /* If this is a new environment, take the default, + * else use the size recorded in the existing env. + */ + env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize; + } else if (env->me_mapsize < meta.mm_mapsize) { + /* If the configured size is smaller, make sure it's + * still big enough. Silently round up to minimum if not. + */ + size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; + if (env->me_mapsize < minsize) + env->me_mapsize = minsize; + } + +#ifdef _WIN32 + { + int rc; + HANDLE mh; + LONG sizelo, sizehi; + sizelo = env->me_mapsize & 0xffffffff; + sizehi = env->me_mapsize >> 16 >> 16; /* only needed on Win64 */ + + /* See if we should use QueryLimited */ + rc = GetVersion(); + if ((rc & 0xff) > 5) + env->me_pidquery = PROCESS_QUERY_LIMITED_INFORMATION; + else + env->me_pidquery = PROCESS_QUERY_INFORMATION; + + /* Windows won't create mappings for zero length files. + * Just allocate the maxsize right now. + */ + if (newenv) { + if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo + || !SetEndOfFile(env->me_fd) + || SetFilePointer(env->me_fd, 0, NULL, 0) != 0) + return ErrCode(); + } + mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ? + PAGE_READWRITE : PAGE_READONLY, + sizehi, sizelo, NULL); + if (!mh) + return ErrCode(); + env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ? + FILE_MAP_WRITE : FILE_MAP_READ, + 0, 0, env->me_mapsize, meta.mm_address); + rc = env->me_map ? 0 : ErrCode(); + CloseHandle(mh); + if (rc) + return rc; + } +#else + i = MAP_SHARED; + prot = PROT_READ; + if (flags & MDB_WRITEMAP) { + prot |= PROT_WRITE; + if (ftruncate(env->me_fd, env->me_mapsize) < 0) + return ErrCode(); + } + env->me_map = mmap(meta.mm_address, env->me_mapsize, prot, i, + env->me_fd, 0); + if (env->me_map == MAP_FAILED) { + env->me_map = NULL; + return ErrCode(); + } + /* Turn off readahead. It's harmful when the DB is larger than RAM. */ +#ifdef MADV_RANDOM + madvise(env->me_map, env->me_mapsize, MADV_RANDOM); +#else +#ifdef POSIX_MADV_RANDOM + posix_madvise(env->me_map, env->me_mapsize, POSIX_MADV_RANDOM); +#endif /* POSIX_MADV_RANDOM */ +#endif /* MADV_RANDOM */ +#endif /* _WIN32 */ + + if (newenv) { + if (flags & MDB_FIXEDMAP) + meta.mm_address = env->me_map; + i = mdb_env_init_meta(env, &meta); + if (i != MDB_SUCCESS) { + return i; + } + } else if (meta.mm_address && env->me_map != meta.mm_address) { + /* Can happen because the address argument to mmap() is just a + * hint. mmap() can pick another, e.g. if the range is in use. + * The MAP_FIXED flag would prevent that, but then mmap could + * instead unmap existing pages to make room for the new map. + */ + return EBUSY; /* TODO: Make a new MDB_* error code? */ + } + env->me_psize = meta.mm_psize; + env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1; + env->me_nodemax = (env->me_psize - PAGEHDRSZ) / MDB_MINKEYS; + + env->me_maxpg = env->me_mapsize / env->me_psize; + + p = (MDB_page *)env->me_map; + env->me_metas[0] = METADATA(p); + env->me_metas[1] = (MDB_meta *)((char *)env->me_metas[0] + meta.mm_psize); + +#if MDB_DEBUG + { + int toggle = mdb_env_pick_meta(env); + MDB_db *db = &env->me_metas[toggle]->mm_dbs[MAIN_DBI]; + + DPRINTF("opened database version %u, pagesize %u", + env->me_metas[0]->mm_version, env->me_psize); + DPRINTF("using meta page %d", toggle); + DPRINTF("depth: %u", db->md_depth); + DPRINTF("entries: %zu", db->md_entries); + DPRINTF("branch pages: %zu", db->md_branch_pages); + DPRINTF("leaf pages: %zu", db->md_leaf_pages); + DPRINTF("overflow pages: %zu", db->md_overflow_pages); + DPRINTF("root: %zu", db->md_root); + } +#endif + + return MDB_SUCCESS; +} + + +/** Release a reader thread's slot in the reader lock table. + * This function is called automatically when a thread exits. + * @param[in] ptr This points to the slot in the reader lock table. + */ +static void +mdb_env_reader_dest(void *ptr) +{ + MDB_reader *reader = ptr; + + reader->mr_pid = 0; +} + +#ifdef _WIN32 +/** Junk for arranging thread-specific callbacks on Windows. This is + * necessarily platform and compiler-specific. Windows supports up + * to 1088 keys. Let's assume nobody opens more than 64 environments + * in a single process, for now. They can override this if needed. + */ +#ifndef MAX_TLS_KEYS +#define MAX_TLS_KEYS 64 +#endif +static pthread_key_t mdb_tls_keys[MAX_TLS_KEYS]; +static int mdb_tls_nkeys; + +static void NTAPI mdb_tls_callback(PVOID module, DWORD reason, PVOID ptr) +{ + int i; + switch(reason) { + case DLL_PROCESS_ATTACH: break; + case DLL_THREAD_ATTACH: break; + case DLL_THREAD_DETACH: + for (i=0; ime_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; + +#ifdef _WIN32 + { + OVERLAPPED ov; + /* First acquire a shared lock. The Unlock will + * then release the existing exclusive lock. + */ + memset(&ov, 0, sizeof(ov)); + if (!LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + rc = ErrCode(); + } else { + UnlockFile(env->me_lfd, 0, 0, 1, 0); + *excl = 0; + } + } +#else + { + struct flock lock_info; + /* The shared lock replaces the existing lock */ + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_RDLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = 0; + lock_info.l_len = 1; + while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + *excl = rc ? -1 : 0; /* error may mean we lost the lock */ + } +#endif + + return rc; +} + +/** Try to get exlusive lock, otherwise shared. + * Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive. + */ +static int +mdb_env_excl_lock(MDB_env *env, int *excl) +{ + int rc = 0; +#ifdef _WIN32 + if (LockFile(env->me_lfd, 0, 0, 1, 0)) { + *excl = 1; + } else { + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); + if (LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { + *excl = 0; + } else { + rc = ErrCode(); + } + } +#else + struct flock lock_info; + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_WRLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = 0; + lock_info.l_len = 1; + while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + if (!rc) { + *excl = 1; + } else +# ifdef MDB_USE_POSIX_SEM + if (*excl < 0) /* always true when !MDB_USE_POSIX_SEM */ +# endif + { + lock_info.l_type = F_RDLCK; + while ((rc = fcntl(env->me_lfd, F_SETLKW, &lock_info)) && + (rc = ErrCode()) == EINTR) ; + if (rc == 0) + *excl = 0; + } +#endif + return rc; +} + +#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) +/* + * hash_64 - 64 bit Fowler/Noll/Vo-0 FNV-1a hash code + * + * @(#) $Revision: 5.1 $ + * @(#) $Id: hash_64a.c,v 5.1 2009/06/30 09:01:38 chongo Exp $ + * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_64a.c,v $ + * + * http://www.isthe.com/chongo/tech/comp/fnv/index.html + * + *** + * + * Please do not copyright this code. This code is in the public domain. + * + * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO + * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF + * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + * + * By: + * chongo /\oo/\ + * http://www.isthe.com/chongo/ + * + * Share and Enjoy! :-) + */ + +typedef unsigned long long mdb_hash_t; +#define MDB_HASH_INIT ((mdb_hash_t)0xcbf29ce484222325ULL) + +/** perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer + * @param[in] str string to hash + * @param[in] hval initial value for hash + * @return 64 bit hash + * + * NOTE: To use the recommended 64 bit FNV-1a hash, use MDB_HASH_INIT as the + * hval arg on the first call. + */ +static mdb_hash_t +mdb_hash_val(MDB_val *val, mdb_hash_t hval) +{ + unsigned char *s = (unsigned char *)val->mv_data; /* unsigned string */ + unsigned char *end = s + val->mv_size; + /* + * FNV-1a hash each octet of the string + */ + while (s < end) { + /* xor the bottom with the current octet */ + hval ^= (mdb_hash_t)*s++; + + /* multiply by the 64 bit FNV magic prime mod 2^64 */ + hval += (hval << 1) + (hval << 4) + (hval << 5) + + (hval << 7) + (hval << 8) + (hval << 40); + } + /* return our new hash value */ + return hval; +} + +/** Hash the string and output the hash in hex. + * @param[in] str string to hash + * @param[out] hexbuf an array of 17 chars to hold the hash + */ +static void +mdb_hash_hex(MDB_val *val, char *hexbuf) +{ + int i; + mdb_hash_t h = mdb_hash_val(val, MDB_HASH_INIT); + for (i=0; i<8; i++) { + hexbuf += sprintf(hexbuf, "%02x", (unsigned int)h & 0xff); + h >>= 8; + } +} +#endif + +/** Open and/or initialize the lock region for the environment. + * @param[in] env The MDB environment. + * @param[in] lpath The pathname of the file used for the lock region. + * @param[in] mode The Unix permissions for the file, if we create it. + * @param[out] excl Resulting file lock type: -1 none, 0 shared, 1 exclusive + * @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive + * @return 0 on success, non-zero on failure. + */ +static int +mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) +{ +#ifdef _WIN32 +# define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT +#else +# define MDB_ERRCODE_ROFS EROFS +#ifdef O_CLOEXEC /* Linux: Open file and set FD_CLOEXEC atomically */ +# define MDB_CLOEXEC O_CLOEXEC +#else + int fdflags; +# define MDB_CLOEXEC 0 +#endif +#endif + int rc; + off_t size, rsize; + +#ifdef _WIN32 + env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); +#else + env->me_lfd = open(lpath, O_RDWR|O_CREAT|MDB_CLOEXEC, mode); +#endif + if (env->me_lfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + if (rc == MDB_ERRCODE_ROFS && (env->me_flags & MDB_RDONLY)) { + return MDB_SUCCESS; + } + goto fail_errno; + } +#if ! ((MDB_CLOEXEC) || defined(_WIN32)) + /* Lose record locks when exec*() */ + if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0) + fcntl(env->me_lfd, F_SETFD, fdflags); +#endif + + if (!(env->me_flags & MDB_NOTLS)) { + rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest); + if (rc) + goto fail; + env->me_flags |= MDB_ENV_TXKEY; +#ifdef _WIN32 + /* Windows TLS callbacks need help finding their TLS info. */ + if (mdb_tls_nkeys >= MAX_TLS_KEYS) { + rc = MDB_TLS_FULL; + goto fail; + } + mdb_tls_keys[mdb_tls_nkeys++] = env->me_txkey; +#endif + } + + /* Try to get exclusive lock. If we succeed, then + * nobody is using the lock region and we should initialize it. + */ + if ((rc = mdb_env_excl_lock(env, excl))) goto fail; + +#ifdef _WIN32 + size = GetFileSize(env->me_lfd, NULL); +#else + size = lseek(env->me_lfd, 0, SEEK_END); + if (size == -1) goto fail_errno; +#endif + rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo); + if (size < rsize && *excl > 0) { +#ifdef _WIN32 + if (SetFilePointer(env->me_lfd, rsize, NULL, FILE_BEGIN) != rsize + || !SetEndOfFile(env->me_lfd)) + goto fail_errno; +#else + if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno; +#endif + } else { + rsize = size; + size = rsize - sizeof(MDB_txninfo); + env->me_maxreaders = size/sizeof(MDB_reader) + 1; + } + { +#ifdef _WIN32 + HANDLE mh; + mh = CreateFileMapping(env->me_lfd, NULL, PAGE_READWRITE, + 0, 0, NULL); + if (!mh) goto fail_errno; + env->me_txns = MapViewOfFileEx(mh, FILE_MAP_WRITE, 0, 0, rsize, NULL); + CloseHandle(mh); + if (!env->me_txns) goto fail_errno; +#else + void *m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, + env->me_lfd, 0); + if (m == MAP_FAILED) goto fail_errno; + env->me_txns = m; +#endif + } + if (*excl > 0) { +#ifdef _WIN32 + BY_HANDLE_FILE_INFORMATION stbuf; + struct { + DWORD volume; + DWORD nhigh; + DWORD nlow; + } idbuf; + MDB_val val; + char hexbuf[17]; + + if (!mdb_sec_inited) { + InitializeSecurityDescriptor(&mdb_null_sd, + SECURITY_DESCRIPTOR_REVISION); + SetSecurityDescriptorDacl(&mdb_null_sd, TRUE, 0, FALSE); + mdb_all_sa.nLength = sizeof(SECURITY_ATTRIBUTES); + mdb_all_sa.bInheritHandle = FALSE; + mdb_all_sa.lpSecurityDescriptor = &mdb_null_sd; + mdb_sec_inited = 1; + } + if (!GetFileInformationByHandle(env->me_lfd, &stbuf)) goto fail_errno; + idbuf.volume = stbuf.dwVolumeSerialNumber; + idbuf.nhigh = stbuf.nFileIndexHigh; + idbuf.nlow = stbuf.nFileIndexLow; + val.mv_data = &idbuf; + val.mv_size = sizeof(idbuf); + mdb_hash_hex(&val, hexbuf); + sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", hexbuf); + sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", hexbuf); + env->me_rmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_rmname); + if (!env->me_rmutex) goto fail_errno; + env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); + if (!env->me_wmutex) goto fail_errno; +#elif defined(MDB_USE_POSIX_SEM) + struct stat stbuf; + struct { + dev_t dev; + ino_t ino; + } idbuf; + MDB_val val; + char hexbuf[17]; + + if (fstat(env->me_lfd, &stbuf)) goto fail_errno; + idbuf.dev = stbuf.st_dev; + idbuf.ino = stbuf.st_ino; + val.mv_data = &idbuf; + val.mv_size = sizeof(idbuf); + mdb_hash_hex(&val, hexbuf); + sprintf(env->me_txns->mti_rmname, "/MDBr%s", hexbuf); + sprintf(env->me_txns->mti_wmname, "/MDBw%s", hexbuf); + /* Clean up after a previous run, if needed: Try to + * remove both semaphores before doing anything else. + */ + sem_unlink(env->me_txns->mti_rmname); + sem_unlink(env->me_txns->mti_wmname); + env->me_rmutex = sem_open(env->me_txns->mti_rmname, + O_CREAT|O_EXCL, mode, 1); + if (env->me_rmutex == SEM_FAILED) goto fail_errno; + env->me_wmutex = sem_open(env->me_txns->mti_wmname, + O_CREAT|O_EXCL, mode, 1); + if (env->me_wmutex == SEM_FAILED) goto fail_errno; +#else /* MDB_USE_POSIX_SEM */ + pthread_mutexattr_t mattr; + + if ((rc = pthread_mutexattr_init(&mattr)) + || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) + || (rc = pthread_mutex_init(&env->me_txns->mti_mutex, &mattr)) + || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) + goto fail; + pthread_mutexattr_destroy(&mattr); +#endif /* _WIN32 || MDB_USE_POSIX_SEM */ + + env->me_txns->mti_version = MDB_LOCK_VERSION; + env->me_txns->mti_magic = MDB_MAGIC; + env->me_txns->mti_txnid = 0; + env->me_txns->mti_numreaders = 0; + + } else { + if (env->me_txns->mti_magic != MDB_MAGIC) { + DPUTS("lock region has invalid magic"); + rc = MDB_INVALID; + goto fail; + } + if (env->me_txns->mti_version != MDB_LOCK_VERSION) { + DPRINTF("lock region is version %u, expected version %u", + env->me_txns->mti_version, MDB_LOCK_VERSION); + rc = MDB_VERSION_MISMATCH; + goto fail; + } + rc = ErrCode(); + if (rc && rc != EACCES && rc != EAGAIN) { + goto fail; + } +#ifdef _WIN32 + env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname); + if (!env->me_rmutex) goto fail_errno; + env->me_wmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname); + if (!env->me_wmutex) goto fail_errno; +#elif defined(MDB_USE_POSIX_SEM) + env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0); + if (env->me_rmutex == SEM_FAILED) goto fail_errno; + env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0); + if (env->me_wmutex == SEM_FAILED) goto fail_errno; +#endif + } + return MDB_SUCCESS; + +fail_errno: + rc = ErrCode(); +fail: + return rc; +} + + /** The name of the lock file in the DB environment */ +#define LOCKNAME "/lock.mdb" + /** The name of the data file in the DB environment */ +#define DATANAME "/data.mdb" + /** The suffix of the lock file when no subdir is used */ +#define LOCKSUFF "-lock" + /** Only a subset of the @ref mdb_env flags can be changed + * at runtime. Changing other flags requires closing the + * environment and re-opening it with the new flags. + */ +#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC) +#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP|MDB_NOTLS) + +int +mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) +{ + int oflags, rc, len, excl = -1; + char *lpath, *dpath; + + if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) + return EINVAL; + + len = strlen(path); + if (flags & MDB_NOSUBDIR) { + rc = len + sizeof(LOCKSUFF) + len + 1; + } else { + rc = len + sizeof(LOCKNAME) + len + sizeof(DATANAME); + } + lpath = malloc(rc); + if (!lpath) + return ENOMEM; + if (flags & MDB_NOSUBDIR) { + dpath = lpath + len + sizeof(LOCKSUFF); + sprintf(lpath, "%s" LOCKSUFF, path); + strcpy(dpath, path); + } else { + dpath = lpath + len + sizeof(LOCKNAME); + sprintf(lpath, "%s" LOCKNAME, path); + sprintf(dpath, "%s" DATANAME, path); + } + + rc = MDB_SUCCESS; + flags |= env->me_flags; + if (flags & MDB_RDONLY) { + /* silently ignore WRITEMAP when we're only getting read access */ + flags &= ~MDB_WRITEMAP; + } else { + if (!((env->me_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX)) && + (env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) + rc = ENOMEM; + } + env->me_flags = flags |= MDB_ENV_ACTIVE; + if (rc) + goto leave; + + env->me_path = strdup(path); + env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx)); + env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t)); + if (!(env->me_dbxs && env->me_path && env->me_dbflags)) { + rc = ENOMEM; + goto leave; + } + + rc = mdb_env_setup_locks(env, lpath, mode, &excl); + if (rc) + goto leave; + +#ifdef _WIN32 + if (F_ISSET(flags, MDB_RDONLY)) { + oflags = GENERIC_READ; + len = OPEN_EXISTING; + } else { + oflags = GENERIC_READ|GENERIC_WRITE; + len = OPEN_ALWAYS; + } + mode = FILE_ATTRIBUTE_NORMAL; + env->me_fd = CreateFile(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, + NULL, len, mode, NULL); +#else + if (F_ISSET(flags, MDB_RDONLY)) + oflags = O_RDONLY; + else + oflags = O_RDWR | O_CREAT; + + env->me_fd = open(dpath, oflags, mode); +#endif + if (env->me_fd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + + if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) { + if (flags & (MDB_RDONLY|MDB_WRITEMAP)) { + env->me_mfd = env->me_fd; + } else { + /* Synchronous fd for meta writes. Needed even with + * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. + */ +#ifdef _WIN32 + env->me_mfd = CreateFile(dpath, oflags, + FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, + mode | FILE_FLAG_WRITE_THROUGH, NULL); +#else + env->me_mfd = open(dpath, oflags | MDB_DSYNC, mode); +#endif + if (env->me_mfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + } + DPRINTF("opened dbenv %p", (void *) env); + if (excl > 0) { + rc = mdb_env_share_locks(env, &excl); + } + } + +leave: + if (rc) { + mdb_env_close0(env, excl); + } + free(lpath); + return rc; +} + +/** Destroy resources from mdb_env_open(), clear our readers & DBIs */ +static void +mdb_env_close0(MDB_env *env, int excl) +{ + int i; + + if (!(env->me_flags & MDB_ENV_ACTIVE)) + return; + + /* Doing this here since me_dbxs may not exist during mdb_env_close */ + for (i = env->me_maxdbs; --i > MAIN_DBI; ) + free(env->me_dbxs[i].md_name.mv_data); + + free(env->me_dbflags); + free(env->me_dbxs); + free(env->me_path); + free(env->me_dirty_list); + mdb_midl_free(env->me_free_pgs); + + if (env->me_flags & MDB_ENV_TXKEY) { + pthread_key_delete(env->me_txkey); +#ifdef _WIN32 + /* Delete our key from the global list */ + for (i=0; ime_txkey) { + mdb_tls_keys[i] = mdb_tls_keys[mdb_tls_nkeys-1]; + mdb_tls_nkeys--; + break; + } +#endif + } + + if (env->me_map) { + munmap(env->me_map, env->me_mapsize); + } + if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) + (void) close(env->me_mfd); + if (env->me_fd != INVALID_HANDLE_VALUE) + (void) close(env->me_fd); + if (env->me_txns) { + pid_t pid = env->me_pid; + /* Clearing readers is done in this function because + * me_txkey with its destructor must be disabled first. + */ + for (i = env->me_numreaders; --i >= 0; ) + if (env->me_txns->mti_readers[i].mr_pid == pid) + env->me_txns->mti_readers[i].mr_pid = 0; +#ifdef _WIN32 + if (env->me_rmutex) { + CloseHandle(env->me_rmutex); + if (env->me_wmutex) CloseHandle(env->me_wmutex); + } + /* Windows automatically destroys the mutexes when + * the last handle closes. + */ +#elif defined(MDB_USE_POSIX_SEM) + if (env->me_rmutex != SEM_FAILED) { + sem_close(env->me_rmutex); + if (env->me_wmutex != SEM_FAILED) + sem_close(env->me_wmutex); + /* If we have the filelock: If we are the + * only remaining user, clean up semaphores. + */ + if (excl == 0) + mdb_env_excl_lock(env, &excl); + if (excl > 0) { + sem_unlink(env->me_txns->mti_rmname); + sem_unlink(env->me_txns->mti_wmname); + } + } +#endif + munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); + } + if (env->me_lfd != INVALID_HANDLE_VALUE) { +#ifdef _WIN32 + if (excl >= 0) { + /* Unlock the lockfile. Windows would have unlocked it + * after closing anyway, but not necessarily at once. + */ + UnlockFile(env->me_lfd, 0, 0, 1, 0); + } +#endif + (void) close(env->me_lfd); + } + + env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); +} + +int +mdb_env_copyfd(MDB_env *env, HANDLE fd) +{ + MDB_txn *txn = NULL; + int rc; + size_t wsize; + char *ptr; + + /* Do the lock/unlock of the reader mutex before starting the + * write txn. Otherwise other read txns could block writers. + */ + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) + return rc; + + if (env->me_txns) { + /* We must start the actual read txn after blocking writers */ + mdb_txn_reset0(txn, "reset-stage1"); + + /* Temporarily block writers until we snapshot the meta pages */ + LOCK_MUTEX_W(env); + + rc = mdb_txn_renew0(txn); + if (rc) { + UNLOCK_MUTEX_W(env); + goto leave; + } + } + + wsize = env->me_psize * 2; +#ifdef _WIN32 + { + DWORD len; + rc = WriteFile(fd, env->me_map, wsize, &len, NULL); + rc = rc ? (len == wsize ? MDB_SUCCESS : EIO) : ErrCode(); + } +#else + rc = write(fd, env->me_map, wsize); + rc = rc == (int)wsize ? MDB_SUCCESS : rc < 0 ? ErrCode() : EIO; +#endif + if (env->me_txns) + UNLOCK_MUTEX_W(env); + + if (rc) + goto leave; + + ptr = env->me_map + wsize; + wsize = txn->mt_next_pgno * env->me_psize - wsize; +#ifdef _WIN32 + while (wsize > 0) { + DWORD len, w2; + if (wsize > MAX_WRITE) + w2 = MAX_WRITE; + else + w2 = wsize; + rc = WriteFile(fd, ptr, w2, &len, NULL); + rc = rc ? (len == w2 ? MDB_SUCCESS : EIO) : ErrCode(); + if (rc) break; + wsize -= w2; + ptr += w2; + } +#else + while (wsize > 0) { + size_t w2; + ssize_t wres; + if (wsize > MAX_WRITE) + w2 = MAX_WRITE; + else + w2 = wsize; + wres = write(fd, ptr, w2); + rc = wres == (ssize_t)w2 ? MDB_SUCCESS : wres < 0 ? ErrCode() : EIO; + if (rc) break; + wsize -= wres; + ptr += wres; + } +#endif + +leave: + mdb_txn_abort(txn); + return rc; +} + +int +mdb_env_copy(MDB_env *env, const char *path) +{ + int rc, len; + char *lpath; + HANDLE newfd = INVALID_HANDLE_VALUE; + + if (env->me_flags & MDB_NOSUBDIR) { + lpath = (char *)path; + } else { + len = strlen(path); + len += sizeof(DATANAME); + lpath = malloc(len); + if (!lpath) + return ENOMEM; + sprintf(lpath, "%s" DATANAME, path); + } + + /* The destination path must exist, but the destination file must not. + * We don't want the OS to cache the writes, since the source data is + * already in the OS cache. + */ +#ifdef _WIN32 + newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, + FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); +#else + newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL +#ifdef O_DIRECT + |O_DIRECT +#endif + , 0666); +#endif + if (newfd == INVALID_HANDLE_VALUE) { + rc = ErrCode(); + goto leave; + } + +#ifdef F_NOCACHE /* __APPLE__ */ + rc = fcntl(newfd, F_NOCACHE, 1); + if (rc) { + rc = ErrCode(); + goto leave; + } +#endif + + rc = mdb_env_copyfd(env, newfd); + +leave: + if (!(env->me_flags & MDB_NOSUBDIR)) + free(lpath); + if (newfd != INVALID_HANDLE_VALUE) + if (close(newfd) < 0 && rc == MDB_SUCCESS) + rc = ErrCode(); + + return rc; +} + +void +mdb_env_close(MDB_env *env) +{ + MDB_page *dp; + + if (env == NULL) + return; + + VGMEMP_DESTROY(env); + while ((dp = env->me_dpages) != NULL) { + VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next)); + env->me_dpages = dp->mp_next; + free(dp); + } + + mdb_env_close0(env, 0); + free(env); +} + +/** Compare two items pointing at aligned size_t's */ +static int +mdb_cmp_long(const MDB_val *a, const MDB_val *b) +{ + return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 : + *(size_t *)a->mv_data > *(size_t *)b->mv_data; +} + +/** Compare two items pointing at aligned int's */ +static int +mdb_cmp_int(const MDB_val *a, const MDB_val *b) +{ + return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 : + *(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data; +} + +/** Compare two items pointing at ints of unknown alignment. + * Nodes and keys are guaranteed to be 2-byte aligned. + */ +static int +mdb_cmp_cint(const MDB_val *a, const MDB_val *b) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned short *u, *c; + int x; + + u = (unsigned short *) ((char *) a->mv_data + a->mv_size); + c = (unsigned short *) ((char *) b->mv_data + a->mv_size); + do { + x = *--u - *--c; + } while(!x && u > (unsigned short *)a->mv_data); + return x; +#else + return memcmp(a->mv_data, b->mv_data, a->mv_size); +#endif +} + +/** Compare two items lexically */ +static int +mdb_cmp_memn(const MDB_val *a, const MDB_val *b) +{ + int diff; + ssize_t len_diff; + unsigned int len; + + len = a->mv_size; + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; + if (len_diff > 0) { + len = b->mv_size; + len_diff = 1; + } + + diff = memcmp(a->mv_data, b->mv_data, len); + return diff ? diff : len_diff<0 ? -1 : len_diff; +} + +/** Compare two items in reverse byte order */ +static int +mdb_cmp_memnr(const MDB_val *a, const MDB_val *b) +{ + const unsigned char *p1, *p2, *p1_lim; + ssize_t len_diff; + int diff; + + p1_lim = (const unsigned char *)a->mv_data; + p1 = (const unsigned char *)a->mv_data + a->mv_size; + p2 = (const unsigned char *)b->mv_data + b->mv_size; + + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; + if (len_diff > 0) { + p1_lim += len_diff; + len_diff = 1; + } + + while (p1 > p1_lim) { + diff = *--p1 - *--p2; + if (diff) + return diff; + } + return len_diff<0 ? -1 : len_diff; +} + +/** Search for key within a page, using binary search. + * Returns the smallest entry larger or equal to the key. + * If exactp is non-null, stores whether the found entry was an exact match + * in *exactp (1 or 0). + * Updates the cursor index with the index of the found entry. + * If no entry larger or equal to the key is found, returns NULL. + */ +static MDB_node * +mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) +{ + unsigned int i = 0, nkeys; + int low, high; + int rc = 0; + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_node *node = NULL; + MDB_val nodekey; + MDB_cmp_func *cmp; + DKBUF; + + nkeys = NUMKEYS(mp); + +#if MDB_DEBUG + { + pgno_t pgno; + COPY_PGNO(pgno, mp->mp_pgno); + DPRINTF("searching %u keys in %s %spage %zu", + nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "", + pgno); + } +#endif + + assert(nkeys > 0); + + low = IS_LEAF(mp) ? 0 : 1; + high = nkeys - 1; + cmp = mc->mc_dbx->md_cmp; + + /* Branch pages have no data, so if using integer keys, + * alignment is guaranteed. Use faster mdb_cmp_int. + */ + if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) { + if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t)) + cmp = mdb_cmp_long; + else + cmp = mdb_cmp_int; + } + + if (IS_LEAF2(mp)) { + nodekey.mv_size = mc->mc_db->md_pad; + node = NODEPTR(mp, 0); /* fake */ + while (low <= high) { + i = (low + high) >> 1; + nodekey.mv_data = LEAF2KEY(mp, i, nodekey.mv_size); + rc = cmp(key, &nodekey); + DPRINTF("found leaf index %u [%s], rc = %i", + i, DKEY(&nodekey), rc); + if (rc == 0) + break; + if (rc > 0) + low = i + 1; + else + high = i - 1; + } + } else { + while (low <= high) { + i = (low + high) >> 1; + + node = NODEPTR(mp, i); + nodekey.mv_size = NODEKSZ(node); + nodekey.mv_data = NODEKEY(node); + + rc = cmp(key, &nodekey); +#if MDB_DEBUG + if (IS_LEAF(mp)) + DPRINTF("found leaf index %u [%s], rc = %i", + i, DKEY(&nodekey), rc); + else + DPRINTF("found branch index %u [%s -> %zu], rc = %i", + i, DKEY(&nodekey), NODEPGNO(node), rc); +#endif + if (rc == 0) + break; + if (rc > 0) + low = i + 1; + else + high = i - 1; + } + } + + if (rc > 0) { /* Found entry is less than the key. */ + i++; /* Skip to get the smallest entry larger than key. */ + if (!IS_LEAF2(mp)) + node = NODEPTR(mp, i); + } + if (exactp) + *exactp = (rc == 0); + /* store the key index */ + mc->mc_ki[mc->mc_top] = i; + if (i >= nkeys) + /* There is no entry larger or equal to the key. */ + return NULL; + + /* nodeptr is fake for LEAF2 */ + return node; +} + +#if 0 +static void +mdb_cursor_adjust(MDB_cursor *mc, func) +{ + MDB_cursor *m2; + + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2->mc_pg[m2->mc_top] == mc->mc_pg[mc->mc_top]) { + func(mc, m2); + } + } +} +#endif + +/** Pop a page off the top of the cursor's stack. */ +static void +mdb_cursor_pop(MDB_cursor *mc) +{ + if (mc->mc_snum) { +#ifndef MDB_DEBUG_SKIP + MDB_page *top = mc->mc_pg[mc->mc_top]; +#endif + mc->mc_snum--; + if (mc->mc_snum) + mc->mc_top--; + + DPRINTF("popped page %zu off db %u cursor %p", top->mp_pgno, + mc->mc_dbi, (void *) mc); + } +} + +/** Push a page onto the top of the cursor's stack. */ +static int +mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) +{ + DPRINTF("pushing page %zu on db %u cursor %p", mp->mp_pgno, + mc->mc_dbi, (void *) mc); + + if (mc->mc_snum >= CURSOR_STACK) { + assert(mc->mc_snum < CURSOR_STACK); + return MDB_CURSOR_FULL; + } + + mc->mc_top = mc->mc_snum++; + mc->mc_pg[mc->mc_top] = mp; + mc->mc_ki[mc->mc_top] = 0; + + return MDB_SUCCESS; +} + +/** Find the address of the page corresponding to a given page number. + * @param[in] txn the transaction for this access. + * @param[in] pgno the page number for the page to retrieve. + * @param[out] ret address of a pointer where the page's address will be stored. + * @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) +{ + MDB_page *p = NULL; + int level; + + if (!((txn->mt_flags & MDB_TXN_RDONLY) | + (txn->mt_env->me_flags & MDB_WRITEMAP))) + { + MDB_txn *tx2 = txn; + level = 1; + do { + MDB_ID2L dl = tx2->mt_u.dirty_list; + unsigned x; + /* Spilled pages were dirtied in this txn and flushed + * because the dirty list got full. Bring this page + * back in from the map (but don't unspill it here, + * leave that unless page_touch happens again). + */ + if (tx2->mt_spill_pgs) { + x = mdb_midl_search(tx2->mt_spill_pgs, pgno); + if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pgno) { + p = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno); + goto done; + } + } + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + p = dl[x].mptr; + goto done; + } + } + level++; + } while ((tx2 = tx2->mt_parent) != NULL); + } + + if (pgno < txn->mt_next_pgno) { + level = 0; + p = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno); + } else { + DPRINTF("page %zu not found", pgno); + assert(p != NULL); + return MDB_PAGE_NOTFOUND; + } + +done: + *ret = p; + if (lvl) + *lvl = level; + return MDB_SUCCESS; +} + +/** Search for the page a given key should be in. + * Pushes parent pages on the cursor stack. This function continues a + * search on a cursor that has already been initialized. (Usually by + * #mdb_page_search() but also by #mdb_node_move().) + * @param[in,out] mc the cursor for this operation. + * @param[in] key the key to search for. If NULL, search for the lowest + * page. (This is used by #mdb_cursor_first().) + * @param[in] modify If true, visited pages are updated with new page numbers. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int modify) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top]; + DKBUF; + int rc; + + + while (IS_BRANCH(mp)) { + MDB_node *node; + indx_t i; + + DPRINTF("branch page %zu has %u keys", mp->mp_pgno, NUMKEYS(mp)); + assert(NUMKEYS(mp) > 1); + DPRINTF("found index 0 to page %zu", NODEPGNO(NODEPTR(mp, 0))); + + if (key == NULL) /* Initialize cursor to first page. */ + i = 0; + else if (key->mv_size > MDB_MAXKEYSIZE && key->mv_data == NULL) { + /* cursor to last page */ + i = NUMKEYS(mp)-1; + } else { + int exact; + node = mdb_node_search(mc, key, &exact); + if (node == NULL) + i = NUMKEYS(mp) - 1; + else { + i = mc->mc_ki[mc->mc_top]; + if (!exact) { + assert(i > 0); + i--; + } + } + } + + if (key) + DPRINTF("following index %u for key [%s]", + i, DKEY(key)); + assert(i < NUMKEYS(mp)); + node = NODEPTR(mp, i); + + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) + return rc; + + mc->mc_ki[mc->mc_top] = i; + if ((rc = mdb_cursor_push(mc, mp))) + return rc; + + if (modify) { + if ((rc = mdb_page_touch(mc)) != 0) + return rc; + mp = mc->mc_pg[mc->mc_top]; + } + } + + if (!IS_LEAF(mp)) { + DPRINTF("internal error, index points to a %02X page!?", + mp->mp_flags); + return MDB_CORRUPTED; + } + + DPRINTF("found leaf page %zu for key [%s]", mp->mp_pgno, + key ? DKEY(key) : NULL); + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + return MDB_SUCCESS; +} + +/** Search for the lowest key under the current branch page. + * This just bypasses a NUMKEYS check in the current page + * before calling mdb_page_search_root(), because the callers + * are all in situations where the current page is known to + * be underfilled. + */ +static int +mdb_page_search_lowest(MDB_cursor *mc) +{ + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_node *node = NODEPTR(mp, 0); + int rc; + + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) + return rc; + + mc->mc_ki[mc->mc_top] = 0; + if ((rc = mdb_cursor_push(mc, mp))) + return rc; + return mdb_page_search_root(mc, NULL, 0); +} + +/** Search for the page a given key should be in. + * Pushes parent pages on the cursor stack. This function just sets up + * the search; it finds the root page for \b mc's database and sets this + * as the root of the cursor's stack. Then #mdb_page_search_root() is + * called to complete the search. + * @param[in,out] mc the cursor for this operation. + * @param[in] key the key to search for. If NULL, search for the lowest + * page. (This is used by #mdb_cursor_first().) + * @param[in] flags If MDB_PS_MODIFY set, visited pages are updated with new page numbers. + * If MDB_PS_ROOTONLY set, just fetch root node, no further lookups. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) +{ + int rc; + pgno_t root; + + /* Make sure the txn is still viable, then find the root from + * the txn's db table. + */ + if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_ERROR)) { + DPUTS("transaction has failed, must abort"); + return EINVAL; + } else { + /* Make sure we're using an up-to-date root */ + if (mc->mc_dbi > MAIN_DBI) { + if ((*mc->mc_dbflag & DB_STALE) || + ((flags & MDB_PS_MODIFY) && !(*mc->mc_dbflag & DB_DIRTY))) { + MDB_cursor mc2; + unsigned char dbflag = 0; + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); + rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, flags & MDB_PS_MODIFY); + if (rc) + return rc; + if (*mc->mc_dbflag & DB_STALE) { + MDB_val data; + int exact = 0; + uint16_t flags; + MDB_node *leaf = mdb_node_search(&mc2, + &mc->mc_dbx->md_name, &exact); + if (!exact) + return MDB_NOTFOUND; + rc = mdb_node_read(mc->mc_txn, leaf, &data); + if (rc) + return rc; + memcpy(&flags, ((char *) data.mv_data + offsetof(MDB_db, md_flags)), + sizeof(uint16_t)); + /* The txn may not know this DBI, or another process may + * have dropped and recreated the DB with other flags. + */ + if ((mc->mc_db->md_flags & PERSISTENT_FLAGS) != flags) + return MDB_INCOMPATIBLE; + memcpy(mc->mc_db, data.mv_data, sizeof(MDB_db)); + } + if (flags & MDB_PS_MODIFY) + dbflag = DB_DIRTY; + *mc->mc_dbflag &= ~DB_STALE; + *mc->mc_dbflag |= dbflag; + } + } + root = mc->mc_db->md_root; + + if (root == P_INVALID) { /* Tree is empty. */ + DPUTS("tree is empty"); + return MDB_NOTFOUND; + } + } + + assert(root > 1); + if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) + if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0) + return rc; + + mc->mc_snum = 1; + mc->mc_top = 0; + + DPRINTF("db %u root page %zu has flags 0x%X", + mc->mc_dbi, root, mc->mc_pg[0]->mp_flags); + + if (flags & MDB_PS_MODIFY) { + if ((rc = mdb_page_touch(mc))) + return rc; + } + + if (flags & MDB_PS_ROOTONLY) + return MDB_SUCCESS; + + return mdb_page_search_root(mc, key, flags); +} + +static int +mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp) +{ + MDB_txn *txn = mc->mc_txn; + pgno_t pg = mp->mp_pgno; + unsigned i, ovpages = mp->mp_pages; + MDB_env *env = txn->mt_env; + int rc; + + DPRINTF("free ov page %zu (%d)", pg, ovpages); + /* If the page is dirty or on the spill list we just acquired it, + * so we should give it back to our current free list, if any. + * Not currently supported in nested txns. + * Otherwise put it onto the list of pages we freed in this txn. + */ + if (!(mp->mp_flags & P_DIRTY) && txn->mt_spill_pgs) { + unsigned x = mdb_midl_search(txn->mt_spill_pgs, pg); + if (x <= txn->mt_spill_pgs[0] && txn->mt_spill_pgs[x] == pg) { + /* This page is no longer spilled */ + for (; x < txn->mt_spill_pgs[0]; x++) + txn->mt_spill_pgs[x] = txn->mt_spill_pgs[x+1]; + txn->mt_spill_pgs[0]--; + goto release; + } + } + if ((mp->mp_flags & P_DIRTY) && !txn->mt_parent && env->me_pghead) { + unsigned j, x; + pgno_t *mop; + MDB_ID2 *dl, ix, iy; + rc = mdb_midl_need(&env->me_pghead, ovpages); + if (rc) + return rc; + /* Remove from dirty list */ + dl = txn->mt_u.dirty_list; + x = dl[0].mid--; + for (ix = dl[x]; ix.mptr != mp; ix = iy) { + if (x > 1) { + x--; + iy = dl[x]; + dl[x] = ix; + } else { + assert(x > 1); + j = ++(dl[0].mid); + dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */ + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_CORRUPTED; + } + } + if (!(env->me_flags & MDB_WRITEMAP)) + mdb_dpage_free(env, mp); +release: + /* Insert in me_pghead */ + mop = env->me_pghead; + j = mop[0] + ovpages; + for (i = mop[0]; i && mop[i] < pg; i--) + mop[j--] = mop[i]; + while (j>i) + mop[j--] = pg++; + mop[0] += ovpages; + } else { + rc = mdb_midl_append_range(&txn->mt_free_pgs, pg, ovpages); + if (rc) + return rc; + } + mc->mc_db->md_overflow_pages -= ovpages; + return 0; +} + +/** Return the data associated with a given node. + * @param[in] txn The transaction for this operation. + * @param[in] leaf The node being read. + * @param[out] data Updated to point to the node's data. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data) +{ + MDB_page *omp; /* overflow page */ + pgno_t pgno; + int rc; + + if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) { + data->mv_size = NODEDSZ(leaf); + data->mv_data = NODEDATA(leaf); + return MDB_SUCCESS; + } + + /* Read overflow data. + */ + data->mv_size = NODEDSZ(leaf); + memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); + if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) { + DPRINTF("read overflow page %zu failed", pgno); + return rc; + } + data->mv_data = METADATA(omp); + + return MDB_SUCCESS; +} + +int +mdb_get(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data) +{ + MDB_cursor mc; + MDB_xcursor mx; + int exact = 0; + DKBUF; + + assert(key); + assert(data); + DPRINTF("===> get db %u key [%s]", dbi, DKEY(key)); + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) { + return EINVAL; + } + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); +} + +/** Find a sibling for a page. + * Replaces the page at the top of the cursor's stack with the + * specified sibling, if one exists. + * @param[in] mc The cursor for this operation. + * @param[in] move_right Non-zero if the right sibling is requested, + * otherwise the left sibling. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_cursor_sibling(MDB_cursor *mc, int move_right) +{ + int rc; + MDB_node *indx; + MDB_page *mp; + + if (mc->mc_snum < 2) { + return MDB_NOTFOUND; /* root has no siblings */ + } + + mdb_cursor_pop(mc); + DPRINTF("parent page is page %zu, index %u", + mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]); + + if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top])) + : (mc->mc_ki[mc->mc_top] == 0)) { + DPRINTF("no more keys left, moving to %s sibling", + move_right ? "right" : "left"); + if ((rc = mdb_cursor_sibling(mc, move_right)) != MDB_SUCCESS) { + /* undo cursor_pop before returning */ + mc->mc_top++; + mc->mc_snum++; + return rc; + } + } else { + if (move_right) + mc->mc_ki[mc->mc_top]++; + else + mc->mc_ki[mc->mc_top]--; + DPRINTF("just moving to %s index key %u", + move_right ? "right" : "left", mc->mc_ki[mc->mc_top]); + } + assert(IS_BRANCH(mc->mc_pg[mc->mc_top])); + + indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL) != 0)) + return rc; + + mdb_cursor_push(mc, mp); + if (!move_right) + mc->mc_ki[mc->mc_top] = NUMKEYS(mp)-1; + + return MDB_SUCCESS; +} + +/** Move the cursor to the next data item. */ +static int +mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) +{ + MDB_page *mp; + MDB_node *leaf; + int rc; + + if (mc->mc_flags & C_EOF) { + return MDB_NOTFOUND; + } + + assert(mc->mc_flags & C_INITIALIZED); + + mp = mc->mc_pg[mc->mc_top]; + + if (mc->mc_db->md_flags & MDB_DUPSORT) { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (op == MDB_NEXT || op == MDB_NEXT_DUP) { + rc = mdb_cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_NEXT); + if (op != MDB_NEXT || rc != MDB_NOTFOUND) + return rc; + } + } else { + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if (op == MDB_NEXT_DUP) + return MDB_NOTFOUND; + } + } + + DPRINTF("cursor_next: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc); + + if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) { + DPUTS("=====> move to next sibling page"); + if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) { + mc->mc_flags |= C_EOF; + return rc; + } + mp = mc->mc_pg[mc->mc_top]; + DPRINTF("next page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); + } else + mc->mc_ki[mc->mc_top]++; + + DPRINTF("==> cursor points to page %zu with %u keys, key index %u", + mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); + + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + assert(IS_LEAF(mp)); + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Move the cursor to the previous data item. */ +static int +mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) +{ + MDB_page *mp; + MDB_node *leaf; + int rc; + + assert(mc->mc_flags & C_INITIALIZED); + + mp = mc->mc_pg[mc->mc_top]; + + if (mc->mc_db->md_flags & MDB_DUPSORT) { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (op == MDB_PREV || op == MDB_PREV_DUP) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV); + if (op != MDB_PREV || rc != MDB_NOTFOUND) + return rc; + } else { + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if (op == MDB_PREV_DUP) + return MDB_NOTFOUND; + } + } + } + + DPRINTF("cursor_prev: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc); + + if (mc->mc_ki[mc->mc_top] == 0) { + DPUTS("=====> move to prev sibling page"); + if ((rc = mdb_cursor_sibling(mc, 0)) != MDB_SUCCESS) { + return rc; + } + mp = mc->mc_pg[mc->mc_top]; + mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1; + DPRINTF("prev page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); + } else + mc->mc_ki[mc->mc_top]--; + + mc->mc_flags &= ~C_EOF; + + DPRINTF("==> cursor points to page %zu with %u keys, key index %u", + mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); + + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + assert(IS_LEAF(mp)); + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Set the cursor on a specific data item. */ +static int +mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, + MDB_cursor_op op, int *exactp) +{ + int rc; + MDB_page *mp; + MDB_node *leaf = NULL; + DKBUF; + + assert(mc); + assert(key); + assert(key->mv_size > 0); + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + /* See if we're already on the right page */ + if (mc->mc_flags & C_INITIALIZED) { + MDB_val nodekey; + + mp = mc->mc_pg[mc->mc_top]; + if (!NUMKEYS(mp)) { + mc->mc_ki[mc->mc_top] = 0; + return MDB_NOTFOUND; + } + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_size = mc->mc_db->md_pad; + nodekey.mv_data = LEAF2KEY(mp, 0, nodekey.mv_size); + } else { + leaf = NODEPTR(mp, 0); + MDB_GET_KEY(leaf, &nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* Probably happens rarely, but first node on the page + * was the one we wanted. + */ + mc->mc_ki[mc->mc_top] = 0; + if (exactp) + *exactp = 1; + goto set1; + } + if (rc > 0) { + unsigned int i; + unsigned int nkeys = NUMKEYS(mp); + if (nkeys > 1) { + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_data = LEAF2KEY(mp, + nkeys-1, nodekey.mv_size); + } else { + leaf = NODEPTR(mp, nkeys-1); + MDB_GET_KEY(leaf, &nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* last node was the one we wanted */ + mc->mc_ki[mc->mc_top] = nkeys-1; + if (exactp) + *exactp = 1; + goto set1; + } + if (rc < 0) { + if (mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) { + /* This is definitely the right page, skip search_page */ + if (mp->mp_flags & P_LEAF2) { + nodekey.mv_data = LEAF2KEY(mp, + mc->mc_ki[mc->mc_top], nodekey.mv_size); + } else { + leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + MDB_GET_KEY(leaf, &nodekey); + } + rc = mc->mc_dbx->md_cmp(key, &nodekey); + if (rc == 0) { + /* current node was the one we wanted */ + if (exactp) + *exactp = 1; + goto set1; + } + } + rc = 0; + goto set2; + } + } + /* If any parents have right-sibs, search. + * Otherwise, there's nothing further. + */ + for (i=0; imc_top; i++) + if (mc->mc_ki[i] < + NUMKEYS(mc->mc_pg[i])-1) + break; + if (i == mc->mc_top) { + /* There are no other pages */ + mc->mc_ki[mc->mc_top] = nkeys; + return MDB_NOTFOUND; + } + } + if (!mc->mc_top) { + /* There are no other pages */ + mc->mc_ki[mc->mc_top] = 0; + return MDB_NOTFOUND; + } + } + + rc = mdb_page_search(mc, key, 0); + if (rc != MDB_SUCCESS) + return rc; + + mp = mc->mc_pg[mc->mc_top]; + assert(IS_LEAF(mp)); + +set2: + leaf = mdb_node_search(mc, key, exactp); + if (exactp != NULL && !*exactp) { + /* MDB_SET specified and not an exact match. */ + return MDB_NOTFOUND; + } + + if (leaf == NULL) { + DPUTS("===> inexact leaf not found, goto sibling"); + if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) + return rc; /* no entries matched */ + mp = mc->mc_pg[mc->mc_top]; + assert(IS_LEAF(mp)); + leaf = NODEPTR(mp, 0); + } + +set1: + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + } + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + } else { + int ex2, *ex2p; + if (op == MDB_GET_BOTH) { + ex2p = &ex2; + ex2 = 0; + } else { + ex2p = NULL; + } + rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); + if (rc != MDB_SUCCESS) + return rc; + } + } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { + MDB_val d2; + if ((rc = mdb_node_read(mc->mc_txn, leaf, &d2)) != MDB_SUCCESS) + return rc; + rc = mc->mc_dbx->md_dcmp(data, &d2); + if (rc) { + if (op == MDB_GET_BOTH || rc > 0) + return MDB_NOTFOUND; + } + + } else { + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + + /* The key already matches in all other cases */ + if (op == MDB_SET_RANGE || op == MDB_SET_KEY) + MDB_GET_KEY(leaf, key); + DPRINTF("==> cursor placed on key [%s]", DKEY(key)); + + return rc; +} + +/** Move the cursor to the first item in the database. */ +static int +mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) +{ + int rc; + MDB_node *leaf; + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, 0); + if (rc != MDB_SUCCESS) + return rc; + } + assert(IS_LEAF(mc->mc_pg[mc->mc_top])); + + leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0); + mc->mc_flags |= C_INITIALIZED; + mc->mc_flags &= ~C_EOF; + + mc->mc_ki[mc->mc_top] = 0; + + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); + return MDB_SUCCESS; + } + + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +/** Move the cursor to the last item in the database. */ +static int +mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) +{ + int rc; + MDB_node *leaf; + + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); + + if (!(mc->mc_flags & C_EOF)) { + + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + MDB_val lkey; + + lkey.mv_size = MDB_MAXKEYSIZE+1; + lkey.mv_data = NULL; + rc = mdb_page_search(mc, &lkey, 0); + if (rc != MDB_SUCCESS) + return rc; + } + assert(IS_LEAF(mc->mc_pg[mc->mc_top])); + + } + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; + mc->mc_flags |= C_INITIALIZED|C_EOF; + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); + return MDB_SUCCESS; + } + + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else { + if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + } + } + + MDB_GET_KEY(leaf, key); + return MDB_SUCCESS; +} + +int +mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, + MDB_cursor_op op) +{ + int rc; + int exact = 0; + + assert(mc); + + switch (op) { + case MDB_GET_CURRENT: + if (!(mc->mc_flags & C_INITIALIZED)) { + rc = EINVAL; + } else { + MDB_page *mp = mc->mc_pg[mc->mc_top]; + if (!NUMKEYS(mp)) { + mc->mc_ki[mc->mc_top] = 0; + rc = MDB_NOTFOUND; + break; + } + rc = MDB_SUCCESS; + if (IS_LEAF2(mp)) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); + } else { + MDB_node *leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + MDB_GET_KEY(leaf, key); + if (data) { + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + rc = mdb_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_GET_CURRENT); + } else { + rc = mdb_node_read(mc->mc_txn, leaf, data); + } + } + } + } + break; + case MDB_GET_BOTH: + case MDB_GET_BOTH_RANGE: + if (data == NULL || mc->mc_xcursor == NULL) { + rc = EINVAL; + break; + } + /* FALLTHRU */ + case MDB_SET: + case MDB_SET_KEY: + case MDB_SET_RANGE: + if (key == NULL || key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) { + rc = EINVAL; + } else if (op == MDB_SET_RANGE) + rc = mdb_cursor_set(mc, key, data, op, NULL); + else + rc = mdb_cursor_set(mc, key, data, op, &exact); + break; + case MDB_GET_MULTIPLE: + if (data == NULL || + !(mc->mc_db->md_flags & MDB_DUPFIXED) || + !(mc->mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + rc = MDB_SUCCESS; + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) || + (mc->mc_xcursor->mx_cursor.mc_flags & C_EOF)) + break; + goto fetchm; + case MDB_NEXT_MULTIPLE: + if (data == NULL || + !(mc->mc_db->md_flags & MDB_DUPFIXED)) { + rc = EINVAL; + break; + } + if (!(mc->mc_flags & C_INITIALIZED)) + rc = mdb_cursor_first(mc, key, data); + else + rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP); + if (rc == MDB_SUCCESS) { + if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + MDB_cursor *mx; +fetchm: + mx = &mc->mc_xcursor->mx_cursor; + data->mv_size = NUMKEYS(mx->mc_pg[mx->mc_top]) * + mx->mc_db->md_pad; + data->mv_data = METADATA(mx->mc_pg[mx->mc_top]); + mx->mc_ki[mx->mc_top] = NUMKEYS(mx->mc_pg[mx->mc_top])-1; + } else { + rc = MDB_NOTFOUND; + } + } + break; + case MDB_NEXT: + case MDB_NEXT_DUP: + case MDB_NEXT_NODUP: + if (!(mc->mc_flags & C_INITIALIZED)) + rc = mdb_cursor_first(mc, key, data); + else + rc = mdb_cursor_next(mc, key, data, op); + break; + case MDB_PREV: + case MDB_PREV_DUP: + case MDB_PREV_NODUP: + if (!(mc->mc_flags & C_INITIALIZED)) { + rc = mdb_cursor_last(mc, key, data); + if (rc) + break; + mc->mc_flags |= C_INITIALIZED; + mc->mc_ki[mc->mc_top]++; + } + rc = mdb_cursor_prev(mc, key, data, op); + break; + case MDB_FIRST: + rc = mdb_cursor_first(mc, key, data); + break; + case MDB_FIRST_DUP: + if (data == NULL || + !(mc->mc_db->md_flags & MDB_DUPSORT) || + !(mc->mc_flags & C_INITIALIZED) || + !(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + break; + case MDB_LAST: + rc = mdb_cursor_last(mc, key, data); + break; + case MDB_LAST_DUP: + if (data == NULL || + !(mc->mc_db->md_flags & MDB_DUPSORT) || + !(mc->mc_flags & C_INITIALIZED) || + !(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { + rc = EINVAL; + break; + } + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + break; + default: + DPRINTF("unhandled/unimplemented cursor operation %u", op); + rc = EINVAL; + break; + } + + return rc; +} + +/** Touch all the pages in the cursor stack. + * Makes sure all the pages are writable, before attempting a write operation. + * @param[in] mc The cursor to operate on. + */ +static int +mdb_cursor_touch(MDB_cursor *mc) +{ + int rc; + + if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) { + MDB_cursor mc2; + MDB_xcursor mcx; + mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, &mcx); + rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY); + if (rc) + return rc; + *mc->mc_dbflag |= DB_DIRTY; + } + for (mc->mc_top = 0; mc->mc_top < mc->mc_snum; mc->mc_top++) { + rc = mdb_page_touch(mc); + if (rc) + return rc; + } + mc->mc_top = mc->mc_snum-1; + return MDB_SUCCESS; +} + +/** Do not spill pages to disk if txn is getting full, may fail instead */ +#define MDB_NOSPILL 0x8000 + +int +mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, + unsigned int flags) +{ + enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; /* internal code */ + MDB_node *leaf = NULL; + MDB_val xdata, *rdata, dkey; + MDB_page *fp; + MDB_db dummy; + int do_sub = 0, insert = 0; + unsigned int mcount = 0, dcount = 0, nospill; + size_t nsize; + int rc, rc2; + MDB_pagebuf pbuf; + char dbuf[MDB_MAXKEYSIZE+1]; + unsigned int nflags; + DKBUF; + + /* Check this first so counter will always be zero on any + * early failures. + */ + if (flags & MDB_MULTIPLE) { + dcount = data[1].mv_size; + data[1].mv_size = 0; + if (!F_ISSET(mc->mc_db->md_flags, MDB_DUPFIXED)) + return EINVAL; + } + + nospill = flags & MDB_NOSPILL; + flags &= ~MDB_NOSPILL; + + if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; + + if (flags != MDB_CURRENT && (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE)) + return EINVAL; + + if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT) && data->mv_size > MDB_MAXKEYSIZE) + return EINVAL; + +#if SIZE_MAX > MAXDATASIZE + if (data->mv_size > MAXDATASIZE) + return EINVAL; +#endif + + DPRINTF("==> put db %u key [%s], size %zu, data size %zu", + mc->mc_dbi, DKEY(key), key ? key->mv_size:0, data->mv_size); + + dkey.mv_size = 0; + + if (flags == MDB_CURRENT) { + if (!(mc->mc_flags & C_INITIALIZED)) + return EINVAL; + rc = MDB_SUCCESS; + } else if (mc->mc_db->md_root == P_INVALID) { + /* new database, cursor has nothing to point to */ + mc->mc_snum = 0; + mc->mc_flags &= ~C_INITIALIZED; + rc = MDB_NO_ROOT; + } else { + int exact = 0; + MDB_val d2; + if (flags & MDB_APPEND) { + MDB_val k2; + rc = mdb_cursor_last(mc, &k2, &d2); + if (rc == 0) { + rc = mc->mc_dbx->md_cmp(key, &k2); + if (rc > 0) { + rc = MDB_NOTFOUND; + mc->mc_ki[mc->mc_top]++; + } else { + /* new key is <= last key */ + rc = MDB_KEYEXIST; + } + } + } else { + rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact); + } + if ((flags & MDB_NOOVERWRITE) && rc == 0) { + DPRINTF("duplicate key [%s]", DKEY(key)); + *data = d2; + return MDB_KEYEXIST; + } + if (rc && rc != MDB_NOTFOUND) + return rc; + } + + /* Cursor is positioned, check for room in the dirty list */ + if (!nospill) { + if (flags & MDB_MULTIPLE) { + rdata = &xdata; + xdata.mv_size = data->mv_size * dcount; + } else { + rdata = data; + } + if ((rc2 = mdb_page_spill(mc, key, rdata))) + return rc2; + } + + if (rc == MDB_NO_ROOT) { + MDB_page *np; + /* new database, write a root leaf page */ + DPUTS("allocating new root leaf page"); + if ((rc2 = mdb_page_new(mc, P_LEAF, 1, &np))) { + return rc2; + } + mdb_cursor_push(mc, np); + mc->mc_db->md_root = np->mp_pgno; + mc->mc_db->md_depth++; + *mc->mc_dbflag |= DB_DIRTY; + if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) + == MDB_DUPFIXED) + np->mp_flags |= P_LEAF2; + mc->mc_flags |= C_INITIALIZED; + } else { + /* make sure all cursor pages are writable */ + rc2 = mdb_cursor_touch(mc); + if (rc2) + return rc2; + } + + /* The key already exists */ + if (rc == MDB_SUCCESS) { + /* there's only a key anyway, so this is a no-op */ + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { + unsigned int ksize = mc->mc_db->md_pad; + if (key->mv_size != ksize) + return EINVAL; + if (flags == MDB_CURRENT) { + char *ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize); + memcpy(ptr, key->mv_data, ksize); + } + return MDB_SUCCESS; + } + + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + + /* DB has dups? */ + if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) { + /* Was a single item before, must convert now */ +more: + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { + /* Just overwrite the current item */ + if (flags == MDB_CURRENT) + goto current; + + dkey.mv_size = NODEDSZ(leaf); + dkey.mv_data = NODEDATA(leaf); +#if UINT_MAX < SIZE_MAX + if (mc->mc_dbx->md_dcmp == mdb_cmp_int && dkey.mv_size == sizeof(size_t)) +#ifdef MISALIGNED_OK + mc->mc_dbx->md_dcmp = mdb_cmp_long; +#else + mc->mc_dbx->md_dcmp = mdb_cmp_cint; +#endif +#endif + /* if data matches, ignore it */ + if (!mc->mc_dbx->md_dcmp(data, &dkey)) + return (flags == MDB_NODUPDATA) ? MDB_KEYEXIST : MDB_SUCCESS; + + /* create a fake page for the dup items */ + memcpy(dbuf, dkey.mv_data, dkey.mv_size); + dkey.mv_data = dbuf; + fp = (MDB_page *)&pbuf; + fp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; + fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP; + fp->mp_lower = PAGEHDRSZ; + fp->mp_upper = PAGEHDRSZ + dkey.mv_size + data->mv_size; + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + fp->mp_flags |= P_LEAF2; + fp->mp_pad = data->mv_size; + fp->mp_upper += 2 * data->mv_size; /* leave space for 2 more */ + } else { + fp->mp_upper += 2 * sizeof(indx_t) + 2 * NODESIZE + + (dkey.mv_size & 1) + (data->mv_size & 1); + } + mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0); + do_sub = 1; + rdata = &xdata; + xdata.mv_size = fp->mp_upper; + xdata.mv_data = fp; + flags |= F_DUPDATA; + goto new_sub; + } + if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) { + /* See if we need to convert from fake page to subDB */ + MDB_page *mp; + unsigned int offset; + unsigned int i; + uint16_t fp_flags; + + fp = NODEDATA(leaf); + if (flags == MDB_CURRENT) { +reuse: + fp->mp_flags |= P_DIRTY; + COPY_PGNO(fp->mp_pgno, mc->mc_pg[mc->mc_top]->mp_pgno); + mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; + flags |= F_DUPDATA; + goto put_sub; + } + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + offset = fp->mp_pad; + if (SIZELEFT(fp) >= offset) + goto reuse; + offset *= 4; /* space for 4 more */ + } else { + offset = NODESIZE + sizeof(indx_t) + data->mv_size; + } + offset += offset & 1; + fp_flags = fp->mp_flags; + if (NODESIZE + sizeof(indx_t) + NODEKSZ(leaf) + NODEDSZ(leaf) + + offset >= mc->mc_txn->mt_env->me_nodemax) { + /* yes, convert it */ + dummy.md_flags = 0; + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + dummy.md_pad = fp->mp_pad; + dummy.md_flags = MDB_DUPFIXED; + if (mc->mc_db->md_flags & MDB_INTEGERDUP) + dummy.md_flags |= MDB_INTEGERKEY; + } + dummy.md_depth = 1; + dummy.md_branch_pages = 0; + dummy.md_leaf_pages = 1; + dummy.md_overflow_pages = 0; + dummy.md_entries = NUMKEYS(fp); + rdata = &xdata; + xdata.mv_size = sizeof(MDB_db); + xdata.mv_data = &dummy; + if ((rc = mdb_page_alloc(mc, 1, &mp))) + return rc; + offset = mc->mc_txn->mt_env->me_psize - NODEDSZ(leaf); + flags |= F_DUPDATA|F_SUBDATA; + dummy.md_root = mp->mp_pgno; + fp_flags &= ~P_SUBP; + } else { + /* no, just grow it */ + rdata = &xdata; + xdata.mv_size = NODEDSZ(leaf) + offset; + xdata.mv_data = &pbuf; + mp = (MDB_page *)&pbuf; + mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; + flags |= F_DUPDATA; + } + mp->mp_flags = fp_flags | P_DIRTY; + mp->mp_pad = fp->mp_pad; + mp->mp_lower = fp->mp_lower; + mp->mp_upper = fp->mp_upper + offset; + if (IS_LEAF2(fp)) { + memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad); + } else { + nsize = NODEDSZ(leaf) - fp->mp_upper; + memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper, nsize); + for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; + } + mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0); + do_sub = 1; + goto new_sub; + } + /* data is on sub-DB, just store it */ + flags |= F_DUPDATA|F_SUBDATA; + goto put_sub; + } +current: + /* overflow page overwrites need special handling */ + if (F_ISSET(leaf->mn_flags, F_BIGDATA)) { + MDB_page *omp; + pgno_t pg; + unsigned psize = mc->mc_txn->mt_env->me_psize; + int level, ovpages, dpages = OVPAGES(data->mv_size, psize); + + memcpy(&pg, NODEDATA(leaf), sizeof(pg)); + if ((rc2 = mdb_page_get(mc->mc_txn, pg, &omp, &level)) != 0) + return rc2; + ovpages = omp->mp_pages; + + /* Is the ov page large enough? */ + if (ovpages >= dpages) { + if (!(omp->mp_flags & P_DIRTY) && + (level || (mc->mc_txn->mt_env->me_flags & MDB_WRITEMAP))) + { + rc = mdb_page_unspill(mc->mc_txn, omp, &omp); + if (rc) + return rc; + level = 0; /* dirty in this txn or clean */ + } + /* Is it dirty? */ + if (omp->mp_flags & P_DIRTY) { + /* yes, overwrite it. Note in this case we don't + * bother to try shrinking the page if the new data + * is smaller than the overflow threshold. + */ + if (level > 1) { + /* It is writable only in a parent txn */ + size_t sz = (size_t) psize * ovpages, off; + MDB_page *np = mdb_page_malloc(mc->mc_txn, ovpages); + MDB_ID2 id2; + if (!np) + return ENOMEM; + id2.mid = pg; + id2.mptr = np; + mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2); + if (!(flags & MDB_RESERVE)) { + /* Copy end of page, adjusting alignment so + * compiler may copy words instead of bytes. + */ + off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t); + memcpy((size_t *)((char *)np + off), + (size_t *)((char *)omp + off), sz - off); + sz = PAGEHDRSZ; + } + memcpy(np, omp, sz); /* Copy beginning of page */ + omp = np; + } + SETDSZ(leaf, data->mv_size); + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = METADATA(omp); + else + memcpy(METADATA(omp), data->mv_data, data->mv_size); + goto done; + } + } + if ((rc2 = mdb_ovpage_free(mc, omp)) != MDB_SUCCESS) + return rc2; + } else if (NODEDSZ(leaf) == data->mv_size) { + /* same size, just replace it. Note that we could + * also reuse this node if the new data is smaller, + * but instead we opt to shrink the node in that case. + */ + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = NODEDATA(leaf); + else if (data->mv_size) + memcpy(NODEDATA(leaf), data->mv_data, data->mv_size); + else + memcpy(NODEKEY(leaf), key->mv_data, key->mv_size); + goto done; + } + mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0); + mc->mc_db->md_entries--; + } else { + DPRINTF("inserting key at index %i", mc->mc_ki[mc->mc_top]); + insert = 1; + } + + rdata = data; + +new_sub: + nflags = flags & NODE_ADD_FLAGS; + nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(mc->mc_txn->mt_env, key, rdata); + if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) { + if (( flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA ) + nflags &= ~MDB_APPEND; + if (!insert) + nflags |= MDB_SPLIT_REPLACE; + rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags); + } else { + /* There is room already in this leaf page. */ + rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags); + if (rc == 0 && !do_sub && insert) { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + unsigned i = mc->mc_top; + MDB_page *mp = mc->mc_pg[i]; + + if (mc->mc_flags & C_SUB) + dbi--; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc || m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[i] == mp && m3->mc_ki[i] >= mc->mc_ki[i]) { + m3->mc_ki[i]++; + } + } + } + } + + if (rc != MDB_SUCCESS) + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + else { + /* Now store the actual data in the child DB. Note that we're + * storing the user data in the keys field, so there are strict + * size limits on dupdata. The actual data fields of the child + * DB are all zero size. + */ + if (do_sub) { + int xflags; +put_sub: + xdata.mv_size = 0; + xdata.mv_data = ""; + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if (flags & MDB_CURRENT) { + xflags = MDB_CURRENT|MDB_NOSPILL; + } else { + mdb_xcursor_init1(mc, leaf); + xflags = (flags & MDB_NODUPDATA) ? + MDB_NOOVERWRITE|MDB_NOSPILL : MDB_NOSPILL; + } + /* converted, write the original data first */ + if (dkey.mv_size) { + rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags); + if (rc) + return rc; + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2; + unsigned i = mc->mc_top; + MDB_page *mp = mc->mc_pg[i]; + + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; + if (!(m2->mc_flags & C_INITIALIZED)) continue; + if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) { + mdb_xcursor_init1(m2, leaf); + } + } + } + /* we've done our job */ + dkey.mv_size = 0; + } + if (flags & MDB_APPENDDUP) + xflags |= MDB_APPEND; + rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags); + if (flags & F_SUBDATA) { + void *db = NODEDATA(leaf); + memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); + } + } + /* sub-writes might have failed so check rc again. + * Don't increment count if we just replaced an existing item. + */ + if (!rc && !(flags & MDB_CURRENT)) + mc->mc_db->md_entries++; + if (flags & MDB_MULTIPLE) { + if (!rc) { + mcount++; + if (mcount < dcount) { + data[0].mv_data = (char *)data[0].mv_data + data[0].mv_size; + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + goto more; + } + } + /* let caller know how many succeeded, if any */ + data[1].mv_size = mcount; + } + } +done: + /* If we succeeded and the key didn't exist before, make sure + * the cursor is marked valid. + */ + if (!rc && insert) + mc->mc_flags |= C_INITIALIZED; + return rc; +} + +int +mdb_cursor_del(MDB_cursor *mc, unsigned int flags) +{ + MDB_node *leaf; + int rc; + + if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; + + if (!(mc->mc_flags & C_INITIALIZED)) + return EINVAL; + + if (!(flags & MDB_NOSPILL) && (rc = mdb_page_spill(mc, NULL, NULL))) + return rc; + flags &= ~MDB_NOSPILL; /* TODO: Or change (flags != MDB_NODUPDATA) to ~(flags & MDB_NODUPDATA), not looking at the logic of that code just now */ + + rc = mdb_cursor_touch(mc); + if (rc) + return rc; + + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + + if (!IS_LEAF2(mc->mc_pg[mc->mc_top]) && F_ISSET(leaf->mn_flags, F_DUPDATA)) { + if (flags != MDB_NODUPDATA) { + if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) { + mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL); + /* If sub-DB still has entries, we're done */ + if (mc->mc_xcursor->mx_db.md_entries) { + if (leaf->mn_flags & F_SUBDATA) { + /* update subDB info */ + void *db = NODEDATA(leaf); + memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); + } else { + MDB_cursor *m2; + /* shrink fake page */ + mdb_node_shrink(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + /* fix other sub-DB cursors pointed at this fake page */ + for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { + if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; + if (m2->mc_pg[mc->mc_top] == mc->mc_pg[mc->mc_top] && + m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) + m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); + } + } + mc->mc_db->md_entries--; + return rc; + } + /* otherwise fall thru and delete the sub-DB */ + } + + if (leaf->mn_flags & F_SUBDATA) { + /* add all the child DB's pages to the free list */ + rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); + if (rc == MDB_SUCCESS) { + mc->mc_db->md_entries -= + mc->mc_xcursor->mx_db.md_entries; + } + } + } + + return mdb_cursor_del0(mc, leaf); +} + +/** Allocate and initialize new pages for a database. + * @param[in] mc a cursor on the database being added to. + * @param[in] flags flags defining what type of page is being allocated. + * @param[in] num the number of pages to allocate. This is usually 1, + * unless allocating overflow pages for a large record. + * @param[out] mp Address of a page, or NULL on failure. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) +{ + MDB_page *np; + int rc; + + if ((rc = mdb_page_alloc(mc, num, &np))) + return rc; + DPRINTF("allocated new mpage %zu, page size %u", + np->mp_pgno, mc->mc_txn->mt_env->me_psize); + np->mp_flags = flags | P_DIRTY; + np->mp_lower = PAGEHDRSZ; + np->mp_upper = mc->mc_txn->mt_env->me_psize; + + if (IS_BRANCH(np)) + mc->mc_db->md_branch_pages++; + else if (IS_LEAF(np)) + mc->mc_db->md_leaf_pages++; + else if (IS_OVERFLOW(np)) { + mc->mc_db->md_overflow_pages += num; + np->mp_pages = num; + } + *mp = np; + + return 0; +} + +/** Calculate the size of a leaf node. + * The size depends on the environment's page size; if a data item + * is too large it will be put onto an overflow page and the node + * size will only include the key and not the data. Sizes are always + * rounded up to an even number of bytes, to guarantee 2-byte alignment + * of the #MDB_node headers. + * @param[in] env The environment handle. + * @param[in] key The key for the node. + * @param[in] data The data for the node. + * @return The number of bytes needed to store the node. + */ +static size_t +mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data) +{ + size_t sz; + + sz = LEAFSIZE(key, data); + if (sz >= env->me_nodemax) { + /* put on overflow page */ + sz -= data->mv_size - sizeof(pgno_t); + } + sz += sz & 1; + + return sz + sizeof(indx_t); +} + +/** Calculate the size of a branch node. + * The size should depend on the environment's page size but since + * we currently don't support spilling large keys onto overflow + * pages, it's simply the size of the #MDB_node header plus the + * size of the key. Sizes are always rounded up to an even number + * of bytes, to guarantee 2-byte alignment of the #MDB_node headers. + * @param[in] env The environment handle. + * @param[in] key The key for the node. + * @return The number of bytes needed to store the node. + */ +static size_t +mdb_branch_size(MDB_env *env, MDB_val *key) +{ + size_t sz; + + sz = INDXSIZE(key); + if (sz >= env->me_nodemax) { + /* put on overflow page */ + /* not implemented */ + /* sz -= key->size - sizeof(pgno_t); */ + } + + return sz + sizeof(indx_t); +} + +/** Add a node to the page pointed to by the cursor. + * @param[in] mc The cursor for this operation. + * @param[in] indx The index on the page where the new node should be added. + * @param[in] key The key for the new node. + * @param[in] data The data for the new node, if any. + * @param[in] pgno The page number, if adding a branch node. + * @param[in] flags Flags for the node. + * @return 0 on success, non-zero on failure. Possible errors are: + *
    + *
  • ENOMEM - failed to allocate overflow pages for the node. + *
  • MDB_PAGE_FULL - there is insufficient room in the page. This error + * should never happen since all callers already calculate the + * page's free space before calling this function. + *
+ */ +static int +mdb_node_add(MDB_cursor *mc, indx_t indx, + MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags) +{ + unsigned int i; + size_t node_size = NODESIZE; + indx_t ofs; + MDB_node *node; + MDB_page *mp = mc->mc_pg[mc->mc_top]; + MDB_page *ofp = NULL; /* overflow page */ + DKBUF; + + assert(mp->mp_upper >= mp->mp_lower); + + DPRINTF("add to %s %spage %zu index %i, data size %zu key size %zu [%s]", + IS_LEAF(mp) ? "leaf" : "branch", + IS_SUBP(mp) ? "sub-" : "", + mp->mp_pgno, indx, data ? data->mv_size : 0, + key ? key->mv_size : 0, key ? DKEY(key) : NULL); + + if (IS_LEAF2(mp)) { + /* Move higher keys up one slot. */ + int ksize = mc->mc_db->md_pad, dif; + char *ptr = LEAF2KEY(mp, indx, ksize); + dif = NUMKEYS(mp) - indx; + if (dif > 0) + memmove(ptr+ksize, ptr, dif*ksize); + /* insert new key */ + memcpy(ptr, key->mv_data, ksize); + + /* Just using these for counting */ + mp->mp_lower += sizeof(indx_t); + mp->mp_upper -= ksize - sizeof(indx_t); + return MDB_SUCCESS; + } + + if (key != NULL) + node_size += key->mv_size; + + if (IS_LEAF(mp)) { + assert(data); + if (F_ISSET(flags, F_BIGDATA)) { + /* Data already on overflow page. */ + node_size += sizeof(pgno_t); + } else if (node_size + data->mv_size >= mc->mc_txn->mt_env->me_nodemax) { + int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize); + int rc; + /* Put data on overflow page. */ + DPRINTF("data size is %zu, node would be %zu, put data on overflow page", + data->mv_size, node_size+data->mv_size); + node_size += sizeof(pgno_t); + if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp))) + return rc; + DPRINTF("allocated overflow page %zu", ofp->mp_pgno); + flags |= F_BIGDATA; + } else { + node_size += data->mv_size; + } + } + node_size += node_size & 1; + + if (node_size + sizeof(indx_t) > SIZELEFT(mp)) { + DPRINTF("not enough room in page %zu, got %u ptrs", + mp->mp_pgno, NUMKEYS(mp)); + DPRINTF("upper - lower = %u - %u = %u", mp->mp_upper, mp->mp_lower, + mp->mp_upper - mp->mp_lower); + DPRINTF("node size = %zu", node_size); + return MDB_PAGE_FULL; + } + + /* Move higher pointers up one slot. */ + for (i = NUMKEYS(mp); i > indx; i--) + mp->mp_ptrs[i] = mp->mp_ptrs[i - 1]; + + /* Adjust free space offsets. */ + ofs = mp->mp_upper - node_size; + assert(ofs >= mp->mp_lower + sizeof(indx_t)); + mp->mp_ptrs[indx] = ofs; + mp->mp_upper = ofs; + mp->mp_lower += sizeof(indx_t); + + /* Write the node data. */ + node = NODEPTR(mp, indx); + node->mn_ksize = (key == NULL) ? 0 : key->mv_size; + node->mn_flags = flags; + if (IS_LEAF(mp)) + SETDSZ(node,data->mv_size); + else + SETPGNO(node,pgno); + + if (key) + memcpy(NODEKEY(node), key->mv_data, key->mv_size); + + if (IS_LEAF(mp)) { + assert(key); + if (ofp == NULL) { + if (F_ISSET(flags, F_BIGDATA)) + memcpy(node->mn_data + key->mv_size, data->mv_data, + sizeof(pgno_t)); + else if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = node->mn_data + key->mv_size; + else + memcpy(node->mn_data + key->mv_size, data->mv_data, + data->mv_size); + } else { + memcpy(node->mn_data + key->mv_size, &ofp->mp_pgno, + sizeof(pgno_t)); + if (F_ISSET(flags, MDB_RESERVE)) + data->mv_data = METADATA(ofp); + else + memcpy(METADATA(ofp), data->mv_data, data->mv_size); + } + } + + return MDB_SUCCESS; +} + +/** Delete the specified node from a page. + * @param[in] mp The page to operate on. + * @param[in] indx The index of the node to delete. + * @param[in] ksize The size of a node. Only used if the page is + * part of a #MDB_DUPFIXED database. + */ +static void +mdb_node_del(MDB_page *mp, indx_t indx, int ksize) +{ + unsigned int sz; + indx_t i, j, numkeys, ptr; + MDB_node *node; + char *base; + +#if MDB_DEBUG + { + pgno_t pgno; + COPY_PGNO(pgno, mp->mp_pgno); + DPRINTF("delete node %u on %s page %zu", indx, + IS_LEAF(mp) ? "leaf" : "branch", pgno); + } +#endif + assert(indx < NUMKEYS(mp)); + + if (IS_LEAF2(mp)) { + int x = NUMKEYS(mp) - 1 - indx; + base = LEAF2KEY(mp, indx, ksize); + if (x) + memmove(base, base + ksize, x * ksize); + mp->mp_lower -= sizeof(indx_t); + mp->mp_upper += ksize - sizeof(indx_t); + return; + } + + node = NODEPTR(mp, indx); + sz = NODESIZE + node->mn_ksize; + if (IS_LEAF(mp)) { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + sz += sizeof(pgno_t); + else + sz += NODEDSZ(node); + } + sz += sz & 1; + + ptr = mp->mp_ptrs[indx]; + numkeys = NUMKEYS(mp); + for (i = j = 0; i < numkeys; i++) { + if (i != indx) { + mp->mp_ptrs[j] = mp->mp_ptrs[i]; + if (mp->mp_ptrs[i] < ptr) + mp->mp_ptrs[j] += sz; + j++; + } + } + + base = (char *)mp + mp->mp_upper; + memmove(base + sz, base, ptr - mp->mp_upper); + + mp->mp_lower -= sizeof(indx_t); + mp->mp_upper += sz; +} + +/** Compact the main page after deleting a node on a subpage. + * @param[in] mp The main page to operate on. + * @param[in] indx The index of the subpage on the main page. + */ +static void +mdb_node_shrink(MDB_page *mp, indx_t indx) +{ + MDB_node *node; + MDB_page *sp, *xp; + char *base; + int osize, nsize; + int delta; + indx_t i, numkeys, ptr; + + node = NODEPTR(mp, indx); + sp = (MDB_page *)NODEDATA(node); + osize = NODEDSZ(node); + + delta = sp->mp_upper - sp->mp_lower; + SETDSZ(node, osize - delta); + xp = (MDB_page *)((char *)sp + delta); + + /* shift subpage upward */ + if (IS_LEAF2(sp)) { + nsize = NUMKEYS(sp) * sp->mp_pad; + memmove(METADATA(xp), METADATA(sp), nsize); + } else { + int i; + nsize = osize - sp->mp_upper; + numkeys = NUMKEYS(sp); + for (i=numkeys-1; i>=0; i--) + xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta; + } + xp->mp_upper = sp->mp_lower; + xp->mp_lower = sp->mp_lower; + xp->mp_flags = sp->mp_flags; + xp->mp_pad = sp->mp_pad; + COPY_PGNO(xp->mp_pgno, mp->mp_pgno); + + /* shift lower nodes upward */ + ptr = mp->mp_ptrs[indx]; + numkeys = NUMKEYS(mp); + for (i = 0; i < numkeys; i++) { + if (mp->mp_ptrs[i] <= ptr) + mp->mp_ptrs[i] += delta; + } + + base = (char *)mp + mp->mp_upper; + memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node)); + mp->mp_upper += delta; +} + +/** Initial setup of a sorted-dups cursor. + * Sorted duplicates are implemented as a sub-database for the given key. + * The duplicate data items are actually keys of the sub-database. + * Operations on the duplicate data items are performed using a sub-cursor + * initialized when the sub-database is first accessed. This function does + * the preliminary setup of the sub-cursor, filling in the fields that + * depend only on the parent DB. + * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized. + */ +static void +mdb_xcursor_init0(MDB_cursor *mc) +{ + MDB_xcursor *mx = mc->mc_xcursor; + + mx->mx_cursor.mc_xcursor = NULL; + mx->mx_cursor.mc_txn = mc->mc_txn; + mx->mx_cursor.mc_db = &mx->mx_db; + mx->mx_cursor.mc_dbx = &mx->mx_dbx; + mx->mx_cursor.mc_dbi = mc->mc_dbi+1; + mx->mx_cursor.mc_dbflag = &mx->mx_dbflag; + mx->mx_cursor.mc_snum = 0; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_flags = C_SUB; + mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; + mx->mx_dbx.md_dcmp = NULL; + mx->mx_dbx.md_rel = mc->mc_dbx->md_rel; +} + +/** Final setup of a sorted-dups cursor. + * Sets up the fields that depend on the data from the main cursor. + * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized. + * @param[in] node The data containing the #MDB_db record for the + * sorted-dup database. + */ +static void +mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) +{ + MDB_xcursor *mx = mc->mc_xcursor; + + if (node->mn_flags & F_SUBDATA) { + memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db)); + mx->mx_cursor.mc_pg[0] = 0; + mx->mx_cursor.mc_snum = 0; + mx->mx_cursor.mc_flags = C_SUB; + } else { + MDB_page *fp = NODEDATA(node); + mx->mx_db.md_pad = mc->mc_pg[mc->mc_top]->mp_pad; + mx->mx_db.md_flags = 0; + mx->mx_db.md_depth = 1; + mx->mx_db.md_branch_pages = 0; + mx->mx_db.md_leaf_pages = 1; + mx->mx_db.md_overflow_pages = 0; + mx->mx_db.md_entries = NUMKEYS(fp); + COPY_PGNO(mx->mx_db.md_root, fp->mp_pgno); + mx->mx_cursor.mc_snum = 1; + mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB; + mx->mx_cursor.mc_top = 0; + mx->mx_cursor.mc_pg[0] = fp; + mx->mx_cursor.mc_ki[0] = 0; + if (mc->mc_db->md_flags & MDB_DUPFIXED) { + mx->mx_db.md_flags = MDB_DUPFIXED; + mx->mx_db.md_pad = fp->mp_pad; + if (mc->mc_db->md_flags & MDB_INTEGERDUP) + mx->mx_db.md_flags |= MDB_INTEGERKEY; + } + } + DPRINTF("Sub-db %u for db %u root page %zu", mx->mx_cursor.mc_dbi, mc->mc_dbi, + mx->mx_db.md_root); + mx->mx_dbflag = DB_VALID | (F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY) ? + DB_DIRTY : 0); + mx->mx_dbx.md_name.mv_data = NODEKEY(node); + mx->mx_dbx.md_name.mv_size = node->mn_ksize; +#if UINT_MAX < SIZE_MAX + if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) +#ifdef MISALIGNED_OK + mx->mx_dbx.md_cmp = mdb_cmp_long; +#else + mx->mx_dbx.md_cmp = mdb_cmp_cint; +#endif +#endif +} + +/** Initialize a cursor for a given transaction and database. */ +static void +mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) +{ + mc->mc_next = NULL; + mc->mc_backup = NULL; + mc->mc_dbi = dbi; + mc->mc_txn = txn; + mc->mc_db = &txn->mt_dbs[dbi]; + mc->mc_dbx = &txn->mt_dbxs[dbi]; + mc->mc_dbflag = &txn->mt_dbflags[dbi]; + mc->mc_snum = 0; + mc->mc_top = 0; + mc->mc_pg[0] = 0; + mc->mc_flags = 0; + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { + assert(mx != NULL); + mc->mc_xcursor = mx; + mdb_xcursor_init0(mc); + } else { + mc->mc_xcursor = NULL; + } + if (*mc->mc_dbflag & DB_STALE) { + mdb_page_search(mc, NULL, MDB_PS_ROOTONLY); + } +} + +int +mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) +{ + MDB_cursor *mc; + size_t size = sizeof(MDB_cursor); + + if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + /* Allow read access to the freelist */ + if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EINVAL; + + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) + size += sizeof(MDB_xcursor); + + if ((mc = malloc(size)) != NULL) { + mdb_cursor_init(mc, txn, dbi, (MDB_xcursor *)(mc + 1)); + if (txn->mt_cursors) { + mc->mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = mc; + mc->mc_flags |= C_UNTRACK; + } + } else { + return ENOMEM; + } + + *ret = mc; + + return MDB_SUCCESS; +} + +int +mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) +{ + if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs) + return EINVAL; + + if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors) + return EINVAL; + + mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor); + return MDB_SUCCESS; +} + +/* Return the count of duplicate data items for the current key */ +int +mdb_cursor_count(MDB_cursor *mc, size_t *countp) +{ + MDB_node *leaf; + + if (mc == NULL || countp == NULL) + return EINVAL; + + if (!(mc->mc_db->md_flags & MDB_DUPSORT)) + return EINVAL; + + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { + *countp = 1; + } else { + if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + return EINVAL; + + *countp = mc->mc_xcursor->mx_db.md_entries; + } + return MDB_SUCCESS; +} + +void +mdb_cursor_close(MDB_cursor *mc) +{ + if (mc && !mc->mc_backup) { + /* remove from txn, if tracked */ + if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { + MDB_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; + while (*prev && *prev != mc) prev = &(*prev)->mc_next; + if (*prev == mc) + *prev = mc->mc_next; + } + free(mc); + } +} + +MDB_txn * +mdb_cursor_txn(MDB_cursor *mc) +{ + if (!mc) return NULL; + return mc->mc_txn; +} + +MDB_dbi +mdb_cursor_dbi(MDB_cursor *mc) +{ + assert(mc != NULL); + return mc->mc_dbi; +} + +/** Replace the key for a node with a new key. + * @param[in] mc Cursor pointing to the node to operate on. + * @param[in] key The new key to use. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_update_key(MDB_cursor *mc, MDB_val *key) +{ + MDB_page *mp; + MDB_node *node; + char *base; + size_t len; + int delta, delta0; + indx_t ptr, i, numkeys, indx; + DKBUF; + + indx = mc->mc_ki[mc->mc_top]; + mp = mc->mc_pg[mc->mc_top]; + node = NODEPTR(mp, indx); + ptr = mp->mp_ptrs[indx]; +#if MDB_DEBUG + { + MDB_val k2; + char kbuf2[(MDB_MAXKEYSIZE*2+1)]; + k2.mv_data = NODEKEY(node); + k2.mv_size = node->mn_ksize; + DPRINTF("update key %u (ofs %u) [%s] to [%s] on page %zu", + indx, ptr, + mdb_dkey(&k2, kbuf2), + DKEY(key), + mp->mp_pgno); + } +#endif + + delta0 = delta = key->mv_size - node->mn_ksize; + + /* Must be 2-byte aligned. If new key is + * shorter by 1, the shift will be skipped. + */ + delta += (delta & 1); + if (delta) { + if (delta > 0 && SIZELEFT(mp) < delta) { + pgno_t pgno; + /* not enough space left, do a delete and split */ + DPRINTF("Not enough room, delta = %d, splitting...", delta); + pgno = NODEPGNO(node); + mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0); + return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE); + } + + numkeys = NUMKEYS(mp); + for (i = 0; i < numkeys; i++) { + if (mp->mp_ptrs[i] <= ptr) + mp->mp_ptrs[i] -= delta; + } + + base = (char *)mp + mp->mp_upper; + len = ptr - mp->mp_upper + NODESIZE; + memmove(base - delta, base, len); + mp->mp_upper -= delta; + + node = NODEPTR(mp, indx); + } + + /* But even if no shift was needed, update ksize */ + if (delta0) + node->mn_ksize = key->mv_size; + + if (key->mv_size) + memcpy(NODEKEY(node), key->mv_data, key->mv_size); + + return MDB_SUCCESS; +} + +static void +mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst); + +/** Move a node from csrc to cdst. + */ +static int +mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst) +{ + MDB_node *srcnode; + MDB_val key, data; + pgno_t srcpg; + MDB_cursor mn; + int rc; + unsigned short flags; + + DKBUF; + + /* Mark src and dst as dirty. */ + if ((rc = mdb_page_touch(csrc)) || + (rc = mdb_page_touch(cdst))) + return rc; + + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); /* fake */ + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size); + data.mv_size = 0; + data.mv_data = NULL; + srcpg = 0; + flags = 0; + } else { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]); + assert(!((long)srcnode&1)); + srcpg = NODEPGNO(srcnode); + flags = srcnode->mn_flags; + if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { + unsigned int snum = csrc->mc_snum; + MDB_node *s2; + /* must find the lowest key below src */ + mdb_page_search_lowest(csrc); + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); + } else { + s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + key.mv_size = NODEKSZ(s2); + key.mv_data = NODEKEY(s2); + } + csrc->mc_snum = snum--; + csrc->mc_top = snum; + } else { + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + data.mv_size = NODEDSZ(srcnode); + data.mv_data = NODEDATA(srcnode); + } + if (IS_BRANCH(cdst->mc_pg[cdst->mc_top]) && cdst->mc_ki[cdst->mc_top] == 0) { + unsigned int snum = cdst->mc_snum; + MDB_node *s2; + MDB_val bkey; + /* must find the lowest key below dst */ + mdb_page_search_lowest(cdst); + if (IS_LEAF2(cdst->mc_pg[cdst->mc_top])) { + bkey.mv_size = cdst->mc_db->md_pad; + bkey.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, bkey.mv_size); + } else { + s2 = NODEPTR(cdst->mc_pg[cdst->mc_top], 0); + bkey.mv_size = NODEKSZ(s2); + bkey.mv_data = NODEKEY(s2); + } + cdst->mc_snum = snum--; + cdst->mc_top = snum; + mdb_cursor_copy(cdst, &mn); + mn.mc_ki[snum] = 0; + rc = mdb_update_key(&mn, &bkey); + if (rc) + return rc; + } + + DPRINTF("moving %s node %u [%s] on page %zu to node %u on page %zu", + IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch", + csrc->mc_ki[csrc->mc_top], + DKEY(&key), + csrc->mc_pg[csrc->mc_top]->mp_pgno, + cdst->mc_ki[cdst->mc_top], cdst->mc_pg[cdst->mc_top]->mp_pgno); + + /* Add the node to the destination page. + */ + rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, flags); + if (rc != MDB_SUCCESS) + return rc; + + /* Delete the node from the source page. + */ + mdb_node_del(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size); + + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = csrc->mc_dbi; + MDB_page *mp = csrc->mc_pg[csrc->mc_top]; + + if (csrc->mc_flags & C_SUB) + dbi--; + + for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (csrc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == csrc) continue; + if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] == + csrc->mc_ki[csrc->mc_top]) { + m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; + m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; + } + } + } + + /* Update the parent separators. + */ + if (csrc->mc_ki[csrc->mc_top] == 0) { + if (csrc->mc_ki[csrc->mc_top-1] != 0) { + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); + } else { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + DPRINTF("update separator for source page %zu to [%s]", + csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key)); + mdb_cursor_copy(csrc, &mn); + mn.mc_snum--; + mn.mc_top--; + if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS) + return rc; + } + if (IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { + MDB_val nullkey; + indx_t ix = csrc->mc_ki[csrc->mc_top]; + nullkey.mv_size = 0; + csrc->mc_ki[csrc->mc_top] = 0; + rc = mdb_update_key(csrc, &nullkey); + csrc->mc_ki[csrc->mc_top] = ix; + assert(rc == MDB_SUCCESS); + } + } + + if (cdst->mc_ki[cdst->mc_top] == 0) { + if (cdst->mc_ki[cdst->mc_top-1] != 0) { + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, key.mv_size); + } else { + srcnode = NODEPTR(cdst->mc_pg[cdst->mc_top], 0); + key.mv_size = NODEKSZ(srcnode); + key.mv_data = NODEKEY(srcnode); + } + DPRINTF("update separator for destination page %zu to [%s]", + cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key)); + mdb_cursor_copy(cdst, &mn); + mn.mc_snum--; + mn.mc_top--; + if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS) + return rc; + } + if (IS_BRANCH(cdst->mc_pg[cdst->mc_top])) { + MDB_val nullkey; + indx_t ix = cdst->mc_ki[cdst->mc_top]; + nullkey.mv_size = 0; + cdst->mc_ki[cdst->mc_top] = 0; + rc = mdb_update_key(cdst, &nullkey); + cdst->mc_ki[cdst->mc_top] = ix; + assert(rc == MDB_SUCCESS); + } + } + + return MDB_SUCCESS; +} + +/** Merge one page into another. + * The nodes from the page pointed to by \b csrc will + * be copied to the page pointed to by \b cdst and then + * the \b csrc page will be freed. + * @param[in] csrc Cursor pointing to the source page. + * @param[in] cdst Cursor pointing to the destination page. + */ +static int +mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) +{ + int rc; + indx_t i, j; + MDB_node *srcnode; + MDB_val key, data; + unsigned nkeys; + + DPRINTF("merging page %zu into %zu", csrc->mc_pg[csrc->mc_top]->mp_pgno, + cdst->mc_pg[cdst->mc_top]->mp_pgno); + + assert(csrc->mc_snum > 1); /* can't merge root page */ + assert(cdst->mc_snum > 1); + + /* Mark dst as dirty. */ + if ((rc = mdb_page_touch(cdst))) + return rc; + + /* Move all nodes from src to dst. + */ + j = nkeys = NUMKEYS(cdst->mc_pg[cdst->mc_top]); + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = METADATA(csrc->mc_pg[csrc->mc_top]); + for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) { + rc = mdb_node_add(cdst, j, &key, NULL, 0, 0); + if (rc != MDB_SUCCESS) + return rc; + key.mv_data = (char *)key.mv_data + key.mv_size; + } + } else { + for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) { + srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], i); + if (i == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { + unsigned int snum = csrc->mc_snum; + MDB_node *s2; + /* must find the lowest key below src */ + mdb_page_search_lowest(csrc); + if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { + key.mv_size = csrc->mc_db->md_pad; + key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); + } else { + s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); + key.mv_size = NODEKSZ(s2); + key.mv_data = NODEKEY(s2); + } + csrc->mc_snum = snum--; + csrc->mc_top = snum; + } else { + key.mv_size = srcnode->mn_ksize; + key.mv_data = NODEKEY(srcnode); + } + + data.mv_size = NODEDSZ(srcnode); + data.mv_data = NODEDATA(srcnode); + rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), srcnode->mn_flags); + if (rc != MDB_SUCCESS) + return rc; + } + } + + DPRINTF("dst page %zu now has %u keys (%.1f%% filled)", + cdst->mc_pg[cdst->mc_top]->mp_pgno, NUMKEYS(cdst->mc_pg[cdst->mc_top]), (float)PAGEFILL(cdst->mc_txn->mt_env, cdst->mc_pg[cdst->mc_top]) / 10); + + /* Unlink the src page from parent and add to free list. + */ + mdb_node_del(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1], 0); + if (csrc->mc_ki[csrc->mc_top-1] == 0) { + key.mv_size = 0; + csrc->mc_top--; + rc = mdb_update_key(csrc, &key); + csrc->mc_top++; + if (rc) + return rc; + } + + rc = mdb_midl_append(&csrc->mc_txn->mt_free_pgs, + csrc->mc_pg[csrc->mc_top]->mp_pgno); + if (rc) + return rc; + if (IS_LEAF(csrc->mc_pg[csrc->mc_top])) + csrc->mc_db->md_leaf_pages--; + else + csrc->mc_db->md_branch_pages--; + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = csrc->mc_dbi; + MDB_page *mp = cdst->mc_pg[cdst->mc_top]; + + if (csrc->mc_flags & C_SUB) + dbi--; + + for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (csrc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == csrc) continue; + if (m3->mc_snum < csrc->mc_snum) continue; + if (m3->mc_pg[csrc->mc_top] == csrc->mc_pg[csrc->mc_top]) { + m3->mc_pg[csrc->mc_top] = mp; + m3->mc_ki[csrc->mc_top] += nkeys; + } + } + } + mdb_cursor_pop(csrc); + + return mdb_rebalance(csrc); +} + +/** Copy the contents of a cursor. + * @param[in] csrc The cursor to copy from. + * @param[out] cdst The cursor to copy to. + */ +static void +mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst) +{ + unsigned int i; + + cdst->mc_txn = csrc->mc_txn; + cdst->mc_dbi = csrc->mc_dbi; + cdst->mc_db = csrc->mc_db; + cdst->mc_dbx = csrc->mc_dbx; + cdst->mc_snum = csrc->mc_snum; + cdst->mc_top = csrc->mc_top; + cdst->mc_flags = csrc->mc_flags; + + for (i=0; imc_snum; i++) { + cdst->mc_pg[i] = csrc->mc_pg[i]; + cdst->mc_ki[i] = csrc->mc_ki[i]; + } +} + +/** Rebalance the tree after a delete operation. + * @param[in] mc Cursor pointing to the page where rebalancing + * should begin. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_rebalance(MDB_cursor *mc) +{ + MDB_node *node; + int rc; + unsigned int ptop, minkeys; + MDB_cursor mn; + + minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top])); +#if MDB_DEBUG + { + pgno_t pgno; + COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno); + DPRINTF("rebalancing %s page %zu (has %u keys, %.1f%% full)", + IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch", + pgno, NUMKEYS(mc->mc_pg[mc->mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10); + } +#endif + + if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD && + NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) { +#if MDB_DEBUG + pgno_t pgno; + COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno); + DPRINTF("no need to rebalance page %zu, above fill threshold", + pgno); +#endif + return MDB_SUCCESS; + } + + if (mc->mc_snum < 2) { + MDB_page *mp = mc->mc_pg[0]; + if (IS_SUBP(mp)) { + DPUTS("Can't rebalance a subpage, ignoring"); + return MDB_SUCCESS; + } + if (NUMKEYS(mp) == 0) { + DPUTS("tree is completely empty"); + mc->mc_db->md_root = P_INVALID; + mc->mc_db->md_depth = 0; + mc->mc_db->md_leaf_pages = 0; + rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); + if (rc) + return rc; + /* Adjust cursors pointing to mp */ + mc->mc_snum = 0; + mc->mc_top = 0; + { + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + + if (mc->mc_flags & C_SUB) + dbi--; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[0] == mp) { + m3->mc_snum = 0; + m3->mc_top = 0; + } + } + } + } else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) { + DPUTS("collapsing root page!"); + rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); + if (rc) + return rc; + mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0)); + rc = mdb_page_get(mc->mc_txn,mc->mc_db->md_root,&mc->mc_pg[0],NULL); + if (rc) + return rc; + mc->mc_db->md_depth--; + mc->mc_db->md_branch_pages--; + mc->mc_ki[0] = mc->mc_ki[1]; + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + + if (mc->mc_flags & C_SUB) + dbi--; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc || m3->mc_snum < mc->mc_snum) continue; + if (m3->mc_pg[0] == mp) { + m3->mc_pg[0] = mc->mc_pg[0]; + m3->mc_snum = 1; + m3->mc_top = 0; + m3->mc_ki[0] = m3->mc_ki[1]; + } + } + } + } else + DPUTS("root page doesn't need rebalancing"); + return MDB_SUCCESS; + } + + /* The parent (branch page) must have at least 2 pointers, + * otherwise the tree is invalid. + */ + ptop = mc->mc_top-1; + assert(NUMKEYS(mc->mc_pg[ptop]) > 1); + + /* Leaf page fill factor is below the threshold. + * Try to move keys from left or right neighbor, or + * merge with a neighbor page. + */ + + /* Find neighbors. + */ + mdb_cursor_copy(mc, &mn); + mn.mc_xcursor = NULL; + + if (mc->mc_ki[ptop] == 0) { + /* We're the leftmost leaf in our parent. + */ + DPUTS("reading right neighbor"); + mn.mc_ki[ptop]++; + node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); + rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); + if (rc) + return rc; + mn.mc_ki[mn.mc_top] = 0; + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); + } else { + /* There is at least one neighbor to the left. + */ + DPUTS("reading left neighbor"); + mn.mc_ki[ptop]--; + node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); + rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); + if (rc) + return rc; + mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1; + mc->mc_ki[mc->mc_top] = 0; + } + + DPRINTF("found neighbor page %zu (%u keys, %.1f%% full)", + mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10); + + /* If the neighbor page is above threshold and has enough keys, + * move one key from it. Otherwise we should try to merge them. + * (A branch page must never have less than 2 keys.) + */ + minkeys = 1 + (IS_BRANCH(mn.mc_pg[mn.mc_top])); + if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) + return mdb_node_move(&mn, mc); + else { + if (mc->mc_ki[ptop] == 0) + rc = mdb_page_merge(&mn, mc); + else + rc = mdb_page_merge(mc, &mn); + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + } + return rc; +} + +/** Complete a delete operation started by #mdb_cursor_del(). */ +static int +mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf) +{ + int rc; + MDB_page *mp; + indx_t ki; + + mp = mc->mc_pg[mc->mc_top]; + ki = mc->mc_ki[mc->mc_top]; + + /* add overflow pages to free list */ + if (!IS_LEAF2(mp) && F_ISSET(leaf->mn_flags, F_BIGDATA)) { + MDB_page *omp; + pgno_t pg; + + memcpy(&pg, NODEDATA(leaf), sizeof(pg)); + if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) || + (rc = mdb_ovpage_free(mc, omp))) + return rc; + } + mdb_node_del(mp, ki, mc->mc_db->md_pad); + mc->mc_db->md_entries--; + rc = mdb_rebalance(mc); + if (rc != MDB_SUCCESS) + mc->mc_txn->mt_flags |= MDB_TXN_ERROR; + /* if mc points past last node in page, invalidate */ + else if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) + mc->mc_flags &= ~(C_INITIALIZED|C_EOF); + + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2; + unsigned int nkeys; + MDB_dbi dbi = mc->mc_dbi; + + mp = mc->mc_pg[mc->mc_top]; + nkeys = NUMKEYS(mp); + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (m2 == mc) + continue; + if (!(m2->mc_flags & C_INITIALIZED)) + continue; + if (m2->mc_pg[mc->mc_top] == mp) { + if (m2->mc_ki[mc->mc_top] > ki) + m2->mc_ki[mc->mc_top]--; + if (m2->mc_ki[mc->mc_top] >= nkeys) + m2->mc_flags &= ~(C_INITIALIZED|C_EOF); + } + } + } + + return rc; +} + +int +mdb_del(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data) +{ + MDB_cursor mc; + MDB_xcursor mx; + MDB_cursor_op op; + MDB_val rdata, *xdata; + int rc, exact; + DKBUF; + + assert(key != NULL); + + DPRINTF("====> delete db %u key [%s]", dbi, DKEY(key)); + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + return EACCES; + } + + if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) { + return EINVAL; + } + + mdb_cursor_init(&mc, txn, dbi, &mx); + + exact = 0; + if (data) { + op = MDB_GET_BOTH; + rdata = *data; + xdata = &rdata; + } else { + op = MDB_SET; + xdata = NULL; + } + rc = mdb_cursor_set(&mc, key, xdata, op, &exact); + if (rc == 0) { + /* let mdb_page_split know about this cursor if needed: + * delete will trigger a rebalance; if it needs to move + * a node from one page to another, it will have to + * update the parent's separator key(s). If the new sepkey + * is larger than the current one, the parent page may + * run out of space, triggering a split. We need this + * cursor to be consistent until the end of the rebalance. + */ + mc.mc_flags |= C_UNTRACK; + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA); + txn->mt_cursors[dbi] = mc.mc_next; + } + return rc; +} + +/** Split a page and insert a new node. + * @param[in,out] mc Cursor pointing to the page and desired insertion index. + * The cursor will be updated to point to the actual page and index where + * the node got inserted after the split. + * @param[in] newkey The key for the newly inserted node. + * @param[in] newdata The data for the newly inserted node. + * @param[in] newpgno The page number, if the new node is a branch node. + * @param[in] nflags The #NODE_ADD_FLAGS for the new node. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, + unsigned int nflags) +{ + unsigned int flags; + int rc = MDB_SUCCESS, ins_new = 0, new_root = 0, newpos = 1, did_split = 0; + indx_t newindx; + pgno_t pgno = 0; + unsigned int i, j, split_indx, nkeys, pmax; + MDB_node *node; + MDB_val sepkey, rkey, xdata, *rdata = &xdata; + MDB_page *copy; + MDB_page *mp, *rp, *pp; + unsigned int ptop; + MDB_cursor mn; + DKBUF; + + mp = mc->mc_pg[mc->mc_top]; + newindx = mc->mc_ki[mc->mc_top]; + + DPRINTF("-----> splitting %s page %zu and adding [%s] at index %i", + IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, + DKEY(newkey), mc->mc_ki[mc->mc_top]); + + /* Create a right sibling. */ + if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp))) + return rc; + DPRINTF("new right sibling: page %zu", rp->mp_pgno); + + if (mc->mc_snum < 2) { + if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp))) + return rc; + /* shift current top to make room for new parent */ + mc->mc_pg[1] = mc->mc_pg[0]; + mc->mc_ki[1] = mc->mc_ki[0]; + mc->mc_pg[0] = pp; + mc->mc_ki[0] = 0; + mc->mc_db->md_root = pp->mp_pgno; + DPRINTF("root split! new root = %zu", pp->mp_pgno); + mc->mc_db->md_depth++; + new_root = 1; + + /* Add left (implicit) pointer. */ + if ((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS) { + /* undo the pre-push */ + mc->mc_pg[0] = mc->mc_pg[1]; + mc->mc_ki[0] = mc->mc_ki[1]; + mc->mc_db->md_root = mp->mp_pgno; + mc->mc_db->md_depth--; + return rc; + } + mc->mc_snum = 2; + mc->mc_top = 1; + ptop = 0; + } else { + ptop = mc->mc_top-1; + DPRINTF("parent branch page is %zu", mc->mc_pg[ptop]->mp_pgno); + } + + mc->mc_flags |= C_SPLITTING; + mdb_cursor_copy(mc, &mn); + mn.mc_pg[mn.mc_top] = rp; + mn.mc_ki[ptop] = mc->mc_ki[ptop]+1; + + if (nflags & MDB_APPEND) { + mn.mc_ki[mn.mc_top] = 0; + sepkey = *newkey; + split_indx = newindx; + nkeys = 0; + goto newsep; + } + + nkeys = NUMKEYS(mp); + split_indx = nkeys / 2; + if (newindx < split_indx) + newpos = 0; + + if (IS_LEAF2(rp)) { + char *split, *ins; + int x; + unsigned int lsize, rsize, ksize; + /* Move half of the keys to the right sibling */ + copy = NULL; + x = mc->mc_ki[mc->mc_top] - split_indx; + ksize = mc->mc_db->md_pad; + split = LEAF2KEY(mp, split_indx, ksize); + rsize = (nkeys - split_indx) * ksize; + lsize = (nkeys - split_indx) * sizeof(indx_t); + mp->mp_lower -= lsize; + rp->mp_lower += lsize; + mp->mp_upper += rsize - lsize; + rp->mp_upper -= rsize - lsize; + sepkey.mv_size = ksize; + if (newindx == split_indx) { + sepkey.mv_data = newkey->mv_data; + } else { + sepkey.mv_data = split; + } + if (x<0) { + ins = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], ksize); + memcpy(rp->mp_ptrs, split, rsize); + sepkey.mv_data = rp->mp_ptrs; + memmove(ins+ksize, ins, (split_indx - mc->mc_ki[mc->mc_top]) * ksize); + memcpy(ins, newkey->mv_data, ksize); + mp->mp_lower += sizeof(indx_t); + mp->mp_upper -= ksize - sizeof(indx_t); + } else { + if (x) + memcpy(rp->mp_ptrs, split, x * ksize); + ins = LEAF2KEY(rp, x, ksize); + memcpy(ins, newkey->mv_data, ksize); + memcpy(ins+ksize, split + x * ksize, rsize - x * ksize); + rp->mp_lower += sizeof(indx_t); + rp->mp_upper -= ksize - sizeof(indx_t); + mc->mc_ki[mc->mc_top] = x; + mc->mc_pg[mc->mc_top] = rp; + } + goto newsep; + } + + /* For leaf pages, check the split point based on what + * fits where, since otherwise mdb_node_add can fail. + * + * This check is only needed when the data items are + * relatively large, such that being off by one will + * make the difference between success or failure. + * + * It's also relevant if a page happens to be laid out + * such that one half of its nodes are all "small" and + * the other half of its nodes are "large." If the new + * item is also "large" and falls on the half with + * "large" nodes, it also may not fit. + */ + if (IS_LEAF(mp)) { + unsigned int psize, nsize; + /* Maximum free space in an empty page */ + pmax = mc->mc_txn->mt_env->me_psize - PAGEHDRSZ; + nsize = mdb_leaf_size(mc->mc_txn->mt_env, newkey, newdata); + if ((nkeys < 20) || (nsize > pmax/16)) { + if (newindx <= split_indx) { + psize = nsize; + newpos = 0; + for (i=0; imn_flags, F_BIGDATA)) + psize += sizeof(pgno_t); + else + psize += NODEDSZ(node); + psize += psize & 1; + if (psize > pmax) { + if (i <= newindx) { + split_indx = newindx; + if (i < newindx) + newpos = 1; + } + else + split_indx = i; + break; + } + } + } else { + psize = nsize; + for (i=nkeys-1; i>=split_indx; i--) { + node = NODEPTR(mp, i); + psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t); + if (F_ISSET(node->mn_flags, F_BIGDATA)) + psize += sizeof(pgno_t); + else + psize += NODEDSZ(node); + psize += psize & 1; + if (psize > pmax) { + if (i >= newindx) { + split_indx = newindx; + newpos = 0; + } else + split_indx = i+1; + break; + } + } + } + } + } + + /* First find the separating key between the split pages. + * The case where newindx == split_indx is ambiguous; the + * new item could go to the new page or stay on the original + * page. If newpos == 1 it goes to the new page. + */ + if (newindx == split_indx && newpos) { + sepkey.mv_size = newkey->mv_size; + sepkey.mv_data = newkey->mv_data; + } else { + node = NODEPTR(mp, split_indx); + sepkey.mv_size = node->mn_ksize; + sepkey.mv_data = NODEKEY(node); + } + +newsep: + DPRINTF("separator is [%s]", DKEY(&sepkey)); + + /* Copy separator key to the parent. + */ + if (SIZELEFT(mn.mc_pg[ptop]) < mdb_branch_size(mc->mc_txn->mt_env, &sepkey)) { + mn.mc_snum--; + mn.mc_top--; + did_split = 1; + rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0); + + /* root split? */ + if (mn.mc_snum == mc->mc_snum) { + mc->mc_pg[mc->mc_snum] = mc->mc_pg[mc->mc_top]; + mc->mc_ki[mc->mc_snum] = mc->mc_ki[mc->mc_top]; + mc->mc_pg[mc->mc_top] = mc->mc_pg[ptop]; + mc->mc_ki[mc->mc_top] = mc->mc_ki[ptop]; + mc->mc_snum++; + mc->mc_top++; + ptop++; + } + /* Right page might now have changed parent. + * Check if left page also changed parent. + */ + if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && + mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) { + for (i=0; imc_pg[i] = mn.mc_pg[i]; + mc->mc_ki[i] = mn.mc_ki[i]; + } + mc->mc_pg[ptop] = mn.mc_pg[ptop]; + mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1; + } + } else { + mn.mc_top--; + rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0); + mn.mc_top++; + } + mc->mc_flags ^= C_SPLITTING; + if (rc != MDB_SUCCESS) { + return rc; + } + if (nflags & MDB_APPEND) { + mc->mc_pg[mc->mc_top] = rp; + mc->mc_ki[mc->mc_top] = 0; + rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags); + if (rc) + return rc; + for (i=0; imc_top; i++) + mc->mc_ki[i] = mn.mc_ki[i]; + goto done; + } + if (IS_LEAF2(rp)) { + goto done; + } + + /* Move half of the keys to the right sibling. */ + + /* grab a page to hold a temporary copy */ + copy = mdb_page_malloc(mc->mc_txn, 1); + if (copy == NULL) + return ENOMEM; + + copy->mp_pgno = mp->mp_pgno; + copy->mp_flags = mp->mp_flags; + copy->mp_lower = PAGEHDRSZ; + copy->mp_upper = mc->mc_txn->mt_env->me_psize; + mc->mc_pg[mc->mc_top] = copy; + for (i = j = 0; i <= nkeys; j++) { + if (i == split_indx) { + /* Insert in right sibling. */ + /* Reset insert index for right sibling. */ + if (i != newindx || (newpos ^ ins_new)) { + j = 0; + mc->mc_pg[mc->mc_top] = rp; + } + } + + if (i == newindx && !ins_new) { + /* Insert the original entry that caused the split. */ + rkey.mv_data = newkey->mv_data; + rkey.mv_size = newkey->mv_size; + if (IS_LEAF(mp)) { + rdata = newdata; + } else + pgno = newpgno; + flags = nflags; + + ins_new = 1; + + /* Update index for the new key. */ + mc->mc_ki[mc->mc_top] = j; + } else if (i == nkeys) { + break; + } else { + node = NODEPTR(mp, i); + rkey.mv_data = NODEKEY(node); + rkey.mv_size = node->mn_ksize; + if (IS_LEAF(mp)) { + xdata.mv_data = NODEDATA(node); + xdata.mv_size = NODEDSZ(node); + rdata = &xdata; + } else + pgno = NODEPGNO(node); + flags = node->mn_flags; + + i++; + } + + if (!IS_LEAF(mp) && j == 0) { + /* First branch index doesn't need key data. */ + rkey.mv_size = 0; + } + + rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags); + if (rc) break; + } + + nkeys = NUMKEYS(copy); + for (i=0; imp_ptrs[i] = copy->mp_ptrs[i]; + mp->mp_lower = copy->mp_lower; + mp->mp_upper = copy->mp_upper; + memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1), + mc->mc_txn->mt_env->me_psize - copy->mp_upper); + + /* reset back to original page */ + if (newindx < split_indx || (!newpos && newindx == split_indx)) { + mc->mc_pg[mc->mc_top] = mp; + if (nflags & MDB_RESERVE) { + node = NODEPTR(mp, mc->mc_ki[mc->mc_top]); + if (!(node->mn_flags & F_BIGDATA)) + newdata->mv_data = NODEDATA(node); + } + } else { + mc->mc_ki[ptop]++; + /* Make sure mc_ki is still valid. + */ + if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && + mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) { + for (i=0; imc_pg[i] = mn.mc_pg[i]; + mc->mc_ki[i] = mn.mc_ki[i]; + } + mc->mc_pg[ptop] = mn.mc_pg[ptop]; + mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1; + } + } + + /* return tmp page to freelist */ + mdb_page_free(mc->mc_txn->mt_env, copy); +done: + { + /* Adjust other cursors pointing to mp */ + MDB_cursor *m2, *m3; + MDB_dbi dbi = mc->mc_dbi; + int fixup = NUMKEYS(mp); + + if (mc->mc_flags & C_SUB) + dbi--; + + for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { + if (mc->mc_flags & C_SUB) + m3 = &m2->mc_xcursor->mx_cursor; + else + m3 = m2; + if (m3 == mc) + continue; + if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) + continue; + if (m3->mc_flags & C_SPLITTING) + continue; + if (new_root) { + int k; + /* root split */ + for (k=m3->mc_top; k>=0; k--) { + m3->mc_ki[k+1] = m3->mc_ki[k]; + m3->mc_pg[k+1] = m3->mc_pg[k]; + } + if (m3->mc_ki[0] >= split_indx) { + m3->mc_ki[0] = 1; + } else { + m3->mc_ki[0] = 0; + } + m3->mc_pg[0] = mc->mc_pg[0]; + m3->mc_snum++; + m3->mc_top++; + } + if (m3->mc_pg[mc->mc_top] == mp) { + if (m3->mc_ki[mc->mc_top] >= newindx && !(nflags & MDB_SPLIT_REPLACE)) + m3->mc_ki[mc->mc_top]++; + if (m3->mc_ki[mc->mc_top] >= fixup) { + m3->mc_pg[mc->mc_top] = rp; + m3->mc_ki[mc->mc_top] -= fixup; + m3->mc_ki[ptop] = mn.mc_ki[ptop]; + } + } else if (!did_split && m3->mc_pg[ptop] == mc->mc_pg[ptop] && + m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { + m3->mc_ki[ptop]++; + } + } + } + return rc; +} + +int +mdb_put(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, unsigned int flags) +{ + MDB_cursor mc; + MDB_xcursor mx; + + assert(key != NULL); + assert(data != NULL); + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + return EACCES; + } + + if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) { + return EINVAL; + } + + if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) + return EINVAL; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_put(&mc, key, data, flags); +} + +int +mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) +{ + if ((flag & CHANGEABLE) != flag) + return EINVAL; + if (onoff) + env->me_flags |= flag; + else + env->me_flags &= ~flag; + return MDB_SUCCESS; +} + +int +mdb_env_get_flags(MDB_env *env, unsigned int *arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_flags; + return MDB_SUCCESS; +} + +int +mdb_env_get_path(MDB_env *env, const char **arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_path; + return MDB_SUCCESS; +} + +/** Common code for #mdb_stat() and #mdb_env_stat(). + * @param[in] env the environment to operate in. + * @param[in] db the #MDB_db record containing the stats to return. + * @param[out] arg the address of an #MDB_stat structure to receive the stats. + * @return 0, this function always succeeds. + */ +static int +mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) +{ + arg->ms_psize = env->me_psize; + arg->ms_depth = db->md_depth; + arg->ms_branch_pages = db->md_branch_pages; + arg->ms_leaf_pages = db->md_leaf_pages; + arg->ms_overflow_pages = db->md_overflow_pages; + arg->ms_entries = db->md_entries; + + return MDB_SUCCESS; +} +int +mdb_env_stat(MDB_env *env, MDB_stat *arg) +{ + int toggle; + + if (env == NULL || arg == NULL) + return EINVAL; + + toggle = mdb_env_pick_meta(env); + + return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg); +} + +int +mdb_env_info(MDB_env *env, MDB_envinfo *arg) +{ + int toggle; + + if (env == NULL || arg == NULL) + return EINVAL; + + toggle = mdb_env_pick_meta(env); + arg->me_mapaddr = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : 0; + arg->me_mapsize = env->me_mapsize; + arg->me_maxreaders = env->me_maxreaders; + arg->me_numreaders = env->me_numreaders; + arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg; + arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; + return MDB_SUCCESS; +} + +/** Set the default comparison functions for a database. + * Called immediately after a database is opened to set the defaults. + * The user can then override them with #mdb_set_compare() or + * #mdb_set_dupsort(). + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + */ +static void +mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi) +{ + uint16_t f = txn->mt_dbs[dbi].md_flags; + + txn->mt_dbxs[dbi].md_cmp = + (f & MDB_REVERSEKEY) ? mdb_cmp_memnr : + (f & MDB_INTEGERKEY) ? mdb_cmp_cint : mdb_cmp_memn; + + txn->mt_dbxs[dbi].md_dcmp = + !(f & MDB_DUPSORT) ? 0 : + ((f & MDB_INTEGERDUP) + ? ((f & MDB_DUPFIXED) ? mdb_cmp_int : mdb_cmp_cint) + : ((f & MDB_REVERSEDUP) ? mdb_cmp_memnr : mdb_cmp_memn)); +} + +int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) +{ + MDB_val key, data; + MDB_dbi i; + MDB_cursor mc; + int rc, dbflag, exact; + unsigned int unused = 0; + size_t len; + + if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) { + mdb_default_cmp(txn, FREE_DBI); + } + + if ((flags & VALID_FLAGS) != flags) + return EINVAL; + + /* main DB? */ + if (!name) { + *dbi = MAIN_DBI; + if (flags & PERSISTENT_FLAGS) { + uint16_t f2 = flags & PERSISTENT_FLAGS; + /* make sure flag changes get committed */ + if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) != txn->mt_dbs[MAIN_DBI].md_flags) { + txn->mt_dbs[MAIN_DBI].md_flags |= f2; + txn->mt_flags |= MDB_TXN_DIRTY; + } + } + mdb_default_cmp(txn, MAIN_DBI); + return MDB_SUCCESS; + } + + if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) { + mdb_default_cmp(txn, MAIN_DBI); + } + + /* Is the DB already open? */ + len = strlen(name); + for (i=2; imt_numdbs; i++) { + if (!txn->mt_dbxs[i].md_name.mv_size) { + /* Remember this free slot */ + if (!unused) unused = i; + continue; + } + if (len == txn->mt_dbxs[i].md_name.mv_size && + !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) { + *dbi = i; + return MDB_SUCCESS; + } + } + + /* If no free slot and max hit, fail */ + if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs) + return MDB_DBS_FULL; + + /* Cannot mix named databases with some mainDB flags */ + if (txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT|MDB_INTEGERKEY)) + return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND; + + /* Find the DB info */ + dbflag = DB_NEW|DB_VALID; + exact = 0; + key.mv_size = len; + key.mv_data = (void *)name; + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); + rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact); + if (rc == MDB_SUCCESS) { + /* make sure this is actually a DB */ + MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); + if (!(node->mn_flags & F_SUBDATA)) + return EINVAL; + } else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) { + /* Create if requested */ + MDB_db dummy; + data.mv_size = sizeof(MDB_db); + data.mv_data = &dummy; + memset(&dummy, 0, sizeof(dummy)); + dummy.md_root = P_INVALID; + dummy.md_flags = flags & PERSISTENT_FLAGS; + rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); + dbflag |= DB_DIRTY; + } + + /* OK, got info, add to table */ + if (rc == MDB_SUCCESS) { + unsigned int slot = unused ? unused : txn->mt_numdbs; + txn->mt_dbxs[slot].md_name.mv_data = strdup(name); + txn->mt_dbxs[slot].md_name.mv_size = len; + txn->mt_dbxs[slot].md_rel = NULL; + txn->mt_dbflags[slot] = dbflag; + memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db)); + *dbi = slot; + txn->mt_env->me_dbflags[slot] = txn->mt_dbs[slot].md_flags; + mdb_default_cmp(txn, slot); + if (!unused) { + txn->mt_numdbs++; + } + } + + return rc; +} + +int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) +{ + if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs) + return EINVAL; + + if (txn->mt_dbflags[dbi] & DB_STALE) { + MDB_cursor mc; + MDB_xcursor mx; + /* Stale, must read the DB's root. cursor_init does it for us. */ + mdb_cursor_init(&mc, txn, dbi, &mx); + } + return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); +} + +void mdb_dbi_close(MDB_env *env, MDB_dbi dbi) +{ + char *ptr; + if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs) + return; + ptr = env->me_dbxs[dbi].md_name.mv_data; + env->me_dbxs[dbi].md_name.mv_data = NULL; + env->me_dbxs[dbi].md_name.mv_size = 0; + env->me_dbflags[dbi] = 0; + free(ptr); +} + +int mdb_dbi_flags(MDB_env *env, MDB_dbi dbi, unsigned int *flags) +{ + /* We could return the flags for the FREE_DBI too but what's the point? */ + if (dbi <= MAIN_DBI || dbi >= env->me_numdbs) + return EINVAL; + *flags = env->me_dbflags[dbi]; + return MDB_SUCCESS; +} + +/** Add all the DB's pages to the free list. + * @param[in] mc Cursor on the DB to free. + * @param[in] subs non-Zero to check for sub-DBs in this DB. + * @return 0 on success, non-zero on failure. + */ +static int +mdb_drop0(MDB_cursor *mc, int subs) +{ + int rc; + + rc = mdb_page_search(mc, NULL, 0); + if (rc == MDB_SUCCESS) { + MDB_txn *txn = mc->mc_txn; + MDB_node *ni; + MDB_cursor mx; + unsigned int i; + + /* LEAF2 pages have no nodes, cannot have sub-DBs */ + if (IS_LEAF2(mc->mc_pg[mc->mc_top])) + mdb_cursor_pop(mc); + + mdb_cursor_copy(mc, &mx); + while (mc->mc_snum > 0) { + MDB_page *mp = mc->mc_pg[mc->mc_top]; + unsigned n = NUMKEYS(mp); + if (IS_LEAF(mp)) { + for (i=0; imn_flags & F_BIGDATA) { + MDB_page *omp; + pgno_t pg; + memcpy(&pg, NODEDATA(ni), sizeof(pg)); + rc = mdb_page_get(txn, pg, &omp, NULL); + if (rc != 0) + return rc; + assert(IS_OVERFLOW(omp)); + rc = mdb_midl_append_range(&txn->mt_free_pgs, + pg, omp->mp_pages); + if (rc) + return rc; + } else if (subs && (ni->mn_flags & F_SUBDATA)) { + mdb_xcursor_init1(mc, ni); + rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); + if (rc) + return rc; + } + } + } else { + if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0) + return rc; + for (i=0; imt_free_pgs, pg); + } + } + if (!mc->mc_top) + break; + mc->mc_ki[mc->mc_top] = i; + rc = mdb_cursor_sibling(mc, 1); + if (rc) { + /* no more siblings, go back to beginning + * of previous level. + */ + mdb_cursor_pop(mc); + mc->mc_ki[0] = 0; + for (i=1; imc_snum; i++) { + mc->mc_ki[i] = 0; + mc->mc_pg[i] = mx.mc_pg[i]; + } + } + } + /* free it */ + rc = mdb_midl_append(&txn->mt_free_pgs, mc->mc_db->md_root); + } else if (rc == MDB_NOTFOUND) { + rc = MDB_SUCCESS; + } + return rc; +} + +int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del) +{ + MDB_cursor *mc, *m2; + int rc; + + if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; + + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) + return rc; + + rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT); + /* Invalidate the dropped DB's cursors */ + for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) + m2->mc_flags &= ~(C_INITIALIZED|C_EOF); + if (rc) + goto leave; + + /* Can't delete the main DB */ + if (del && dbi > MAIN_DBI) { + rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL); + if (!rc) { + txn->mt_dbflags[dbi] = DB_STALE; + mdb_dbi_close(txn->mt_env, dbi); + } + } else { + /* reset the DB record, mark it dirty */ + txn->mt_dbflags[dbi] |= DB_DIRTY; + txn->mt_dbs[dbi].md_depth = 0; + txn->mt_dbs[dbi].md_branch_pages = 0; + txn->mt_dbs[dbi].md_leaf_pages = 0; + txn->mt_dbs[dbi].md_overflow_pages = 0; + txn->mt_dbs[dbi].md_entries = 0; + txn->mt_dbs[dbi].md_root = P_INVALID; + + txn->mt_flags |= MDB_TXN_DIRTY; + } +leave: + mdb_cursor_close(mc); + return rc; +} + +int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) +{ + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + txn->mt_dbxs[dbi].md_cmp = cmp; + return MDB_SUCCESS; +} + +int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) +{ + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + txn->mt_dbxs[dbi].md_dcmp = cmp; + return MDB_SUCCESS; +} + +int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel) +{ + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + txn->mt_dbxs[dbi].md_rel = rel; + return MDB_SUCCESS; +} + +int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx) +{ + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + txn->mt_dbxs[dbi].md_relctx = ctx; + return MDB_SUCCESS; +} + +int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) +{ + unsigned int i, rdrs; + MDB_reader *mr; + char buf[64]; + int first = 1; + + if (!env || !func) + return -1; + if (!env->me_txns) { + return func("(no reader locks)\n", ctx); + } + rdrs = env->me_txns->mti_numreaders; + mr = env->me_txns->mti_readers; + for (i=0; i> 1; + cursor = base + pivot + 1; + val = pid - ids[cursor]; + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + /* found, so it's a duplicate */ + return -1; + } + } + + if( val > 0 ) { + ++cursor; + } + ids[0]++; + for (n = ids[0]; n > cursor; n--) + ids[n] = ids[n-1]; + ids[n] = pid; + return 0; +} + +int mdb_reader_check(MDB_env *env, int *dead) +{ + unsigned int i, j, rdrs; + MDB_reader *mr; + pid_t *pids, pid; + int count = 0; + + if (!env) + return EINVAL; + if (dead) + *dead = 0; + if (!env->me_txns) + return MDB_SUCCESS; + rdrs = env->me_txns->mti_numreaders; + pids = malloc((rdrs+1) * sizeof(pid_t)); + if (!pids) + return ENOMEM; + pids[0] = 0; + mr = env->me_txns->mti_readers; + j = 0; + for (i=0; ime_pid) { + pid = mr[i].mr_pid; + if (mdb_pid_insert(pids, pid) == 0) { + if (mdb_reader_pid(env, Pidcheck, pid)) { + LOCK_MUTEX_R(env); + if (mdb_reader_pid(env, Pidcheck, pid)) { + for (j=i; j diff --git a/libraries/liblmdb/mdb_copy.c b/libraries/liblmdb/mdb_copy.c new file mode 100644 index 0000000000..ca92009cff --- /dev/null +++ b/libraries/liblmdb/mdb_copy.c @@ -0,0 +1,66 @@ +/* mdb_copy.c - memory-mapped database backup tool */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ +#ifdef _WIN32 +#include +#define MDB_STDOUT GetStdHandle(STD_OUTPUT_HANDLE) +#else +#define MDB_STDOUT 1 +#endif +#include +#include +#include +#include "lmdb.h" + +static void +sighandle(int sig) +{ +} + +int main(int argc,char * argv[]) +{ + int rc; + MDB_env *env; + char *envname = argv[1]; + + if (argc<2 || argc>3) { + fprintf(stderr, "usage: %s srcpath [dstpath]\n", argv[0]); + exit(EXIT_FAILURE); + } + +#ifdef SIGPIPE + signal(SIGPIPE, sighandle); +#endif +#ifdef SIGHUP + signal(SIGHUP, sighandle); +#endif + signal(SIGINT, sighandle); + signal(SIGTERM, sighandle); + + rc = mdb_env_create(&env); + + rc = mdb_env_open(env, envname, MDB_RDONLY, 0); + if (rc) { + printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + } else { + if (argc == 2) + rc = mdb_env_copyfd(env, MDB_STDOUT); + else + rc = mdb_env_copy(env, argv[2]); + if (rc) + printf("mdb_env_copy failed, error %d %s\n", rc, mdb_strerror(rc)); + } + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/libraries/liblmdb/mdb_stat.1 b/libraries/liblmdb/mdb_stat.1 new file mode 100644 index 0000000000..3622772ddf --- /dev/null +++ b/libraries/liblmdb/mdb_stat.1 @@ -0,0 +1,59 @@ +.TH MDB_STAT 1 "2012/12/12" "LMDB 0.9.5" +.\" Copyright 2012 Howard Chu, Symas Corp. All Rights Reserved. +.\" Copying restrictions apply. See COPYRIGHT/LICENSE. +.SH NAME +mdb_stat \- LMDB environment status tool +.SH SYNOPSIS +.B mdb_stat +.BR \ envpath +[\c +.BR \-e ] +[\c +.BR \-f [ f [ f ]]] +[\c +.BR \-n ] +[\c +.BR \-r [ r ]] +[\c +.BR \-a \ | +.BI \-s \ subdb\fR] +.SH DESCRIPTION +The +.B mdb_stat +utility displays the status of an LMDB environment. +.SH OPTIONS +.TP +.BR \-e +Display information about the database environment. +.TP +.BR \-f +Display information about the environment freelist. +If \fB\-ff\fP is given, summarize each freelist entry. +If \fB\-fff\fP is given, display the full list of page IDs in the freelist. +.TP +.BR \-n +Display the status of an LMDB database which does not use subdirectories. +.TP +.BR \-r +Display information about the environment reader table. +Shows the process ID, thread ID, and transaction ID for each active +reader slot. The process ID and transaction ID are in decimal, the +thread ID is in hexadecimal. The transaction ID is displayed as "-" +if the reader does not currently have a read transaction open. +If \fB\-rr\fP is given, check for stale entries in the reader +table and clear them. The reader table will be printed again +after the check is performed. +.TP +.BR \-a +Display the status of all of the subdatabases in the environment. +.TP +.BR \-s \ subdb +Display the status of a specific subdatabase. +.SH DIAGNOSTICS +Exit status is zero if no errors occur. +Errors result in a non-zero exit status and +a diagnostic message being written to standard error. +.SH "SEE ALSO" +.BR mdb_copy (1) +.SH AUTHOR +Howard Chu of Symas Corporation diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c new file mode 100644 index 0000000000..aaad2d75a3 --- /dev/null +++ b/libraries/liblmdb/mdb_stat.c @@ -0,0 +1,248 @@ +/* mdb_stat.c - memory-mapped database status tool */ +/* + * Copyright 2011-2013 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ +#include +#include +#include +#include +#include "lmdb.h" + +static void prstat(MDB_stat *ms) +{ +#if 0 + printf(" Page size: %u\n", ms->ms_psize); +#endif + printf(" Tree depth: %u\n", ms->ms_depth); + printf(" Branch pages: %zu\n", ms->ms_branch_pages); + printf(" Leaf pages: %zu\n", ms->ms_leaf_pages); + printf(" Overflow pages: %zu\n", ms->ms_overflow_pages); + printf(" Entries: %zu\n", ms->ms_entries); +} + +static void usage(char *prog) +{ + fprintf(stderr, "usage: %s dbpath [-n] [-e] [-r[r]] [-f[f[f]]] [-a|-s subdb]\n", prog); + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + int i, rc; + MDB_env *env; + MDB_txn *txn; + MDB_dbi dbi; + MDB_stat mst; + MDB_envinfo mei; + char *prog = argv[0]; + char *envname; + char *subname = NULL; + int alldbs = 0, envinfo = 0, envflags = 0, freinfo = 0, rdrinfo = 0; + + if (argc < 2) { + usage(prog); + } + + /* -a: print stat of main DB and all subDBs + * -s: print stat of only the named subDB + * -e: print env info + * -f: print freelist info + * -r: print reader info + * -n: use NOSUBDIR flag on env_open + * (default) print stat of only the main DB + */ + while ((i = getopt(argc, argv, "aefnrs:")) != EOF) { + switch(i) { + case 'a': + if (subname) + usage(prog); + alldbs++; + break; + case 'e': + envinfo++; + break; + case 'f': + freinfo++; + break; + case 'n': + envflags |= MDB_NOSUBDIR; + break; + case 'r': + rdrinfo++; + break; + case 's': + if (alldbs) + usage(prog); + subname = optarg; + break; + default: + usage(prog); + } + } + + if (optind != argc - 1) + usage(prog); + + envname = argv[optind]; + rc = mdb_env_create(&env); + + if (alldbs || subname) { + mdb_env_set_maxdbs(env, 4); + } + + rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); + if (rc) { + printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + if (envinfo) { + rc = mdb_env_stat(env, &mst); + rc = mdb_env_info(env, &mei); + printf("Environment Info\n"); + printf(" Map address: %p\n", mei.me_mapaddr); + printf(" Map size: %zu\n", mei.me_mapsize); + printf(" Page size: %u\n", mst.ms_psize); + printf(" Max pages: %zu\n", mei.me_mapsize / mst.ms_psize); + printf(" Number of pages used: %zu\n", mei.me_last_pgno+1); + printf(" Last transaction ID: %zu\n", mei.me_last_txnid); + printf(" Max readers: %u\n", mei.me_maxreaders); + printf(" Number of readers used: %u\n", mei.me_numreaders); + } + + if (rdrinfo) { + printf("Reader Table Status\n"); + rc = mdb_reader_list(env, (MDB_msg_func *)fputs, stdout); + if (rdrinfo > 1) { + int dead; + mdb_reader_check(env, &dead); + printf(" %d stale readers cleared.\n", dead); + rc = mdb_reader_list(env, (MDB_msg_func *)fputs, stdout); + } + if (!(subname || alldbs || freinfo)) + goto env_close; + } + + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + if (rc) { + printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); + goto env_close; + } + + if (freinfo) { + MDB_cursor *cursor; + MDB_val key, data; + size_t pages = 0, *iptr; + + printf("Freelist Status\n"); + dbi = 0; + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + rc = mdb_stat(txn, dbi, &mst); + if (rc) { + printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + prstat(&mst); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + iptr = data.mv_data; + pages += *iptr; + if (freinfo > 1) { + char *bad = ""; + size_t pg, prev; + ssize_t i, j, span = 0; + j = *iptr++; + for (i = j, prev = 1; --i >= 0; ) { + pg = iptr[i]; + if (pg <= prev) + bad = " [bad sequence]"; + prev = pg; + pg += span; + for (; i >= span && iptr[i-span] == pg; span++, pg++) ; + } + printf(" Transaction %zu, %zd pages, maxspan %zd%s\n", + *(size_t *)key.mv_data, j, span, bad); + if (freinfo > 2) { + for (--j; j >= 0; ) { + pg = iptr[j]; + for (span=1; --j >= 0 && iptr[j] == pg+span; span++) ; + printf(span>1 ? " %9zu[%zd]\n" : " %9zu\n", + pg, span); + } + } + } + } + mdb_cursor_close(cursor); + printf(" Free pages: %zu\n", pages); + } + + rc = mdb_open(txn, subname, 0, &dbi); + if (rc) { + printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + + rc = mdb_stat(txn, dbi, &mst); + if (rc) { + printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + printf("Status of %s\n", subname ? subname : "Main DB"); + prstat(&mst); + + if (alldbs) { + MDB_cursor *cursor; + MDB_val key; + + rc = mdb_cursor_open(txn, dbi, &cursor); + if (rc) { + printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { + char *str; + MDB_dbi db2; + if (memchr(key.mv_data, '\0', key.mv_size)) + continue; + str = malloc(key.mv_size+1); + memcpy(str, key.mv_data, key.mv_size); + str[key.mv_size] = '\0'; + rc = mdb_open(txn, str, 0, &db2); + if (rc == MDB_SUCCESS) + printf("Status of %s\n", str); + free(str); + if (rc) continue; + rc = mdb_stat(txn, db2, &mst); + if (rc) { + printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); + goto txn_abort; + } + prstat(&mst); + mdb_close(env, db2); + } + mdb_cursor_close(cursor); + } + + if (rc == MDB_NOTFOUND) + rc = MDB_SUCCESS; + + mdb_close(env, dbi); +txn_abort: + mdb_txn_abort(txn); +env_close: + mdb_env_close(env); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/libraries/liblmdb/midl.c b/libraries/liblmdb/midl.c new file mode 100644 index 0000000000..86e4592d2d --- /dev/null +++ b/libraries/liblmdb/midl.c @@ -0,0 +1,348 @@ +/** @file midl.c + * @brief ldap bdb back-end ID List functions */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2013 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include +#include +#include +#include +#include +#include +#include "midl.h" + +/** @defgroup internal MDB Internals + * @{ + */ +/** @defgroup idls ID List Management + * @{ + */ +#define CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) ) + +unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id ) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = ids[0]; + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP( ids[cursor], id ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if( val > 0 ) { + ++cursor; + } + return cursor; +} + +#if 0 /* superseded by append/sort */ +int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) +{ + unsigned x, i; + + x = mdb_midl_search( ids, id ); + assert( x > 0 ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0] && ids[x] == id ) { + /* duplicate */ + assert(0); + return -1; + } + + if ( ++ids[0] >= MDB_IDL_DB_MAX ) { + /* no room */ + --ids[0]; + return -2; + + } else { + /* insert id */ + for (i=ids[0]; i>x; i--) + ids[i] = ids[i-1]; + ids[x] = id; + } + + return 0; +} +#endif + +MDB_IDL mdb_midl_alloc(int num) +{ + MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID)); + if (ids) { + *ids++ = num; + *ids = 0; + } + return ids; +} + +void mdb_midl_free(MDB_IDL ids) +{ + if (ids) + free(ids-1); +} + +int mdb_midl_shrink( MDB_IDL *idp ) +{ + MDB_IDL ids = *idp; + if (*(--ids) > MDB_IDL_UM_MAX && + (ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID)))) + { + *ids++ = MDB_IDL_UM_MAX; + *idp = ids; + return 1; + } + return 0; +} + +static int mdb_midl_grow( MDB_IDL *idp, int num ) +{ + MDB_IDL idn = *idp-1; + /* grow it */ + idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); + if (!idn) + return ENOMEM; + *idn++ += num; + *idp = idn; + return 0; +} + +int mdb_midl_need( MDB_IDL *idp, unsigned num ) +{ + MDB_IDL ids = *idp; + num += ids[0]; + if (num > ids[-1]) { + num = (num + num/4 + (256 + 2)) & -256; + if (!(ids = realloc(ids-1, num * sizeof(MDB_ID)))) + return ENOMEM; + *ids++ = num -= 2; + *idp = ids; + } + return 0; +} + +int mdb_midl_append( MDB_IDL *idp, MDB_ID id ) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] >= ids[-1]) { + if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0]++; + ids[ids[0]] = id; + return 0; +} + +int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app ) +{ + MDB_IDL ids = *idp; + /* Too big? */ + if (ids[0] + app[0] >= ids[-1]) { + if (mdb_midl_grow(idp, app[0])) + return ENOMEM; + ids = *idp; + } + memcpy(&ids[ids[0]+1], &app[1], app[0] * sizeof(MDB_ID)); + ids[0] += app[0]; + return 0; +} + +int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n ) +{ + MDB_ID *ids = *idp, len = ids[0]; + /* Too big? */ + if (len + n > ids[-1]) { + if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) + return ENOMEM; + ids = *idp; + } + ids[0] = len + n; + ids += len; + while (n) + ids[n--] = id++; + return 0; +} + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define SWAP(a,b) { itmp=(a); (a)=(b); (b)=itmp; } + +void +mdb_midl_sort( MDB_IDL ids ) +{ + /* Max possible depth of int-indexed tree * 2 items/level */ + int istack[sizeof(int)*CHAR_BIT * 2]; + int i,j,k,l,ir,jstack; + MDB_ID a, itmp; + + ir = (int)ids[0]; + l = 1; + jstack = 0; + for(;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j=l+1;j<=ir;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] >= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + if (jstack == 0) break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + SWAP(ids[k], ids[l+1]); + if (ids[l] < ids[ir]) { + SWAP(ids[l], ids[ir]); + } + if (ids[l+1] < ids[ir]) { + SWAP(ids[l+1], ids[ir]); + } + if (ids[l] < ids[l+1]) { + SWAP(ids[l], ids[l+1]); + } + i = l+1; + j = ir; + a = ids[l+1]; + for(;;) { + do i++; while(ids[i] > a); + do j--; while(ids[j] < a); + if (j < i) break; + SWAP(ids[i],ids[j]); + } + ids[l+1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir-i+1 >= j-l) { + istack[jstack] = ir; + istack[jstack-1] = i; + ir = j-1; + } else { + istack[jstack] = j-1; + istack[jstack-1] = l; + l = i; + } + } + } +} + +unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id ) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 1; + int val = 0; + unsigned n = (unsigned)ids[0].mid; + + while( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot + 1; + val = CMP( id, ids[cursor].mid ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor; + n -= pivot + 1; + + } else { + return cursor; + } + } + + if( val > 0 ) { + ++cursor; + } + return cursor; +} + +int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ) +{ + unsigned x, i; + + x = mdb_mid2l_search( ids, id->mid ); + assert( x > 0 ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0].mid && ids[x].mid == id->mid ) { + /* duplicate */ + return -1; + } + + if ( ids[0].mid >= MDB_IDL_UM_MAX ) { + /* too big */ + return -2; + + } else { + /* insert id */ + ids[0].mid++; + for (i=(unsigned)ids[0].mid; i>x; i--) + ids[i] = ids[i-1]; + ids[x] = *id; + } + + return 0; +} + +int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ) +{ + /* Too big? */ + if (ids[0].mid >= MDB_IDL_UM_MAX) { + return -2; + } + ids[0].mid++; + ids[ids[0].mid] = *id; + return 0; +} + +/** @} */ +/** @} */ diff --git a/libraries/liblmdb/midl.h b/libraries/liblmdb/midl.h new file mode 100644 index 0000000000..b0bdff3f49 --- /dev/null +++ b/libraries/liblmdb/midl.h @@ -0,0 +1,177 @@ +/** @file midl.h + * @brief mdb ID List header file. + * + * This file was originally part of back-bdb but has been + * modified for use in libmdb. Most of the macros defined + * in this file are unused, just left over from the original. + * + * This file is only used internally in libmdb and its definitions + * are not exposed publicly. + */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2013 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#ifndef _MDB_MIDL_H_ +#define _MDB_MIDL_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** @defgroup internal MDB Internals + * @{ + */ + +/** @defgroup idls ID List Management + * @{ + */ + /** A generic ID number. These were entryIDs in back-bdb. + * Preferably it should have the same size as a pointer. + */ +typedef size_t MDB_ID; + + /** An IDL is an ID List, a sorted array of IDs. The first + * element of the array is a counter for how many actual + * IDs are in the list. In the original back-bdb code, IDLs are + * sorted in ascending order. For libmdb IDLs are sorted in + * descending order. + */ +typedef MDB_ID *MDB_IDL; + +/* IDL sizes - likely should be even bigger + * limiting factors: sizeof(ID), thread stack size + */ +#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#define MDB_IDL_DB_SIZE (1<. + */ +#define _XOPEN_SOURCE 500 /* srandom(), random() */ +#include +#include +#include +#include "lmdb.h" + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor, *cur2; + int count; + int *values; + char sval[32]; + + srandom(time(NULL)); + + count = (random()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i -1; i-= (random()%5)) { + j++; + txn=NULL; + rc = mdb_txn_begin(env, NULL, 0, &txn); + sprintf(sval, "%03x ", values[i]); + rc = mdb_del(txn, dbi, &key, NULL); + if (rc) { + j--; + mdb_txn_abort(txn); + } else { + rc = mdb_txn_commit(txn); + } + } + free(values); + printf("Deleted %d values\n", j); + + rc = mdb_env_stat(env, &mst); + rc = mdb_txn_begin(env, NULL, 1, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + printf("Cursor last\n"); + rc = mdb_cursor_get(cursor, &key, &data, MDB_LAST); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + printf("Cursor last/prev\n"); + rc = mdb_cursor_get(cursor, &key, &data, MDB_LAST); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV); + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + + mdb_txn_abort(txn); + + printf("Deleting with cursor\n"); + rc = mdb_txn_begin(env, NULL, 0, &txn); + rc = mdb_cursor_open(txn, dbi, &cur2); + for (i=0; i<50; i++) { + rc = mdb_cursor_get(cur2, &key, &data, MDB_NEXT); + if (rc) + break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + rc = mdb_del(txn, dbi, &key, NULL); + } + + printf("Restarting cursor in txn\n"); + rc = mdb_cursor_get(cur2, &key, &data, MDB_FIRST); + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + for (i=0; i<32; i++) { + rc = mdb_cursor_get(cur2, &key, &data, MDB_NEXT); + if (rc) break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cur2); + rc = mdb_txn_commit(txn); + + printf("Restarting cursor outside txn\n"); + rc = mdb_txn_begin(env, NULL, 0, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST); + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + for (i=0; i<32; i++) { + rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT); + if (rc) break; + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/libraries/liblmdb/mtest2.c b/libraries/liblmdb/mtest2.c new file mode 100644 index 0000000000..44d1de7ccd --- /dev/null +++ b/libraries/liblmdb/mtest2.c @@ -0,0 +1,117 @@ +/* mtest2.c - memory-mapped database tester/toy */ +/* + * Copyright 2011 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +/* Just like mtest.c, but using a subDB instead of the main DB */ + +#define _XOPEN_SOURCE 500 /* srandom(), random() */ +#include +#include +#include +#include "lmdb.h" + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32]; + + srandom(time(NULL)); + + count = (random()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i -1; i-= (random()%5)) { + j++; + txn=NULL; + rc = mdb_txn_begin(env, NULL, 0, &txn); + sprintf(sval, "%03x ", values[i]); + rc = mdb_del(txn, dbi, &key, NULL); + if (rc) { + j--; + mdb_txn_abort(txn); + } else { + rc = mdb_txn_commit(txn); + } + } + free(values); + printf("Deleted %d values\n", j); + + rc = mdb_env_stat(env, &mst); + rc = mdb_txn_begin(env, NULL, 1, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/libraries/liblmdb/mtest3.c b/libraries/liblmdb/mtest3.c new file mode 100644 index 0000000000..c189eaa952 --- /dev/null +++ b/libraries/liblmdb/mtest3.c @@ -0,0 +1,127 @@ +/* mtest3.c - memory-mapped database tester/toy */ +/* + * Copyright 2011 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +/* Tests for sorted duplicate DBs */ +#define _XOPEN_SOURCE 500 /* srandom(), random() */ +#include +#include +#include +#include +#include "lmdb.h" + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32]; + char kval[sizeof(int)]; + + srandom(time(NULL)); + + memset(sval, 0, sizeof(sval)); + + count = (random()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i -1; i-= (random()%5)) { + j++; + txn=NULL; + rc = mdb_txn_begin(env, NULL, 0, &txn); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + rc = mdb_del(txn, dbi, &key, &data); + if (rc) { + j--; + mdb_txn_abort(txn); + } else { + rc = mdb_txn_commit(txn); + } + } + free(values); + printf("Deleted %d values\n", j); + + rc = mdb_env_stat(env, &mst); + rc = mdb_txn_begin(env, NULL, 1, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/libraries/liblmdb/mtest4.c b/libraries/liblmdb/mtest4.c new file mode 100644 index 0000000000..e0ba7e20b6 --- /dev/null +++ b/libraries/liblmdb/mtest4.c @@ -0,0 +1,161 @@ +/* mtest4.c - memory-mapped database tester/toy */ +/* + * Copyright 2011 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +/* Tests for sorted duplicate DBs with fixed-size keys */ +#define _XOPEN_SOURCE 500 /* srandom(), random() */ +#include +#include +#include +#include +#include "lmdb.h" + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[8]; + char kval[sizeof(int)]; + + memset(sval, 0, sizeof(sval)); + + count = 510; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i -1; i-= (random()%3)) { + j++; + txn=NULL; + rc = mdb_txn_begin(env, NULL, 0, &txn); + sprintf(sval, "%07x", values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + rc = mdb_del(txn, dbi, &key, &data); + if (rc) { + j--; + mdb_txn_abort(txn); + } else { + rc = mdb_txn_commit(txn); + } + } + free(values); + printf("Deleted %d values\n", j); + + rc = mdb_env_stat(env, &mst); + rc = mdb_txn_begin(env, NULL, 1, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/libraries/liblmdb/mtest5.c b/libraries/liblmdb/mtest5.c new file mode 100644 index 0000000000..bc472fa093 --- /dev/null +++ b/libraries/liblmdb/mtest5.c @@ -0,0 +1,129 @@ +/* mtest5.c - memory-mapped database tester/toy */ +/* + * Copyright 2011 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +/* Tests for sorted duplicate DBs using cursor_put */ +#define _XOPEN_SOURCE 500 /* srandom(), random() */ +#include +#include +#include +#include +#include "lmdb.h" + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + char sval[32]; + char kval[sizeof(int)]; + + srandom(time(NULL)); + + memset(sval, 0, sizeof(sval)); + + count = (random()%384) + 64; + values = (int *)malloc(count*sizeof(int)); + + for(i = 0;i -1; i-= (random()%5)) { + j++; + txn=NULL; + rc = mdb_txn_begin(env, NULL, 0, &txn); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + rc = mdb_del(txn, dbi, &key, &data); + if (rc) { + j--; + mdb_txn_abort(txn); + } else { + rc = mdb_txn_commit(txn); + } + } + free(values); + printf("Deleted %d values\n", j); + + rc = mdb_env_stat(env, &mst); + rc = mdb_txn_begin(env, NULL, 1, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(env, dbi); + + mdb_txn_abort(txn); + mdb_env_close(env); + + return 0; +} diff --git a/libraries/liblmdb/mtest6.c b/libraries/liblmdb/mtest6.c new file mode 100644 index 0000000000..0bf26ccc45 --- /dev/null +++ b/libraries/liblmdb/mtest6.c @@ -0,0 +1,131 @@ +/* mtest6.c - memory-mapped database tester/toy */ +/* + * Copyright 2011 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +/* Tests for DB splits and merges */ +#define _XOPEN_SOURCE 500 /* srandom(), random() */ +#include +#include +#include +#include +#include "lmdb.h" + +char dkbuf[1024]; + +int main(int argc,char * argv[]) +{ + int i = 0, j = 0, rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_stat mst; + MDB_cursor *cursor; + int count; + int *values; + long kval; + char *sval; + + srandom(time(NULL)); + + rc = mdb_env_create(&env); + rc = mdb_env_set_mapsize(env, 10485760); + rc = mdb_env_set_maxdbs(env, 4); + rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664); + rc = mdb_txn_begin(env, NULL, 0, &txn); + rc = mdb_open(txn, "id2", MDB_CREATE|MDB_INTEGERKEY, &dbi); + rc = mdb_cursor_open(txn, dbi, &cursor); + rc = mdb_stat(txn, dbi, &mst); + + sval = calloc(1, mst.ms_psize / 4); + key.mv_size = sizeof(long); + key.mv_data = &kval; + data.mv_size = mst.ms_psize / 4 - 30; + data.mv_data = sval; + + printf("Adding 12 values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5; + sprintf(sval, "%08x", kval); + rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); + } + printf("Adding 12 more values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5+4; + sprintf(sval, "%08x", kval); + rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); + } + printf("Adding 12 more values, should yield 3 splits\n"); + for (i=0;i<12;i++) { + kval = i*5+1; + sprintf(sval, "%08x", kval); + rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); + } + rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST); + + do { + printf("key: %p %s, data: %p %.*s\n", + key.mv_data, mdb_dkey(&key, dkbuf), + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0); + mdb_cursor_close(cursor); + mdb_txn_commit(txn); + +#if 0 + j=0; + + for (i= count - 1; i > -1; i-= (random()%5)) { + j++; + txn=NULL; + rc = mdb_txn_begin(env, NULL, 0, &txn); + sprintf(kval, "%03x", values[i & ~0x0f]); + sprintf(sval, "%03x %d foo bar", values[i], values[i]); + key.mv_size = sizeof(int); + key.mv_data = kval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + rc = mdb_del(txn, dbi, &key, &data); + if (rc) { + j--; + mdb_txn_abort(txn); + } else { + rc = mdb_txn_commit(txn); + } + } + free(values); + printf("Deleted %d values\n", j); + + rc = mdb_env_stat(env, &mst); + rc = mdb_txn_begin(env, NULL, 1, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + printf("Cursor next\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + printf("Cursor prev\n"); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { + printf("key: %.*s, data: %.*s\n", + (int) key.mv_size, (char *) key.mv_data, + (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_close(txn, dbi); + + mdb_txn_abort(txn); +#endif + mdb_env_close(env); + + return 0; +} diff --git a/libraries/liblmdb/sample-bdb.c b/libraries/liblmdb/sample-bdb.c new file mode 100644 index 0000000000..2c11bb38a0 --- /dev/null +++ b/libraries/liblmdb/sample-bdb.c @@ -0,0 +1,71 @@ +/* sample-bdb.c - BerkeleyDB toy/sample + * + * Do a line-by-line comparison of this and sample-mdb.c + */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ +#include +#include +#include + +int main(int argc,char * argv[]) +{ + int rc; + DB_ENV *env; + DB *dbi; + DBT key, data; + DB_TXN *txn; + DBC *cursor; + char sval[32], kval[32]; + +#define FLAGS (DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_INIT_MPOOL|DB_CREATE|DB_THREAD) + rc = db_env_create(&env, 0); + rc = env->open(env, "./testdb", FLAGS, 0664); + rc = db_create(&dbi, env, 0); + rc = env->txn_begin(env, NULL, &txn, 0); + rc = dbi->open(dbi, txn, "test.bdb", NULL, DB_BTREE, DB_CREATE, 0664); + + memset(&key, 0, sizeof(DBT)); + memset(&data, 0, sizeof(DBT)); + key.size = sizeof(int); + key.data = sval; + data.size = sizeof(sval); + data.data = sval; + + sprintf(sval, "%03x %d foo bar", 32, 3141592); + rc = dbi->put(dbi, txn, &key, &data, 0); + rc = txn->commit(txn, 0); + if (rc) { + fprintf(stderr, "txn->commit: (%d) %s\n", rc, db_strerror(rc)); + goto leave; + } + rc = env->txn_begin(env, NULL, &txn, 0); + rc = dbi->cursor(dbi, txn, &cursor, 0); + key.flags = DB_DBT_USERMEM; + key.data = kval; + key.ulen = sizeof(kval); + data.flags = DB_DBT_USERMEM; + data.data = sval; + data.ulen = sizeof(sval); + while ((rc = cursor->c_get(cursor, &key, &data, DB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.data, (int) key.size, (char *) key.data, + data.data, (int) data.size, (char *) data.data); + } + rc = cursor->c_close(cursor); + rc = txn->abort(txn); +leave: + rc = dbi->close(dbi, 0); + rc = env->close(env, 0); + return rc; +} diff --git a/libraries/liblmdb/sample-mdb.c b/libraries/liblmdb/sample-mdb.c new file mode 100644 index 0000000000..0b10f47173 --- /dev/null +++ b/libraries/liblmdb/sample-mdb.c @@ -0,0 +1,60 @@ +/* sample-mdb.c - MDB toy/sample + * + * Do a line-by-line comparison of this and sample-bdb.c + */ +/* + * Copyright 2012 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ +#include +#include "lmdb.h" + +int main(int argc,char * argv[]) +{ + int rc; + MDB_env *env; + MDB_dbi dbi; + MDB_val key, data; + MDB_txn *txn; + MDB_cursor *cursor; + char sval[32]; + + rc = mdb_env_create(&env); + rc = mdb_env_open(env, "./testdb", 0, 0664); + rc = mdb_txn_begin(env, NULL, 0, &txn); + rc = mdb_open(txn, NULL, 0, &dbi); + + key.mv_size = sizeof(int); + key.mv_data = sval; + data.mv_size = sizeof(sval); + data.mv_data = sval; + + sprintf(sval, "%03x %d foo bar", 32, 3141592); + rc = mdb_put(txn, dbi, &key, &data, 0); + rc = mdb_txn_commit(txn); + if (rc) { + fprintf(stderr, "mdb_txn_commit: (%d) %s\n", rc, mdb_strerror(rc)); + goto leave; + } + rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); + rc = mdb_cursor_open(txn, dbi, &cursor); + while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { + printf("key: %p %.*s, data: %p %.*s\n", + key.mv_data, (int) key.mv_size, (char *) key.mv_data, + data.mv_data, (int) data.mv_size, (char *) data.mv_data); + } + mdb_cursor_close(cursor); + mdb_txn_abort(txn); +leave: + mdb_close(env, dbi); + mdb_env_close(env); + return 0; +} From d60fc410aeef7cbd028c5b79314663c2e0f34d9a Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Sun, 21 Jul 2013 14:51:25 -0700 Subject: [PATCH 39/50] Remove old mdb tree after subtree add --- Subtrees/README.md | 13 + TODO.txt | 4 - libraries/liblmdb/.gitignore | 16 - libraries/liblmdb/COPYRIGHT | 20 - libraries/liblmdb/Doxyfile | 1631 ------- libraries/liblmdb/LICENSE | Bin 2214 -> 0 bytes libraries/liblmdb/Makefile | 87 - libraries/liblmdb/lmdb.h | 1297 ------ libraries/liblmdb/mdb.c | 7488 ------------------------------- libraries/liblmdb/mdb_copy.1 | 28 - libraries/liblmdb/mdb_copy.c | 66 - libraries/liblmdb/mdb_stat.1 | 47 - libraries/liblmdb/mdb_stat.c | 230 - libraries/liblmdb/midl.c | 348 -- libraries/liblmdb/midl.h | 179 - libraries/liblmdb/mtest.c | 176 - libraries/liblmdb/mtest2.c | 117 - libraries/liblmdb/mtest3.c | 127 - libraries/liblmdb/mtest4.c | 161 - libraries/liblmdb/mtest5.c | 129 - libraries/liblmdb/mtest6.c | 131 - libraries/liblmdb/sample-bdb.c | 71 - libraries/liblmdb/sample-mdb.c | 60 - modules/ripple_mdb/ripple_mdb.c | 4 +- modules/ripple_mdb/ripple_mdb.h | 2 +- 25 files changed, 16 insertions(+), 12416 deletions(-) delete mode 100644 libraries/liblmdb/.gitignore delete mode 100644 libraries/liblmdb/COPYRIGHT delete mode 100644 libraries/liblmdb/Doxyfile delete mode 100644 libraries/liblmdb/LICENSE delete mode 100644 libraries/liblmdb/Makefile delete mode 100644 libraries/liblmdb/lmdb.h delete mode 100644 libraries/liblmdb/mdb.c delete mode 100644 libraries/liblmdb/mdb_copy.1 delete mode 100644 libraries/liblmdb/mdb_copy.c delete mode 100644 libraries/liblmdb/mdb_stat.1 delete mode 100644 libraries/liblmdb/mdb_stat.c delete mode 100644 libraries/liblmdb/midl.c delete mode 100644 libraries/liblmdb/midl.h delete mode 100644 libraries/liblmdb/mtest.c delete mode 100644 libraries/liblmdb/mtest2.c delete mode 100644 libraries/liblmdb/mtest3.c delete mode 100644 libraries/liblmdb/mtest4.c delete mode 100644 libraries/liblmdb/mtest5.c delete mode 100644 libraries/liblmdb/mtest6.c delete mode 100644 libraries/liblmdb/sample-bdb.c delete mode 100644 libraries/liblmdb/sample-mdb.c diff --git a/Subtrees/README.md b/Subtrees/README.md index 51435b4def..457688b264 100644 --- a/Subtrees/README.md +++ b/Subtrees/README.md @@ -21,6 +21,19 @@ Branch ripple-fork ``` +## LightningDB (a.k.a. MDB) + +A supposedly fast memory-mapped key value database system + +Repository
+``` +git://gitorious.org/mdb/mdb.git +``` +Branch +``` +mdb.master +``` + ## websocket Ripple's fork of websocketpp has some incompatible changes and Ripple specific includes. diff --git a/TODO.txt b/TODO.txt index db50945f86..5621072c5a 100644 --- a/TODO.txt +++ b/TODO.txt @@ -3,11 +3,7 @@ RIPPLE TODO -------------------------------------------------------------------------------- Vinnie's Short List (Changes day to day) -- Give mdb a proper spot in Subtrees/ -- Finish writing the NodeStore unit tests -- Finish converting backends to new API - Memory NodeStore::Backend for unit tests -- Performance test for NodeStore::Backend - Improved Mutex to track deadlocks - Work on KeyvaDB - Import beast::db and use it in SQliteBackend diff --git a/libraries/liblmdb/.gitignore b/libraries/liblmdb/.gitignore deleted file mode 100644 index 0d493fe188..0000000000 --- a/libraries/liblmdb/.gitignore +++ /dev/null @@ -1,16 +0,0 @@ -mtest -mtest[23456] -testdb -mdb_copy -mdb_stat -*.[ao] -*.so -*[~#] -*.bak -*.orig -*.rej -core -core.* -valgrind.* -man/ -html/ diff --git a/libraries/liblmdb/COPYRIGHT b/libraries/liblmdb/COPYRIGHT deleted file mode 100644 index 4482816cf5..0000000000 --- a/libraries/liblmdb/COPYRIGHT +++ /dev/null @@ -1,20 +0,0 @@ -Copyright 2011-2013 Howard Chu, Symas Corp. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted only as authorized by the OpenLDAP -Public License. - -A copy of this license is available in the file LICENSE in the -top-level directory of the distribution or, alternatively, at -. - -OpenLDAP is a registered trademark of the OpenLDAP Foundation. - -Individual files and/or contributed packages may be copyright by -other parties and/or subject to additional restrictions. - -This work also contains materials derived from public sources. - -Additional information about OpenLDAP can be obtained at -. diff --git a/libraries/liblmdb/Doxyfile b/libraries/liblmdb/Doxyfile deleted file mode 100644 index 3fd0365c7d..0000000000 --- a/libraries/liblmdb/Doxyfile +++ /dev/null @@ -1,1631 +0,0 @@ -# Doxyfile 1.7.1 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (" ") - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# http://www.gnu.org/software/libiconv for the list of possible encodings. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = MDB - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create -# 4096 sub-directories (in 2 levels) under the output directory of each output -# format and will distribute the generated files over these directories. -# Enabling this option can be useful when feeding doxygen a huge amount of -# source files, where putting all generated files in the same directory would -# otherwise cause performance problems for the file system. - -CREATE_SUBDIRS = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, -# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, -# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English -# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, -# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, -# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator -# that is used to form the text in various listings. Each string -# in this list, if found as the leading text of the brief description, will be -# stripped from the text and the result after processing the whole list, is -# used as the annotated text. Otherwise, the brief description is used as-is. -# If left blank, the following values are used ("$name" is automatically -# replaced with the name of the entity): "The $name class" "The $name widget" -# "The $name file" "is" "provides" "specifies" "contains" -# "represents" "a" "an" "the" - -ABBREVIATE_BRIEF = - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full -# path before files name in the file list and in the header files. If set -# to NO the shortest path that makes the file name unique will be used. - -FULL_PATH_NAMES = YES - -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user-defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the -# path to strip. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of -# the path mentioned in the documentation of a class, which tells -# the reader which header file to include in order to use a class. -# If left blank only the name of the header file containing the class -# definition is used. Otherwise one should specify the include paths that -# are normally passed to the compiler using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter -# (but less readable) file names. This can be useful is your file systems -# doesn't support long names like on DOS, Mac, or CD-ROM. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like regular Qt-style comments -# (thus requiring an explicit @brief command for a brief description.) - -JAVADOC_AUTOBRIEF = NO - -# If the QT_AUTOBRIEF tag is set to YES then Doxygen will -# interpret the first line (until the first dot) of a Qt-style -# comment as the brief description. If set to NO, the comments -# will behave just like regular Qt-style comments (thus requiring -# an explicit \brief command for a brief description.) - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen -# treat a multi-line C++ special comment block (i.e. a block of //! or /// -# comments) as a brief description. This used to be the default behaviour. -# The new default is to treat a multi-line C++ comment block as a detailed -# description. Set this tag to YES if you prefer the old behaviour instead. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented -# member inherits the documentation from any documented member that it -# re-implements. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce -# a new page for each member. If set to NO, the documentation of a member will -# be part of the file/class/namespace that contains it. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. -# Doxygen uses this value to replace tabs by spaces in code fragments. - -TAB_SIZE = 4 - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user-defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C -# sources only. Doxygen will then generate output that is more tailored for C. -# For instance, some of the names that are used will be different. The list -# of all members will be omitted, etc. - -OPTIMIZE_OUTPUT_FOR_C = YES - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java -# sources only. Doxygen will then generate output that is more tailored for -# Java. For instance, namespaces will be presented as packages, qualified -# scopes will look different, etc. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources only. Doxygen will then generate output that is more tailored for -# Fortran. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for -# VHDL. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given extension. -# Doxygen has a built-in mapping, but you can override or extend it using this -# tag. The format is ext=language, where ext is a file extension, and language -# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, -# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make -# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C -# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions -# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should -# set this tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. -# func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. -# Doxygen will parse them like normal C++ but will assume all classes use public -# instead of private inheritance when no explicit protection keyword is present. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate getter -# and setter methods for a property. Setting this option to YES (the default) -# will make doxygen to replace the get and set methods by a property in the -# documentation. This will only work if the methods are indeed getting or -# setting a simple type. If this is not the case, or you want to show the -# methods anyway, you should set this option to NO. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. - -DISTRIBUTE_GROUP_DOC = NO - -# Set the SUBGROUPING tag to YES (the default) to allow class member groups of -# the same type (for instance a group of public functions) to be put as a -# subgroup of that type (e.g. under the Public Functions section). Set it to -# NO to prevent subgrouping. Alternatively, this can be done per class using -# the \nosubgrouping command. - -SUBGROUPING = YES - -INLINE_GROUPED_CLASSES = YES -# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum -# is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically -# be useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. - -TYPEDEF_HIDES_STRUCT = YES - -# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to -# determine which symbols to keep in memory and which to flush to disk. -# When the cache is full, less often used symbols will be written to disk. -# For small to medium size projects (<1000 input files) the default value is -# probably good enough. For larger projects a too small cache size can cause -# doxygen to be busy swapping symbols to and from disk most of the time -# causing a significant performance penality. -# If the system has enough physical memory increasing the cache will improve the -# performance by keeping more symbols in memory. Note that the value works on -# a logarithmic scale so increasing the size by one will rougly double the -# memory usage. The cache size is given by this formula: -# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, -# corresponding to a cache size of 2^16 = 65536 symbols - -SYMBOL_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. -# Private class members and static file members will be hidden unless -# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES - -EXTRACT_ALL = NO - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class -# will be included in the documentation. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_STATIC tag is set to YES all static members of a file -# will be included in the documentation. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) -# defined locally in source files will be included in the documentation. -# If set to NO only classes defined in header files are included. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. When set to YES local -# methods, which are defined in the implementation section but not in -# the interface are included in the documentation. -# If set to NO (the default) only methods in the interface are included. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base -# name of the file that contains the anonymous namespace. By default -# anonymous namespace are hidden. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all -# undocumented members of documented classes, files or namespaces. -# If set to NO (the default) these members will be included in the -# various overviews, but no documentation section is generated. -# This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. -# If set to NO (the default) these classes will be included in the various -# overviews. This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all -# friend (class|struct|union) declarations. -# If set to NO (the default) these declarations will be included in the -# documentation. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any -# documentation blocks found inside the body of a function. -# If set to NO (the default) these blocks will be appended to the -# function's detailed documentation block. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation -# that is typed after a \internal command is included. If the tag is set -# to NO (the default) then the documentation will be excluded. -# Set it to YES to include the internal documentation. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate -# file names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen -# will show members with their full class and namespace scopes in the -# documentation. If set to YES the scope will be hidden. - -HIDE_SCOPE_NAMES = NO - -# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen -# will put a list of the files that are included by a file in the documentation -# of that file. - -SHOW_INCLUDE_FILES = YES - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen -# will list include files with double quotes in the documentation -# rather than with sharp brackets. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] -# is inserted in the documentation for inline members. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen -# will sort the (detailed) documentation of file and class members -# alphabetically by member name. If set to NO the members will appear in -# declaration order. - -SORT_MEMBER_DOCS = NO - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the -# brief documentation of file, namespace and class members alphabetically -# by member name. If set to NO (the default) the members will appear in -# declaration order. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen -# will sort the (brief and detailed) documentation of class members so that -# constructors and destructors are listed first. If set to NO (the default) -# the constructors will appear in the respective orders defined by -# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. -# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO -# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the -# hierarchy of group names into alphabetical order. If set to NO (the default) -# the group names will appear in their defined order. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be -# sorted by fully-qualified names, including namespaces. If set to -# NO (the default), the class list will be sorted only by class name, -# not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the -# alphabetical list. - -SORT_BY_SCOPE_NAME = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or -# disable (NO) the todo list. This list is created by putting \todo -# commands in the documentation. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or -# disable (NO) the test list. This list is created by putting \test -# commands in the documentation. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or -# disable (NO) the bug list. This list is created by putting \bug -# commands in the documentation. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or -# disable (NO) the deprecated list. This list is created by putting -# \deprecated commands in the documentation. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines -# the initial value of a variable or define consists of for it to appear in -# the documentation. If the initializer consists of more lines than specified -# here it will be hidden. Use a value of 0 to hide initializers completely. -# The appearance of the initializer of individual variables and defines in the -# documentation can be controlled using \showinitializer or \hideinitializer -# command in the documentation regardless of this setting. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated -# at the bottom of the documentation of classes and structs. If set to YES the -# list will mention the files that were used to generate the documentation. - -SHOW_USED_FILES = YES - -# If the sources in your project are distributed over multiple directories -# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy -# in the documentation. The default is NO. - -SHOW_DIRECTORIES = NO - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. -# This will remove the Files entry from the Quick Index and from the -# Folder Tree View (if specified). The default is YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the -# Namespaces page. -# This will remove the Namespaces entry from the Quick Index -# and from the Folder Tree View (if specified). The default is YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command , where is the value of -# the FILE_VERSION_FILTER tag, and is the name of an input file -# provided by doxygen. Whatever the program writes to standard output -# is used as the file version. See the manual for examples. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. The create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. -# You can optionally specify a file name after the option, if omitted -# DoxygenLayout.xml will be used as the name of the layout file. - -LAYOUT_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated -# by doxygen. Possible values are YES and NO. If left blank NO is used. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated by doxygen. Possible values are YES and NO. If left blank -# NO is used. - -WARNINGS = YES - -# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings -# for undocumented members. If EXTRACT_ALL is set to YES then this flag will -# automatically be disabled. - -WARN_IF_UNDOCUMENTED = YES - -# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some -# parameters in a documented function, or documenting parameters that -# don't exist or using markup commands wrongly. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be abled to get warnings for -# functions that are documented, but have no documentation for their parameters -# or return value. If set to NO (the default) doxygen will only warn about -# wrong or incomplete parameter documentation, but not about the absence of -# documentation. - -WARN_NO_PARAMDOC = NO - -# The WARN_FORMAT tag determines the format of the warning messages that -# doxygen can produce. The string should contain the $file, $line, and $text -# tags, which will be replaced by the file and line number from which the -# warning originated and the warning text. Optionally the format may contain -# $version, which will be replaced by the version of the file (if it could -# be obtained via FILE_VERSION_FILTER) - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning -# and error messages should be written. If left blank the output is written -# to stderr. - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT = lmdb.h midl.h mdb.c midl.c - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is -# also the default input encoding. Doxygen uses libiconv (or the iconv built -# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for -# the list of possible encodings. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx -# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 - -FILE_PATTERNS = - -# The RECURSIVE tag can be used to turn specify whether or not subdirectories -# should be searched for input files as well. Possible values are YES and NO. -# If left blank NO is used. - -RECURSIVE = NO - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or -# directories that are symbolic links (a Unix filesystem feature) are excluded -# from the input. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. Note that the wildcards are matched -# against the file with absolute path, so to exclude all test directories -# for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank all files are included. - -EXAMPLE_PATTERNS = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude -# commands irrespective of the value of the RECURSIVE tag. -# Possible values are YES and NO. If left blank NO is used. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command , where -# is the value of the INPUT_FILTER tag, and is the name of an -# input file. Doxygen will then use the output that the filter program writes -# to standard output. -# If FILTER_PATTERNS is specified, this tag will be -# ignored. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. -# Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. -# The filters are a list of the form: -# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further -# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER -# is applied to all files. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will be used to filter the input files when producing source -# files to browse (i.e. when SOURCE_BROWSER is set to YES). - -FILTER_SOURCE_FILES = NO - -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will -# be generated. Documented entities will be cross-referenced with these sources. -# Note: To get rid of all source code in the generated output, make sure also -# VERBATIM_HEADERS is set to NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body -# of functions and classes directly in the documentation. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct -# doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES -# then for each documented function all documented -# functions referencing it will be listed. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES -# then for each documented function all documented entities -# called/used by that function will be listed. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) -# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from -# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will -# link to the source code. -# Otherwise they will link to the documentation. - -REFERENCES_LINK_SOURCE = YES - -# If the USE_HTAGS tag is set to YES then the references to source code -# will point to the HTML generated by the htags(1) tool instead of doxygen -# built-in source browser. The htags tool is part of GNU's global source -# tagging system (see http://www.gnu.org/software/global/global.html). You -# will need version 4.8.6 or higher. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen -# will generate a verbatim copy of the header file for each class for -# which an include is specified. Set to NO to disable this. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index -# of all compounds will be generated. Enable this if the project -# contains a lot of classes, structs, unions or interfaces. - -ALPHABETICAL_INDEX = YES - -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all -# classes will be put under the same header in the alphabetical index. -# The IGNORE_PREFIX tag can be used to specify one or more prefixes that -# should be ignored while generating the index headers. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will -# generate HTML output. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `html' will be used as the default path. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for -# each generated HTML page (for example: .htm,.php,.asp). If it is left blank -# doxygen will generate files with .html extension. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a personal HTML header for -# each generated HTML page. If it is left blank doxygen will generate a -# standard header. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a personal HTML footer for -# each generated HTML page. If it is left blank doxygen will generate a -# standard footer. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading -# style sheet that is used by each HTML page. It can be used to -# fine-tune the look of the HTML output. If the tag is left blank doxygen -# will generate a default style sheet. Note that doxygen will try to copy -# the style sheet file to the HTML output directory, so don't put your own -# stylesheet in the HTML output directory as well, or it will be erased! - -HTML_STYLESHEET = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. -# Doxygen will adjust the colors in the stylesheet and background images -# according to this color. Hue is specified as an angle on a colorwheel, -# see http://en.wikipedia.org/wiki/Hue for more information. -# For instance the value 0 represents red, 60 is yellow, 120 is green, -# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. -# The allowed range is 0 to 359. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of -# the colors in the HTML output. For a value of 0 the output will use -# grayscales only. A value of 255 will produce the most vivid colors. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to -# the luminance component of the colors in the HTML output. Values below -# 100 gradually make the output lighter, whereas values above 100 make -# the output darker. The value divided by 100 is the actual gamma applied, -# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, -# and 100 does not change the gamma. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting -# this to NO can help when comparing the output of multiple runs. - -HTML_TIMESTAMP = YES - -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. For this to work a browser that supports -# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox -# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). - -HTML_DYNAMIC_SECTIONS = NO - -# If the GENERATE_DOCSET tag is set to YES, additional index files -# will be generated that can be used as input for Apple's Xcode 3 -# integrated development environment, introduced with OSX 10.5 (Leopard). -# To create a documentation set, doxygen will generate a Makefile in the -# HTML output directory. Running make will produce the docset in that -# directory and running "make install" will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find -# it at startup. -# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html -# for more information. - -GENERATE_DOCSET = NO - -# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the -# feed. A documentation feed provides an umbrella under which multiple -# documentation sets from a single provider (such as a company or product suite) -# can be grouped. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that -# should uniquely identify the documentation set bundle. This should be a -# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen -# will append .docset to the name. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES, additional index files -# will be generated that can be used as input for tools like the -# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) -# of the generated HTML documentation. - -GENERATE_HTMLHELP = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can -# be used to specify the file name of the resulting .chm file. You -# can add a path in front of the file if the result should not be -# written to the html output directory. - -CHM_FILE = - -# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can -# be used to specify the location (absolute path including file name) of -# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run -# the HTML help compiler on the generated index.hhp. - -HHC_LOCATION = - -# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag -# controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). - -GENERATE_CHI = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING -# is used to encode HtmlHelp index (hhk), content (hhc) and project file -# content. - -CHM_INDEX_ENCODING = - -# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag -# controls whether a binary table of contents is generated (YES) or a -# normal table of contents (NO) in the .chm file. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members -# to the contents of the HTML help documentation and to the tree view. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated -# that can be used as input for Qt's qhelpgenerator to generate a -# Qt Compressed Help (.qch) of the generated HTML documentation. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can -# be used to specify the file name of the resulting .qch file. -# The path specified is relative to the HTML output folder. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# http://doc.trolltech.com/qthelpproject.html#namespace - -QHP_NAMESPACE = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# http://doc.trolltech.com/qthelpproject.html#virtual-folders - -QHP_VIRTUAL_FOLDER = doc - -# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to -# add. For more information please see -# http://doc.trolltech.com/qthelpproject.html#custom-filters - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see -# -# Qt Help Project / Custom Filters. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's -# filter section matches. -# -# Qt Help Project / Filter Attributes. - -QHP_SECT_FILTER_ATTRS = - -# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can -# be used to specify the location of Qt's qhelpgenerator. -# If non-empty doxygen will try to run qhelpgenerator on the generated -# .qhp file. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files -# will be generated, which together with the HTML files, form an Eclipse help -# plugin. To install this plugin and make it available under the help contents -# menu in Eclipse, the contents of the directory containing the HTML and XML -# files needs to be copied into the plugins directory of eclipse. The name of -# the directory within the plugins directory should be the same as -# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before -# the help appears. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have -# this name. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. - -DISABLE_INDEX = NO - -# This tag can be used to set the number of enum values (range [1..20]) -# that doxygen will group on one line in the generated HTML documentation. - -ENUM_VALUES_PER_LINE = 4 - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. -# If the tag value is set to YES, a side panel will be generated -# containing a tree-like index structure (just like the one that -# is generated for HTML Help). For this to work a browser that supports -# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). -# Windows users are probably better off using the HTML help feature. - -GENERATE_TREEVIEW = NO - -# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, -# and Class Hierarchy pages using a tree view instead of an ordered list. - -USE_INLINE_TREES = NO - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be -# used to set the initial width (in pixels) of the frame in which the tree -# is shown. - -TREEVIEW_WIDTH = 250 - -# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open -# links to external symbols imported via tag files in a separate window. - -EXT_LINKS_IN_WINDOW = NO - -# Use this tag to change the font size of Latex formulas included -# as images in the HTML documentation. The default is 10. Note that -# when you change the font size after a successful doxygen run you need -# to manually remove any form_*.png images from the HTML output directory -# to force them to be regenerated. - -FORMULA_FONTSIZE = 10 - -# Use the FORMULA_TRANPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are -# not supported properly for IE 6.0, but are supported on all modern browsers. -# Note that when changing this option you need to delete any form_*.png files -# in the HTML output before the changes have effect. - -FORMULA_TRANSPARENT = YES - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box -# for the HTML output. The underlying search engine uses javascript -# and DHTML and should work on any modern browser. Note that when using -# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets -# (GENERATE_DOCSET) there is already a search function so this one should -# typically be disabled. For large projects the javascript based search engine -# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. - -SEARCHENGINE = YES - -# When the SERVER_BASED_SEARCH tag is enabled the search engine will be -# implemented using a PHP enabled web server instead of at the web client -# using Javascript. Doxygen will generate the search PHP script and index -# file to put on the web server. The advantage of the server -# based approach is that it scales better to large projects and allows -# full text search. The disadvances is that it is more difficult to setup -# and does not have live searching capabilities. - -SERVER_BASED_SEARCH = NO - -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will -# generate Latex output. - -GENERATE_LATEX = NO - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `latex' will be used as the default path. - -LATEX_OUTPUT = latex - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be -# invoked. If left blank `latex' will be used as the default command name. -# Note that when enabling USE_PDFLATEX this option is only used for -# generating bitmaps for formulas in the HTML output, but not in the -# Makefile that is written to the output directory. - -LATEX_CMD_NAME = latex - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the -# default command name. - -MAKEINDEX_CMD_NAME = makeindex - -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_LATEX = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and -# executive. If left blank a4wide will be used. - -PAPER_TYPE = a4wide - -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX -# packages that should be included in the LaTeX output. - -EXTRA_PACKAGES = - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a -# standard header. Notice: only use this tag if you know what you are doing! - -LATEX_HEADER = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references -# This makes the output suitable for online browsing using a pdf viewer. - -PDF_HYPERLINKS = YES - -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a -# higher quality PDF documentation. - -USE_PDFLATEX = YES - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. -# This option is also used when generating formulas in HTML. - -LATEX_BATCHMODE = NO - -# If LATEX_HIDE_INDICES is set to YES then doxygen will not -# include the index chapters (such as File Index, Compound Index, etc.) -# in the output. - -LATEX_HIDE_INDICES = NO - -# If LATEX_SOURCE_CODE is set to YES then doxygen will include -# source code with syntax highlighting in the LaTeX output. -# Note that which sources are shown also depends on other settings -# such as SOURCE_BROWSER. - -LATEX_SOURCE_CODE = NO - -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output -# The RTF output is optimized for Word 97 and may not look very pretty with -# other RTF readers or editors. - -GENERATE_RTF = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `rtf' will be used as the default path. - -RTF_OUTPUT = rtf - -# If the COMPACT_RTF tag is set to YES Doxygen generates more compact -# RTF documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_RTF = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated -# will contain hyperlink fields. The RTF file will -# contain links (just like the HTML output) instead of page references. -# This makes the output suitable for online browsing using WORD or other -# programs which support those fields. -# Note: wordpad (write) and others do not support links. - -RTF_HYPERLINKS = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's -# config file, i.e. a series of assignments. You only have to provide -# replacements, missing definitions are set to their default value. - -RTF_STYLESHEET_FILE = - -# Set optional variables used in the generation of an rtf document. -# Syntax is similar to doxygen's config file. - -RTF_EXTENSIONS_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will -# generate man pages - -GENERATE_MAN = YES - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `man' will be used as the default path. - -MAN_OUTPUT = man - -# The MAN_EXTENSION tag determines the extension that is added to -# the generated man pages (default is the subroutine's section .3) - -MAN_EXTENSION = .3 - -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command -# would be unable to find the correct page. The default is NO. - -MAN_LINKS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES Doxygen will -# generate an XML file that captures the structure of -# the code including all documentation. - -GENERATE_XML = NO - -# The XML_OUTPUT tag is used to specify where the XML pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `xml' will be used as the default path. - -XML_OUTPUT = xml - -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - -# If the XML_PROGRAMLISTING tag is set to YES Doxygen will -# dump the program listings (including syntax highlighting -# and cross-referencing information) to the XML output. Note that -# enabling this will significantly increase the size of the XML output. - -XML_PROGRAMLISTING = YES - -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will -# generate an AutoGen Definitions (see autogen.sf.net) file -# that captures the structure of the code including all -# documentation. Note that this feature is still experimental -# and incomplete at the moment. - -GENERATE_AUTOGEN_DEF = NO - -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- - -# If the GENERATE_PERLMOD tag is set to YES Doxygen will -# generate a Perl module file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_PERLMOD = NO - -# If the PERLMOD_LATEX tag is set to YES Doxygen will generate -# the necessary Makefile rules, Perl scripts and LaTeX code to be able -# to generate PDF and DVI output from the Perl module output. - -PERLMOD_LATEX = NO - -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be -# nicely formatted so it can be parsed by a human reader. -# This is useful -# if you want to understand what is going on. -# On the other hand, if this -# tag is set to NO the size of the Perl module output will be much smaller -# and Perl will parse it just the same. - -PERLMOD_PRETTY = YES - -# The names of the make variables in the generated doxyrules.make file -# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. -# This is useful so different doxyrules.make files included by the same -# Makefile don't overwrite each other's variables. - -PERLMOD_MAKEVAR_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include -# files. - -ENABLE_PREPROCESSING = YES - -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled -# way by setting EXPAND_ONLY_PREDEF to YES. - -MACRO_EXPANSION = NO - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the -# PREDEFINED and EXPAND_AS_DEFINED tags. - -EXPAND_ONLY_PREDEF = NO - -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. - -SEARCH_INCLUDES = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will -# be used. - -INCLUDE_FILE_PATTERNS = - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. To prevent a macro definition from being -# undefined via #undef or recursively expanded use the := operator -# instead of the = operator. - -PREDEFINED = DEBUG=2 __GNUC__=1 - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED = - -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse -# the parser if not removed. - -SKIP_FUNCTION_MACROS = YES - -#--------------------------------------------------------------------------- -# Configuration::additions related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES option can be used to specify one or more tagfiles. -# Optionally an initial location of the external documentation -# can be added for each tagfile. The format of a tag file without -# this location is as follows: -# -# TAGFILES = file1 file2 ... -# Adding location for the tag files is done as follows: -# -# TAGFILES = file1=loc1 "file2 = loc2" ... -# where "loc1" and "loc2" can be relative or absolute paths or -# URLs. If a location is present for each tag, the installdox tool -# does not have to be run to correct the links. -# Note that each tag file must have a unique name -# (where the name does NOT include the path) -# If a tag file is not located in the directory in which doxygen -# is run, you must also specify the path to the tagfile here. - -TAGFILES = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create -# a tag file that is based on the input files it reads. - -GENERATE_TAGFILE = - -# If the ALLEXTERNALS tag is set to YES all external classes will be listed -# in the class index. If set to NO only the inherited external classes -# will be listed. - -ALLEXTERNALS = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed -# in the modules index. If set to NO, only the current project's groups will -# be listed. - -EXTERNAL_GROUPS = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of `which perl'). - -PERL_PATH = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base -# or super classes. Setting the tag to NO turns the diagrams off. Note that -# this option is superseded by the HAVE_DOT option below. This is only a -# fallback. It is recommended to install and use dot, since it yields more -# powerful graphs. - -CLASS_DIAGRAMS = YES - -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see -# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - -# If set to YES, the inheritance and collaboration graphs will hide -# inheritance and usage relations if the target is undocumented -# or is not a class. - -HIDE_UNDOC_RELATIONS = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz, a graph visualization -# toolkit from AT&T and Lucent Bell Labs. The other options in this section -# have no effect if this option is set to NO (the default) - -HAVE_DOT = NO - -# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is -# allowed to run in parallel. When set to 0 (the default) doxygen will -# base this on the number of processors available in the system. You can set it -# explicitly to a value larger than 0 to get control over the balance -# between CPU load and processing speed. - -DOT_NUM_THREADS = 0 - -# By default doxygen will write a font called FreeSans.ttf to the output -# directory and reference it in all dot files that doxygen generates. This -# font does not include all possible unicode characters however, so when you need -# these (or just want a differently looking font) you can specify the font name -# using DOT_FONTNAME. You need need to make sure dot is able to find the font, -# which can be done by putting it in a standard location or by setting the -# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory -# containing the font. - -DOT_FONTNAME = FreeSans.ttf - -# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. -# The default size is 10pt. - -DOT_FONTSIZE = 10 - -# By default doxygen will tell dot to use the output directory to look for the -# FreeSans.ttf font (which doxygen will put there itself). If you specify a -# different font using DOT_FONTNAME you can set the path where dot -# can find it using this tag. - -DOT_FONTPATH = - -# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. - -CLASS_GRAPH = YES - -# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect implementation dependencies (inheritance, containment, and -# class references variables) of the class with other documented classes. - -COLLABORATION_GRAPH = YES - -# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for groups, showing the direct groups dependencies - -GROUP_GRAPHS = YES - -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and -# collaboration diagrams in a style similar to the OMG's Unified Modeling -# Language. - -UML_LOOK = NO - -# If set to YES, the inheritance and collaboration graphs will show the -# relations between templates and their instances. - -TEMPLATE_RELATIONS = NO - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT -# tags are set to YES then doxygen will generate a graph for each documented -# file showing the direct and indirect include dependencies of the file with -# other documented files. - -INCLUDE_GRAPH = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and -# HAVE_DOT tags are set to YES then doxygen will generate a graph for each -# documented header file showing the documented files that directly or -# indirectly include this file. - -INCLUDED_BY_GRAPH = YES - -# If the CALL_GRAPH and HAVE_DOT options are set to YES then -# doxygen will generate a call dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable call graphs -# for selected functions only using the \callgraph command. - -CALL_GRAPH = NO - -# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then -# doxygen will generate a caller dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable caller -# graphs for selected functions only using the \callergraph command. - -CALLER_GRAPH = NO - -# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen -# will graphical hierarchy of all classes instead of a textual one. - -GRAPHICAL_HIERARCHY = YES - -# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES -# then doxygen will show the dependencies a directory has on other directories -# in a graphical way. The dependency relations are determined by the #include -# relations between the files in the directories. - -DIRECTORY_GRAPH = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, jpg, or gif -# If left blank png will be used. - -DOT_IMAGE_FORMAT = png - -# The tag DOT_PATH can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found in the path. - -DOT_PATH = - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS = - -# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of -# nodes that will be shown in the graph. If the number of nodes in a graph -# becomes larger than this value, doxygen will truncate the graph, which is -# visualized by representing a node as a red box. Note that doxygen if the -# number of direct children of the root node in a graph is already larger than -# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note -# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. - -DOT_GRAPH_MAX_NODES = 50 - -# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the -# graphs generated by dot. A depth value of 3 means that only nodes reachable -# from the root by following a path via at most 3 edges will be shown. Nodes -# that lay further from the root node will be omitted. Note that setting this -# option to 1 or 2 may greatly reduce the computation time needed for large -# code bases. Also note that the size of a graph can be further restricted by -# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. - -MAX_DOT_GRAPH_DEPTH = 0 - -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not -# seem to support this out of the box. Warning: Depending on the platform used, -# enabling this option may lead to badly anti-aliased labels on the edges of -# a graph (i.e. they become hard to read). - -DOT_TRANSPARENT = NO - -# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output -# files in one run (i.e. multiple -o and -T options on the command line). This -# makes dot run faster, but since only newer versions of dot (>1.8.10) -# support this, this feature is disabled by default. - -DOT_MULTI_TARGETS = YES - -# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will -# generate a legend page explaining the meaning of the various boxes and -# arrows in the dot generated graphs. - -GENERATE_LEGEND = YES - -# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will -# remove the intermediate dot files that are used to generate -# the various graphs. - -DOT_CLEANUP = YES diff --git a/libraries/liblmdb/LICENSE b/libraries/liblmdb/LICENSE deleted file mode 100644 index 05ad7571e448b9d83ead5d4691274d9484574714..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2214 zcmZ`*%W|AJ6y579Ze^3Wit*&Jn8{>CgW9bs3_S?!^e%KGyH&%Z2Z3Gw`<$zOx}2;~ zq5C}da4yBxP_{E&5{FA#PV2^QD6t#UIU|Vf%;0R_(dX;G1N!tAh128YM*95m;};=v zQ(HF&QSkA$hA zSj4pu#U9^F$d{k=vzWSyzc$;TF7d%-P z1uxbwRn?WT5ZhHIi#KlvPBCLc&?7kw-0?-b2TlgPstL}x| zw07qI3G_JE`uiprrC;$g`{jLQ~wFhvoejtLS-q1 zX-ulnBFfStSF^Ina!o&egc`=9xV_hoT)?=E5QZLRgoea&5}yvXiT z$!9K+FHgA6BpM56iQHEQY+{wms6gQKaAGikJ9Vv;S8hr6^xi@ROa#)Zd{lyXSz^DSxCCcvRHE`TIP2ulKPuSGC&BcQp*4t ziqI!U2*9hqvF)tX$^)&^Lgsn7WJ+AoBD)8Yxrjoz#-0P7DV{}HzT%*afge~v_X`Px zK#O#bYsg?|uqi6Ug&Cuq<;6vhl**e#-N-ZoHvu_ExmQ|VVZI8L-5@0RumaJL6RbMS z0#)sg7kmf47(S^vh4CF?fT(_O0({YGUn;-SWwbcJeGrPSYR%!JgmO;fwjUtq5WcfG zZ~F$HuiHyEbeYzMw@?lJ6IBg48m;4va8$i#uP{AJUfYi>Hilf)m|(xVj@nq8s)3vE zDtf6FERVdI+|F!xHDPBBypK9Wv`FIjlVj}x^$R;Tkc7XHy`wSD^f_3IzJ&FMF~ z9Pr9~?)#b|JH7-W0xVdSovi|pH$;yi%FcODI+!{$JeeAAm%OGu46NJS1kX*Jei+QF XO<|8yW-&kf9. - * - * @par Derived From: - * This code is derived from btree.c written by Martin Hedenfalk. - * - * Copyright (c) 2009, 2010 Martin Hedenfalk - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifndef _LMDB_H_ -#define _LMDB_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _MSC_VER -typedef int mdb_mode_t; -#else -typedef mode_t mdb_mode_t; -#endif - -/** An abstraction for a file handle. - * On POSIX systems file handles are small integers. On Windows - * they're opaque pointers. - */ -#ifdef _WIN32 -typedef void *mdb_filehandle_t; -#else -typedef int mdb_filehandle_t; -#endif - -/** @defgroup mdb MDB API - * @{ - * @brief OpenLDAP Lightning Memory-Mapped Database Manager - */ -/** @defgroup Version Version Macros - * @{ - */ -/** Library major version */ -#define MDB_VERSION_MAJOR 0 -/** Library minor version */ -#define MDB_VERSION_MINOR 9 -/** Library patch version */ -#define MDB_VERSION_PATCH 6 - -/** Combine args a,b,c into a single integer for easy version comparisons */ -#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) - -/** The full library version as a single integer */ -#define MDB_VERSION_FULL \ - MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) - -/** The release date of this library version */ -#define MDB_VERSION_DATE "January 10, 2013" - -/** A stringifier for the version info */ -#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")" - -/** A helper for the stringifier macro */ -#define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d) - -/** The full library version as a C string */ -#define MDB_VERSION_STRING \ - MDB_VERFOO(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH,MDB_VERSION_DATE) -/** @} */ - -/** @brief Opaque structure for a database environment. - * - * A DB environment supports multiple databases, all residing in the same - * shared-memory map. - */ -typedef struct MDB_env MDB_env; - -/** @brief Opaque structure for a transaction handle. - * - * All database operations require a transaction handle. Transactions may be - * read-only or read-write. - */ -typedef struct MDB_txn MDB_txn; - -/** @brief A handle for an individual database in the DB environment. */ -typedef unsigned int MDB_dbi; - -/** @brief Opaque structure for navigating through a database */ -typedef struct MDB_cursor MDB_cursor; - -/** @brief Generic structure used for passing keys and data in and out - * of the database. - * - * Key sizes must be between 1 and the liblmdb build-time constant - * #MDB_MAXKEYSIZE inclusive. This currently defaults to 511. The - * same applies to data sizes in databases with the #MDB_DUPSORT flag. - * Other data items can in theory be from 0 to 0xffffffff bytes long. - * - * Values returned from the database are valid only until a subsequent - * update operation, or the end of the transaction. - */ -typedef struct MDB_val { - size_t mv_size; /**< size of the data item */ - void *mv_data; /**< address of the data item */ -} MDB_val; - -/** @brief A callback function used to compare two keys in a database */ -typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b); - -/** @brief A callback function used to relocate a position-dependent data item - * in a fixed-address database. - * - * The \b newptr gives the item's desired address in - * the memory map, and \b oldptr gives its previous address. The item's actual - * data resides at the address in \b item. This callback is expected to walk - * through the fields of the record in \b item and modify any - * values based at the \b oldptr address to be relative to the \b newptr address. - * @param[in,out] item The item that is to be relocated. - * @param[in] oldptr The previous address. - * @param[in] newptr The new address to relocate to. - * @param[in] relctx An application-provided context, set by #mdb_set_relctx(). - * @todo This feature is currently unimplemented. - */ -typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx); - -/** @defgroup mdb_env Environment Flags - * - * Values do not overlap Database Flags. - * @{ - */ - /** mmap at a fixed address (experimental) */ -#define MDB_FIXEDMAP 0x01 - /** no environment directory */ -#define MDB_NOSUBDIR 0x4000 - /** don't fsync after commit */ -#define MDB_NOSYNC 0x10000 - /** read only */ -#define MDB_RDONLY 0x20000 - /** don't fsync metapage after commit */ -#define MDB_NOMETASYNC 0x40000 - /** use writable mmap */ -#define MDB_WRITEMAP 0x80000 - /** use asynchronous msync when MDB_WRITEMAP is used */ -#define MDB_MAPASYNC 0x100000 - /** tie reader locktable slots to #MDB_txn objects instead of to threads */ -#define MDB_NOTLS 0x200000 -/** @} */ - -/** @defgroup mdb_dbi_open Database Flags - * - * Values do not overlap Environment Flags. - * @{ - */ - /** use reverse string keys */ -#define MDB_REVERSEKEY 0x02 - /** use sorted duplicates */ -#define MDB_DUPSORT 0x04 - /** numeric keys in native byte order. - * The keys must all be of the same size. */ -#define MDB_INTEGERKEY 0x08 - /** with #MDB_DUPSORT, sorted dup items have fixed size */ -#define MDB_DUPFIXED 0x10 - /** with #MDB_DUPSORT, dups are numeric in native byte order */ -#define MDB_INTEGERDUP 0x20 - /** with #MDB_DUPSORT, use reverse string dups */ -#define MDB_REVERSEDUP 0x40 - /** create DB if not already existing */ -#define MDB_CREATE 0x40000 -/** @} */ - -/** @defgroup mdb_put Write Flags - * @{ - */ -/** For put: Don't write if the key already exists. */ -#define MDB_NOOVERWRITE 0x10 -/** Only for #MDB_DUPSORT
- * For put: don't write if the key and data pair already exist.
- * For mdb_cursor_del: remove all duplicate data items. - */ -#define MDB_NODUPDATA 0x20 -/** For mdb_cursor_put: overwrite the current key/data pair */ -#define MDB_CURRENT 0x40 -/** For put: Just reserve space for data, don't copy it. Return a - * pointer to the reserved space. - */ -#define MDB_RESERVE 0x10000 -/** Data is being appended, don't split full pages. */ -#define MDB_APPEND 0x20000 -/** Duplicate data is being appended, don't split full pages. */ -#define MDB_APPENDDUP 0x40000 -/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */ -#define MDB_MULTIPLE 0x80000 -/* @} */ - -/** @brief Cursor Get operations. - * - * This is the set of all operations for retrieving data - * using a cursor. - */ -typedef enum MDB_cursor_op { - MDB_FIRST, /**< Position at first key/data item */ - MDB_FIRST_DUP, /**< Position at first data item of current key. - Only for #MDB_DUPSORT */ - MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */ - MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */ - MDB_GET_CURRENT, /**< Return key/data at current cursor position */ - MDB_GET_MULTIPLE, /**< Return all the duplicate data items at the current - cursor position. Only for #MDB_DUPFIXED */ - MDB_LAST, /**< Position at last key/data item */ - MDB_LAST_DUP, /**< Position at last data item of current key. - Only for #MDB_DUPSORT */ - MDB_NEXT, /**< Position at next data item */ - MDB_NEXT_DUP, /**< Position at next data item of current key. - Only for #MDB_DUPSORT */ - MDB_NEXT_MULTIPLE, /**< Return all duplicate data items at the next - cursor position. Only for #MDB_DUPFIXED */ - MDB_NEXT_NODUP, /**< Position at first data item of next key */ - MDB_PREV, /**< Position at previous data item */ - MDB_PREV_DUP, /**< Position at previous data item of current key. - Only for #MDB_DUPSORT */ - MDB_PREV_NODUP, /**< Position at last data item of previous key */ - MDB_SET, /**< Position at specified key */ - MDB_SET_KEY, /**< Position at specified key, return key + data */ - MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ -} MDB_cursor_op; - -/** @defgroup errors Return Codes - * - * BerkeleyDB uses -30800 to -30999, we'll go under them - * @{ - */ - /** Successful result */ -#define MDB_SUCCESS 0 - /** key/data pair already exists */ -#define MDB_KEYEXIST (-30799) - /** key/data pair not found (EOF) */ -#define MDB_NOTFOUND (-30798) - /** Requested page not found - this usually indicates corruption */ -#define MDB_PAGE_NOTFOUND (-30797) - /** Located page was wrong type */ -#define MDB_CORRUPTED (-30796) - /** Update of meta page failed, probably I/O error */ -#define MDB_PANIC (-30795) - /** Environment version mismatch */ -#define MDB_VERSION_MISMATCH (-30794) - /** File is not a valid MDB file */ -#define MDB_INVALID (-30793) - /** Environment mapsize reached */ -#define MDB_MAP_FULL (-30792) - /** Environment maxdbs reached */ -#define MDB_DBS_FULL (-30791) - /** Environment maxreaders reached */ -#define MDB_READERS_FULL (-30790) - /** Too many TLS keys in use - Windows only */ -#define MDB_TLS_FULL (-30789) - /** Txn has too many dirty pages */ -#define MDB_TXN_FULL (-30788) - /** Cursor stack too deep - internal error */ -#define MDB_CURSOR_FULL (-30787) - /** Page has not enough space - internal error */ -#define MDB_PAGE_FULL (-30786) - /** Database contents grew beyond environment mapsize */ -#define MDB_MAP_RESIZED (-30785) - /** Database flags changed or would change */ -#define MDB_INCOMPATIBLE (-30784) - /** Invalid reuse of reader locktable slot */ -#define MDB_BAD_RSLOT (-30783) -#define MDB_LAST_ERRCODE MDB_BAD_RSLOT -/** @} */ - -/** @brief Statistics for a database in the environment */ -typedef struct MDB_stat { - unsigned int ms_psize; /**< Size of a database page. - This is currently the same for all databases. */ - unsigned int ms_depth; /**< Depth (height) of the B-tree */ - size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ - size_t ms_leaf_pages; /**< Number of leaf pages */ - size_t ms_overflow_pages; /**< Number of overflow pages */ - size_t ms_entries; /**< Number of data items */ -} MDB_stat; - -/** @brief Information about the environment */ -typedef struct MDB_envinfo { - void *me_mapaddr; /**< Address of map, if fixed */ - size_t me_mapsize; /**< Size of the data memory map */ - size_t me_last_pgno; /**< ID of the last used page */ - size_t me_last_txnid; /**< ID of the last committed transaction */ - unsigned int me_maxreaders; /**< max reader slots in the environment */ - unsigned int me_numreaders; /**< max reader slots used in the environment */ -} MDB_envinfo; - - /** @brief Return the mdb library version information. - * - * @param[out] major if non-NULL, the library major version number is copied here - * @param[out] minor if non-NULL, the library minor version number is copied here - * @param[out] patch if non-NULL, the library patch version number is copied here - * @retval "version string" The library version as a string - */ -char *mdb_version(int *major, int *minor, int *patch); - - /** @brief Return a string describing a given error code. - * - * This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3) - * function. If the error code is greater than or equal to 0, then the string - * returned by the system function strerror(3) is returned. If the error code - * is less than 0, an error string corresponding to the MDB library error is - * returned. See @ref errors for a list of MDB-specific error codes. - * @param[in] err The error code - * @retval "error message" The description of the error - */ -char *mdb_strerror(int err); - - /** @brief Create an MDB environment handle. - * - * This function allocates memory for a #MDB_env structure. To release - * the allocated memory and discard the handle, call #mdb_env_close(). - * Before the handle may be used, it must be opened using #mdb_env_open(). - * Various other options may also need to be set before opening the handle, - * e.g. #mdb_env_set_mapsize(), #mdb_env_set_maxreaders(), #mdb_env_set_maxdbs(), - * depending on usage requirements. - * @param[out] env The address where the new handle will be stored - * @return A non-zero error value on failure and 0 on success. - */ -int mdb_env_create(MDB_env **env); - - /** @brief Open an environment handle. - * - * If this function fails, #mdb_env_close() must be called to discard the #MDB_env handle. - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] path The directory in which the database files reside. This - * directory must already exist and be writable. - * @param[in] flags Special options for this environment. This parameter - * must be set to 0 or by bitwise OR'ing together one or more of the - * values described here. - * Flags set by mdb_env_set_flags() are also used. - *
    - *
  • #MDB_FIXEDMAP - * use a fixed address for the mmap region. This flag must be specified - * when creating the environment, and is stored persistently in the environment. - * If successful, the memory map will always reside at the same virtual address - * and pointers used to reference data items in the database will be constant - * across multiple invocations. This option may not always work, depending on - * how the operating system has allocated memory to shared libraries and other uses. - * The feature is highly experimental. - *
  • #MDB_NOSUBDIR - * By default, MDB creates its environment in a directory whose - * pathname is given in \b path, and creates its data and lock files - * under that directory. With this option, \b path is used as-is for - * the database main data file. The database lock file is the \b path - * with "-lock" appended. - *
  • #MDB_RDONLY - * Open the environment in read-only mode. No write operations will be - * allowed. MDB will still modify the lock file - except on read-only - * filesystems, where MDB does not use locks. - *
  • #MDB_WRITEMAP - * Use a writeable memory map unless MDB_RDONLY is set. This is faster - * and uses fewer mallocs, but loses protection from application bugs - * like wild pointer writes and other bad updates into the database. - * Incompatible with nested transactions. - *
  • #MDB_NOMETASYNC - * Flush system buffers to disk only once per transaction, omit the - * metadata flush. Defer that until the system flushes files to disk, - * or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization - * maintains database integrity, but a system crash may undo the last - * committed transaction. I.e. it preserves the ACI (atomicity, - * consistency, isolation) but not D (durability) database property. - * This flag may be changed at any time using #mdb_env_set_flags(). - *
  • #MDB_NOSYNC - * Don't flush system buffers to disk when committing a transaction. - * This optimization means a system crash can corrupt the database or - * lose the last transactions if buffers are not yet flushed to disk. - * The risk is governed by how often the system flushes dirty buffers - * to disk and how often #mdb_env_sync() is called. However, if the - * filesystem preserves write order and the #MDB_WRITEMAP flag is not - * used, transactions exhibit ACI (atomicity, consistency, isolation) - * properties and only lose D (durability). I.e. database integrity - * is maintained, but a system crash may undo the final transactions. - * Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no - * hint for when to write transactions to disk, unless #mdb_env_sync() - * is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable. - * This flag may be changed at any time using #mdb_env_set_flags(). - *
  • #MDB_MAPASYNC - * When using #MDB_WRITEMAP, use asynchronous flushes to disk. - * As with #MDB_NOSYNC, a system crash can then corrupt the - * database or lose the last transactions. Calling #mdb_env_sync() - * ensures on-disk database integrity until next commit. - * This flag may be changed at any time using #mdb_env_set_flags(). - *
  • #MDB_NOTLS - * Don't use Thread-Local Storage. Tie reader locktable slots to - * #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps - * the slot reseved for the #MDB_txn object. A thread may use parallel - * read-only transactions. A read-only transaction may span threads if - * the user synchronizes its use. Applications that multiplex many - * user threads over individual OS threads need this option. Such an - * application must also serialize the write transactions in an OS - * thread, since MDB's write locking is unaware of the user threads. - *
- * @param[in] mode The UNIX permissions to set on created files. This parameter - * is ignored on Windows. - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_VERSION_MISMATCH - the version of the MDB library doesn't match the - * version that created the database environment. - *
  • #MDB_INVALID - the environment file headers are corrupted. - *
  • ENOENT - the directory specified by the path parameter doesn't exist. - *
  • EACCES - the user didn't have permission to access the environment files. - *
  • EAGAIN - the environment was locked by another process. - *
- */ -int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode); - - /** @brief Copy an MDB environment to the specified path. - * - * This function may be used to make a backup of an existing environment. - * @param[in] env An environment handle returned by #mdb_env_create(). It - * must have already been opened successfully. - * @param[in] path The directory in which the copy will reside. This - * directory must already exist and be writable but must otherwise be - * empty. - * @return A non-zero error value on failure and 0 on success. - */ -int mdb_env_copy(MDB_env *env, const char *path); - - /** @brief Copy an MDB environment to the specified file descriptor. - * - * This function may be used to make a backup of an existing environment. - * @param[in] env An environment handle returned by #mdb_env_create(). It - * must have already been opened successfully. - * @param[in] fd The filedescriptor to write the copy to. It must - * have already been opened for Write access. - * @return A non-zero error value on failure and 0 on success. - */ -int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); - - /** @brief Return statistics about the MDB environment. - * - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[out] stat The address of an #MDB_stat structure - * where the statistics will be copied - */ -int mdb_env_stat(MDB_env *env, MDB_stat *stat); - - /** @brief Return information about the MDB environment. - * - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[out] stat The address of an #MDB_envinfo structure - * where the information will be copied - */ -int mdb_env_info(MDB_env *env, MDB_envinfo *stat); - - /** @brief Flush the data buffers to disk. - * - * Data is always written to disk when #mdb_txn_commit() is called, - * but the operating system may keep it buffered. MDB always flushes - * the OS buffers upon commit as well, unless the environment was - * opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC. - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] force If non-zero, force a synchronous flush. Otherwise - * if the environment has the #MDB_NOSYNC flag set the flushes - * will be omitted, and with #MDB_MAPASYNC they will be asynchronous. - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
  • EIO - an error occurred during synchronization. - *
- */ -int mdb_env_sync(MDB_env *env, int force); - - /** @brief Close the environment and release the memory map. - * - * Only a single thread may call this function. All transactions, databases, - * and cursors must already be closed before calling this function. Attempts to - * use any such handles after calling this function will cause a SIGSEGV. - * The environment handle will be freed and must not be used again after this call. - * @param[in] env An environment handle returned by #mdb_env_create() - */ -void mdb_env_close(MDB_env *env); - - /** @brief Set environment flags. - * - * This may be used to set some flags in addition to those from - * #mdb_env_open(), or to unset these flags. - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] flags The flags to change, bitwise OR'ed together - * @param[in] onoff A non-zero value sets the flags, zero clears them. - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_env_set_flags(MDB_env *env, unsigned int flags, int onoff); - - /** @brief Get environment flags. - * - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[out] flags The address of an integer to store the flags - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_env_get_flags(MDB_env *env, unsigned int *flags); - - /** @brief Return the path that was used in #mdb_env_open(). - * - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[out] path Address of a string pointer to contain the path. This - * is the actual string in the environment, not a copy. It should not be - * altered in any way. - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_env_get_path(MDB_env *env, const char **path); - - /** @brief Set the size of the memory map to use for this environment. - * - * The size should be a multiple of the OS page size. The default is - * 10485760 bytes. The size of the memory map is also the maximum size - * of the database. The value should be chosen as large as possible, - * to accommodate future growth of the database. - * This function may only be called after #mdb_env_create() and before #mdb_env_open(). - * The size may be changed by closing and reopening the environment. - * Any attempt to set a size smaller than the space already consumed - * by the environment will be silently changed to the current size of the used space. - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] size The size in bytes - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified, or the environment is already open. - *
- */ -int mdb_env_set_mapsize(MDB_env *env, size_t size); - - /** @brief Set the maximum number of threads/reader slots for the environment. - * - * This defines the number of slots in the lock table that is used to track readers in the - * the environment. The default is 126. - * Starting a read-only transaction normally ties a lock table slot to the - * current thread until the environment closes or the thread exits. If - * MDB_NOTLS is in use, #mdb_txn_begin() instead ties the slot to the - * MDB_txn object until it or the #MDB_env object is destroyed. - * This function may only be called after #mdb_env_create() and before #mdb_env_open(). - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] readers The maximum number of reader lock table slots - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified, or the environment is already open. - *
- */ -int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers); - - /** @brief Get the maximum number of threads/reader slots for the environment. - * - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[out] readers Address of an integer to store the number of readers - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers); - - /** @brief Set the maximum number of named databases for the environment. - * - * This function is only needed if multiple databases will be used in the - * environment. Simpler applications that use the environment as a single - * unnamed database can ignore this option. - * This function may only be called after #mdb_env_create() and before #mdb_env_open(). - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] dbs The maximum number of databases - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified, or the environment is already open. - *
- */ -int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); - - /** @brief Create a transaction for use with the environment. - * - * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit(). - * @note A transaction and its cursors must only be used by a single - * thread, and a thread may only have a single transaction at a time. - * If #MDB_NOTLS is in use, this does not apply to read-only transactions. - * @note Cursors may not span transactions. - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] parent If this parameter is non-NULL, the new transaction - * will be a nested transaction, with the transaction indicated by \b parent - * as its parent. Transactions may be nested to any level. A parent - * transaction may not issue any other operations besides mdb_txn_begin, - * mdb_txn_abort, or mdb_txn_commit while it has active child transactions. - * @param[in] flags Special options for this transaction. This parameter - * must be set to 0 or by bitwise OR'ing together one or more of the - * values described here. - *
    - *
  • #MDB_RDONLY - * This transaction will not perform any write operations. - *
- * @param[out] txn Address where the new #MDB_txn handle will be stored - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_PANIC - a fatal error occurred earlier and the environment - * must be shut down. - *
  • #MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's - * mapsize and the environment must be shut down. - *
  • #MDB_READERS_FULL - a read-only transaction was requested and - * the reader lock table is full. See #mdb_env_set_maxreaders(). - *
  • ENOMEM - out of memory. - *
- */ -int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn); - - /** @brief Commit all the operations of a transaction into the database. - * - * The transaction handle is freed. It and its cursors must not be used - * again after this call, except with #mdb_cursor_renew(). - * @note Earlier documentation incorrectly said all cursors would be freed. - * Only write-transactions free cursors. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
  • ENOSPC - no more disk space. - *
  • EIO - a low-level I/O error occurred while writing. - *
  • ENOMEM - out of memory. - *
- */ -int mdb_txn_commit(MDB_txn *txn); - - /** @brief Abandon all the operations of the transaction instead of saving them. - * - * The transaction handle is freed. It and its cursors must not be used - * again after this call, except with #mdb_cursor_renew(). - * @note Earlier documentation incorrectly said all cursors would be freed. - * Only write-transactions free cursors. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - */ -void mdb_txn_abort(MDB_txn *txn); - - /** @brief Reset a read-only transaction. - * - * Abort the transaction like #mdb_txn_abort(), but keep the transaction - * handle. #mdb_txn_renew() may reuse the handle. This saves allocation - * overhead if the process will start a new read-only transaction soon, - * and also locking overhead if #MDB_NOTLS is in use. The reader table - * lock is released, but the table slot stays tied to its thread or - * #MDB_txn. Use mdb_txn_abort() to discard a reset handle, and to free - * its lock table slot if MDB_NOTLS is in use. - * Cursors opened within the transaction must not be used - * again after this call, except with #mdb_cursor_renew(). - * Reader locks generally don't interfere with writers, but they keep old - * versions of database pages allocated. Thus they prevent the old pages - * from being reused when writers commit new data, and so under heavy load - * the database size may grow much more rapidly than otherwise. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - */ -void mdb_txn_reset(MDB_txn *txn); - - /** @brief Renew a read-only transaction. - * - * This acquires a new reader lock for a transaction handle that had been - * released by #mdb_txn_reset(). It must be called before a reset transaction - * may be used again. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_PANIC - a fatal error occurred earlier and the environment - * must be shut down. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_txn_renew(MDB_txn *txn); - -/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */ -#define mdb_open(txn,name,flags,dbi) mdb_dbi_open(txn,name,flags,dbi) -/** Compat with version <= 0.9.4, avoid clash with libmdb from MDB Tools project */ -#define mdb_close(env,dbi) mdb_dbi_close(env,dbi) - - /** @brief Open a database in the environment. - * - * A database handle denotes the name and parameters of a database, - * independently of whether such a database exists. - * The database handle may be discarded by calling #mdb_dbi_close(). - * The old database handle is returned if the database was already open. - * The handle must only be closed once. - * The database handle will be private to the current transaction until - * the transaction is successfully committed. If the transaction is - * aborted the handle will be closed automatically. - * After a successful commit the - * handle will reside in the shared environment, and may be used - * by other transactions. This function must not be called from - * multiple concurrent transactions. A transaction that uses this function - * must finish (either commit or abort) before any other transaction may - * use this function. - * - * To use named databases (with name != NULL), #mdb_env_set_maxdbs() - * must be called before opening the environment. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] name The name of the database to open. If only a single - * database is needed in the environment, this value may be NULL. - * @param[in] flags Special options for this database. This parameter - * must be set to 0 or by bitwise OR'ing together one or more of the - * values described here. - *
    - *
  • #MDB_REVERSEKEY - * Keys are strings to be compared in reverse order, from the end - * of the strings to the beginning. By default, Keys are treated as strings and - * compared from beginning to end. - *
  • #MDB_DUPSORT - * Duplicate keys may be used in the database. (Or, from another perspective, - * keys may have multiple data items, stored in sorted order.) By default - * keys must be unique and may have only a single data item. - *
  • #MDB_INTEGERKEY - * Keys are binary integers in native byte order. Setting this option - * requires all keys to be the same size, typically sizeof(int) - * or sizeof(size_t). - *
  • #MDB_DUPFIXED - * This flag may only be used in combination with #MDB_DUPSORT. This option - * tells the library that the data items for this database are all the same - * size, which allows further optimizations in storage and retrieval. When - * all data items are the same size, the #MDB_GET_MULTIPLE and #MDB_NEXT_MULTIPLE - * cursor operations may be used to retrieve multiple items at once. - *
  • #MDB_INTEGERDUP - * This option specifies that duplicate data items are also integers, and - * should be sorted as such. - *
  • #MDB_REVERSEDUP - * This option specifies that duplicate data items should be compared as - * strings in reverse order. - *
  • #MDB_CREATE - * Create the named database if it doesn't exist. This option is not - * allowed in a read-only transaction or a read-only environment. - *
- * @param[out] dbi Address where the new #MDB_dbi handle will be stored - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_NOTFOUND - the specified database doesn't exist in the environment - * and #MDB_CREATE was not specified. - *
  • #MDB_DBS_FULL - too many databases have been opened. See #mdb_env_set_maxdbs(). - *
- */ -int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi); - - /** @brief Retrieve statistics for a database. - * - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[out] stat The address of an #MDB_stat structure - * where the statistics will be copied - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); - - /** @brief Close a database handle. - * - * This call is not mutex protected. Handles should only be closed by - * a single thread, and only if no other threads are going to reference - * the database handle or one of its cursors any further. Do not close - * a handle if an existing transaction has modified its database. - * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - */ -void mdb_dbi_close(MDB_env *env, MDB_dbi dbi); - - /** @brief Delete a database and/or free all its pages. - * - * If the \b del parameter is 1, the DB handle will be closed - * and the DB will be deleted. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] del 1 to delete the DB from the environment, - * 0 to just free its pages. - * @return A non-zero error value on failure and 0 on success. - */ -int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del); - - /** @brief Set a custom key comparison function for a database. - * - * The comparison function is called whenever it is necessary to compare a - * key specified by the application with a key currently stored in the database. - * If no comparison function is specified, and no special key flags were specified - * with #mdb_dbi_open(), the keys are compared lexically, with shorter keys collating - * before longer keys. - * @warning This function must be called before any data access functions are used, - * otherwise data corruption may occur. The same comparison function must be used by every - * program accessing the database, every time the database is used. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] cmp A #MDB_cmp_func function - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); - - /** @brief Set a custom data comparison function for a #MDB_DUPSORT database. - * - * This comparison function is called whenever it is necessary to compare a data - * item specified by the application with a data item currently stored in the database. - * This function only takes effect if the database was opened with the #MDB_DUPSORT - * flag. - * If no comparison function is specified, and no special key flags were specified - * with #mdb_dbi_open(), the data items are compared lexically, with shorter items collating - * before longer items. - * @warning This function must be called before any data access functions are used, - * otherwise data corruption may occur. The same comparison function must be used by every - * program accessing the database, every time the database is used. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] cmp A #MDB_cmp_func function - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); - - /** @brief Set a relocation function for a #MDB_FIXEDMAP database. - * - * @todo The relocation function is called whenever it is necessary to move the data - * of an item to a different position in the database (e.g. through tree - * balancing operations, shifts as a result of adds or deletes, etc.). It is - * intended to allow address/position-dependent data items to be stored in - * a database in an environment opened with the #MDB_FIXEDMAP option. - * Currently the relocation feature is unimplemented and setting - * this function has no effect. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] rel A #MDB_rel_func function - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel); - - /** @brief Set a context pointer for a #MDB_FIXEDMAP database's relocation function. - * - * See #mdb_set_relfunc and #MDB_rel_func for more details. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] ctx An arbitrary pointer for whatever the application needs. - * It will be passed to the callback function set by #mdb_set_relfunc - * as its \b relctx parameter whenever the callback is invoked. - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx); - - /** @brief Get items from a database. - * - * This function retrieves key/data pairs from the database. The address - * and length of the data associated with the specified \b key are returned - * in the structure to which \b data refers. - * If the database supports duplicate keys (#MDB_DUPSORT) then the - * first data item for the key will be returned. Retrieval of other - * items requires the use of #mdb_cursor_get(). - * - * @note The memory pointed to by the returned values is owned by the - * database. The caller need not dispose of the memory, and may not - * modify it in any way. For values returned in a read-only transaction - * any modification attempts will cause a SIGSEGV. - * @note Values returned from the database are valid only until a - * subsequent update operation, or the end of the transaction. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to search for in the database - * @param[out] data The data corresponding to the key - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_NOTFOUND - the key was not in the database. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); - - /** @brief Store items into a database. - * - * This function stores key/data pairs in the database. The default behavior - * is to enter the new key/data pair, replacing any previously existing key - * if duplicates are disallowed, or adding a duplicate data item if - * duplicates are allowed (#MDB_DUPSORT). - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to store in the database - * @param[in,out] data The data to store - * @param[in] flags Special options for this operation. This parameter - * must be set to 0 or by bitwise OR'ing together one or more of the - * values described here. - *
    - *
  • #MDB_NODUPDATA - enter the new key/data pair only if it does not - * already appear in the database. This flag may only be specified - * if the database was opened with #MDB_DUPSORT. The function will - * return #MDB_KEYEXIST if the key/data pair already appears in the - * database. - *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key - * does not already appear in the database. The function will return - * #MDB_KEYEXIST if the key already appears in the database, even if - * the database supports duplicates (#MDB_DUPSORT). The \b data - * parameter will be set to point to the existing item. - *
  • #MDB_RESERVE - reserve space for data of the given size, but - * don't copy the given data. Instead, return a pointer to the - * reserved space, which the caller can fill in later - before - * the next update operation or the transaction ends. This saves - * an extra memcpy if the data is being generated later. - *
  • #MDB_APPEND - append the given key/data pair to the end of the - * database. No key comparisons are performed. This option allows - * fast bulk loading when keys are already known to be in the - * correct order. Loading unsorted keys with this flag will cause - * data corruption. - *
  • #MDB_APPENDDUP - as above, but for sorted dup data. - *
- * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). - *
  • #MDB_TXN_FULL - the transaction has too many dirty pages. - *
  • EACCES - an attempt was made to write in a read-only transaction. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, - unsigned int flags); - - /** @brief Delete items from a database. - * - * This function removes key/data pairs from the database. - * If the database does not support sorted duplicate data items - * (#MDB_DUPSORT) the data parameter is ignored. - * If the database supports sorted duplicates and the data parameter - * is NULL, all of the duplicate data items for the key will be - * deleted. Otherwise, if the data parameter is non-NULL - * only the matching data item will be deleted. - * This function will return #MDB_NOTFOUND if the specified key/data - * pair is not in the database. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] key The key to delete from the database - * @param[in] data The data to delete - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EACCES - an attempt was made to write in a read-only transaction. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); - - /** @brief Create a cursor handle. - * - * A cursor is associated with a specific transaction and database. - * A cursor cannot be used when its database handle is closed. Nor - * when its transaction has ended, except with #mdb_cursor_renew(). - * It can be discarded with #mdb_cursor_close(). - * A cursor in a write-transaction can be closed before its transaction - * ends, and will otherwise be closed when its transaction ends. - * A cursor in a read-only transaction must be closed explicitly, before - * or after its transaction ends. It can be reused with - * #mdb_cursor_renew() before finally closing it. - * @note Earlier documentation said that cursors in every transaction - * were closed when the transaction committed or aborted. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[out] cursor Address where the new #MDB_cursor handle will be stored - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor); - - /** @brief Close a cursor handle. - * - * The cursor handle will be freed and must not be used again after this call. - * Its transaction must still be live if it is a write-transaction. - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - */ -void mdb_cursor_close(MDB_cursor *cursor); - - /** @brief Renew a cursor handle. - * - * A cursor is associated with a specific transaction and database. - * Cursors that are only used in read-only - * transactions may be re-used, to avoid unnecessary malloc/free overhead. - * The cursor may be associated with a new read-only transaction, and - * referencing the same database handle as it was created with. - * This may be done whether the previous transaction is live or dead. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_cursor_renew(MDB_txn *txn, MDB_cursor *cursor); - - /** @brief Return the cursor's transaction handle. - * - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - */ -MDB_txn *mdb_cursor_txn(MDB_cursor *cursor); - - /** @brief Return the cursor's database handle. - * - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - */ -MDB_dbi mdb_cursor_dbi(MDB_cursor *cursor); - - /** @brief Retrieve by cursor. - * - * This function retrieves key/data pairs from the database. The address and length - * of the key are returned in the object to which \b key refers (except for the - * case of the #MDB_SET option, in which the \b key object is unchanged), and - * the address and length of the data are returned in the object to which \b data - * refers. - * See #mdb_get() for restrictions on using the output values. - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - * @param[in,out] key The key for a retrieved item - * @param[in,out] data The data of a retrieved item - * @param[in] op A cursor operation #MDB_cursor_op - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_NOTFOUND - no matching key found. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, - MDB_cursor_op op); - - /** @brief Store by cursor. - * - * This function stores key/data pairs into the database. - * If the function fails for any reason, the state of the cursor will be - * unchanged. If the function succeeds and an item is inserted into the - * database, the cursor is always positioned to refer to the newly inserted item. - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - * @param[in] key The key operated on. - * @param[in] data The data operated on. - * @param[in] flags Options for this operation. This parameter - * must be set to 0 or one of the values described here. - *
    - *
  • #MDB_CURRENT - overwrite the data of the key/data pair to which - * the cursor refers with the specified data item. The \b key - * parameter is ignored. - *
  • #MDB_NODUPDATA - enter the new key/data pair only if it does not - * already appear in the database. This flag may only be specified - * if the database was opened with #MDB_DUPSORT. The function will - * return #MDB_KEYEXIST if the key/data pair already appears in the - * database. - *
  • #MDB_NOOVERWRITE - enter the new key/data pair only if the key - * does not already appear in the database. The function will return - * #MDB_KEYEXIST if the key already appears in the database, even if - * the database supports duplicates (#MDB_DUPSORT). - *
  • #MDB_RESERVE - reserve space for data of the given size, but - * don't copy the given data. Instead, return a pointer to the - * reserved space, which the caller can fill in later. This saves - * an extra memcpy if the data is being generated later. - *
  • #MDB_APPEND - append the given key/data pair to the end of the - * database. No key comparisons are performed. This option allows - * fast bulk loading when keys are already known to be in the - * correct order. Loading unsorted keys with this flag will cause - * data corruption. - *
  • #MDB_APPENDDUP - as above, but for sorted dup data. - *
  • #MDB_MULTIPLE - store multiple contiguous data elements in a - * single request. This flag may only be specified if the database - * was opened with #MDB_DUPFIXED. The \b data argument must be an - * array of two MDB_vals. The mv_size of the first MDB_val must be - * the size of a single data element. The mv_data of the first MDB_val - * must point to the beginning of the array of contiguous data elements. - * The mv_size of the second MDB_val must be the count of the number - * of data elements to store. On return this field will be set to - * the count of the number of elements actually written. The mv_data - * of the second MDB_val is unused. - *
- * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • #MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize(). - *
  • #MDB_TXN_FULL - the transaction has too many dirty pages. - *
  • EACCES - an attempt was made to modify a read-only database. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, - unsigned int flags); - - /** @brief Delete current key/data pair - * - * This function deletes the key/data pair to which the cursor refers. - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - * @param[in] flags Options for this operation. This parameter - * must be set to 0 or one of the values described here. - *
    - *
  • #MDB_NODUPDATA - delete all of the data items for the current key. - * This flag may only be specified if the database was opened with #MDB_DUPSORT. - *
- * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EACCES - an attempt was made to modify a read-only database. - *
  • EINVAL - an invalid parameter was specified. - *
- */ -int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags); - - /** @brief Return count of duplicates for current key. - * - * This call is only valid on databases that support sorted duplicate - * data items #MDB_DUPSORT. - * @param[in] cursor A cursor handle returned by #mdb_cursor_open() - * @param[out] countp Address where the count will be stored - * @return A non-zero error value on failure and 0 on success. Some possible - * errors are: - *
    - *
  • EINVAL - cursor is not initialized, or an invalid parameter was specified. - *
- */ -int mdb_cursor_count(MDB_cursor *cursor, size_t *countp); - - /** @brief Compare two data items according to a particular database. - * - * This returns a comparison as if the two data items were keys in the - * specified database. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] a The first item to compare - * @param[in] b The second item to compare - * @return < 0 if a < b, 0 if a == b, > 0 if a > b - */ -int mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); - - /** @brief Compare two data items according to a particular database. - * - * This returns a comparison as if the two items were data items of - * the specified database. The database must have the #MDB_DUPSORT flag. - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] a The first item to compare - * @param[in] b The second item to compare - * @return < 0 if a < b, 0 if a == b, > 0 if a > b - */ -int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); -/** @} */ - -#ifdef __cplusplus -} -#endif -#endif /* _LMDB_H_ */ diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c deleted file mode 100644 index 620e5b51ff..0000000000 --- a/libraries/liblmdb/mdb.c +++ /dev/null @@ -1,7488 +0,0 @@ -/** @file mdb.c - * @brief memory-mapped database library - * - * A Btree-based database management library modeled loosely on the - * BerkeleyDB API, but much simplified. - */ -/* - * Copyright 2011-2013 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - * - * This code is derived from btree.c written by Martin Hedenfalk. - * - * Copyright (c) 2009, 2010 Martin Hedenfalk - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 -#endif -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#include -#ifdef HAVE_SYS_FILE_H -#include -#endif -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER)) -#include -#include /* defines BYTE_ORDER on HPUX and Solaris */ -#endif - -#if defined(__APPLE__) || defined (BSD) -# define MDB_USE_POSIX_SEM 1 -# define MDB_FDATASYNC fsync -#elif defined(ANDROID) -# define MDB_FDATASYNC fsync -#endif - -#ifndef _WIN32 -#include -#ifdef MDB_USE_POSIX_SEM -#include -#endif -#endif - -#ifdef USE_VALGRIND -#include -#define VGMEMP_CREATE(h,r,z) VALGRIND_CREATE_MEMPOOL(h,r,z) -#define VGMEMP_ALLOC(h,a,s) VALGRIND_MEMPOOL_ALLOC(h,a,s) -#define VGMEMP_FREE(h,a) VALGRIND_MEMPOOL_FREE(h,a) -#define VGMEMP_DESTROY(h) VALGRIND_DESTROY_MEMPOOL(h) -#define VGMEMP_DEFINED(a,s) VALGRIND_MAKE_MEM_DEFINED(a,s) -#else -#define VGMEMP_CREATE(h,r,z) -#define VGMEMP_ALLOC(h,a,s) -#define VGMEMP_FREE(h,a) -#define VGMEMP_DESTROY(h) -#define VGMEMP_DEFINED(a,s) -#endif - -#ifndef BYTE_ORDER -# if (defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)) -/* Solaris just defines one or the other */ -# define LITTLE_ENDIAN 1234 -# define BIG_ENDIAN 4321 -# ifdef _LITTLE_ENDIAN -# define BYTE_ORDER LITTLE_ENDIAN -# else -# define BYTE_ORDER BIG_ENDIAN -# endif -# else -# define BYTE_ORDER __BYTE_ORDER -# endif -#endif - -#ifndef LITTLE_ENDIAN -#define LITTLE_ENDIAN __LITTLE_ENDIAN -#endif -#ifndef BIG_ENDIAN -#define BIG_ENDIAN __BIG_ENDIAN -#endif - -#if defined(__i386) || defined(__x86_64) || defined(_M_IX86) -#define MISALIGNED_OK 1 -#endif - -#include "lmdb.h" -#include "midl.h" - -#if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN) -# error "Unknown or unsupported endianness (BYTE_ORDER)" -#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF -# error "Two's complement, reasonably sized integer types, please" -#endif - -/** @defgroup internal MDB Internals - * @{ - */ -/** @defgroup compat Windows Compatibility Macros - * A bunch of macros to minimize the amount of platform-specific ifdefs - * needed throughout the rest of the code. When the features this library - * needs are similar enough to POSIX to be hidden in a one-or-two line - * replacement, this macro approach is used. - * @{ - */ -#ifdef _WIN32 -#define pthread_t DWORD -#define pthread_mutex_t HANDLE -#define pthread_key_t DWORD -#define pthread_self() GetCurrentThreadId() -#define pthread_key_create(x,y) \ - ((*(x) = TlsAlloc()) == TLS_OUT_OF_INDEXES ? ErrCode() : 0) -#define pthread_key_delete(x) TlsFree(x) -#define pthread_getspecific(x) TlsGetValue(x) -#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode()) -#define pthread_mutex_unlock(x) ReleaseMutex(x) -#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) -#define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex) -#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex) -#define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex) -#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex) -#define getpid() GetCurrentProcessId() -#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd)) -#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len)) -#define ErrCode() GetLastError() -#define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;} -#define close(fd) (CloseHandle(fd) ? 0 : -1) -#define munmap(ptr,len) UnmapViewOfFile(ptr) -#else - -#ifdef MDB_USE_POSIX_SEM - -#define LOCK_MUTEX_R(env) mdb_sem_wait((env)->me_rmutex) -#define UNLOCK_MUTEX_R(env) sem_post((env)->me_rmutex) -#define LOCK_MUTEX_W(env) mdb_sem_wait((env)->me_wmutex) -#define UNLOCK_MUTEX_W(env) sem_post((env)->me_wmutex) - -static int -mdb_sem_wait(sem_t *sem) -{ - int rc; - while ((rc = sem_wait(sem)) && (rc = errno) == EINTR) ; - return rc; -} - -#else - /** Lock the reader mutex. - */ -#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex) - /** Unlock the reader mutex. - */ -#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex) - - /** Lock the writer mutex. - * Only a single write transaction is allowed at a time. Other writers - * will block waiting for this mutex. - */ -#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex) - /** Unlock the writer mutex. - */ -#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex) -#endif /* MDB_USE_POSIX_SEM */ - - /** Get the error code for the last failed system function. - */ -#define ErrCode() errno - - /** An abstraction for a file handle. - * On POSIX systems file handles are small integers. On Windows - * they're opaque pointers. - */ -#define HANDLE int - - /** A value for an invalid file handle. - * Mainly used to initialize file variables and signify that they are - * unused. - */ -#define INVALID_HANDLE_VALUE (-1) - - /** Get the size of a memory page for the system. - * This is the basic size that the platform's memory manager uses, and is - * fundamental to the use of memory-mapped files. - */ -#define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) -#endif - -#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) -#define MNAME_LEN 32 -#else -#define MNAME_LEN (sizeof(pthread_mutex_t)) -#endif - -/** @} */ - -#ifndef _WIN32 -/** A flag for opening a file and requesting synchronous data writes. - * This is only used when writing a meta page. It's not strictly needed; - * we could just do a normal write and then immediately perform a flush. - * But if this flag is available it saves us an extra system call. - * - * @note If O_DSYNC is undefined but exists in /usr/include, - * preferably set some compiler flag to get the definition. - * Otherwise compile with the less efficient -DMDB_DSYNC=O_SYNC. - */ -#ifndef MDB_DSYNC -# define MDB_DSYNC O_DSYNC -#endif -#endif - -/** Function for flushing the data of a file. Define this to fsync - * if fdatasync() is not supported. - */ -#ifndef MDB_FDATASYNC -# define MDB_FDATASYNC fdatasync -#endif - -#ifndef MDB_MSYNC -# define MDB_MSYNC(addr,len,flags) msync(addr,len,flags) -#endif - -#ifndef MS_SYNC -#define MS_SYNC 1 -#endif - -#ifndef MS_ASYNC -#define MS_ASYNC 0 -#endif - - /** A page number in the database. - * Note that 64 bit page numbers are overkill, since pages themselves - * already represent 12-13 bits of addressable memory, and the OS will - * always limit applications to a maximum of 63 bits of address space. - * - * @note In the #MDB_node structure, we only store 48 bits of this value, - * which thus limits us to only 60 bits of addressable data. - */ -typedef MDB_ID pgno_t; - - /** A transaction ID. - * See struct MDB_txn.mt_txnid for details. - */ -typedef MDB_ID txnid_t; - -/** @defgroup debug Debug Macros - * @{ - */ -#ifndef MDB_DEBUG - /** Enable debug output. - * Set this to 1 for copious tracing. Set to 2 to add dumps of all IDLs - * read from and written to the database (used for free space management). - */ -#define MDB_DEBUG 0 -#endif - -#if !(__STDC_VERSION__ >= 199901L || defined(__GNUC__)) -# undef MDB_DEBUG -# define MDB_DEBUG 0 -# define DPRINTF (void) /* Vararg macros may be unsupported */ -#elif MDB_DEBUG -static int mdb_debug; -static txnid_t mdb_debug_start; - - /** Print a debug message with printf formatting. */ -# define DPRINTF(fmt, ...) /**< Requires 2 or more args */ \ - ((void) ((mdb_debug) && \ - fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__))) -#else -# define DPRINTF(fmt, ...) ((void) 0) -# define MDB_DEBUG_SKIP -#endif - /** Print a debug string. - * The string is printed literally, with no format processing. - */ -#define DPUTS(arg) DPRINTF("%s", arg) -/** @} */ - - /** A default memory page size. - * The actual size is platform-dependent, but we use this for - * boot-strapping. We probably should not be using this any more. - * The #GET_PAGESIZE() macro is used to get the actual size. - * - * Note that we don't currently support Huge pages. On Linux, - * regular data files cannot use Huge pages, and in general - * Huge pages aren't actually pageable. We rely on the OS - * demand-pager to read our data and page it out when memory - * pressure from other processes is high. So until OSs have - * actual paging support for Huge pages, they're not viable. - */ -#define MDB_PAGESIZE 4096 - - /** The minimum number of keys required in a database page. - * Setting this to a larger value will place a smaller bound on the - * maximum size of a data item. Data items larger than this size will - * be pushed into overflow pages instead of being stored directly in - * the B-tree node. This value used to default to 4. With a page size - * of 4096 bytes that meant that any item larger than 1024 bytes would - * go into an overflow page. That also meant that on average 2-3KB of - * each overflow page was wasted space. The value cannot be lower than - * 2 because then there would no longer be a tree structure. With this - * value, items larger than 2KB will go into overflow pages, and on - * average only 1KB will be wasted. - */ -#define MDB_MINKEYS 2 - - /** A stamp that identifies a file as an MDB file. - * There's nothing special about this value other than that it is easily - * recognizable, and it will reflect any byte order mismatches. - */ -#define MDB_MAGIC 0xBEEFC0DE - - /** The version number for a database's file format. */ -#define MDB_VERSION 1 - - /** @brief The maximum size of a key in the database. - * - * The library rejects bigger keys, and cannot deal with records - * with bigger keys stored by a library with bigger max keysize. - * - * We require that keys all fit onto a regular page. This limit - * could be raised a bit further if needed; to something just - * under #MDB_PAGESIZE / #MDB_MINKEYS. - * - * Note that data items in an #MDB_DUPSORT database are actually keys - * of a subDB, so they're also limited to this size. - */ -#ifndef MDB_MAXKEYSIZE -#define MDB_MAXKEYSIZE 511 -#endif - - /** @brief The maximum size of a data item. - * - * We only store a 32 bit value for node sizes. - */ -#define MAXDATASIZE 0xffffffffUL - -#if MDB_DEBUG - /** A key buffer. - * @ingroup debug - * This is used for printing a hex dump of a key's contents. - */ -#define DKBUF char kbuf[(MDB_MAXKEYSIZE*2+1)] - /** Display a key in hex. - * @ingroup debug - * Invoke a function to display a key in hex. - */ -#define DKEY(x) mdb_dkey(x, kbuf) -#else -#define DKBUF typedef int dummy_kbuf /* so we can put ';' after */ -#define DKEY(x) 0 -#endif - - /** An invalid page number. - * Mainly used to denote an empty tree. - */ -#define P_INVALID (~(pgno_t)0) - - /** Test if the flags \b f are set in a flag word \b w. */ -#define F_ISSET(w, f) (((w) & (f)) == (f)) - - /** Used for offsets within a single page. - * Since memory pages are typically 4 or 8KB in size, 12-13 bits, - * this is plenty. - */ -typedef uint16_t indx_t; - - /** Default size of memory map. - * This is certainly too small for any actual applications. Apps should always set - * the size explicitly using #mdb_env_set_mapsize(). - */ -#define DEFAULT_MAPSIZE 1048576 - -/** @defgroup readers Reader Lock Table - * Readers don't acquire any locks for their data access. Instead, they - * simply record their transaction ID in the reader table. The reader - * mutex is needed just to find an empty slot in the reader table. The - * slot's address is saved in thread-specific data so that subsequent read - * transactions started by the same thread need no further locking to proceed. - * - * If #MDB_NOTLS is set, the slot address is not saved in thread-specific data. - * - * No reader table is used if the database is on a read-only filesystem. - * - * Since the database uses multi-version concurrency control, readers don't - * actually need any locking. This table is used to keep track of which - * readers are using data from which old transactions, so that we'll know - * when a particular old transaction is no longer in use. Old transactions - * that have discarded any data pages can then have those pages reclaimed - * for use by a later write transaction. - * - * The lock table is constructed such that reader slots are aligned with the - * processor's cache line size. Any slot is only ever used by one thread. - * This alignment guarantees that there will be no contention or cache - * thrashing as threads update their own slot info, and also eliminates - * any need for locking when accessing a slot. - * - * A writer thread will scan every slot in the table to determine the oldest - * outstanding reader transaction. Any freed pages older than this will be - * reclaimed by the writer. The writer doesn't use any locks when scanning - * this table. This means that there's no guarantee that the writer will - * see the most up-to-date reader info, but that's not required for correct - * operation - all we need is to know the upper bound on the oldest reader, - * we don't care at all about the newest reader. So the only consequence of - * reading stale information here is that old pages might hang around a - * while longer before being reclaimed. That's actually good anyway, because - * the longer we delay reclaiming old pages, the more likely it is that a - * string of contiguous pages can be found after coalescing old pages from - * many old transactions together. - * @{ - */ - /** Number of slots in the reader table. - * This value was chosen somewhat arbitrarily. 126 readers plus a - * couple mutexes fit exactly into 8KB on my development machine. - * Applications should set the table size using #mdb_env_set_maxreaders(). - */ -#define DEFAULT_READERS 126 - - /** The size of a CPU cache line in bytes. We want our lock structures - * aligned to this size to avoid false cache line sharing in the - * lock table. - * This value works for most CPUs. For Itanium this should be 128. - */ -#ifndef CACHELINE -#define CACHELINE 64 -#endif - - /** The information we store in a single slot of the reader table. - * In addition to a transaction ID, we also record the process and - * thread ID that owns a slot, so that we can detect stale information, - * e.g. threads or processes that went away without cleaning up. - * @note We currently don't check for stale records. We simply re-init - * the table when we know that we're the only process opening the - * lock file. - */ -typedef struct MDB_rxbody { - /** Current Transaction ID when this transaction began, or (txnid_t)-1. - * Multiple readers that start at the same time will probably have the - * same ID here. Again, it's not important to exclude them from - * anything; all we need to know is which version of the DB they - * started from so we can avoid overwriting any data used in that - * particular version. - */ - txnid_t mrb_txnid; - /** The process ID of the process owning this reader txn. */ - pid_t mrb_pid; - /** The thread ID of the thread owning this txn. */ - pthread_t mrb_tid; -} MDB_rxbody; - - /** The actual reader record, with cacheline padding. */ -typedef struct MDB_reader { - union { - MDB_rxbody mrx; - /** shorthand for mrb_txnid */ -#define mr_txnid mru.mrx.mrb_txnid -#define mr_pid mru.mrx.mrb_pid -#define mr_tid mru.mrx.mrb_tid - /** cache line alignment */ - char pad[(sizeof(MDB_rxbody)+CACHELINE-1) & ~(CACHELINE-1)]; - } mru; -} MDB_reader; - - /** The header for the reader table. - * The table resides in a memory-mapped file. (This is a different file - * than is used for the main database.) - * - * For POSIX the actual mutexes reside in the shared memory of this - * mapped file. On Windows, mutexes are named objects allocated by the - * kernel; we store the mutex names in this mapped file so that other - * processes can grab them. This same approach is also used on - * MacOSX/Darwin (using named semaphores) since MacOSX doesn't support - * process-shared POSIX mutexes. For these cases where a named object - * is used, the object name is derived from a 64 bit FNV hash of the - * environment pathname. As such, naming collisions are extremely - * unlikely. If a collision occurs, the results are unpredictable. - */ -typedef struct MDB_txbody { - /** Stamp identifying this as an MDB file. It must be set - * to #MDB_MAGIC. */ - uint32_t mtb_magic; - /** Version number of this lock file. Must be set to #MDB_VERSION. */ - uint32_t mtb_version; -#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) - char mtb_rmname[MNAME_LEN]; -#else - /** Mutex protecting access to this table. - * This is the reader lock that #LOCK_MUTEX_R acquires. - */ - pthread_mutex_t mtb_mutex; -#endif - /** The ID of the last transaction committed to the database. - * This is recorded here only for convenience; the value can always - * be determined by reading the main database meta pages. - */ - txnid_t mtb_txnid; - /** The number of slots that have been used in the reader table. - * This always records the maximum count, it is not decremented - * when readers release their slots. - */ - unsigned mtb_numreaders; -} MDB_txbody; - - /** The actual reader table definition. */ -typedef struct MDB_txninfo { - union { - MDB_txbody mtb; -#define mti_magic mt1.mtb.mtb_magic -#define mti_version mt1.mtb.mtb_version -#define mti_mutex mt1.mtb.mtb_mutex -#define mti_rmname mt1.mtb.mtb_rmname -#define mti_txnid mt1.mtb.mtb_txnid -#define mti_numreaders mt1.mtb.mtb_numreaders - char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)]; - } mt1; - union { -#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) - char mt2_wmname[MNAME_LEN]; -#define mti_wmname mt2.mt2_wmname -#else - pthread_mutex_t mt2_wmutex; -#define mti_wmutex mt2.mt2_wmutex -#endif - char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)]; - } mt2; - MDB_reader mti_readers[1]; -} MDB_txninfo; -/** @} */ - -/** Common header for all page types. - * Overflow records occupy a number of contiguous pages with no - * headers on any page after the first. - */ -typedef struct MDB_page { -#define mp_pgno mp_p.p_pgno -#define mp_next mp_p.p_next - union { - pgno_t p_pgno; /**< page number */ - void * p_next; /**< for in-memory list of freed structs */ - } mp_p; - uint16_t mp_pad; -/** @defgroup mdb_page Page Flags - * @ingroup internal - * Flags for the page headers. - * @{ - */ -#define P_BRANCH 0x01 /**< branch page */ -#define P_LEAF 0x02 /**< leaf page */ -#define P_OVERFLOW 0x04 /**< overflow page */ -#define P_META 0x08 /**< meta page */ -#define P_DIRTY 0x10 /**< dirty page */ -#define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */ -#define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */ -/** @} */ - uint16_t mp_flags; /**< @ref mdb_page */ -#define mp_lower mp_pb.pb.pb_lower -#define mp_upper mp_pb.pb.pb_upper -#define mp_pages mp_pb.pb_pages - union { - struct { - indx_t pb_lower; /**< lower bound of free space */ - indx_t pb_upper; /**< upper bound of free space */ - } pb; - uint32_t pb_pages; /**< number of overflow pages */ - } mp_pb; - indx_t mp_ptrs[1]; /**< dynamic size */ -} MDB_page; - - /** Size of the page header, excluding dynamic data at the end */ -#define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs)) - - /** Address of first usable data byte in a page, after the header */ -#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ)) - - /** Number of nodes on a page */ -#define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1) - - /** The amount of space remaining in the page */ -#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) - - /** The percentage of space used in the page, in tenths of a percent. */ -#define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \ - ((env)->me_psize - PAGEHDRSZ)) - /** The minimum page fill factor, in tenths of a percent. - * Pages emptier than this are candidates for merging. - */ -#define FILL_THRESHOLD 250 - - /** Test if a page is a leaf page */ -#define IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF) - /** Test if a page is a LEAF2 page */ -#define IS_LEAF2(p) F_ISSET((p)->mp_flags, P_LEAF2) - /** Test if a page is a branch page */ -#define IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH) - /** Test if a page is an overflow page */ -#define IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW) - /** Test if a page is a sub page */ -#define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP) - - /** The number of overflow pages needed to store the given size. */ -#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) - - /** Header for a single key/data pair within a page. - * We guarantee 2-byte alignment for nodes. - */ -typedef struct MDB_node { - /** lo and hi are used for data size on leaf nodes and for - * child pgno on branch nodes. On 64 bit platforms, flags - * is also used for pgno. (Branch nodes have no flags). - * They are in host byte order in case that lets some - * accesses be optimized into a 32-bit word access. - */ -#define mn_lo mn_offset[BYTE_ORDER!=LITTLE_ENDIAN] -#define mn_hi mn_offset[BYTE_ORDER==LITTLE_ENDIAN] /**< part of dsize or pgno */ - unsigned short mn_offset[2]; /**< storage for #mn_lo and #mn_hi */ -/** @defgroup mdb_node Node Flags - * @ingroup internal - * Flags for node headers. - * @{ - */ -#define F_BIGDATA 0x01 /**< data put on overflow page */ -#define F_SUBDATA 0x02 /**< data is a sub-database */ -#define F_DUPDATA 0x04 /**< data has duplicates */ - -/** valid flags for #mdb_node_add() */ -#define NODE_ADD_FLAGS (F_DUPDATA|F_SUBDATA|MDB_RESERVE|MDB_APPEND) - -/** @} */ - unsigned short mn_flags; /**< @ref mdb_node */ - unsigned short mn_ksize; /**< key size */ - char mn_data[1]; /**< key and data are appended here */ -} MDB_node; - - /** Size of the node header, excluding dynamic data at the end */ -#define NODESIZE offsetof(MDB_node, mn_data) - - /** Bit position of top word in page number, for shifting mn_flags */ -#define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0) - - /** Size of a node in a branch page with a given key. - * This is just the node header plus the key, there is no data. - */ -#define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size)) - - /** Size of a node in a leaf page with a given key and data. - * This is node header plus key plus data size. - */ -#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) - - /** Address of node \b i in page \b p */ -#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i])) - - /** Address of the key for the node */ -#define NODEKEY(node) (void *)((node)->mn_data) - - /** Address of the data for a node */ -#define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize) - - /** Get the page number pointed to by a branch node */ -#define NODEPGNO(node) \ - ((node)->mn_lo | ((pgno_t) (node)->mn_hi << 16) | \ - (PGNO_TOPWORD ? ((pgno_t) (node)->mn_flags << PGNO_TOPWORD) : 0)) - /** Set the page number in a branch node */ -#define SETPGNO(node,pgno) do { \ - (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \ - if (PGNO_TOPWORD) (node)->mn_flags = (pgno) >> PGNO_TOPWORD; } while(0) - - /** Get the size of the data in a leaf node */ -#define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16)) - /** Set the size of the data for a leaf node */ -#define SETDSZ(node,size) do { \ - (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0) - /** The size of a key in a node */ -#define NODEKSZ(node) ((node)->mn_ksize) - - /** Copy a page number from src to dst */ -#ifdef MISALIGNED_OK -#define COPY_PGNO(dst,src) dst = src -#else -#if SIZE_MAX > 4294967295UL -#define COPY_PGNO(dst,src) do { \ - unsigned short *s, *d; \ - s = (unsigned short *)&(src); \ - d = (unsigned short *)&(dst); \ - *d++ = *s++; \ - *d++ = *s++; \ - *d++ = *s++; \ - *d = *s; \ -} while (0) -#else -#define COPY_PGNO(dst,src) do { \ - unsigned short *s, *d; \ - s = (unsigned short *)&(src); \ - d = (unsigned short *)&(dst); \ - *d++ = *s++; \ - *d = *s; \ -} while (0) -#endif -#endif - /** The address of a key in a LEAF2 page. - * LEAF2 pages are used for #MDB_DUPFIXED sorted-duplicate sub-DBs. - * There are no node headers, keys are stored contiguously. - */ -#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) - - /** Set the \b node's key into \b key, if requested. */ -#define MDB_GET_KEY(node, key) { if ((key) != NULL) { \ - (key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node); } } - - /** Information about a single database in the environment. */ -typedef struct MDB_db { - uint32_t md_pad; /**< also ksize for LEAF2 pages */ - uint16_t md_flags; /**< @ref mdb_dbi_open */ - uint16_t md_depth; /**< depth of this tree */ - pgno_t md_branch_pages; /**< number of internal pages */ - pgno_t md_leaf_pages; /**< number of leaf pages */ - pgno_t md_overflow_pages; /**< number of overflow pages */ - size_t md_entries; /**< number of data items */ - pgno_t md_root; /**< the root page of this tree */ -} MDB_db; - - /** mdb_dbi_open flags */ -#define MDB_VALID 0x8000 /**< DB handle is valid, for me_dbflags */ -#define PERSISTENT_FLAGS (0xffff & ~(MDB_VALID)) -#define VALID_FLAGS (MDB_REVERSEKEY|MDB_DUPSORT|MDB_INTEGERKEY|MDB_DUPFIXED|\ - MDB_INTEGERDUP|MDB_REVERSEDUP|MDB_CREATE) - - /** Handle for the DB used to track free pages. */ -#define FREE_DBI 0 - /** Handle for the default DB. */ -#define MAIN_DBI 1 - - /** Meta page content. */ -typedef struct MDB_meta { - /** Stamp identifying this as an MDB file. It must be set - * to #MDB_MAGIC. */ - uint32_t mm_magic; - /** Version number of this lock file. Must be set to #MDB_VERSION. */ - uint32_t mm_version; - void *mm_address; /**< address for fixed mapping */ - size_t mm_mapsize; /**< size of mmap region */ - MDB_db mm_dbs[2]; /**< first is free space, 2nd is main db */ - /** The size of pages used in this DB */ -#define mm_psize mm_dbs[0].md_pad - /** Any persistent environment flags. @ref mdb_env */ -#define mm_flags mm_dbs[0].md_flags - pgno_t mm_last_pg; /**< last used page in file */ - txnid_t mm_txnid; /**< txnid that committed this page */ -} MDB_meta; - - /** Buffer for a stack-allocated dirty page. - * The members define size and alignment, and silence type - * aliasing warnings. They are not used directly; that could - * mean incorrectly using several union members in parallel. - */ -typedef union MDB_pagebuf { - char mb_raw[MDB_PAGESIZE]; - MDB_page mb_page; - struct { - char mm_pad[PAGEHDRSZ]; - MDB_meta mm_meta; - } mb_metabuf; -} MDB_pagebuf; - - /** Auxiliary DB info. - * The information here is mostly static/read-only. There is - * only a single copy of this record in the environment. - */ -typedef struct MDB_dbx { - MDB_val md_name; /**< name of the database */ - MDB_cmp_func *md_cmp; /**< function for comparing keys */ - MDB_cmp_func *md_dcmp; /**< function for comparing data items */ - MDB_rel_func *md_rel; /**< user relocate function */ - void *md_relctx; /**< user-provided context for md_rel */ -} MDB_dbx; - - /** A database transaction. - * Every operation requires a transaction handle. - */ -struct MDB_txn { - MDB_txn *mt_parent; /**< parent of a nested txn */ - MDB_txn *mt_child; /**< nested txn under this txn */ - pgno_t mt_next_pgno; /**< next unallocated page */ - /** The ID of this transaction. IDs are integers incrementing from 1. - * Only committed write transactions increment the ID. If a transaction - * aborts, the ID may be re-used by the next writer. - */ - txnid_t mt_txnid; - MDB_env *mt_env; /**< the DB environment */ - /** The list of pages that became unused during this transaction. - */ - MDB_IDL mt_free_pgs; - union { - MDB_ID2L dirty_list; /**< for write txns: modified pages */ - MDB_reader *reader; /**< this thread's reader table slot or NULL */ - } mt_u; - /** Array of records for each DB known in the environment. */ - MDB_dbx *mt_dbxs; - /** Array of MDB_db records for each known DB */ - MDB_db *mt_dbs; -/** @defgroup mt_dbflag Transaction DB Flags - * @ingroup internal - * @{ - */ -#define DB_DIRTY 0x01 /**< DB was written in this txn */ -#define DB_STALE 0x02 /**< DB record is older than txnID */ -#define DB_NEW 0x04 /**< DB handle opened in this txn */ -#define DB_VALID 0x08 /**< DB handle is valid, see also #MDB_VALID */ -/** @} */ - /** In write txns, array of cursors for each DB */ - MDB_cursor **mt_cursors; - /** Array of flags for each DB */ - unsigned char *mt_dbflags; - /** Number of DB records in use. This number only ever increments; - * we don't decrement it when individual DB handles are closed. - */ - MDB_dbi mt_numdbs; - -/** @defgroup mdb_txn Transaction Flags - * @ingroup internal - * @{ - */ -#define MDB_TXN_RDONLY 0x01 /**< read-only transaction */ -#define MDB_TXN_ERROR 0x02 /**< an error has occurred */ -#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */ -/** @} */ - unsigned int mt_flags; /**< @ref mdb_txn */ - /** dirty_list maxsize - # of allocated pages allowed, including in parent txns */ - unsigned int mt_dirty_room; - /** Tracks which of the two meta pages was used at the start - * of this transaction. - */ - unsigned int mt_toggle; -}; - -/** Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty. - * At 4 keys per node, enough for 2^64 nodes, so there's probably no need to - * raise this on a 64 bit machine. - */ -#define CURSOR_STACK 32 - -struct MDB_xcursor; - - /** Cursors are used for all DB operations */ -struct MDB_cursor { - /** Next cursor on this DB in this txn */ - MDB_cursor *mc_next; - /** Backup of the original cursor if this cursor is a shadow */ - MDB_cursor *mc_backup; - /** Context used for databases with #MDB_DUPSORT, otherwise NULL */ - struct MDB_xcursor *mc_xcursor; - /** The transaction that owns this cursor */ - MDB_txn *mc_txn; - /** The database handle this cursor operates on */ - MDB_dbi mc_dbi; - /** The database record for this cursor */ - MDB_db *mc_db; - /** The database auxiliary record for this cursor */ - MDB_dbx *mc_dbx; - /** The @ref mt_dbflag for this database */ - unsigned char *mc_dbflag; - unsigned short mc_snum; /**< number of pushed pages */ - unsigned short mc_top; /**< index of top page, normally mc_snum-1 */ -/** @defgroup mdb_cursor Cursor Flags - * @ingroup internal - * Cursor state flags. - * @{ - */ -#define C_INITIALIZED 0x01 /**< cursor has been initialized and is valid */ -#define C_EOF 0x02 /**< No more data */ -#define C_SUB 0x04 /**< Cursor is a sub-cursor */ -#define C_SPLITTING 0x20 /**< Cursor is in page_split */ -#define C_UNTRACK 0x40 /**< Un-track cursor when closing */ -/** @} */ - unsigned int mc_flags; /**< @ref mdb_cursor */ - MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */ - indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */ -}; - - /** Context for sorted-dup records. - * We could have gone to a fully recursive design, with arbitrarily - * deep nesting of sub-databases. But for now we only handle these - * levels - main DB, optional sub-DB, sorted-duplicate DB. - */ -typedef struct MDB_xcursor { - /** A sub-cursor for traversing the Dup DB */ - MDB_cursor mx_cursor; - /** The database record for this Dup DB */ - MDB_db mx_db; - /** The auxiliary DB record for this Dup DB */ - MDB_dbx mx_dbx; - /** The @ref mt_dbflag for this Dup DB */ - unsigned char mx_dbflag; -} MDB_xcursor; - - /** State of FreeDB old pages, stored in the MDB_env */ -typedef struct MDB_pgstate { - pgno_t *mf_pghead; /**< Reclaimed freeDB pages, or NULL before use */ - txnid_t mf_pglast; /**< ID of last used record, or 0 if !mf_pghead */ -} MDB_pgstate; - - /** The database environment. */ -struct MDB_env { - HANDLE me_fd; /**< The main data file */ - HANDLE me_lfd; /**< The lock file */ - HANDLE me_mfd; /**< just for writing the meta pages */ - /** Failed to update the meta page. Probably an I/O error. */ -#define MDB_FATAL_ERROR 0x80000000U - /** Some fields are initialized. */ -#define MDB_ENV_ACTIVE 0x20000000U - /** me_txkey is set */ -#define MDB_ENV_TXKEY 0x10000000U - uint32_t me_flags; /**< @ref mdb_env */ - unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */ - unsigned int me_maxreaders; /**< size of the reader table */ - unsigned int me_numreaders; /**< max numreaders set by this env */ - MDB_dbi me_numdbs; /**< number of DBs opened */ - MDB_dbi me_maxdbs; /**< size of the DB table */ - pid_t me_pid; /**< process ID of this env */ - char *me_path; /**< path to the DB files */ - char *me_map; /**< the memory map of the data file */ - MDB_txninfo *me_txns; /**< the memory map of the lock file or NULL */ - MDB_meta *me_metas[2]; /**< pointers to the two meta pages */ - MDB_txn *me_txn; /**< current write transaction */ - size_t me_mapsize; /**< size of the data memory map */ - off_t me_size; /**< current file size */ - pgno_t me_maxpg; /**< me_mapsize / me_psize */ - MDB_dbx *me_dbxs; /**< array of static DB info */ - uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */ - pthread_key_t me_txkey; /**< thread-key for readers */ - MDB_pgstate me_pgstate; /**< state of old pages from freeDB */ -# define me_pglast me_pgstate.mf_pglast -# define me_pghead me_pgstate.mf_pghead - MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */ - /** IDL of pages that became unused in a write txn */ - MDB_IDL me_free_pgs; - /** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. */ - MDB_ID2L me_dirty_list; - /** Max number of freelist items that can fit in a single overflow page */ - int me_maxfree_1pg; - /** Max size of a node on a page */ - unsigned int me_nodemax; -#ifdef _WIN32 - HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */ - HANDLE me_wmutex; -#elif defined(MDB_USE_POSIX_SEM) - sem_t *me_rmutex; /* Shared mutexes are not supported */ - sem_t *me_wmutex; -#endif -}; - - /** Nested transaction */ -typedef struct MDB_ntxn { - MDB_txn mnt_txn; /* the transaction */ - MDB_pgstate mnt_pgstate; /* parent transaction's saved freestate */ -} MDB_ntxn; - - /** max number of pages to commit in one writev() call */ -#define MDB_COMMIT_PAGES 64 -#if defined(IOV_MAX) && IOV_MAX < MDB_COMMIT_PAGES -#undef MDB_COMMIT_PAGES -#define MDB_COMMIT_PAGES IOV_MAX -#endif - - /* max bytes to write in one call */ -#define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4)) - -static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp); -static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp); -static int mdb_page_touch(MDB_cursor *mc); - -static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl); -static int mdb_page_search_root(MDB_cursor *mc, - MDB_val *key, int modify); -#define MDB_PS_MODIFY 1 -#define MDB_PS_ROOTONLY 2 -static int mdb_page_search(MDB_cursor *mc, - MDB_val *key, int flags); -static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst); - -#define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */ -static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, - pgno_t newpgno, unsigned int nflags); - -static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); -static int mdb_env_pick_meta(const MDB_env *env); -static int mdb_env_write_meta(MDB_txn *txn); -#if !(defined(_WIN32) || defined(MDB_USE_POSIX_SEM)) /* Drop unused excl arg */ -# define mdb_env_close0(env, excl) mdb_env_close1(env) -#endif -static void mdb_env_close0(MDB_env *env, int excl); - -static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp); -static int mdb_node_add(MDB_cursor *mc, indx_t indx, - MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags); -static void mdb_node_del(MDB_page *mp, indx_t indx, int ksize); -static void mdb_node_shrink(MDB_page *mp, indx_t indx); -static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst); -static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data); -static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data); -static size_t mdb_branch_size(MDB_env *env, MDB_val *key); - -static int mdb_rebalance(MDB_cursor *mc); -static int mdb_update_key(MDB_cursor *mc, MDB_val *key); - -static void mdb_cursor_pop(MDB_cursor *mc); -static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp); - -static int mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf); -static int mdb_cursor_sibling(MDB_cursor *mc, int move_right); -static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); -static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op); -static int mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op, - int *exactp); -static int mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data); -static int mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data); - -static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); -static void mdb_xcursor_init0(MDB_cursor *mc); -static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node); - -static int mdb_drop0(MDB_cursor *mc, int subs); -static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi); - -/** @cond */ -static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long; -/** @endcond */ - -#ifdef _WIN32 -static SECURITY_DESCRIPTOR mdb_null_sd; -static SECURITY_ATTRIBUTES mdb_all_sa; -static int mdb_sec_inited; -#endif - -/** Return the library version info. */ -char * -mdb_version(int *major, int *minor, int *patch) -{ - if (major) *major = MDB_VERSION_MAJOR; - if (minor) *minor = MDB_VERSION_MINOR; - if (patch) *patch = MDB_VERSION_PATCH; - return MDB_VERSION_STRING; -} - -/** Table of descriptions for MDB @ref errors */ -static char *const mdb_errstr[] = { - "MDB_KEYEXIST: Key/data pair already exists", - "MDB_NOTFOUND: No matching key/data pair found", - "MDB_PAGE_NOTFOUND: Requested page not found", - "MDB_CORRUPTED: Located page was wrong type", - "MDB_PANIC: Update of meta page failed", - "MDB_VERSION_MISMATCH: Database environment version mismatch", - "MDB_INVALID: File is not an MDB file", - "MDB_MAP_FULL: Environment mapsize limit reached", - "MDB_DBS_FULL: Environment maxdbs limit reached", - "MDB_READERS_FULL: Environment maxreaders limit reached", - "MDB_TLS_FULL: Thread-local storage keys full - too many environments open", - "MDB_TXN_FULL: Transaction has too many dirty pages - transaction too big", - "MDB_CURSOR_FULL: Internal error - cursor stack limit reached", - "MDB_PAGE_FULL: Internal error - page has no more space", - "MDB_MAP_RESIZED: Database contents grew beyond environment mapsize", - "MDB_INCOMPATIBLE: Database flags changed or would change", - "MDB_BAD_RSLOT: Invalid reuse of reader locktable slot", -}; - -char * -mdb_strerror(int err) -{ - int i; - if (!err) - return ("Successful return: 0"); - - if (err >= MDB_KEYEXIST && err <= MDB_LAST_ERRCODE) { - i = err - MDB_KEYEXIST; - return mdb_errstr[i]; - } - - return strerror(err); -} - -#if MDB_DEBUG -/** Display a key in hexadecimal and return the address of the result. - * @param[in] key the key to display - * @param[in] buf the buffer to write into. Should always be #DKBUF. - * @return The key in hexadecimal form. - */ -char * -mdb_dkey(MDB_val *key, char *buf) -{ - char *ptr = buf; - unsigned char *c = key->mv_data; - unsigned int i; - - if (!key) - return ""; - - if (key->mv_size > MDB_MAXKEYSIZE) - return "MDB_MAXKEYSIZE"; - /* may want to make this a dynamic check: if the key is mostly - * printable characters, print it as-is instead of converting to hex. - */ -#if 1 - buf[0] = '\0'; - for (i=0; imv_size; i++) - ptr += sprintf(ptr, "%02x", *c++); -#else - sprintf(buf, "%.*s", key->mv_size, key->mv_data); -#endif - return buf; -} - -/** Display all the keys in the page. */ -void -mdb_page_list(MDB_page *mp) -{ - MDB_node *node; - unsigned int i, nkeys, nsize; - MDB_val key; - DKBUF; - - nkeys = NUMKEYS(mp); - fprintf(stderr, "Page %zu numkeys %d\n", mp->mp_pgno, nkeys); - for (i=0; imn_ksize; - key.mv_data = node->mn_data; - nsize = NODESIZE + NODEKSZ(node) + sizeof(indx_t); - if (IS_BRANCH(mp)) { - fprintf(stderr, "key %d: page %zu, %s\n", i, NODEPGNO(node), - DKEY(&key)); - } else { - if (F_ISSET(node->mn_flags, F_BIGDATA)) - nsize += sizeof(pgno_t); - else - nsize += NODEDSZ(node); - fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key)); - } - } -} - -void -mdb_cursor_chk(MDB_cursor *mc) -{ - unsigned int i; - MDB_node *node; - MDB_page *mp; - - if (!mc->mc_snum && !(mc->mc_flags & C_INITIALIZED)) return; - for (i=0; imc_top; i++) { - mp = mc->mc_pg[i]; - node = NODEPTR(mp, mc->mc_ki[i]); - if (NODEPGNO(node) != mc->mc_pg[i+1]->mp_pgno) - printf("oops!\n"); - } - if (mc->mc_ki[i] >= NUMKEYS(mc->mc_pg[i])) - printf("ack!\n"); -} -#endif - -#if MDB_DEBUG > 2 -/** Count all the pages in each DB and in the freelist - * and make sure it matches the actual number of pages - * being used. - */ -static void mdb_audit(MDB_txn *txn) -{ - MDB_cursor mc; - MDB_val key, data; - MDB_ID freecount, count; - MDB_dbi i; - int rc; - - freecount = 0; - mdb_cursor_init(&mc, txn, FREE_DBI, NULL); - while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0) - freecount += *(MDB_ID *)data.mv_data; - - count = 0; - for (i = 0; imt_numdbs; i++) { - MDB_xcursor mx; - mdb_cursor_init(&mc, txn, i, &mx); - if (txn->mt_dbs[i].md_root == P_INVALID) - continue; - count += txn->mt_dbs[i].md_branch_pages + - txn->mt_dbs[i].md_leaf_pages + - txn->mt_dbs[i].md_overflow_pages; - if (txn->mt_dbs[i].md_flags & MDB_DUPSORT) { - mdb_page_search(&mc, NULL, 0); - do { - unsigned j; - MDB_page *mp; - mp = mc.mc_pg[mc.mc_top]; - for (j=0; jmn_flags & F_SUBDATA) { - MDB_db db; - memcpy(&db, NODEDATA(leaf), sizeof(db)); - count += db.md_branch_pages + db.md_leaf_pages + - db.md_overflow_pages; - } - } - } - while (mdb_cursor_sibling(&mc, 1) == 0); - } - } - if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) { - fprintf(stderr, "audit: %lu freecount: %lu count: %lu total: %lu next_pgno: %lu\n", - txn->mt_txnid, freecount, count+2, freecount+count+2, txn->mt_next_pgno); - } -} -#endif - -int -mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) -{ - return txn->mt_dbxs[dbi].md_cmp(a, b); -} - -int -mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) -{ - return txn->mt_dbxs[dbi].md_dcmp(a, b); -} - -/** Allocate a page. - * Re-use old malloc'd pages first for singletons, otherwise just malloc. - */ -static MDB_page * -mdb_page_malloc(MDB_txn *txn, unsigned num) -{ - MDB_env *env = txn->mt_env; - MDB_page *ret = env->me_dpages; - size_t sz = env->me_psize; - if (num == 1) { - if (ret) { - VGMEMP_ALLOC(env, ret, sz); - VGMEMP_DEFINED(ret, sizeof(ret->mp_next)); - env->me_dpages = ret->mp_next; - return ret; - } - } else { - sz *= num; - } - if ((ret = malloc(sz)) != NULL) { - VGMEMP_ALLOC(env, ret, sz); - } - return ret; -} - -/** Free a single page. - * Saves single pages to a list, for future reuse. - * (This is not used for multi-page overflow pages.) - */ -static void -mdb_page_free(MDB_env *env, MDB_page *mp) -{ - mp->mp_next = env->me_dpages; - VGMEMP_FREE(env, mp); - env->me_dpages = mp; -} - -/* Free a dirty page */ -static void -mdb_dpage_free(MDB_env *env, MDB_page *dp) -{ - if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) { - mdb_page_free(env, dp); - } else { - /* large pages just get freed directly */ - VGMEMP_FREE(env, dp); - free(dp); - } -} - -/* Return all dirty pages to dpage list */ -static void -mdb_dlist_free(MDB_txn *txn) -{ - MDB_env *env = txn->mt_env; - MDB_ID2L dl = txn->mt_u.dirty_list; - unsigned i, n = dl[0].mid; - - for (i = 1; i <= n; i++) { - mdb_dpage_free(env, dl[i].mptr); - } - dl[0].mid = 0; -} - -/** Find oldest txnid still referenced. Expects txn->mt_txnid > 0. */ -static txnid_t -mdb_find_oldest(MDB_txn *txn) -{ - int i; - txnid_t mr, oldest = txn->mt_txnid - 1; - MDB_reader *r = txn->mt_env->me_txns->mti_readers; - for (i = txn->mt_env->me_txns->mti_numreaders; --i >= 0; ) { - if (r[i].mr_pid) { - mr = r[i].mr_txnid; - if (oldest > mr) - oldest = mr; - } - } - return oldest; -} - -/** Allocate pages for writing. - * If there are free pages available from older transactions, they - * will be re-used first. Otherwise a new page will be allocated. - * @param[in] mc cursor A cursor handle identifying the transaction and - * database for which we are allocating. - * @param[in] num the number of pages to allocate. - * @param[out] mp Address of the allocated page(s). Requests for multiple pages - * will always be satisfied by a single contiguous chunk of memory. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) -{ -#ifdef MDB_PARANOID /* Seems like we can ignore this now */ - /* Get at most more freeDB records once me_pghead - * has enough pages. If not enough, use new pages from the map. - * If and mc is updating the freeDB, only get new - * records if me_pghead is empty. Then the freelist cannot play - * catch-up with itself by growing while trying to save it. - */ - enum { Paranoid = 1, Max_retries = 500 }; -#else - enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ }; -#endif - int rc, n2 = num-1, retry = Max_retries; - MDB_txn *txn = mc->mc_txn; - MDB_env *env = txn->mt_env; - pgno_t pgno, *mop = env->me_pghead; - unsigned i, j, k, mop_len = mop ? mop[0] : 0; - MDB_page *np; - MDB_ID2 mid; - txnid_t oldest = 0, last; - MDB_cursor_op op; - MDB_cursor m2; - int (*insert)(MDB_ID2L, MDB_ID2 *); - - *mp = NULL; - - /* If our dirty list is already full, we can't do anything */ - if (txn->mt_dirty_room == 0) - return MDB_TXN_FULL; - - for (op = MDB_FIRST;; op = MDB_NEXT) { - MDB_val key, data; - MDB_node *leaf; - pgno_t *idl, old_id, new_id; - - /* Seek a big enough contiguous page range. Prefer - * pages at the tail, just truncating the list. - */ - if (mop_len >= (unsigned)num) { - i = mop_len; - do { - pgno = mop[i]; - if (mop[i-n2] == pgno+n2) - goto search_done; - } while (--i >= (unsigned)num); - if (Max_retries < INT_MAX && --retry < 0) - break; - } - - if (op == MDB_FIRST) { /* 1st iteration */ - /* Prepare to fetch more and coalesce */ - oldest = mdb_find_oldest(txn); - last = env->me_pglast; - mdb_cursor_init(&m2, txn, FREE_DBI, NULL); - if (last) { - op = MDB_SET_RANGE; - key.mv_data = &last; /* will loop up last+1 */ - key.mv_size = sizeof(last); - } - if (Paranoid && mc->mc_dbi == FREE_DBI) - retry = -1; - } - if (Paranoid && retry < 0 && mop_len) - break; - - last++; - /* Do not fetch more if the record will be too recent */ - if (oldest <= last) - break; - rc = mdb_cursor_get(&m2, &key, NULL, op); - if (rc) { - if (rc == MDB_NOTFOUND) - break; - return rc; - } - last = *(txnid_t*)key.mv_data; - if (oldest <= last) - break; - np = m2.mc_pg[m2.mc_top]; - leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); - if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS) - return rc; - - idl = (MDB_ID *) data.mv_data; - i = idl[0]; - if (!mop) { - if (!(env->me_pghead = mop = mdb_midl_alloc(i))) - return ENOMEM; - } else { - if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0) - return rc; - mop = env->me_pghead; - } - env->me_pglast = last; -#if MDB_DEBUG > 1 - DPRINTF("IDL read txn %zu root %zu num %u", - last, txn->mt_dbs[FREE_DBI].md_root, i); - for (k = i; k; k--) - DPRINTF("IDL %zu", idl[k]); -#endif - /* Merge in descending sorted order */ - j = mop_len; - k = mop_len += i; - mop[0] = (pgno_t)-1; - old_id = mop[j]; - while (i) { - new_id = idl[i--]; - for (; old_id < new_id; old_id = mop[--j]) - mop[k--] = old_id; - mop[k--] = new_id; - } - mop[0] = mop_len; - } - - /* Use new pages from the map when nothing suitable in the freeDB */ - i = 0; - pgno = txn->mt_next_pgno; - if (pgno + num >= env->me_maxpg) { - DPUTS("DB size maxed out"); - return MDB_MAP_FULL; - } - -search_done: - if (env->me_flags & MDB_WRITEMAP) { - np = (MDB_page *)(env->me_map + env->me_psize * pgno); - insert = mdb_mid2l_append; - } else { - if (!(np = mdb_page_malloc(txn, num))) - return ENOMEM; - insert = mdb_mid2l_insert; - } - if (i) { - mop[0] = mop_len -= num; - /* Move any stragglers down */ - for (j = i-num; j < mop_len; ) - mop[++j] = mop[++i]; - } else { - txn->mt_next_pgno = pgno + num; - } - mid.mid = np->mp_pgno = pgno; - mid.mptr = np; - insert(txn->mt_u.dirty_list, &mid); - txn->mt_dirty_room--; - *mp = np; - - return MDB_SUCCESS; -} - -/** Copy the used portions of a non-overflow page. - * @param[in] dst page to copy into - * @param[in] src page to copy from - * @param[in] psize size of a page - */ -static void -mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize) -{ - enum { Align = sizeof(pgno_t) }; - indx_t upper = src->mp_upper, lower = src->mp_lower, unused = upper-lower; - - /* If page isn't full, just copy the used portion. Adjust - * alignment so memcpy may copy words instead of bytes. - */ - if ((unused &= -Align) && !IS_LEAF2(src)) { - upper &= -Align; - memcpy(dst, src, (lower + (Align-1)) & -Align); - memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper), - psize - upper); - } else { - memcpy(dst, src, psize - unused); - } -} - -/** Touch a page: make it dirty and re-insert into tree with updated pgno. - * @param[in] mc cursor pointing to the page to be touched - * @return 0 on success, non-zero on failure. - */ -static int -mdb_page_touch(MDB_cursor *mc) -{ - MDB_page *mp = mc->mc_pg[mc->mc_top], *np; - MDB_txn *txn = mc->mc_txn; - MDB_cursor *m2, *m3; - MDB_dbi dbi; - pgno_t pgno; - int rc; - - if (!F_ISSET(mp->mp_flags, P_DIRTY)) { - if ((rc = mdb_midl_need(&txn->mt_free_pgs, 1)) || - (rc = mdb_page_alloc(mc, 1, &np))) - return rc; - pgno = np->mp_pgno; - DPRINTF("touched db %u page %zu -> %zu", mc->mc_dbi,mp->mp_pgno,pgno); - assert(mp->mp_pgno != pgno); - mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); - /* Update the parent page, if any, to point to the new page */ - if (mc->mc_top) { - MDB_page *parent = mc->mc_pg[mc->mc_top-1]; - MDB_node *node = NODEPTR(parent, mc->mc_ki[mc->mc_top-1]); - SETPGNO(node, pgno); - } else { - mc->mc_db->md_root = pgno; - } - } else if (txn->mt_parent && !IS_SUBP(mp)) { - MDB_ID2 mid, *dl = txn->mt_u.dirty_list; - pgno = mp->mp_pgno; - /* If txn has a parent, make sure the page is in our - * dirty list. - */ - if (dl[0].mid) { - unsigned x = mdb_mid2l_search(dl, pgno); - if (x <= dl[0].mid && dl[x].mid == pgno) { - if (mp != dl[x].mptr) { /* bad cursor? */ - mc->mc_flags &= ~(C_INITIALIZED|C_EOF); - return MDB_CORRUPTED; - } - return 0; - } - } - assert(dl[0].mid < MDB_IDL_UM_MAX); - /* No - copy it */ - np = mdb_page_malloc(txn, 1); - if (!np) - return ENOMEM; - mid.mid = pgno; - mid.mptr = np; - mdb_mid2l_insert(dl, &mid); - } else { - return 0; - } - - mdb_page_copy(np, mp, txn->mt_env->me_psize); - np->mp_pgno = pgno; - np->mp_flags |= P_DIRTY; - - /* Adjust cursors pointing to mp */ - mc->mc_pg[mc->mc_top] = np; - dbi = mc->mc_dbi; - if (mc->mc_flags & C_SUB) { - dbi--; - for (m2 = txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - m3 = &m2->mc_xcursor->mx_cursor; - if (m3->mc_snum < mc->mc_snum) continue; - if (m3->mc_pg[mc->mc_top] == mp) - m3->mc_pg[mc->mc_top] = np; - } - } else { - for (m2 = txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (m2->mc_snum < mc->mc_snum) continue; - if (m2->mc_pg[mc->mc_top] == mp) { - m2->mc_pg[mc->mc_top] = np; - if ((mc->mc_db->md_flags & MDB_DUPSORT) && - m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) - { - MDB_node *leaf = NODEPTR(np, mc->mc_ki[mc->mc_top]); - if (!(leaf->mn_flags & F_SUBDATA)) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - } - } - } - } - return 0; -} - -int -mdb_env_sync(MDB_env *env, int force) -{ - int rc = 0; - if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { - if (env->me_flags & MDB_WRITEMAP) { - int flags = ((env->me_flags & MDB_MAPASYNC) && !force) - ? MS_ASYNC : MS_SYNC; - if (MDB_MSYNC(env->me_map, env->me_mapsize, flags)) - rc = ErrCode(); -#ifdef _WIN32 - else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd)) - rc = ErrCode(); -#endif - } else { - if (MDB_FDATASYNC(env->me_fd)) - rc = ErrCode(); - } - } - return rc; -} - -/** Back up parent txn's cursors, then grab the originals for tracking */ -static int -mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst) -{ - MDB_cursor *mc, *bk; - MDB_xcursor *mx; - size_t size; - int i; - - for (i = src->mt_numdbs; --i >= 0; ) { - if ((mc = src->mt_cursors[i]) != NULL) { - size = sizeof(MDB_cursor); - if (mc->mc_xcursor) - size += sizeof(MDB_xcursor); - for (; mc; mc = bk->mc_next) { - bk = malloc(size); - if (!bk) - return ENOMEM; - *bk = *mc; - mc->mc_backup = bk; - mc->mc_db = &dst->mt_dbs[i]; - /* Kill pointers into src - and dst to reduce abuse: The - * user may not use mc until dst ends. Otherwise we'd... - */ - mc->mc_txn = NULL; /* ...set this to dst */ - mc->mc_dbflag = NULL; /* ...and &dst->mt_dbflags[i] */ - if ((mx = mc->mc_xcursor) != NULL) { - *(MDB_xcursor *)(bk+1) = *mx; - mx->mx_cursor.mc_txn = NULL; /* ...and dst. */ - } - mc->mc_next = dst->mt_cursors[i]; - dst->mt_cursors[i] = mc; - } - } - } - return MDB_SUCCESS; -} - -/** Close this write txn's cursors, give parent txn's cursors back to parent. - * @param[in] txn the transaction handle. - * @param[in] merge true to keep changes to parent cursors, false to revert. - * @return 0 on success, non-zero on failure. - */ -static void -mdb_cursors_close(MDB_txn *txn, unsigned merge) -{ - MDB_cursor **cursors = txn->mt_cursors, *mc, *next, *bk; - MDB_xcursor *mx; - int i; - - for (i = txn->mt_numdbs; --i >= 0; ) { - for (mc = cursors[i]; mc; mc = next) { - next = mc->mc_next; - if ((bk = mc->mc_backup) != NULL) { - if (merge) { - /* Commit changes to parent txn */ - mc->mc_next = bk->mc_next; - mc->mc_backup = bk->mc_backup; - mc->mc_txn = bk->mc_txn; - mc->mc_db = bk->mc_db; - mc->mc_dbflag = bk->mc_dbflag; - if ((mx = mc->mc_xcursor) != NULL) - mx->mx_cursor.mc_txn = bk->mc_txn; - } else { - /* Abort nested txn */ - *mc = *bk; - if ((mx = mc->mc_xcursor) != NULL) - *mx = *(MDB_xcursor *)(bk+1); - } - mc = bk; - } - free(mc); - } - cursors[i] = NULL; - } -} - -#ifdef MDB_DEBUG_SKIP -#define mdb_txn_reset0(txn, act) mdb_txn_reset0(txn) -#endif -static void -mdb_txn_reset0(MDB_txn *txn, const char *act); - -/** Common code for #mdb_txn_begin() and #mdb_txn_renew(). - * @param[in] txn the transaction handle to initialize - * @return 0 on success, non-zero on failure. - */ -static int -mdb_txn_renew0(MDB_txn *txn) -{ - MDB_env *env = txn->mt_env; - unsigned int i; - uint16_t x; - int rc, new_notls = 0; - - /* Setup db info */ - txn->mt_numdbs = env->me_numdbs; - txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ - - if (txn->mt_flags & MDB_TXN_RDONLY) { - if (!env->me_txns) { - i = mdb_env_pick_meta(env); - txn->mt_txnid = env->me_metas[i]->mm_txnid; - txn->mt_u.reader = NULL; - } else { - MDB_reader *r = (env->me_flags & MDB_NOTLS) ? txn->mt_u.reader : - pthread_getspecific(env->me_txkey); - if (r) { - if (r->mr_pid != env->me_pid || r->mr_txnid != (txnid_t)-1) - return MDB_BAD_RSLOT; - } else { - pid_t pid = env->me_pid; - pthread_t tid = pthread_self(); - - LOCK_MUTEX_R(env); - for (i=0; ime_txns->mti_numreaders; i++) - if (env->me_txns->mti_readers[i].mr_pid == 0) - break; - if (i == env->me_maxreaders) { - UNLOCK_MUTEX_R(env); - return MDB_READERS_FULL; - } - env->me_txns->mti_readers[i].mr_pid = pid; - env->me_txns->mti_readers[i].mr_tid = tid; - if (i >= env->me_txns->mti_numreaders) - env->me_txns->mti_numreaders = i+1; - /* Save numreaders for un-mutexed mdb_env_close() */ - env->me_numreaders = env->me_txns->mti_numreaders; - UNLOCK_MUTEX_R(env); - r = &env->me_txns->mti_readers[i]; - new_notls = (env->me_flags & MDB_NOTLS); - if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) { - r->mr_pid = 0; - return rc; - } - } - txn->mt_txnid = r->mr_txnid = env->me_txns->mti_txnid; - txn->mt_u.reader = r; - } - txn->mt_toggle = txn->mt_txnid & 1; - } else { - LOCK_MUTEX_W(env); - - txn->mt_txnid = env->me_txns->mti_txnid; - txn->mt_toggle = txn->mt_txnid & 1; - txn->mt_txnid++; -#if MDB_DEBUG - if (txn->mt_txnid == mdb_debug_start) - mdb_debug = 1; -#endif - txn->mt_dirty_room = MDB_IDL_UM_MAX; - txn->mt_u.dirty_list = env->me_dirty_list; - txn->mt_u.dirty_list[0].mid = 0; - txn->mt_free_pgs = env->me_free_pgs; - txn->mt_free_pgs[0] = 0; - env->me_txn = txn; - } - - /* Copy the DB info and flags */ - memcpy(txn->mt_dbs, env->me_metas[txn->mt_toggle]->mm_dbs, 2 * sizeof(MDB_db)); - - /* Moved to here to avoid a data race in read TXNs */ - txn->mt_next_pgno = env->me_metas[txn->mt_toggle]->mm_last_pg+1; - - for (i=2; imt_numdbs; i++) { - x = env->me_dbflags[i]; - txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS; - txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0; - } - txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID; - - if (env->me_maxpg < txn->mt_next_pgno) { - mdb_txn_reset0(txn, "renew0-mapfail"); - if (new_notls) { - txn->mt_u.reader->mr_pid = 0; - txn->mt_u.reader = NULL; - } - return MDB_MAP_RESIZED; - } - - return MDB_SUCCESS; -} - -int -mdb_txn_renew(MDB_txn *txn) -{ - int rc; - - if (!txn || txn->mt_dbxs) /* A reset txn has mt_dbxs==NULL */ - return EINVAL; - - if (txn->mt_env->me_flags & MDB_FATAL_ERROR) { - DPUTS("environment had fatal error, must shutdown!"); - return MDB_PANIC; - } - - rc = mdb_txn_renew0(txn); - if (rc == MDB_SUCCESS) { - DPRINTF("renew txn %zu%c %p on mdbenv %p, root page %zu", - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', - (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); - } - return rc; -} - -int -mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) -{ - MDB_txn *txn; - MDB_ntxn *ntxn; - int rc, size, tsize = sizeof(MDB_txn); - - if (env->me_flags & MDB_FATAL_ERROR) { - DPUTS("environment had fatal error, must shutdown!"); - return MDB_PANIC; - } - if ((env->me_flags & MDB_RDONLY) && !(flags & MDB_RDONLY)) - return EACCES; - if (parent) { - /* Nested transactions: Max 1 child, write txns only, no writemap */ - if (parent->mt_child || - (flags & MDB_RDONLY) || (parent->mt_flags & MDB_TXN_RDONLY) || - (env->me_flags & MDB_WRITEMAP)) - { - return EINVAL; - } - tsize = sizeof(MDB_ntxn); - } - size = tsize + env->me_maxdbs * (sizeof(MDB_db)+1); - if (!(flags & MDB_RDONLY)) - size += env->me_maxdbs * sizeof(MDB_cursor *); - - if ((txn = calloc(1, size)) == NULL) { - DPRINTF("calloc: %s", strerror(ErrCode())); - return ENOMEM; - } - txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); - if (flags & MDB_RDONLY) { - txn->mt_flags |= MDB_TXN_RDONLY; - txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs); - } else { - txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs); - txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs); - } - txn->mt_env = env; - - if (parent) { - unsigned int i; - txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE); - if (!txn->mt_u.dirty_list || - !(txn->mt_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX))) - { - free(txn->mt_u.dirty_list); - free(txn); - return ENOMEM; - } - txn->mt_txnid = parent->mt_txnid; - txn->mt_toggle = parent->mt_toggle; - txn->mt_dirty_room = parent->mt_dirty_room; - txn->mt_u.dirty_list[0].mid = 0; - txn->mt_next_pgno = parent->mt_next_pgno; - parent->mt_child = txn; - txn->mt_parent = parent; - txn->mt_numdbs = parent->mt_numdbs; - txn->mt_flags = parent->mt_flags; - txn->mt_dbxs = parent->mt_dbxs; - memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); - /* Copy parent's mt_dbflags, but clear DB_NEW */ - for (i=0; imt_numdbs; i++) - txn->mt_dbflags[i] = parent->mt_dbflags[i] & ~DB_NEW; - rc = 0; - ntxn = (MDB_ntxn *)txn; - ntxn->mnt_pgstate = env->me_pgstate; /* save parent me_pghead & co */ - if (env->me_pghead) { - size = MDB_IDL_SIZEOF(env->me_pghead); - env->me_pghead = mdb_midl_alloc(env->me_pghead[0]); - if (env->me_pghead) - memcpy(env->me_pghead, ntxn->mnt_pgstate.mf_pghead, size); - else - rc = ENOMEM; - } - if (!rc) - rc = mdb_cursor_shadow(parent, txn); - if (rc) - mdb_txn_reset0(txn, "beginchild-fail"); - } else { - rc = mdb_txn_renew0(txn); - } - if (rc) - free(txn); - else { - *ret = txn; - DPRINTF("begin txn %zu%c %p on mdbenv %p, root page %zu", - txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', - (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root); - } - - return rc; -} - -/** Export or close DBI handles opened in this txn. */ -static void -mdb_dbis_update(MDB_txn *txn, int keep) -{ - int i; - MDB_dbi n = txn->mt_numdbs; - MDB_env *env = txn->mt_env; - unsigned char *tdbflags = txn->mt_dbflags; - - for (i = n; --i >= 2;) { - if (tdbflags[i] & DB_NEW) { - if (keep) { - env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID; - } else { - char *ptr = env->me_dbxs[i].md_name.mv_data; - env->me_dbxs[i].md_name.mv_data = NULL; - env->me_dbxs[i].md_name.mv_size = 0; - env->me_dbflags[i] = 0; - free(ptr); - } - } - } - if (keep && env->me_numdbs < n) - env->me_numdbs = n; -} - -/** Common code for #mdb_txn_reset() and #mdb_txn_abort(). - * May be called twice for readonly txns: First reset it, then abort. - * @param[in] txn the transaction handle to reset - */ -static void -mdb_txn_reset0(MDB_txn *txn, const char *act) -{ - MDB_env *env = txn->mt_env; - - /* Close any DBI handles opened in this txn */ - mdb_dbis_update(txn, 0); - - DPRINTF("%s txn %zu%c %p on mdbenv %p, root page %zu", - act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', - (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); - - if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { - if (txn->mt_u.reader) { - txn->mt_u.reader->mr_txnid = (txnid_t)-1; - if (!(env->me_flags & MDB_NOTLS)) - txn->mt_u.reader = NULL; /* txn does not own reader */ - } - txn->mt_numdbs = 0; /* close nothing if called again */ - txn->mt_dbxs = NULL; /* mark txn as reset */ - } else { - mdb_cursors_close(txn, 0); - - if (!(env->me_flags & MDB_WRITEMAP)) { - mdb_dlist_free(txn); - } - mdb_midl_free(env->me_pghead); - - if (txn->mt_parent) { - txn->mt_parent->mt_child = NULL; - env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; - mdb_midl_free(txn->mt_free_pgs); - free(txn->mt_u.dirty_list); - return; - } - - if (mdb_midl_shrink(&txn->mt_free_pgs)) - env->me_free_pgs = txn->mt_free_pgs; - env->me_pghead = NULL; - env->me_pglast = 0; - - env->me_txn = NULL; - /* The writer mutex was locked in mdb_txn_begin. */ - UNLOCK_MUTEX_W(env); - } -} - -void -mdb_txn_reset(MDB_txn *txn) -{ - if (txn == NULL) - return; - - /* This call is only valid for read-only txns */ - if (!(txn->mt_flags & MDB_TXN_RDONLY)) - return; - - mdb_txn_reset0(txn, "reset"); -} - -void -mdb_txn_abort(MDB_txn *txn) -{ - if (txn == NULL) - return; - - if (txn->mt_child) - mdb_txn_abort(txn->mt_child); - - mdb_txn_reset0(txn, "abort"); - /* Free reader slot tied to this txn (if MDB_NOTLS && writable FS) */ - if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader) - txn->mt_u.reader->mr_pid = 0; - - free(txn); -} - -/** Save the freelist as of this transaction to the freeDB. - * This changes the freelist. Keep trying until it stabilizes. - */ -static int -mdb_freelist_save(MDB_txn *txn) -{ - /* env->me_pghead[] can grow and shrink during this call. - * env->me_pglast and txn->mt_free_pgs[] can only grow. - * Page numbers cannot disappear from txn->mt_free_pgs[]. - */ - MDB_cursor mc; - MDB_env *env = txn->mt_env; - int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1; - txnid_t pglast = 0, head_id = 0; - pgno_t freecnt = 0, *free_pgs, *mop; - ssize_t head_room = 0, total_room = 0, mop_len; - - mdb_cursor_init(&mc, txn, FREE_DBI, NULL); - - if (env->me_pghead) { - /* Make sure first page of freeDB is touched and on freelist */ - rc = mdb_page_search(&mc, NULL, MDB_PS_MODIFY); - if (rc && rc != MDB_NOTFOUND) - return rc; - } - - for (;;) { - /* Come back here after each Put() in case freelist changed */ - MDB_val key, data; - - /* If using records from freeDB which we have not yet - * deleted, delete them and any we reserved for me_pghead. - */ - while (pglast < env->me_pglast) { - rc = mdb_cursor_first(&mc, &key, NULL); - if (rc) - return rc; - pglast = head_id = *(txnid_t *)key.mv_data; - total_room = head_room = 0; - assert(pglast <= env->me_pglast); - rc = mdb_cursor_del(&mc, 0); - if (rc) - return rc; - } - - /* Save the IDL of pages freed by this txn, to a single record */ - if (freecnt < txn->mt_free_pgs[0]) { - if (!freecnt) { - /* Make sure last page of freeDB is touched and on freelist */ - key.mv_size = MDB_MAXKEYSIZE+1; - key.mv_data = NULL; - rc = mdb_page_search(&mc, &key, MDB_PS_MODIFY); - if (rc && rc != MDB_NOTFOUND) - return rc; - } - free_pgs = txn->mt_free_pgs; - /* Write to last page of freeDB */ - key.mv_size = sizeof(txn->mt_txnid); - key.mv_data = &txn->mt_txnid; - do { - freecnt = free_pgs[0]; - data.mv_size = MDB_IDL_SIZEOF(free_pgs); - rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); - if (rc) - return rc; - /* Retry if mt_free_pgs[] grew during the Put() */ - free_pgs = txn->mt_free_pgs; - } while (freecnt < free_pgs[0]); - mdb_midl_sort(free_pgs); - memcpy(data.mv_data, free_pgs, data.mv_size); -#if MDB_DEBUG > 1 - { - unsigned int i = free_pgs[0]; - DPRINTF("IDL write txn %zu root %zu num %u", - txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i); - for (; i; i--) - DPRINTF("IDL %zu", free_pgs[i]); - } -#endif - continue; - } - - mop = env->me_pghead; - mop_len = mop ? mop[0] : 0; - - /* Reserve records for me_pghead[]. Split it if multi-page, - * to avoid searching freeDB for a page range. Use keys in - * range [1,me_pglast]: Smaller than txnid of oldest reader. - */ - if (total_room >= mop_len) { - if (total_room == mop_len || --more < 0) - break; - } else if (head_room >= maxfree_1pg && head_id > 1) { - /* Keep current record (overflow page), add a new one */ - head_id--; - head_room = 0; - } - /* (Re)write {key = head_id, IDL length = head_room} */ - total_room -= head_room; - head_room = mop_len - total_room; - if (head_room > maxfree_1pg && head_id > 1) { - /* Overflow multi-page for part of me_pghead */ - head_room /= head_id; /* amortize page sizes */ - head_room += maxfree_1pg - head_room % (maxfree_1pg + 1); - } else if (head_room < 0) { - /* Rare case, not bothering to delete this record */ - head_room = 0; - } - key.mv_size = sizeof(head_id); - key.mv_data = &head_id; - data.mv_size = (head_room + 1) * sizeof(pgno_t); - rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); - if (rc) - return rc; - *(MDB_ID *)data.mv_data = 0; /* IDL is initially empty */ - total_room += head_room; - } - - /* Fill in the reserved, touched me_pghead records. Avoid write ops - * so they cannot rearrange anything, just read the destinations. - */ - rc = MDB_SUCCESS; - if (mop_len) { - MDB_val key, data; - - mop += mop_len + 1; - rc = mdb_cursor_first(&mc, &key, &data); - for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) { - MDB_IDL dest = data.mv_data; - ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1; - - assert(len >= 0 && *(txnid_t*)key.mv_data <= env->me_pglast); - if (len > mop_len) - len = mop_len; - *dest++ = len; - memcpy(dest, mop -= len, len * sizeof(MDB_ID)); - if (! (mop_len -= len)) - break; - } - } - return rc; -} - -/** Flush dirty pages to the map, after clearing their dirty flag. - */ -static int -mdb_page_flush(MDB_txn *txn) -{ - MDB_env *env = txn->mt_env; - MDB_ID2L dl = txn->mt_u.dirty_list; - unsigned psize = env->me_psize; - int i, pagecount = dl[0].mid, rc; - size_t size = 0, pos = 0; - pgno_t pgno = 0; - MDB_page *dp = NULL; -#ifdef _WIN32 - OVERLAPPED ov; -#else - struct iovec iov[MDB_COMMIT_PAGES]; - ssize_t wpos = 0, wsize = 0, wres; - size_t next_pos = 1; /* impossible pos, so pos != next_pos */ - int n = 0; -#endif - - if (env->me_flags & MDB_WRITEMAP) { - /* Clear dirty flags */ - for (i = pagecount; i; i--) { - dp = dl[i].mptr; - dp->mp_flags &= ~P_DIRTY; - } - dl[0].mid = 0; - return MDB_SUCCESS; - } - - /* Write the pages */ - for (i = 1;; i++) { - if (i <= pagecount) { - dp = dl[i].mptr; - pgno = dl[i].mid; - /* clear dirty flag */ - dp->mp_flags &= ~P_DIRTY; - pos = pgno * psize; - size = psize; - if (IS_OVERFLOW(dp)) size *= dp->mp_pages; - } -#ifdef _WIN32 - else break; - - /* Windows actually supports scatter/gather I/O, but only on - * unbuffered file handles. Since we're relying on the OS page - * cache for all our data, that's self-defeating. So we just - * write pages one at a time. We use the ov structure to set - * the write offset, to at least save the overhead of a Seek - * system call. - */ - DPRINTF("committing page %zu", pgno); - memset(&ov, 0, sizeof(ov)); - ov.Offset = pos & 0xffffffff; - ov.OffsetHigh = pos >> 16 >> 16; - if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) { - rc = ErrCode(); - DPRINTF("WriteFile: %d", rc); - return rc; - } -#else - /* Write up to MDB_COMMIT_PAGES dirty pages at a time. */ - if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) { - if (n) { - /* Write previous page(s) */ -#ifdef MDB_USE_PWRITEV - wres = pwritev(env->me_fd, iov, n, wpos); -#else - if (n == 1) { - wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos); - } else { - if (lseek(env->me_fd, wpos, SEEK_SET) == -1) { - rc = ErrCode(); - DPRINTF("lseek: %s", strerror(rc)); - return rc; - } - wres = writev(env->me_fd, iov, n); - } -#endif - if (wres != wsize) { - if (wres < 0) { - rc = ErrCode(); - DPRINTF("Write error: %s", strerror(rc)); - } else { - rc = EIO; /* TODO: Use which error code? */ - DPUTS("short write, filesystem full?"); - } - return rc; - } - n = 0; - } - if (i > pagecount) - break; - wpos = pos; - wsize = 0; - } - DPRINTF("committing page %zu", pgno); - next_pos = pos + size; - iov[n].iov_len = size; - iov[n].iov_base = (char *)dp; - wsize += size; - n++; -#endif /* _WIN32 */ - } - - mdb_dlist_free(txn); - - return MDB_SUCCESS; -} - -int -mdb_txn_commit(MDB_txn *txn) -{ - int rc; - unsigned int i; - MDB_env *env; - - assert(txn != NULL); - assert(txn->mt_env != NULL); - - if (txn->mt_child) { - rc = mdb_txn_commit(txn->mt_child); - txn->mt_child = NULL; - if (rc) - goto fail; - } - - env = txn->mt_env; - - if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { - mdb_dbis_update(txn, 1); - txn->mt_numdbs = 2; /* so txn_abort() doesn't close any new handles */ - mdb_txn_abort(txn); - return MDB_SUCCESS; - } - - if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { - DPUTS("error flag is set, can't commit"); - if (txn->mt_parent) - txn->mt_parent->mt_flags |= MDB_TXN_ERROR; - rc = EINVAL; - goto fail; - } - - if (txn->mt_parent) { - MDB_txn *parent = txn->mt_parent; - unsigned x, y, len; - MDB_ID2L dst, src; - - /* Append our free list to parent's */ - rc = mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs); - if (rc) - goto fail; - mdb_midl_free(txn->mt_free_pgs); - - parent->mt_next_pgno = txn->mt_next_pgno; - parent->mt_flags = txn->mt_flags; - - /* Merge our cursors into parent's and close them */ - mdb_cursors_close(txn, 1); - - /* Update parent's DB table. */ - memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); - txn->mt_parent->mt_numdbs = txn->mt_numdbs; - txn->mt_parent->mt_dbflags[0] = txn->mt_dbflags[0]; - txn->mt_parent->mt_dbflags[1] = txn->mt_dbflags[1]; - for (i=2; imt_numdbs; i++) { - /* preserve parent's DB_NEW status */ - x = txn->mt_parent->mt_dbflags[i] & DB_NEW; - txn->mt_parent->mt_dbflags[i] = txn->mt_dbflags[i] | x; - } - - dst = txn->mt_parent->mt_u.dirty_list; - src = txn->mt_u.dirty_list; - /* Find len = length of merging our dirty list with parent's */ - x = dst[0].mid; - dst[0].mid = 0; /* simplify loops */ - if (parent->mt_parent) { - len = x + src[0].mid; - y = mdb_mid2l_search(src, dst[x].mid + 1) - 1; - for (i = x; y && i; y--) { - pgno_t yp = src[y].mid; - while (yp < dst[i].mid) - i--; - if (yp == dst[i].mid) { - i--; - len--; - } - } - } else { /* Simplify the above for single-ancestor case */ - len = MDB_IDL_UM_MAX - txn->mt_dirty_room; - } - /* Merge our dirty list with parent's */ - y = src[0].mid; - for (i = len; y; dst[i--] = src[y--]) { - pgno_t yp = src[y].mid; - while (yp < dst[x].mid) - dst[i--] = dst[x--]; - if (yp == dst[x].mid) - free(dst[x--].mptr); - } - assert(i == x); - dst[0].mid = len; - free(txn->mt_u.dirty_list); - parent->mt_dirty_room = txn->mt_dirty_room; - - txn->mt_parent->mt_child = NULL; - mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead); - free(txn); - return MDB_SUCCESS; - } - - if (txn != env->me_txn) { - DPUTS("attempt to commit unknown transaction"); - rc = EINVAL; - goto fail; - } - - mdb_cursors_close(txn, 0); - - if (!txn->mt_u.dirty_list[0].mid && !(txn->mt_flags & MDB_TXN_DIRTY)) - goto done; - - DPRINTF("committing txn %zu %p on mdbenv %p, root page %zu", - txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); - - /* Update DB root pointers */ - if (txn->mt_numdbs > 2) { - MDB_cursor mc; - MDB_dbi i; - MDB_val data; - data.mv_size = sizeof(MDB_db); - - mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); - for (i = 2; i < txn->mt_numdbs; i++) { - if (txn->mt_dbflags[i] & DB_DIRTY) { - data.mv_data = &txn->mt_dbs[i]; - rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0); - if (rc) - goto fail; - } - } - } - - rc = mdb_freelist_save(txn); - if (rc) - goto fail; - - mdb_midl_free(env->me_pghead); - env->me_pghead = NULL; - if (mdb_midl_shrink(&txn->mt_free_pgs)) - env->me_free_pgs = txn->mt_free_pgs; - -#if MDB_DEBUG > 2 - mdb_audit(txn); -#endif - - if ((rc = mdb_page_flush(txn)) || - (rc = mdb_env_sync(env, 0)) || - (rc = mdb_env_write_meta(txn))) - goto fail; - -done: - env->me_pglast = 0; - env->me_txn = NULL; - mdb_dbis_update(txn, 1); - - UNLOCK_MUTEX_W(env); - free(txn); - - return MDB_SUCCESS; - -fail: - mdb_txn_abort(txn); - return rc; -} - -/** Read the environment parameters of a DB environment before - * mapping it into memory. - * @param[in] env the environment handle - * @param[out] meta address of where to store the meta information - * @return 0 on success, non-zero on failure. - */ -static int -mdb_env_read_header(MDB_env *env, MDB_meta *meta) -{ - MDB_pagebuf pbuf; - MDB_page *p; - MDB_meta *m; - int i, rc, off; - - /* We don't know the page size yet, so use a minimum value. - * Read both meta pages so we can use the latest one. - */ - - for (i=off=0; i<2; i++, off = meta->mm_psize) { -#ifdef _WIN32 - DWORD len; - OVERLAPPED ov; - memset(&ov, 0, sizeof(ov)); - ov.Offset = off; - rc = ReadFile(env->me_fd,&pbuf,MDB_PAGESIZE,&len,&ov) ? (int)len : -1; -#else - rc = pread(env->me_fd, &pbuf, MDB_PAGESIZE, off); -#endif - if (rc != MDB_PAGESIZE) { - if (rc == 0 && off == 0) - return ENOENT; - rc = rc < 0 ? (int) ErrCode() : MDB_INVALID; - DPRINTF("read: %s", mdb_strerror(rc)); - return rc; - } - - p = (MDB_page *)&pbuf; - - if (!F_ISSET(p->mp_flags, P_META)) { - DPRINTF("page %zu not a meta page", p->mp_pgno); - return MDB_INVALID; - } - - m = METADATA(p); - if (m->mm_magic != MDB_MAGIC) { - DPUTS("meta has invalid magic"); - return MDB_INVALID; - } - - if (m->mm_version != MDB_VERSION) { - DPRINTF("database is version %u, expected version %u", - m->mm_version, MDB_VERSION); - return MDB_VERSION_MISMATCH; - } - - if (off == 0 || m->mm_txnid > meta->mm_txnid) - *meta = *m; - } - return 0; -} - -/** Write the environment parameters of a freshly created DB environment. - * @param[in] env the environment handle - * @param[out] meta address of where to store the meta information - * @return 0 on success, non-zero on failure. - */ -static int -mdb_env_init_meta(MDB_env *env, MDB_meta *meta) -{ - MDB_page *p, *q; - int rc; - unsigned int psize; - - DPUTS("writing new meta page"); - - GET_PAGESIZE(psize); - - meta->mm_magic = MDB_MAGIC; - meta->mm_version = MDB_VERSION; - meta->mm_mapsize = env->me_mapsize; - meta->mm_psize = psize; - meta->mm_last_pg = 1; - meta->mm_flags = env->me_flags & 0xffff; - meta->mm_flags |= MDB_INTEGERKEY; - meta->mm_dbs[0].md_root = P_INVALID; - meta->mm_dbs[1].md_root = P_INVALID; - - p = calloc(2, psize); - p->mp_pgno = 0; - p->mp_flags = P_META; - *(MDB_meta *)METADATA(p) = *meta; - - q = (MDB_page *)((char *)p + psize); - q->mp_pgno = 1; - q->mp_flags = P_META; - *(MDB_meta *)METADATA(q) = *meta; - -#ifdef _WIN32 - { - DWORD len; - OVERLAPPED ov; - memset(&ov, 0, sizeof(ov)); - rc = WriteFile(env->me_fd, p, psize * 2, &len, &ov); - rc = rc ? (len == psize * 2 ? MDB_SUCCESS : EIO) : ErrCode(); - } -#else - rc = pwrite(env->me_fd, p, psize * 2, 0); - rc = (rc == (int)psize * 2) ? MDB_SUCCESS : rc < 0 ? ErrCode() : EIO; -#endif - free(p); - return rc; -} - -/** Update the environment info to commit a transaction. - * @param[in] txn the transaction that's being committed - * @return 0 on success, non-zero on failure. - */ -static int -mdb_env_write_meta(MDB_txn *txn) -{ - MDB_env *env; - MDB_meta meta, metab, *mp; - off_t off; - int rc, len, toggle; - char *ptr; - HANDLE mfd; -#ifdef _WIN32 - OVERLAPPED ov; -#else - int r2; -#endif - - assert(txn != NULL); - assert(txn->mt_env != NULL); - - toggle = !txn->mt_toggle; - DPRINTF("writing meta page %d for root page %zu", - toggle, txn->mt_dbs[MAIN_DBI].md_root); - - env = txn->mt_env; - mp = env->me_metas[toggle]; - - if (env->me_flags & MDB_WRITEMAP) { - /* Persist any increases of mapsize config */ - if (env->me_mapsize > mp->mm_mapsize) - mp->mm_mapsize = env->me_mapsize; - mp->mm_dbs[0] = txn->mt_dbs[0]; - mp->mm_dbs[1] = txn->mt_dbs[1]; - mp->mm_last_pg = txn->mt_next_pgno - 1; - mp->mm_txnid = txn->mt_txnid; - if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { - rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC; - ptr = env->me_map; - if (toggle) - ptr += env->me_psize; - if (MDB_MSYNC(ptr, env->me_psize, rc)) { - rc = ErrCode(); - goto fail; - } - } - goto done; - } - metab.mm_txnid = env->me_metas[toggle]->mm_txnid; - metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg; - - ptr = (char *)&meta; - if (env->me_mapsize > mp->mm_mapsize) { - /* Persist any increases of mapsize config */ - meta.mm_mapsize = env->me_mapsize; - off = offsetof(MDB_meta, mm_mapsize); - } else { - off = offsetof(MDB_meta, mm_dbs[0].md_depth); - } - len = sizeof(MDB_meta) - off; - - ptr += off; - meta.mm_dbs[0] = txn->mt_dbs[0]; - meta.mm_dbs[1] = txn->mt_dbs[1]; - meta.mm_last_pg = txn->mt_next_pgno - 1; - meta.mm_txnid = txn->mt_txnid; - - if (toggle) - off += env->me_psize; - off += PAGEHDRSZ; - - /* Write to the SYNC fd */ - mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ? - env->me_fd : env->me_mfd; -#ifdef _WIN32 - { - memset(&ov, 0, sizeof(ov)); - ov.Offset = off; - if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov)) - rc = -1; - } -#else - rc = pwrite(mfd, ptr, len, off); -#endif - if (rc != len) { - rc = rc < 0 ? ErrCode() : EIO; - DPUTS("write failed, disk error?"); - /* On a failure, the pagecache still contains the new data. - * Write some old data back, to prevent it from being used. - * Use the non-SYNC fd; we know it will fail anyway. - */ - meta.mm_last_pg = metab.mm_last_pg; - meta.mm_txnid = metab.mm_txnid; -#ifdef _WIN32 - memset(&ov, 0, sizeof(ov)); - ov.Offset = off; - WriteFile(env->me_fd, ptr, len, NULL, &ov); -#else - r2 = pwrite(env->me_fd, ptr, len, off); -#endif -fail: - env->me_flags |= MDB_FATAL_ERROR; - return rc; - } -done: - /* Memory ordering issues are irrelevant; since the entire writer - * is wrapped by wmutex, all of these changes will become visible - * after the wmutex is unlocked. Since the DB is multi-version, - * readers will get consistent data regardless of how fresh or - * how stale their view of these values is. - */ - env->me_txns->mti_txnid = txn->mt_txnid; - - return MDB_SUCCESS; -} - -/** Check both meta pages to see which one is newer. - * @param[in] env the environment handle - * @return meta toggle (0 or 1). - */ -static int -mdb_env_pick_meta(const MDB_env *env) -{ - return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid); -} - -int -mdb_env_create(MDB_env **env) -{ - MDB_env *e; - - e = calloc(1, sizeof(MDB_env)); - if (!e) - return ENOMEM; - - e->me_maxreaders = DEFAULT_READERS; - e->me_maxdbs = e->me_numdbs = 2; - e->me_fd = INVALID_HANDLE_VALUE; - e->me_lfd = INVALID_HANDLE_VALUE; - e->me_mfd = INVALID_HANDLE_VALUE; -#ifdef MDB_USE_POSIX_SEM - e->me_rmutex = SEM_FAILED; - e->me_wmutex = SEM_FAILED; -#endif - e->me_pid = getpid(); - VGMEMP_CREATE(e,0,0); - *env = e; - return MDB_SUCCESS; -} - -int -mdb_env_set_mapsize(MDB_env *env, size_t size) -{ - if (env->me_map) - return EINVAL; - env->me_mapsize = size; - if (env->me_psize) - env->me_maxpg = env->me_mapsize / env->me_psize; - return MDB_SUCCESS; -} - -int -mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs) -{ - if (env->me_map) - return EINVAL; - env->me_maxdbs = dbs + 2; /* Named databases + main and free DB */ - return MDB_SUCCESS; -} - -int -mdb_env_set_maxreaders(MDB_env *env, unsigned int readers) -{ - if (env->me_map || readers < 1) - return EINVAL; - env->me_maxreaders = readers; - return MDB_SUCCESS; -} - -int -mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers) -{ - if (!env || !readers) - return EINVAL; - *readers = env->me_maxreaders; - return MDB_SUCCESS; -} - -/** Further setup required for opening an MDB environment - */ -static int -mdb_env_open2(MDB_env *env) -{ - unsigned int flags = env->me_flags; - int i, newenv = 0; - MDB_meta meta; - MDB_page *p; -#ifndef _WIN32 - int prot; -#endif - - memset(&meta, 0, sizeof(meta)); - - if ((i = mdb_env_read_header(env, &meta)) != 0) { - if (i != ENOENT) - return i; - DPUTS("new mdbenv"); - newenv = 1; - } - - /* Was a mapsize configured? */ - if (!env->me_mapsize) { - /* If this is a new environment, take the default, - * else use the size recorded in the existing env. - */ - env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize; - } else if (env->me_mapsize < meta.mm_mapsize) { - /* If the configured size is smaller, make sure it's - * still big enough. Silently round up to minimum if not. - */ - size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize; - if (env->me_mapsize < minsize) - env->me_mapsize = minsize; - } - -#ifdef _WIN32 - { - int rc; - HANDLE mh; - LONG sizelo, sizehi; - sizelo = env->me_mapsize & 0xffffffff; - sizehi = env->me_mapsize >> 16 >> 16; /* only needed on Win64 */ - /* Windows won't create mappings for zero length files. - * Just allocate the maxsize right now. - */ - if (newenv) { - if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo - || !SetEndOfFile(env->me_fd) - || SetFilePointer(env->me_fd, 0, NULL, 0) != 0) - return ErrCode(); - } - mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ? - PAGE_READWRITE : PAGE_READONLY, - sizehi, sizelo, NULL); - if (!mh) - return ErrCode(); - env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ? - FILE_MAP_WRITE : FILE_MAP_READ, - 0, 0, env->me_mapsize, meta.mm_address); - rc = env->me_map ? 0 : ErrCode(); - CloseHandle(mh); - if (rc) - return rc; - } -#else - i = MAP_SHARED; - prot = PROT_READ; - if (flags & MDB_WRITEMAP) { - prot |= PROT_WRITE; - if (ftruncate(env->me_fd, env->me_mapsize) < 0) - return ErrCode(); - } - env->me_map = mmap(meta.mm_address, env->me_mapsize, prot, i, - env->me_fd, 0); - if (env->me_map == MAP_FAILED) { - env->me_map = NULL; - return ErrCode(); - } - /* Turn off readahead. It's harmful when the DB is larger than RAM. */ -#ifdef MADV_RANDOM - madvise(env->me_map, env->me_mapsize, MADV_RANDOM); -#else -#ifdef POSIX_MADV_RANDOM - posix_madvise(env->me_map, env->me_mapsize, POSIX_MADV_RANDOM); -#endif /* POSIX_MADV_RANDOM */ -#endif /* MADV_RANDOM */ -#endif /* _WIN32 */ - - if (newenv) { - if (flags & MDB_FIXEDMAP) - meta.mm_address = env->me_map; - i = mdb_env_init_meta(env, &meta); - if (i != MDB_SUCCESS) { - return i; - } - } else if (meta.mm_address && env->me_map != meta.mm_address) { - /* Can happen because the address argument to mmap() is just a - * hint. mmap() can pick another, e.g. if the range is in use. - * The MAP_FIXED flag would prevent that, but then mmap could - * instead unmap existing pages to make room for the new map. - */ - return EBUSY; /* TODO: Make a new MDB_* error code? */ - } - env->me_psize = meta.mm_psize; - env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1; - env->me_nodemax = (env->me_psize - PAGEHDRSZ) / MDB_MINKEYS; - - env->me_maxpg = env->me_mapsize / env->me_psize; - - p = (MDB_page *)env->me_map; - env->me_metas[0] = METADATA(p); - env->me_metas[1] = (MDB_meta *)((char *)env->me_metas[0] + meta.mm_psize); - -#if MDB_DEBUG - { - int toggle = mdb_env_pick_meta(env); - MDB_db *db = &env->me_metas[toggle]->mm_dbs[MAIN_DBI]; - - DPRINTF("opened database version %u, pagesize %u", - env->me_metas[0]->mm_version, env->me_psize); - DPRINTF("using meta page %d", toggle); - DPRINTF("depth: %u", db->md_depth); - DPRINTF("entries: %zu", db->md_entries); - DPRINTF("branch pages: %zu", db->md_branch_pages); - DPRINTF("leaf pages: %zu", db->md_leaf_pages); - DPRINTF("overflow pages: %zu", db->md_overflow_pages); - DPRINTF("root: %zu", db->md_root); - } -#endif - - return MDB_SUCCESS; -} - - -/** Release a reader thread's slot in the reader lock table. - * This function is called automatically when a thread exits. - * @param[in] ptr This points to the slot in the reader lock table. - */ -static void -mdb_env_reader_dest(void *ptr) -{ - MDB_reader *reader = ptr; - - reader->mr_pid = 0; -} - -#ifdef _WIN32 -/** Junk for arranging thread-specific callbacks on Windows. This is - * necessarily platform and compiler-specific. Windows supports up - * to 1088 keys. Let's assume nobody opens more than 64 environments - * in a single process, for now. They can override this if needed. - */ -#ifndef MAX_TLS_KEYS -#define MAX_TLS_KEYS 64 -#endif -static pthread_key_t mdb_tls_keys[MAX_TLS_KEYS]; -static int mdb_tls_nkeys; - -static void NTAPI mdb_tls_callback(PVOID module, DWORD reason, PVOID ptr) -{ - int i; - switch(reason) { - case DLL_PROCESS_ATTACH: break; - case DLL_THREAD_ATTACH: break; - case DLL_THREAD_DETACH: - for (i=0; ime_txns->mti_txnid = env->me_metas[toggle]->mm_txnid; - -#ifdef _WIN32 - { - OVERLAPPED ov; - /* First acquire a shared lock. The Unlock will - * then release the existing exclusive lock. - */ - memset(&ov, 0, sizeof(ov)); - if (!LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { - rc = ErrCode(); - } else { - UnlockFile(env->me_lfd, 0, 0, 1, 0); - *excl = 0; - } - } -#else - { - struct flock lock_info; - /* The shared lock replaces the existing lock */ - memset((void *)&lock_info, 0, sizeof(lock_info)); - lock_info.l_type = F_RDLCK; - lock_info.l_whence = SEEK_SET; - lock_info.l_start = 0; - lock_info.l_len = 1; - while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && - (rc = ErrCode()) == EINTR) ; - *excl = rc ? -1 : 0; /* error may mean we lost the lock */ - } -#endif - - return rc; -} - -/** Try to get exlusive lock, otherwise shared. - * Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive. - */ -static int -mdb_env_excl_lock(MDB_env *env, int *excl) -{ - int rc = 0; -#ifdef _WIN32 - if (LockFile(env->me_lfd, 0, 0, 1, 0)) { - *excl = 1; - } else { - OVERLAPPED ov; - memset(&ov, 0, sizeof(ov)); - if (LockFileEx(env->me_lfd, 0, 0, 1, 0, &ov)) { - *excl = 0; - } else { - rc = ErrCode(); - } - } -#else - struct flock lock_info; - memset((void *)&lock_info, 0, sizeof(lock_info)); - lock_info.l_type = F_WRLCK; - lock_info.l_whence = SEEK_SET; - lock_info.l_start = 0; - lock_info.l_len = 1; - while ((rc = fcntl(env->me_lfd, F_SETLK, &lock_info)) && - (rc = ErrCode()) == EINTR) ; - if (!rc) { - *excl = 1; - } else -# ifdef MDB_USE_POSIX_SEM - if (*excl < 0) /* always true when !MDB_USE_POSIX_SEM */ -# endif - { - lock_info.l_type = F_RDLCK; - while ((rc = fcntl(env->me_lfd, F_SETLKW, &lock_info)) && - (rc = ErrCode()) == EINTR) ; - if (rc == 0) - *excl = 0; - } -#endif - return rc; -} - -#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) -/* - * hash_64 - 64 bit Fowler/Noll/Vo-0 FNV-1a hash code - * - * @(#) $Revision: 5.1 $ - * @(#) $Id: hash_64a.c,v 5.1 2009/06/30 09:01:38 chongo Exp $ - * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_64a.c,v $ - * - * http://www.isthe.com/chongo/tech/comp/fnv/index.html - * - *** - * - * Please do not copyright this code. This code is in the public domain. - * - * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO - * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF - * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR - * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - * - * By: - * chongo /\oo/\ - * http://www.isthe.com/chongo/ - * - * Share and Enjoy! :-) - */ - -typedef unsigned long long mdb_hash_t; -#define MDB_HASH_INIT ((mdb_hash_t)0xcbf29ce484222325ULL) - -/** perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer - * @param[in] str string to hash - * @param[in] hval initial value for hash - * @return 64 bit hash - * - * NOTE: To use the recommended 64 bit FNV-1a hash, use MDB_HASH_INIT as the - * hval arg on the first call. - */ -static mdb_hash_t -mdb_hash_val(MDB_val *val, mdb_hash_t hval) -{ - unsigned char *s = (unsigned char *)val->mv_data; /* unsigned string */ - unsigned char *end = s + val->mv_size; - /* - * FNV-1a hash each octet of the string - */ - while (s < end) { - /* xor the bottom with the current octet */ - hval ^= (mdb_hash_t)*s++; - - /* multiply by the 64 bit FNV magic prime mod 2^64 */ - hval += (hval << 1) + (hval << 4) + (hval << 5) + - (hval << 7) + (hval << 8) + (hval << 40); - } - /* return our new hash value */ - return hval; -} - -/** Hash the string and output the hash in hex. - * @param[in] str string to hash - * @param[out] hexbuf an array of 17 chars to hold the hash - */ -static void -mdb_hash_hex(MDB_val *val, char *hexbuf) -{ - int i; - mdb_hash_t h = mdb_hash_val(val, MDB_HASH_INIT); - for (i=0; i<8; i++) { - hexbuf += sprintf(hexbuf, "%02x", (unsigned int)h & 0xff); - h >>= 8; - } -} -#endif - -/** Open and/or initialize the lock region for the environment. - * @param[in] env The MDB environment. - * @param[in] lpath The pathname of the file used for the lock region. - * @param[in] mode The Unix permissions for the file, if we create it. - * @param[out] excl Resulting file lock type: -1 none, 0 shared, 1 exclusive - * @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive - * @return 0 on success, non-zero on failure. - */ -static int -mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) -{ -#ifdef _WIN32 -# define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT -#else -# define MDB_ERRCODE_ROFS EROFS -#ifdef O_CLOEXEC /* Linux: Open file and set FD_CLOEXEC atomically */ -# define MDB_CLOEXEC O_CLOEXEC -#else - int fdflags; -# define MDB_CLOEXEC 0 -#endif -#endif - int rc; - off_t size, rsize; - -#ifdef _WIN32 - env->me_lfd = CreateFile(lpath, GENERIC_READ|GENERIC_WRITE, - FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, - FILE_ATTRIBUTE_NORMAL, NULL); -#else - env->me_lfd = open(lpath, O_RDWR|O_CREAT|MDB_CLOEXEC, mode); -#endif - if (env->me_lfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - if (rc == MDB_ERRCODE_ROFS && (env->me_flags & MDB_RDONLY)) { - return MDB_SUCCESS; - } - goto fail_errno; - } -#if ! ((MDB_CLOEXEC) || defined(_WIN32)) - /* Lose record locks when exec*() */ - if ((fdflags = fcntl(env->me_lfd, F_GETFD) | FD_CLOEXEC) >= 0) - fcntl(env->me_lfd, F_SETFD, fdflags); -#endif - - if (!(env->me_flags & MDB_NOTLS)) { - rc = pthread_key_create(&env->me_txkey, mdb_env_reader_dest); - if (rc) - goto fail; - env->me_flags |= MDB_ENV_TXKEY; -#ifdef _WIN32 - /* Windows TLS callbacks need help finding their TLS info. */ - if (mdb_tls_nkeys >= MAX_TLS_KEYS) { - rc = MDB_TLS_FULL; - goto fail; - } - mdb_tls_keys[mdb_tls_nkeys++] = env->me_txkey; -#endif - } - - /* Try to get exclusive lock. If we succeed, then - * nobody is using the lock region and we should initialize it. - */ - if ((rc = mdb_env_excl_lock(env, excl))) goto fail; - -#ifdef _WIN32 - size = GetFileSize(env->me_lfd, NULL); -#else - size = lseek(env->me_lfd, 0, SEEK_END); - if (size == -1) goto fail_errno; -#endif - rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo); - if (size < rsize && *excl > 0) { -#ifdef _WIN32 - if (SetFilePointer(env->me_lfd, rsize, NULL, FILE_BEGIN) != rsize - || !SetEndOfFile(env->me_lfd)) - goto fail_errno; -#else - if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno; -#endif - } else { - rsize = size; - size = rsize - sizeof(MDB_txninfo); - env->me_maxreaders = size/sizeof(MDB_reader) + 1; - } - { -#ifdef _WIN32 - HANDLE mh; - mh = CreateFileMapping(env->me_lfd, NULL, PAGE_READWRITE, - 0, 0, NULL); - if (!mh) goto fail_errno; - env->me_txns = MapViewOfFileEx(mh, FILE_MAP_WRITE, 0, 0, rsize, NULL); - CloseHandle(mh); - if (!env->me_txns) goto fail_errno; -#else - void *m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, - env->me_lfd, 0); - if (m == MAP_FAILED) goto fail_errno; - env->me_txns = m; -#endif - } - if (*excl > 0) { -#ifdef _WIN32 - BY_HANDLE_FILE_INFORMATION stbuf; - struct { - DWORD volume; - DWORD nhigh; - DWORD nlow; - } idbuf; - MDB_val val; - char hexbuf[17]; - - if (!mdb_sec_inited) { - InitializeSecurityDescriptor(&mdb_null_sd, - SECURITY_DESCRIPTOR_REVISION); - SetSecurityDescriptorDacl(&mdb_null_sd, TRUE, 0, FALSE); - mdb_all_sa.nLength = sizeof(SECURITY_ATTRIBUTES); - mdb_all_sa.bInheritHandle = FALSE; - mdb_all_sa.lpSecurityDescriptor = &mdb_null_sd; - mdb_sec_inited = 1; - } - if (!GetFileInformationByHandle(env->me_lfd, &stbuf)) goto fail_errno; - idbuf.volume = stbuf.dwVolumeSerialNumber; - idbuf.nhigh = stbuf.nFileIndexHigh; - idbuf.nlow = stbuf.nFileIndexLow; - val.mv_data = &idbuf; - val.mv_size = sizeof(idbuf); - mdb_hash_hex(&val, hexbuf); - sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", hexbuf); - sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", hexbuf); - env->me_rmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_rmname); - if (!env->me_rmutex) goto fail_errno; - env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); - if (!env->me_wmutex) goto fail_errno; -#elif defined(MDB_USE_POSIX_SEM) - struct stat stbuf; - struct { - dev_t dev; - ino_t ino; - } idbuf; - MDB_val val; - char hexbuf[17]; - - if (fstat(env->me_lfd, &stbuf)) goto fail_errno; - idbuf.dev = stbuf.st_dev; - idbuf.ino = stbuf.st_ino; - val.mv_data = &idbuf; - val.mv_size = sizeof(idbuf); - mdb_hash_hex(&val, hexbuf); - sprintf(env->me_txns->mti_rmname, "/MDBr%s", hexbuf); - sprintf(env->me_txns->mti_wmname, "/MDBw%s", hexbuf); - /* Clean up after a previous run, if needed: Try to - * remove both semaphores before doing anything else. - */ - sem_unlink(env->me_txns->mti_rmname); - sem_unlink(env->me_txns->mti_wmname); - env->me_rmutex = sem_open(env->me_txns->mti_rmname, - O_CREAT|O_EXCL, mode, 1); - if (env->me_rmutex == SEM_FAILED) goto fail_errno; - env->me_wmutex = sem_open(env->me_txns->mti_wmname, - O_CREAT|O_EXCL, mode, 1); - if (env->me_wmutex == SEM_FAILED) goto fail_errno; -#else /* MDB_USE_POSIX_SEM */ - pthread_mutexattr_t mattr; - - if ((rc = pthread_mutexattr_init(&mattr)) - || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) - || (rc = pthread_mutex_init(&env->me_txns->mti_mutex, &mattr)) - || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) - goto fail; - pthread_mutexattr_destroy(&mattr); -#endif /* _WIN32 || MDB_USE_POSIX_SEM */ - - env->me_txns->mti_version = MDB_VERSION; - env->me_txns->mti_magic = MDB_MAGIC; - env->me_txns->mti_txnid = 0; - env->me_txns->mti_numreaders = 0; - - } else { - if (env->me_txns->mti_magic != MDB_MAGIC) { - DPUTS("lock region has invalid magic"); - rc = MDB_INVALID; - goto fail; - } - if (env->me_txns->mti_version != MDB_VERSION) { - DPRINTF("lock region is version %u, expected version %u", - env->me_txns->mti_version, MDB_VERSION); - rc = MDB_VERSION_MISMATCH; - goto fail; - } - rc = ErrCode(); - if (rc && rc != EACCES && rc != EAGAIN) { - goto fail; - } -#ifdef _WIN32 - env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname); - if (!env->me_rmutex) goto fail_errno; - env->me_wmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname); - if (!env->me_wmutex) goto fail_errno; -#elif defined(MDB_USE_POSIX_SEM) - env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0); - if (env->me_rmutex == SEM_FAILED) goto fail_errno; - env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0); - if (env->me_wmutex == SEM_FAILED) goto fail_errno; -#endif - } - return MDB_SUCCESS; - -fail_errno: - rc = ErrCode(); -fail: - return rc; -} - - /** The name of the lock file in the DB environment */ -#define LOCKNAME "/lock.mdb" - /** The name of the data file in the DB environment */ -#define DATANAME "/data.mdb" - /** The suffix of the lock file when no subdir is used */ -#define LOCKSUFF "-lock" - /** Only a subset of the @ref mdb_env flags can be changed - * at runtime. Changing other flags requires closing the - * environment and re-opening it with the new flags. - */ -#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC) -#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP|MDB_NOTLS) - -int -mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) -{ - int oflags, rc, len, excl = -1; - char *lpath, *dpath; - - if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) - return EINVAL; - - len = strlen(path); - if (flags & MDB_NOSUBDIR) { - rc = len + sizeof(LOCKSUFF) + len + 1; - } else { - rc = len + sizeof(LOCKNAME) + len + sizeof(DATANAME); - } - lpath = malloc(rc); - if (!lpath) - return ENOMEM; - if (flags & MDB_NOSUBDIR) { - dpath = lpath + len + sizeof(LOCKSUFF); - sprintf(lpath, "%s" LOCKSUFF, path); - strcpy(dpath, path); - } else { - dpath = lpath + len + sizeof(LOCKNAME); - sprintf(lpath, "%s" LOCKNAME, path); - sprintf(dpath, "%s" DATANAME, path); - } - - rc = MDB_SUCCESS; - flags |= env->me_flags; - if (flags & MDB_RDONLY) { - /* silently ignore WRITEMAP when we're only getting read access */ - flags &= ~MDB_WRITEMAP; - } else { - if (!((env->me_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX)) && - (env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) - rc = ENOMEM; - } - env->me_flags = flags |= MDB_ENV_ACTIVE; - if (rc) - goto leave; - - env->me_path = strdup(path); - env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx)); - env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t)); - if (!(env->me_dbxs && env->me_path && env->me_dbflags)) { - rc = ENOMEM; - goto leave; - } - - rc = mdb_env_setup_locks(env, lpath, mode, &excl); - if (rc) - goto leave; - -#ifdef _WIN32 - if (F_ISSET(flags, MDB_RDONLY)) { - oflags = GENERIC_READ; - len = OPEN_EXISTING; - } else { - oflags = GENERIC_READ|GENERIC_WRITE; - len = OPEN_ALWAYS; - } - mode = FILE_ATTRIBUTE_NORMAL; - env->me_fd = CreateFile(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE, - NULL, len, mode, NULL); -#else - if (F_ISSET(flags, MDB_RDONLY)) - oflags = O_RDONLY; - else - oflags = O_RDWR | O_CREAT; - - env->me_fd = open(dpath, oflags, mode); -#endif - if (env->me_fd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; - } - - if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) { - if (flags & (MDB_RDONLY|MDB_WRITEMAP)) { - env->me_mfd = env->me_fd; - } else { - /* Synchronous fd for meta writes. Needed even with - * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. - */ -#ifdef _WIN32 - env->me_mfd = CreateFile(dpath, oflags, - FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len, - mode | FILE_FLAG_WRITE_THROUGH, NULL); -#else - env->me_mfd = open(dpath, oflags | MDB_DSYNC, mode); -#endif - if (env->me_mfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; - } - } - DPRINTF("opened dbenv %p", (void *) env); - if (excl > 0) { - rc = mdb_env_share_locks(env, &excl); - } - } - -leave: - if (rc) { - mdb_env_close0(env, excl); - } - free(lpath); - return rc; -} - -/** Destroy resources from mdb_env_open(), clear our readers & DBIs */ -static void -mdb_env_close0(MDB_env *env, int excl) -{ - int i; - - if (!(env->me_flags & MDB_ENV_ACTIVE)) - return; - - /* Doing this here since me_dbxs may not exist during mdb_env_close */ - for (i = env->me_maxdbs; --i > MAIN_DBI; ) - free(env->me_dbxs[i].md_name.mv_data); - - free(env->me_dbflags); - free(env->me_dbxs); - free(env->me_path); - free(env->me_dirty_list); - mdb_midl_free(env->me_free_pgs); - - if (env->me_flags & MDB_ENV_TXKEY) { - pthread_key_delete(env->me_txkey); -#ifdef _WIN32 - /* Delete our key from the global list */ - for (i=0; ime_txkey) { - mdb_tls_keys[i] = mdb_tls_keys[mdb_tls_nkeys-1]; - mdb_tls_nkeys--; - break; - } -#endif - } - - if (env->me_map) { - munmap(env->me_map, env->me_mapsize); - } - if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) - (void) close(env->me_mfd); - if (env->me_fd != INVALID_HANDLE_VALUE) - (void) close(env->me_fd); - if (env->me_txns) { - pid_t pid = env->me_pid; - /* Clearing readers is done in this function because - * me_txkey with its destructor must be disabled first. - */ - for (i = env->me_numreaders; --i >= 0; ) - if (env->me_txns->mti_readers[i].mr_pid == pid) - env->me_txns->mti_readers[i].mr_pid = 0; -#ifdef _WIN32 - if (env->me_rmutex) { - CloseHandle(env->me_rmutex); - if (env->me_wmutex) CloseHandle(env->me_wmutex); - } - /* Windows automatically destroys the mutexes when - * the last handle closes. - */ -#elif defined(MDB_USE_POSIX_SEM) - if (env->me_rmutex != SEM_FAILED) { - sem_close(env->me_rmutex); - if (env->me_wmutex != SEM_FAILED) - sem_close(env->me_wmutex); - /* If we have the filelock: If we are the - * only remaining user, clean up semaphores. - */ - if (excl == 0) - mdb_env_excl_lock(env, &excl); - if (excl > 0) { - sem_unlink(env->me_txns->mti_rmname); - sem_unlink(env->me_txns->mti_wmname); - } - } -#endif - munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); - } - if (env->me_lfd != INVALID_HANDLE_VALUE) { -#ifdef _WIN32 - if (excl >= 0) { - /* Unlock the lockfile. Windows would have unlocked it - * after closing anyway, but not necessarily at once. - */ - UnlockFile(env->me_lfd, 0, 0, 1, 0); - } -#endif - (void) close(env->me_lfd); - } - - env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); -} - -int -mdb_env_copyfd(MDB_env *env, HANDLE fd) -{ - MDB_txn *txn = NULL; - int rc; - size_t wsize; - char *ptr; - - /* Do the lock/unlock of the reader mutex before starting the - * write txn. Otherwise other read txns could block writers. - */ - rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); - if (rc) - return rc; - - if (env->me_txns) { - /* We must start the actual read txn after blocking writers */ - mdb_txn_reset0(txn, "reset-stage1"); - - /* Temporarily block writers until we snapshot the meta pages */ - LOCK_MUTEX_W(env); - - rc = mdb_txn_renew0(txn); - if (rc) { - UNLOCK_MUTEX_W(env); - goto leave; - } - } - - wsize = env->me_psize * 2; -#ifdef _WIN32 - { - DWORD len; - rc = WriteFile(fd, env->me_map, wsize, &len, NULL); - rc = rc ? (len == wsize ? MDB_SUCCESS : EIO) : ErrCode(); - } -#else - rc = write(fd, env->me_map, wsize); - rc = rc == (int)wsize ? MDB_SUCCESS : rc < 0 ? ErrCode() : EIO; -#endif - if (env->me_txns) - UNLOCK_MUTEX_W(env); - - if (rc) - goto leave; - - ptr = env->me_map + wsize; - wsize = txn->mt_next_pgno * env->me_psize - wsize; -#ifdef _WIN32 - while (wsize > 0) { - DWORD len, w2; - if (wsize > MAX_WRITE) - w2 = MAX_WRITE; - else - w2 = wsize; - rc = WriteFile(fd, ptr, w2, &len, NULL); - rc = rc ? (len == w2 ? MDB_SUCCESS : EIO) : ErrCode(); - if (rc) break; - wsize -= w2; - ptr += w2; - } -#else - while (wsize > 0) { - size_t w2; - ssize_t wres; - if (wsize > MAX_WRITE) - w2 = MAX_WRITE; - else - w2 = wsize; - wres = write(fd, ptr, w2); - rc = wres == (ssize_t)w2 ? MDB_SUCCESS : wres < 0 ? ErrCode() : EIO; - if (rc) break; - wsize -= wres; - ptr += wres; - } -#endif - -leave: - mdb_txn_abort(txn); - return rc; -} - -int -mdb_env_copy(MDB_env *env, const char *path) -{ - int rc, len; - char *lpath; - HANDLE newfd = INVALID_HANDLE_VALUE; - - if (env->me_flags & MDB_NOSUBDIR) { - lpath = (char *)path; - } else { - len = strlen(path); - len += sizeof(DATANAME); - lpath = malloc(len); - if (!lpath) - return ENOMEM; - sprintf(lpath, "%s" DATANAME, path); - } - - /* The destination path must exist, but the destination file must not. - * We don't want the OS to cache the writes, since the source data is - * already in the OS cache. - */ -#ifdef _WIN32 - newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, - FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); -#else - newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL -#ifdef O_DIRECT - |O_DIRECT -#endif - , 0666); -#endif - if (newfd == INVALID_HANDLE_VALUE) { - rc = ErrCode(); - goto leave; - } - -#ifdef F_NOCACHE /* __APPLE__ */ - rc = fcntl(newfd, F_NOCACHE, 1); - if (rc) { - rc = ErrCode(); - goto leave; - } -#endif - - rc = mdb_env_copyfd(env, newfd); - -leave: - if (!(env->me_flags & MDB_NOSUBDIR)) - free(lpath); - if (newfd != INVALID_HANDLE_VALUE) - if (close(newfd) < 0 && rc == MDB_SUCCESS) - rc = ErrCode(); - - return rc; -} - -void -mdb_env_close(MDB_env *env) -{ - MDB_page *dp; - - if (env == NULL) - return; - - VGMEMP_DESTROY(env); - while ((dp = env->me_dpages) != NULL) { - VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next)); - env->me_dpages = dp->mp_next; - free(dp); - } - - mdb_env_close0(env, 0); - free(env); -} - -/** Compare two items pointing at aligned size_t's */ -static int -mdb_cmp_long(const MDB_val *a, const MDB_val *b) -{ - return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 : - *(size_t *)a->mv_data > *(size_t *)b->mv_data; -} - -/** Compare two items pointing at aligned int's */ -static int -mdb_cmp_int(const MDB_val *a, const MDB_val *b) -{ - return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 : - *(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data; -} - -/** Compare two items pointing at ints of unknown alignment. - * Nodes and keys are guaranteed to be 2-byte aligned. - */ -static int -mdb_cmp_cint(const MDB_val *a, const MDB_val *b) -{ -#if BYTE_ORDER == LITTLE_ENDIAN - unsigned short *u, *c; - int x; - - u = (unsigned short *) ((char *) a->mv_data + a->mv_size); - c = (unsigned short *) ((char *) b->mv_data + a->mv_size); - do { - x = *--u - *--c; - } while(!x && u > (unsigned short *)a->mv_data); - return x; -#else - return memcmp(a->mv_data, b->mv_data, a->mv_size); -#endif -} - -/** Compare two items lexically */ -static int -mdb_cmp_memn(const MDB_val *a, const MDB_val *b) -{ - int diff; - ssize_t len_diff; - unsigned int len; - - len = a->mv_size; - len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; - if (len_diff > 0) { - len = b->mv_size; - len_diff = 1; - } - - diff = memcmp(a->mv_data, b->mv_data, len); - return diff ? diff : len_diff<0 ? -1 : len_diff; -} - -/** Compare two items in reverse byte order */ -static int -mdb_cmp_memnr(const MDB_val *a, const MDB_val *b) -{ - const unsigned char *p1, *p2, *p1_lim; - ssize_t len_diff; - int diff; - - p1_lim = (const unsigned char *)a->mv_data; - p1 = (const unsigned char *)a->mv_data + a->mv_size; - p2 = (const unsigned char *)b->mv_data + b->mv_size; - - len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; - if (len_diff > 0) { - p1_lim += len_diff; - len_diff = 1; - } - - while (p1 > p1_lim) { - diff = *--p1 - *--p2; - if (diff) - return diff; - } - return len_diff<0 ? -1 : len_diff; -} - -/** Search for key within a page, using binary search. - * Returns the smallest entry larger or equal to the key. - * If exactp is non-null, stores whether the found entry was an exact match - * in *exactp (1 or 0). - * Updates the cursor index with the index of the found entry. - * If no entry larger or equal to the key is found, returns NULL. - */ -static MDB_node * -mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp) -{ - unsigned int i = 0, nkeys; - int low, high; - int rc = 0; - MDB_page *mp = mc->mc_pg[mc->mc_top]; - MDB_node *node = NULL; - MDB_val nodekey; - MDB_cmp_func *cmp; - DKBUF; - - nkeys = NUMKEYS(mp); - -#if MDB_DEBUG - { - pgno_t pgno; - COPY_PGNO(pgno, mp->mp_pgno); - DPRINTF("searching %u keys in %s %spage %zu", - nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "", - pgno); - } -#endif - - assert(nkeys > 0); - - low = IS_LEAF(mp) ? 0 : 1; - high = nkeys - 1; - cmp = mc->mc_dbx->md_cmp; - - /* Branch pages have no data, so if using integer keys, - * alignment is guaranteed. Use faster mdb_cmp_int. - */ - if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) { - if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t)) - cmp = mdb_cmp_long; - else - cmp = mdb_cmp_int; - } - - if (IS_LEAF2(mp)) { - nodekey.mv_size = mc->mc_db->md_pad; - node = NODEPTR(mp, 0); /* fake */ - while (low <= high) { - i = (low + high) >> 1; - nodekey.mv_data = LEAF2KEY(mp, i, nodekey.mv_size); - rc = cmp(key, &nodekey); - DPRINTF("found leaf index %u [%s], rc = %i", - i, DKEY(&nodekey), rc); - if (rc == 0) - break; - if (rc > 0) - low = i + 1; - else - high = i - 1; - } - } else { - while (low <= high) { - i = (low + high) >> 1; - - node = NODEPTR(mp, i); - nodekey.mv_size = NODEKSZ(node); - nodekey.mv_data = NODEKEY(node); - - rc = cmp(key, &nodekey); -#if MDB_DEBUG - if (IS_LEAF(mp)) - DPRINTF("found leaf index %u [%s], rc = %i", - i, DKEY(&nodekey), rc); - else - DPRINTF("found branch index %u [%s -> %zu], rc = %i", - i, DKEY(&nodekey), NODEPGNO(node), rc); -#endif - if (rc == 0) - break; - if (rc > 0) - low = i + 1; - else - high = i - 1; - } - } - - if (rc > 0) { /* Found entry is less than the key. */ - i++; /* Skip to get the smallest entry larger than key. */ - if (!IS_LEAF2(mp)) - node = NODEPTR(mp, i); - } - if (exactp) - *exactp = (rc == 0); - /* store the key index */ - mc->mc_ki[mc->mc_top] = i; - if (i >= nkeys) - /* There is no entry larger or equal to the key. */ - return NULL; - - /* nodeptr is fake for LEAF2 */ - return node; -} - -#if 0 -static void -mdb_cursor_adjust(MDB_cursor *mc, func) -{ - MDB_cursor *m2; - - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { - if (m2->mc_pg[m2->mc_top] == mc->mc_pg[mc->mc_top]) { - func(mc, m2); - } - } -} -#endif - -/** Pop a page off the top of the cursor's stack. */ -static void -mdb_cursor_pop(MDB_cursor *mc) -{ - if (mc->mc_snum) { -#ifndef MDB_DEBUG_SKIP - MDB_page *top = mc->mc_pg[mc->mc_top]; -#endif - mc->mc_snum--; - if (mc->mc_snum) - mc->mc_top--; - - DPRINTF("popped page %zu off db %u cursor %p", top->mp_pgno, - mc->mc_dbi, (void *) mc); - } -} - -/** Push a page onto the top of the cursor's stack. */ -static int -mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) -{ - DPRINTF("pushing page %zu on db %u cursor %p", mp->mp_pgno, - mc->mc_dbi, (void *) mc); - - if (mc->mc_snum >= CURSOR_STACK) { - assert(mc->mc_snum < CURSOR_STACK); - return MDB_CURSOR_FULL; - } - - mc->mc_top = mc->mc_snum++; - mc->mc_pg[mc->mc_top] = mp; - mc->mc_ki[mc->mc_top] = 0; - - return MDB_SUCCESS; -} - -/** Find the address of the page corresponding to a given page number. - * @param[in] txn the transaction for this access. - * @param[in] pgno the page number for the page to retrieve. - * @param[out] ret address of a pointer where the page's address will be stored. - * @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) -{ - MDB_page *p = NULL; - int level; - - if (!((txn->mt_flags & MDB_TXN_RDONLY) | - (txn->mt_env->me_flags & MDB_WRITEMAP))) - { - MDB_txn *tx2 = txn; - level = 1; - do { - MDB_ID2L dl = tx2->mt_u.dirty_list; - if (dl[0].mid) { - unsigned x = mdb_mid2l_search(dl, pgno); - if (x <= dl[0].mid && dl[x].mid == pgno) { - p = dl[x].mptr; - goto done; - } - } - level++; - } while ((tx2 = tx2->mt_parent) != NULL); - } - - if (pgno < txn->mt_next_pgno) { - level = 0; - p = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno); - } else { - DPRINTF("page %zu not found", pgno); - assert(p != NULL); - return MDB_PAGE_NOTFOUND; - } - -done: - *ret = p; - if (lvl) - *lvl = level; - return MDB_SUCCESS; -} - -/** Search for the page a given key should be in. - * Pushes parent pages on the cursor stack. This function continues a - * search on a cursor that has already been initialized. (Usually by - * #mdb_page_search() but also by #mdb_node_move().) - * @param[in,out] mc the cursor for this operation. - * @param[in] key the key to search for. If NULL, search for the lowest - * page. (This is used by #mdb_cursor_first().) - * @param[in] modify If true, visited pages are updated with new page numbers. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int modify) -{ - MDB_page *mp = mc->mc_pg[mc->mc_top]; - DKBUF; - int rc; - - - while (IS_BRANCH(mp)) { - MDB_node *node; - indx_t i; - - DPRINTF("branch page %zu has %u keys", mp->mp_pgno, NUMKEYS(mp)); - assert(NUMKEYS(mp) > 1); - DPRINTF("found index 0 to page %zu", NODEPGNO(NODEPTR(mp, 0))); - - if (key == NULL) /* Initialize cursor to first page. */ - i = 0; - else if (key->mv_size > MDB_MAXKEYSIZE && key->mv_data == NULL) { - /* cursor to last page */ - i = NUMKEYS(mp)-1; - } else { - int exact; - node = mdb_node_search(mc, key, &exact); - if (node == NULL) - i = NUMKEYS(mp) - 1; - else { - i = mc->mc_ki[mc->mc_top]; - if (!exact) { - assert(i > 0); - i--; - } - } - } - - if (key) - DPRINTF("following index %u for key [%s]", - i, DKEY(key)); - assert(i < NUMKEYS(mp)); - node = NODEPTR(mp, i); - - if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) - return rc; - - mc->mc_ki[mc->mc_top] = i; - if ((rc = mdb_cursor_push(mc, mp))) - return rc; - - if (modify) { - if ((rc = mdb_page_touch(mc)) != 0) - return rc; - mp = mc->mc_pg[mc->mc_top]; - } - } - - if (!IS_LEAF(mp)) { - DPRINTF("internal error, index points to a %02X page!?", - mp->mp_flags); - return MDB_CORRUPTED; - } - - DPRINTF("found leaf page %zu for key [%s]", mp->mp_pgno, - key ? DKEY(key) : NULL); - - return MDB_SUCCESS; -} - -/** Search for the lowest key under the current branch page. - * This just bypasses a NUMKEYS check in the current page - * before calling mdb_page_search_root(), because the callers - * are all in situations where the current page is known to - * be underfilled. - */ -static int -mdb_page_search_lowest(MDB_cursor *mc) -{ - MDB_page *mp = mc->mc_pg[mc->mc_top]; - MDB_node *node = NODEPTR(mp, 0); - int rc; - - if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0) - return rc; - - mc->mc_ki[mc->mc_top] = 0; - if ((rc = mdb_cursor_push(mc, mp))) - return rc; - return mdb_page_search_root(mc, NULL, 0); -} - -/** Search for the page a given key should be in. - * Pushes parent pages on the cursor stack. This function just sets up - * the search; it finds the root page for \b mc's database and sets this - * as the root of the cursor's stack. Then #mdb_page_search_root() is - * called to complete the search. - * @param[in,out] mc the cursor for this operation. - * @param[in] key the key to search for. If NULL, search for the lowest - * page. (This is used by #mdb_cursor_first().) - * @param[in] flags If MDB_PS_MODIFY set, visited pages are updated with new page numbers. - * If MDB_PS_ROOTONLY set, just fetch root node, no further lookups. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) -{ - int rc; - pgno_t root; - - /* Make sure the txn is still viable, then find the root from - * the txn's db table. - */ - if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_ERROR)) { - DPUTS("transaction has failed, must abort"); - return EINVAL; - } else { - /* Make sure we're using an up-to-date root */ - if (mc->mc_dbi > MAIN_DBI) { - if ((*mc->mc_dbflag & DB_STALE) || - ((flags & MDB_PS_MODIFY) && !(*mc->mc_dbflag & DB_DIRTY))) { - MDB_cursor mc2; - unsigned char dbflag = 0; - mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL); - rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, flags & MDB_PS_MODIFY); - if (rc) - return rc; - if (*mc->mc_dbflag & DB_STALE) { - MDB_val data; - int exact = 0; - uint16_t flags; - MDB_node *leaf = mdb_node_search(&mc2, - &mc->mc_dbx->md_name, &exact); - if (!exact) - return MDB_NOTFOUND; - rc = mdb_node_read(mc->mc_txn, leaf, &data); - if (rc) - return rc; - memcpy(&flags, ((char *) data.mv_data + offsetof(MDB_db, md_flags)), - sizeof(uint16_t)); - /* The txn may not know this DBI, or another process may - * have dropped and recreated the DB with other flags. - */ - if ((mc->mc_db->md_flags & PERSISTENT_FLAGS) != flags) - return MDB_INCOMPATIBLE; - memcpy(mc->mc_db, data.mv_data, sizeof(MDB_db)); - } - if (flags & MDB_PS_MODIFY) - dbflag = DB_DIRTY; - *mc->mc_dbflag &= ~DB_STALE; - *mc->mc_dbflag |= dbflag; - } - } - root = mc->mc_db->md_root; - - if (root == P_INVALID) { /* Tree is empty. */ - DPUTS("tree is empty"); - return MDB_NOTFOUND; - } - } - - assert(root > 1); - if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) - if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0) - return rc; - - mc->mc_snum = 1; - mc->mc_top = 0; - - DPRINTF("db %u root page %zu has flags 0x%X", - mc->mc_dbi, root, mc->mc_pg[0]->mp_flags); - - if (flags & MDB_PS_MODIFY) { - if ((rc = mdb_page_touch(mc))) - return rc; - } - - if (flags & MDB_PS_ROOTONLY) - return MDB_SUCCESS; - - return mdb_page_search_root(mc, key, flags); -} - -static int -mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp) -{ - MDB_txn *txn = mc->mc_txn; - pgno_t pg = mp->mp_pgno; - unsigned i, ovpages = mp->mp_pages; - MDB_env *env = txn->mt_env; - int rc; - - DPRINTF("free ov page %zu (%d)", pg, ovpages); - /* If the page is dirty we just acquired it, so we should - * give it back to our current free list, if any. - * Not currently supported in nested txns. - * Otherwise put it onto the list of pages we freed in this txn. - */ - if ((mp->mp_flags & P_DIRTY) && !txn->mt_parent && env->me_pghead) { - unsigned j, x; - pgno_t *mop; - MDB_ID2 *dl, ix, iy; - rc = mdb_midl_need(&env->me_pghead, ovpages); - if (rc) - return rc; - /* Remove from dirty list */ - dl = txn->mt_u.dirty_list; - x = dl[0].mid--; - for (ix = dl[x]; ix.mptr != mp; ix = iy) { - if (x > 1) { - x--; - iy = dl[x]; - dl[x] = ix; - } else { - assert(x > 1); - j = ++(dl[0].mid); - dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */ - txn->mt_flags |= MDB_TXN_ERROR; - return MDB_CORRUPTED; - } - } - if (!(env->me_flags & MDB_WRITEMAP)) - mdb_dpage_free(env, mp); - /* Insert in me_pghead */ - mop = env->me_pghead; - j = mop[0] + ovpages; - for (i = mop[0]; i && mop[i] < pg; i--) - mop[j--] = mop[i]; - while (j>i) - mop[j--] = pg++; - mop[0] += ovpages; - } else { - rc = mdb_midl_append_range(&txn->mt_free_pgs, pg, ovpages); - if (rc) - return rc; - } - mc->mc_db->md_overflow_pages -= ovpages; - return 0; -} - -/** Return the data associated with a given node. - * @param[in] txn The transaction for this operation. - * @param[in] leaf The node being read. - * @param[out] data Updated to point to the node's data. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data) -{ - MDB_page *omp; /* overflow page */ - pgno_t pgno; - int rc; - - if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) { - data->mv_size = NODEDSZ(leaf); - data->mv_data = NODEDATA(leaf); - return MDB_SUCCESS; - } - - /* Read overflow data. - */ - data->mv_size = NODEDSZ(leaf); - memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); - if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) { - DPRINTF("read overflow page %zu failed", pgno); - return rc; - } - data->mv_data = METADATA(omp); - - return MDB_SUCCESS; -} - -int -mdb_get(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data) -{ - MDB_cursor mc; - MDB_xcursor mx; - int exact = 0; - DKBUF; - - assert(key); - assert(data); - DPRINTF("===> get db %u key [%s]", dbi, DKEY(key)); - - if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) { - return EINVAL; - } - - mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); -} - -/** Find a sibling for a page. - * Replaces the page at the top of the cursor's stack with the - * specified sibling, if one exists. - * @param[in] mc The cursor for this operation. - * @param[in] move_right Non-zero if the right sibling is requested, - * otherwise the left sibling. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_cursor_sibling(MDB_cursor *mc, int move_right) -{ - int rc; - MDB_node *indx; - MDB_page *mp; - - if (mc->mc_snum < 2) { - return MDB_NOTFOUND; /* root has no siblings */ - } - - mdb_cursor_pop(mc); - DPRINTF("parent page is page %zu, index %u", - mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]); - - if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top])) - : (mc->mc_ki[mc->mc_top] == 0)) { - DPRINTF("no more keys left, moving to %s sibling", - move_right ? "right" : "left"); - if ((rc = mdb_cursor_sibling(mc, move_right)) != MDB_SUCCESS) { - /* undo cursor_pop before returning */ - mc->mc_top++; - mc->mc_snum++; - return rc; - } - } else { - if (move_right) - mc->mc_ki[mc->mc_top]++; - else - mc->mc_ki[mc->mc_top]--; - DPRINTF("just moving to %s index key %u", - move_right ? "right" : "left", mc->mc_ki[mc->mc_top]); - } - assert(IS_BRANCH(mc->mc_pg[mc->mc_top])); - - indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL) != 0)) - return rc; - - mdb_cursor_push(mc, mp); - if (!move_right) - mc->mc_ki[mc->mc_top] = NUMKEYS(mp)-1; - - return MDB_SUCCESS; -} - -/** Move the cursor to the next data item. */ -static int -mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) -{ - MDB_page *mp; - MDB_node *leaf; - int rc; - - if (mc->mc_flags & C_EOF) { - return MDB_NOTFOUND; - } - - assert(mc->mc_flags & C_INITIALIZED); - - mp = mc->mc_pg[mc->mc_top]; - - if (mc->mc_db->md_flags & MDB_DUPSORT) { - leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - if (op == MDB_NEXT || op == MDB_NEXT_DUP) { - rc = mdb_cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_NEXT); - if (op != MDB_NEXT || rc != MDB_NOTFOUND) - return rc; - } - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if (op == MDB_NEXT_DUP) - return MDB_NOTFOUND; - } - } - - DPRINTF("cursor_next: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc); - - if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) { - DPUTS("=====> move to next sibling page"); - if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) { - mc->mc_flags |= C_EOF; - return rc; - } - mp = mc->mc_pg[mc->mc_top]; - DPRINTF("next page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); - } else - mc->mc_ki[mc->mc_top]++; - - DPRINTF("==> cursor points to page %zu with %u keys, key index %u", - mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); - - if (IS_LEAF2(mp)) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); - return MDB_SUCCESS; - } - - assert(IS_LEAF(mp)); - leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - } - if (data) { - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) - return rc; - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc != MDB_SUCCESS) - return rc; - } - } - - MDB_GET_KEY(leaf, key); - return MDB_SUCCESS; -} - -/** Move the cursor to the previous data item. */ -static int -mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) -{ - MDB_page *mp; - MDB_node *leaf; - int rc; - - assert(mc->mc_flags & C_INITIALIZED); - - mp = mc->mc_pg[mc->mc_top]; - - if (mc->mc_db->md_flags & MDB_DUPSORT) { - leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); - if (op == MDB_PREV || op == MDB_PREV_DUP) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV); - if (op != MDB_PREV || rc != MDB_NOTFOUND) - return rc; - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if (op == MDB_PREV_DUP) - return MDB_NOTFOUND; - } - } - } - - DPRINTF("cursor_prev: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc); - - if (mc->mc_ki[mc->mc_top] == 0) { - DPUTS("=====> move to prev sibling page"); - if ((rc = mdb_cursor_sibling(mc, 0)) != MDB_SUCCESS) { - return rc; - } - mp = mc->mc_pg[mc->mc_top]; - mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1; - DPRINTF("prev page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); - } else - mc->mc_ki[mc->mc_top]--; - - mc->mc_flags &= ~C_EOF; - - DPRINTF("==> cursor points to page %zu with %u keys, key index %u", - mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); - - if (IS_LEAF2(mp)) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); - return MDB_SUCCESS; - } - - assert(IS_LEAF(mp)); - leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - } - if (data) { - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) - return rc; - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc != MDB_SUCCESS) - return rc; - } - } - - MDB_GET_KEY(leaf, key); - return MDB_SUCCESS; -} - -/** Set the cursor on a specific data item. */ -static int -mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, - MDB_cursor_op op, int *exactp) -{ - int rc; - MDB_page *mp; - MDB_node *leaf = NULL; - DKBUF; - - assert(mc); - assert(key); - assert(key->mv_size > 0); - - /* See if we're already on the right page */ - if (mc->mc_flags & C_INITIALIZED) { - MDB_val nodekey; - - mp = mc->mc_pg[mc->mc_top]; - if (!NUMKEYS(mp)) { - mc->mc_ki[mc->mc_top] = 0; - return MDB_NOTFOUND; - } - if (mp->mp_flags & P_LEAF2) { - nodekey.mv_size = mc->mc_db->md_pad; - nodekey.mv_data = LEAF2KEY(mp, 0, nodekey.mv_size); - } else { - leaf = NODEPTR(mp, 0); - MDB_GET_KEY(leaf, &nodekey); - } - rc = mc->mc_dbx->md_cmp(key, &nodekey); - if (rc == 0) { - /* Probably happens rarely, but first node on the page - * was the one we wanted. - */ - mc->mc_ki[mc->mc_top] = 0; - if (exactp) - *exactp = 1; - goto set1; - } - if (rc > 0) { - unsigned int i; - unsigned int nkeys = NUMKEYS(mp); - if (nkeys > 1) { - if (mp->mp_flags & P_LEAF2) { - nodekey.mv_data = LEAF2KEY(mp, - nkeys-1, nodekey.mv_size); - } else { - leaf = NODEPTR(mp, nkeys-1); - MDB_GET_KEY(leaf, &nodekey); - } - rc = mc->mc_dbx->md_cmp(key, &nodekey); - if (rc == 0) { - /* last node was the one we wanted */ - mc->mc_ki[mc->mc_top] = nkeys-1; - if (exactp) - *exactp = 1; - goto set1; - } - if (rc < 0) { - if (mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) { - /* This is definitely the right page, skip search_page */ - if (mp->mp_flags & P_LEAF2) { - nodekey.mv_data = LEAF2KEY(mp, - mc->mc_ki[mc->mc_top], nodekey.mv_size); - } else { - leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); - MDB_GET_KEY(leaf, &nodekey); - } - rc = mc->mc_dbx->md_cmp(key, &nodekey); - if (rc == 0) { - /* current node was the one we wanted */ - if (exactp) - *exactp = 1; - goto set1; - } - } - rc = 0; - goto set2; - } - } - /* If any parents have right-sibs, search. - * Otherwise, there's nothing further. - */ - for (i=0; imc_top; i++) - if (mc->mc_ki[i] < - NUMKEYS(mc->mc_pg[i])-1) - break; - if (i == mc->mc_top) { - /* There are no other pages */ - mc->mc_ki[mc->mc_top] = nkeys; - return MDB_NOTFOUND; - } - } - if (!mc->mc_top) { - /* There are no other pages */ - mc->mc_ki[mc->mc_top] = 0; - return MDB_NOTFOUND; - } - } - - rc = mdb_page_search(mc, key, 0); - if (rc != MDB_SUCCESS) - return rc; - - mp = mc->mc_pg[mc->mc_top]; - assert(IS_LEAF(mp)); - -set2: - leaf = mdb_node_search(mc, key, exactp); - if (exactp != NULL && !*exactp) { - /* MDB_SET specified and not an exact match. */ - return MDB_NOTFOUND; - } - - if (leaf == NULL) { - DPUTS("===> inexact leaf not found, goto sibling"); - if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS) - return rc; /* no entries matched */ - mp = mc->mc_pg[mc->mc_top]; - assert(IS_LEAF(mp)); - leaf = NODEPTR(mp, 0); - } - -set1: - mc->mc_flags |= C_INITIALIZED; - mc->mc_flags &= ~C_EOF; - - if (IS_LEAF2(mp)) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); - return MDB_SUCCESS; - } - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - } - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - } else { - int ex2, *ex2p; - if (op == MDB_GET_BOTH) { - ex2p = &ex2; - ex2 = 0; - } else { - ex2p = NULL; - } - rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); - if (rc != MDB_SUCCESS) - return rc; - } - } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { - MDB_val d2; - if ((rc = mdb_node_read(mc->mc_txn, leaf, &d2)) != MDB_SUCCESS) - return rc; - rc = mc->mc_dbx->md_dcmp(data, &d2); - if (rc) { - if (op == MDB_GET_BOTH || rc > 0) - return MDB_NOTFOUND; - } - - } else { - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) - return rc; - } - } - - /* The key already matches in all other cases */ - if (op == MDB_SET_RANGE || op == MDB_SET_KEY) - MDB_GET_KEY(leaf, key); - DPRINTF("==> cursor placed on key [%s]", DKEY(key)); - - return rc; -} - -/** Move the cursor to the first item in the database. */ -static int -mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) -{ - int rc; - MDB_node *leaf; - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - rc = mdb_page_search(mc, NULL, 0); - if (rc != MDB_SUCCESS) - return rc; - } - assert(IS_LEAF(mc->mc_pg[mc->mc_top])); - - leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0); - mc->mc_flags |= C_INITIALIZED; - mc->mc_flags &= ~C_EOF; - - mc->mc_ki[mc->mc_top] = 0; - - if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); - return MDB_SUCCESS; - } - - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc) - return rc; - } else { - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) - return rc; - } - } - MDB_GET_KEY(leaf, key); - return MDB_SUCCESS; -} - -/** Move the cursor to the last item in the database. */ -static int -mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) -{ - int rc; - MDB_node *leaf; - - if (!(mc->mc_flags & C_EOF)) { - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - MDB_val lkey; - - lkey.mv_size = MDB_MAXKEYSIZE+1; - lkey.mv_data = NULL; - rc = mdb_page_search(mc, &lkey, 0); - if (rc != MDB_SUCCESS) - return rc; - } - assert(IS_LEAF(mc->mc_pg[mc->mc_top])); - - } - mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; - mc->mc_flags |= C_INITIALIZED|C_EOF; - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - - if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); - return MDB_SUCCESS; - } - - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc) - return rc; - } else { - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if ((rc = mdb_node_read(mc->mc_txn, leaf, data)) != MDB_SUCCESS) - return rc; - } - } - - MDB_GET_KEY(leaf, key); - return MDB_SUCCESS; -} - -int -mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, - MDB_cursor_op op) -{ - int rc; - int exact = 0; - - assert(mc); - - switch (op) { - case MDB_GET_CURRENT: - if (!(mc->mc_flags & C_INITIALIZED)) { - rc = EINVAL; - } else { - MDB_page *mp = mc->mc_pg[mc->mc_top]; - if (!NUMKEYS(mp)) { - mc->mc_ki[mc->mc_top] = 0; - rc = MDB_NOTFOUND; - break; - } - rc = MDB_SUCCESS; - if (IS_LEAF2(mp)) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); - } else { - MDB_node *leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); - MDB_GET_KEY(leaf, key); - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_GET_CURRENT); - } else { - rc = mdb_node_read(mc->mc_txn, leaf, data); - } - } - } - } - break; - case MDB_GET_BOTH: - case MDB_GET_BOTH_RANGE: - if (data == NULL || mc->mc_xcursor == NULL) { - rc = EINVAL; - break; - } - /* FALLTHRU */ - case MDB_SET: - case MDB_SET_KEY: - case MDB_SET_RANGE: - if (key == NULL || key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) { - rc = EINVAL; - } else if (op == MDB_SET_RANGE) - rc = mdb_cursor_set(mc, key, data, op, NULL); - else - rc = mdb_cursor_set(mc, key, data, op, &exact); - break; - case MDB_GET_MULTIPLE: - if (data == NULL || - !(mc->mc_db->md_flags & MDB_DUPFIXED) || - !(mc->mc_flags & C_INITIALIZED)) { - rc = EINVAL; - break; - } - rc = MDB_SUCCESS; - if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) || - (mc->mc_xcursor->mx_cursor.mc_flags & C_EOF)) - break; - goto fetchm; - case MDB_NEXT_MULTIPLE: - if (data == NULL || - !(mc->mc_db->md_flags & MDB_DUPFIXED)) { - rc = EINVAL; - break; - } - if (!(mc->mc_flags & C_INITIALIZED)) - rc = mdb_cursor_first(mc, key, data); - else - rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP); - if (rc == MDB_SUCCESS) { - if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - MDB_cursor *mx; -fetchm: - mx = &mc->mc_xcursor->mx_cursor; - data->mv_size = NUMKEYS(mx->mc_pg[mx->mc_top]) * - mx->mc_db->md_pad; - data->mv_data = METADATA(mx->mc_pg[mx->mc_top]); - mx->mc_ki[mx->mc_top] = NUMKEYS(mx->mc_pg[mx->mc_top])-1; - } else { - rc = MDB_NOTFOUND; - } - } - break; - case MDB_NEXT: - case MDB_NEXT_DUP: - case MDB_NEXT_NODUP: - if (!(mc->mc_flags & C_INITIALIZED)) - rc = mdb_cursor_first(mc, key, data); - else - rc = mdb_cursor_next(mc, key, data, op); - break; - case MDB_PREV: - case MDB_PREV_DUP: - case MDB_PREV_NODUP: - if (!(mc->mc_flags & C_INITIALIZED)) { - rc = mdb_cursor_last(mc, key, data); - if (rc) - break; - mc->mc_flags |= C_INITIALIZED; - mc->mc_ki[mc->mc_top]++; - } - rc = mdb_cursor_prev(mc, key, data, op); - break; - case MDB_FIRST: - rc = mdb_cursor_first(mc, key, data); - break; - case MDB_FIRST_DUP: - if (data == NULL || - !(mc->mc_db->md_flags & MDB_DUPSORT) || - !(mc->mc_flags & C_INITIALIZED) || - !(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { - rc = EINVAL; - break; - } - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - break; - case MDB_LAST: - rc = mdb_cursor_last(mc, key, data); - break; - case MDB_LAST_DUP: - if (data == NULL || - !(mc->mc_db->md_flags & MDB_DUPSORT) || - !(mc->mc_flags & C_INITIALIZED) || - !(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) { - rc = EINVAL; - break; - } - rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - break; - default: - DPRINTF("unhandled/unimplemented cursor operation %u", op); - rc = EINVAL; - break; - } - - return rc; -} - -/** Touch all the pages in the cursor stack. - * Makes sure all the pages are writable, before attempting a write operation. - * @param[in] mc The cursor to operate on. - */ -static int -mdb_cursor_touch(MDB_cursor *mc) -{ - int rc; - - if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) { - MDB_cursor mc2; - MDB_xcursor mcx; - mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, &mcx); - rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY); - if (rc) - return rc; - *mc->mc_dbflag |= DB_DIRTY; - } - for (mc->mc_top = 0; mc->mc_top < mc->mc_snum; mc->mc_top++) { - rc = mdb_page_touch(mc); - if (rc) - return rc; - } - mc->mc_top = mc->mc_snum-1; - return MDB_SUCCESS; -} - -int -mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, - unsigned int flags) -{ - MDB_node *leaf = NULL; - MDB_val xdata, *rdata, dkey; - MDB_page *fp; - MDB_db dummy; - int do_sub = 0, insert = 0; - unsigned int mcount = 0, dcount = 0; - size_t nsize; - int rc, rc2; - MDB_pagebuf pbuf; - char dbuf[MDB_MAXKEYSIZE+1]; - unsigned int nflags; - DKBUF; - - /* Check this first so counter will always be zero on any - * early failures. - */ - if (flags & MDB_MULTIPLE) { - dcount = data[1].mv_size; - data[1].mv_size = 0; - if (!F_ISSET(mc->mc_db->md_flags, MDB_DUPFIXED)) - return EINVAL; - } - - if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_RDONLY)) - return EACCES; - - if (flags != MDB_CURRENT && (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE)) - return EINVAL; - - if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT) && data->mv_size > MDB_MAXKEYSIZE) - return EINVAL; - -#if SIZE_MAX > MAXDATASIZE - if (data->mv_size > MAXDATASIZE) - return EINVAL; -#endif - - DPRINTF("==> put db %u key [%s], size %zu, data size %zu", - mc->mc_dbi, DKEY(key), key ? key->mv_size:0, data->mv_size); - - dkey.mv_size = 0; - - if (flags == MDB_CURRENT) { - if (!(mc->mc_flags & C_INITIALIZED)) - return EINVAL; - rc = MDB_SUCCESS; - } else if (mc->mc_db->md_root == P_INVALID) { - MDB_page *np; - /* new database, write a root leaf page */ - DPUTS("allocating new root leaf page"); - if ((rc = mdb_page_new(mc, P_LEAF, 1, &np))) { - return rc; - } - mc->mc_snum = 0; - mdb_cursor_push(mc, np); - mc->mc_db->md_root = np->mp_pgno; - mc->mc_db->md_depth++; - *mc->mc_dbflag |= DB_DIRTY; - if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) - == MDB_DUPFIXED) - np->mp_flags |= P_LEAF2; - mc->mc_flags |= C_INITIALIZED; - rc = MDB_NOTFOUND; - goto top; - } else { - int exact = 0; - MDB_val d2; - if (flags & MDB_APPEND) { - MDB_val k2; - rc = mdb_cursor_last(mc, &k2, &d2); - if (rc == 0) { - rc = mc->mc_dbx->md_cmp(key, &k2); - if (rc > 0) { - rc = MDB_NOTFOUND; - mc->mc_ki[mc->mc_top]++; - } else { - /* new key is <= last key */ - rc = MDB_KEYEXIST; - } - } - } else { - rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact); - } - if ((flags & MDB_NOOVERWRITE) && rc == 0) { - DPRINTF("duplicate key [%s]", DKEY(key)); - *data = d2; - return MDB_KEYEXIST; - } - if (rc && rc != MDB_NOTFOUND) - return rc; - } - - /* Cursor is positioned, now make sure all pages are writable */ - rc2 = mdb_cursor_touch(mc); - if (rc2) - return rc2; - -top: - /* The key already exists */ - if (rc == MDB_SUCCESS) { - /* there's only a key anyway, so this is a no-op */ - if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - unsigned int ksize = mc->mc_db->md_pad; - if (key->mv_size != ksize) - return EINVAL; - if (flags == MDB_CURRENT) { - char *ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize); - memcpy(ptr, key->mv_data, ksize); - } - return MDB_SUCCESS; - } - - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - - /* DB has dups? */ - if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) { - /* Was a single item before, must convert now */ -more: - if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { - /* Just overwrite the current item */ - if (flags == MDB_CURRENT) - goto current; - - dkey.mv_size = NODEDSZ(leaf); - dkey.mv_data = NODEDATA(leaf); -#if UINT_MAX < SIZE_MAX - if (mc->mc_dbx->md_dcmp == mdb_cmp_int && dkey.mv_size == sizeof(size_t)) -#ifdef MISALIGNED_OK - mc->mc_dbx->md_dcmp = mdb_cmp_long; -#else - mc->mc_dbx->md_dcmp = mdb_cmp_cint; -#endif -#endif - /* if data matches, ignore it */ - if (!mc->mc_dbx->md_dcmp(data, &dkey)) - return (flags == MDB_NODUPDATA) ? MDB_KEYEXIST : MDB_SUCCESS; - - /* create a fake page for the dup items */ - memcpy(dbuf, dkey.mv_data, dkey.mv_size); - dkey.mv_data = dbuf; - fp = (MDB_page *)&pbuf; - fp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; - fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP; - fp->mp_lower = PAGEHDRSZ; - fp->mp_upper = PAGEHDRSZ + dkey.mv_size + data->mv_size; - if (mc->mc_db->md_flags & MDB_DUPFIXED) { - fp->mp_flags |= P_LEAF2; - fp->mp_pad = data->mv_size; - fp->mp_upper += 2 * data->mv_size; /* leave space for 2 more */ - } else { - fp->mp_upper += 2 * sizeof(indx_t) + 2 * NODESIZE + - (dkey.mv_size & 1) + (data->mv_size & 1); - } - mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0); - do_sub = 1; - rdata = &xdata; - xdata.mv_size = fp->mp_upper; - xdata.mv_data = fp; - flags |= F_DUPDATA; - goto new_sub; - } - if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) { - /* See if we need to convert from fake page to subDB */ - MDB_page *mp; - unsigned int offset; - unsigned int i; - uint16_t fp_flags; - - fp = NODEDATA(leaf); - if (flags == MDB_CURRENT) { -reuse: - fp->mp_flags |= P_DIRTY; - COPY_PGNO(fp->mp_pgno, mc->mc_pg[mc->mc_top]->mp_pgno); - mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; - flags |= F_DUPDATA; - goto put_sub; - } - if (mc->mc_db->md_flags & MDB_DUPFIXED) { - offset = fp->mp_pad; - if (SIZELEFT(fp) >= offset) - goto reuse; - offset *= 4; /* space for 4 more */ - } else { - offset = NODESIZE + sizeof(indx_t) + data->mv_size; - } - offset += offset & 1; - fp_flags = fp->mp_flags; - if (NODESIZE + sizeof(indx_t) + NODEKSZ(leaf) + NODEDSZ(leaf) + - offset >= mc->mc_txn->mt_env->me_nodemax) { - /* yes, convert it */ - dummy.md_flags = 0; - if (mc->mc_db->md_flags & MDB_DUPFIXED) { - dummy.md_pad = fp->mp_pad; - dummy.md_flags = MDB_DUPFIXED; - if (mc->mc_db->md_flags & MDB_INTEGERDUP) - dummy.md_flags |= MDB_INTEGERKEY; - } - dummy.md_depth = 1; - dummy.md_branch_pages = 0; - dummy.md_leaf_pages = 1; - dummy.md_overflow_pages = 0; - dummy.md_entries = NUMKEYS(fp); - rdata = &xdata; - xdata.mv_size = sizeof(MDB_db); - xdata.mv_data = &dummy; - if ((rc = mdb_page_alloc(mc, 1, &mp))) - return rc; - offset = mc->mc_txn->mt_env->me_psize - NODEDSZ(leaf); - flags |= F_DUPDATA|F_SUBDATA; - dummy.md_root = mp->mp_pgno; - fp_flags &= ~P_SUBP; - } else { - /* no, just grow it */ - rdata = &xdata; - xdata.mv_size = NODEDSZ(leaf) + offset; - xdata.mv_data = &pbuf; - mp = (MDB_page *)&pbuf; - mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; - flags |= F_DUPDATA; - } - mp->mp_flags = fp_flags | P_DIRTY; - mp->mp_pad = fp->mp_pad; - mp->mp_lower = fp->mp_lower; - mp->mp_upper = fp->mp_upper + offset; - if (IS_LEAF2(fp)) { - memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad); - } else { - nsize = NODEDSZ(leaf) - fp->mp_upper; - memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper, nsize); - for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; - } - mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0); - do_sub = 1; - goto new_sub; - } - /* data is on sub-DB, just store it */ - flags |= F_DUPDATA|F_SUBDATA; - goto put_sub; - } -current: - /* overflow page overwrites need special handling */ - if (F_ISSET(leaf->mn_flags, F_BIGDATA)) { - MDB_page *omp; - pgno_t pg; - unsigned psize = mc->mc_txn->mt_env->me_psize; - int level, ovpages, dpages = OVPAGES(data->mv_size, psize); - - memcpy(&pg, NODEDATA(leaf), sizeof(pg)); - if ((rc2 = mdb_page_get(mc->mc_txn, pg, &omp, &level)) != 0) - return rc2; - ovpages = omp->mp_pages; - - /* Is the ov page writable and large enough? */ - if ((omp->mp_flags & P_DIRTY) && ovpages >= dpages) { - /* yes, overwrite it. Note in this case we don't - * bother to try shrinking the page if the new data - * is smaller than the overflow threshold. - */ - if (level > 1) { - /* It is writable only in a parent txn */ - size_t sz = (size_t) psize * ovpages, off; - MDB_page *np = mdb_page_malloc(mc->mc_txn, ovpages); - MDB_ID2 id2; - if (!np) - return ENOMEM; - id2.mid = pg; - id2.mptr = np; - mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2); - if (!(flags & MDB_RESERVE)) { - /* Copy end of page, adjusting alignment so - * compiler may copy words instead of bytes. - */ - off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t); - memcpy((size_t *)((char *)np + off), - (size_t *)((char *)omp + off), sz - off); - sz = PAGEHDRSZ; - } - memcpy(np, omp, sz); /* Copy beginning of page */ - omp = np; - } - SETDSZ(leaf, data->mv_size); - if (F_ISSET(flags, MDB_RESERVE)) - data->mv_data = METADATA(omp); - else - memcpy(METADATA(omp), data->mv_data, data->mv_size); - goto done; - } else { - if ((rc2 = mdb_ovpage_free(mc, omp)) != MDB_SUCCESS) - return rc2; - } - } else if (NODEDSZ(leaf) == data->mv_size) { - /* same size, just replace it. Note that we could - * also reuse this node if the new data is smaller, - * but instead we opt to shrink the node in that case. - */ - if (F_ISSET(flags, MDB_RESERVE)) - data->mv_data = NODEDATA(leaf); - else if (data->mv_size) - memcpy(NODEDATA(leaf), data->mv_data, data->mv_size); - else - memcpy(NODEKEY(leaf), key->mv_data, key->mv_size); - goto done; - } - mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0); - mc->mc_db->md_entries--; - } else { - DPRINTF("inserting key at index %i", mc->mc_ki[mc->mc_top]); - insert = 1; - } - - rdata = data; - -new_sub: - nflags = flags & NODE_ADD_FLAGS; - nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(mc->mc_txn->mt_env, key, rdata); - if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) { - if (( flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA ) - nflags &= ~MDB_APPEND; - if (!insert) - nflags |= MDB_SPLIT_REPLACE; - rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags); - } else { - /* There is room already in this leaf page. */ - rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags); - if (rc == 0 && !do_sub && insert) { - /* Adjust other cursors pointing to mp */ - MDB_cursor *m2, *m3; - MDB_dbi dbi = mc->mc_dbi; - unsigned i = mc->mc_top; - MDB_page *mp = mc->mc_pg[i]; - - if (mc->mc_flags & C_SUB) - dbi--; - - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (mc->mc_flags & C_SUB) - m3 = &m2->mc_xcursor->mx_cursor; - else - m3 = m2; - if (m3 == mc || m3->mc_snum < mc->mc_snum) continue; - if (m3->mc_pg[i] == mp && m3->mc_ki[i] >= mc->mc_ki[i]) { - m3->mc_ki[i]++; - } - } - } - } - - if (rc != MDB_SUCCESS) - mc->mc_txn->mt_flags |= MDB_TXN_ERROR; - else { - /* Now store the actual data in the child DB. Note that we're - * storing the user data in the keys field, so there are strict - * size limits on dupdata. The actual data fields of the child - * DB are all zero size. - */ - if (do_sub) { - int xflags; -put_sub: - xdata.mv_size = 0; - xdata.mv_data = ""; - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if (flags & MDB_CURRENT) { - xflags = MDB_CURRENT; - } else { - mdb_xcursor_init1(mc, leaf); - xflags = (flags & MDB_NODUPDATA) ? MDB_NOOVERWRITE : 0; - } - /* converted, write the original data first */ - if (dkey.mv_size) { - rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags); - if (rc) - return rc; - { - /* Adjust other cursors pointing to mp */ - MDB_cursor *m2; - unsigned i = mc->mc_top; - MDB_page *mp = mc->mc_pg[i]; - - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { - if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; - if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) { - mdb_xcursor_init1(m2, leaf); - } - } - } - /* we've done our job */ - dkey.mv_size = 0; - } - if (flags & MDB_APPENDDUP) - xflags |= MDB_APPEND; - rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags); - if (flags & F_SUBDATA) { - void *db = NODEDATA(leaf); - memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); - } - } - /* sub-writes might have failed so check rc again. - * Don't increment count if we just replaced an existing item. - */ - if (!rc && !(flags & MDB_CURRENT)) - mc->mc_db->md_entries++; - if (flags & MDB_MULTIPLE) { - if (!rc) { - mcount++; - if (mcount < dcount) { - data[0].mv_data = (char *)data[0].mv_data + data[0].mv_size; - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - goto more; - } - } - /* let caller know how many succeeded, if any */ - data[1].mv_size = mcount; - } - } -done: - /* If we succeeded and the key didn't exist before, make sure - * the cursor is marked valid. - */ - if (!rc && insert) - mc->mc_flags |= C_INITIALIZED; - return rc; -} - -int -mdb_cursor_del(MDB_cursor *mc, unsigned int flags) -{ - MDB_node *leaf; - int rc; - - if (F_ISSET(mc->mc_txn->mt_flags, MDB_TXN_RDONLY)) - return EACCES; - - if (!(mc->mc_flags & C_INITIALIZED)) - return EINVAL; - - rc = mdb_cursor_touch(mc); - if (rc) - return rc; - - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - - if (!IS_LEAF2(mc->mc_pg[mc->mc_top]) && F_ISSET(leaf->mn_flags, F_DUPDATA)) { - if (flags != MDB_NODUPDATA) { - if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) { - mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - } - rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, 0); - /* If sub-DB still has entries, we're done */ - if (mc->mc_xcursor->mx_db.md_entries) { - if (leaf->mn_flags & F_SUBDATA) { - /* update subDB info */ - void *db = NODEDATA(leaf); - memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db)); - } else { - MDB_cursor *m2; - /* shrink fake page */ - mdb_node_shrink(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - /* fix other sub-DB cursors pointed at this fake page */ - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { - if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; - if (m2->mc_pg[mc->mc_top] == mc->mc_pg[mc->mc_top] && - m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf); - } - } - mc->mc_db->md_entries--; - return rc; - } - /* otherwise fall thru and delete the sub-DB */ - } - - if (leaf->mn_flags & F_SUBDATA) { - /* add all the child DB's pages to the free list */ - rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); - if (rc == MDB_SUCCESS) { - mc->mc_db->md_entries -= - mc->mc_xcursor->mx_db.md_entries; - } - } - } - - return mdb_cursor_del0(mc, leaf); -} - -/** Allocate and initialize new pages for a database. - * @param[in] mc a cursor on the database being added to. - * @param[in] flags flags defining what type of page is being allocated. - * @param[in] num the number of pages to allocate. This is usually 1, - * unless allocating overflow pages for a large record. - * @param[out] mp Address of a page, or NULL on failure. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) -{ - MDB_page *np; - int rc; - - if ((rc = mdb_page_alloc(mc, num, &np))) - return rc; - DPRINTF("allocated new mpage %zu, page size %u", - np->mp_pgno, mc->mc_txn->mt_env->me_psize); - np->mp_flags = flags | P_DIRTY; - np->mp_lower = PAGEHDRSZ; - np->mp_upper = mc->mc_txn->mt_env->me_psize; - - if (IS_BRANCH(np)) - mc->mc_db->md_branch_pages++; - else if (IS_LEAF(np)) - mc->mc_db->md_leaf_pages++; - else if (IS_OVERFLOW(np)) { - mc->mc_db->md_overflow_pages += num; - np->mp_pages = num; - } - *mp = np; - - return 0; -} - -/** Calculate the size of a leaf node. - * The size depends on the environment's page size; if a data item - * is too large it will be put onto an overflow page and the node - * size will only include the key and not the data. Sizes are always - * rounded up to an even number of bytes, to guarantee 2-byte alignment - * of the #MDB_node headers. - * @param[in] env The environment handle. - * @param[in] key The key for the node. - * @param[in] data The data for the node. - * @return The number of bytes needed to store the node. - */ -static size_t -mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data) -{ - size_t sz; - - sz = LEAFSIZE(key, data); - if (sz >= env->me_nodemax) { - /* put on overflow page */ - sz -= data->mv_size - sizeof(pgno_t); - } - sz += sz & 1; - - return sz + sizeof(indx_t); -} - -/** Calculate the size of a branch node. - * The size should depend on the environment's page size but since - * we currently don't support spilling large keys onto overflow - * pages, it's simply the size of the #MDB_node header plus the - * size of the key. Sizes are always rounded up to an even number - * of bytes, to guarantee 2-byte alignment of the #MDB_node headers. - * @param[in] env The environment handle. - * @param[in] key The key for the node. - * @return The number of bytes needed to store the node. - */ -static size_t -mdb_branch_size(MDB_env *env, MDB_val *key) -{ - size_t sz; - - sz = INDXSIZE(key); - if (sz >= env->me_nodemax) { - /* put on overflow page */ - /* not implemented */ - /* sz -= key->size - sizeof(pgno_t); */ - } - - return sz + sizeof(indx_t); -} - -/** Add a node to the page pointed to by the cursor. - * @param[in] mc The cursor for this operation. - * @param[in] indx The index on the page where the new node should be added. - * @param[in] key The key for the new node. - * @param[in] data The data for the new node, if any. - * @param[in] pgno The page number, if adding a branch node. - * @param[in] flags Flags for the node. - * @return 0 on success, non-zero on failure. Possible errors are: - *
    - *
  • ENOMEM - failed to allocate overflow pages for the node. - *
  • MDB_PAGE_FULL - there is insufficient room in the page. This error - * should never happen since all callers already calculate the - * page's free space before calling this function. - *
- */ -static int -mdb_node_add(MDB_cursor *mc, indx_t indx, - MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags) -{ - unsigned int i; - size_t node_size = NODESIZE; - indx_t ofs; - MDB_node *node; - MDB_page *mp = mc->mc_pg[mc->mc_top]; - MDB_page *ofp = NULL; /* overflow page */ - DKBUF; - - assert(mp->mp_upper >= mp->mp_lower); - - DPRINTF("add to %s %spage %zu index %i, data size %zu key size %zu [%s]", - IS_LEAF(mp) ? "leaf" : "branch", - IS_SUBP(mp) ? "sub-" : "", - mp->mp_pgno, indx, data ? data->mv_size : 0, - key ? key->mv_size : 0, key ? DKEY(key) : NULL); - - if (IS_LEAF2(mp)) { - /* Move higher keys up one slot. */ - int ksize = mc->mc_db->md_pad, dif; - char *ptr = LEAF2KEY(mp, indx, ksize); - dif = NUMKEYS(mp) - indx; - if (dif > 0) - memmove(ptr+ksize, ptr, dif*ksize); - /* insert new key */ - memcpy(ptr, key->mv_data, ksize); - - /* Just using these for counting */ - mp->mp_lower += sizeof(indx_t); - mp->mp_upper -= ksize - sizeof(indx_t); - return MDB_SUCCESS; - } - - if (key != NULL) - node_size += key->mv_size; - - if (IS_LEAF(mp)) { - assert(data); - if (F_ISSET(flags, F_BIGDATA)) { - /* Data already on overflow page. */ - node_size += sizeof(pgno_t); - } else if (node_size + data->mv_size >= mc->mc_txn->mt_env->me_nodemax) { - int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize); - int rc; - /* Put data on overflow page. */ - DPRINTF("data size is %zu, node would be %zu, put data on overflow page", - data->mv_size, node_size+data->mv_size); - node_size += sizeof(pgno_t); - if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp))) - return rc; - DPRINTF("allocated overflow page %zu", ofp->mp_pgno); - flags |= F_BIGDATA; - } else { - node_size += data->mv_size; - } - } - node_size += node_size & 1; - - if (node_size + sizeof(indx_t) > SIZELEFT(mp)) { - DPRINTF("not enough room in page %zu, got %u ptrs", - mp->mp_pgno, NUMKEYS(mp)); - DPRINTF("upper - lower = %u - %u = %u", mp->mp_upper, mp->mp_lower, - mp->mp_upper - mp->mp_lower); - DPRINTF("node size = %zu", node_size); - return MDB_PAGE_FULL; - } - - /* Move higher pointers up one slot. */ - for (i = NUMKEYS(mp); i > indx; i--) - mp->mp_ptrs[i] = mp->mp_ptrs[i - 1]; - - /* Adjust free space offsets. */ - ofs = mp->mp_upper - node_size; - assert(ofs >= mp->mp_lower + sizeof(indx_t)); - mp->mp_ptrs[indx] = ofs; - mp->mp_upper = ofs; - mp->mp_lower += sizeof(indx_t); - - /* Write the node data. */ - node = NODEPTR(mp, indx); - node->mn_ksize = (key == NULL) ? 0 : key->mv_size; - node->mn_flags = flags; - if (IS_LEAF(mp)) - SETDSZ(node,data->mv_size); - else - SETPGNO(node,pgno); - - if (key) - memcpy(NODEKEY(node), key->mv_data, key->mv_size); - - if (IS_LEAF(mp)) { - assert(key); - if (ofp == NULL) { - if (F_ISSET(flags, F_BIGDATA)) - memcpy(node->mn_data + key->mv_size, data->mv_data, - sizeof(pgno_t)); - else if (F_ISSET(flags, MDB_RESERVE)) - data->mv_data = node->mn_data + key->mv_size; - else - memcpy(node->mn_data + key->mv_size, data->mv_data, - data->mv_size); - } else { - memcpy(node->mn_data + key->mv_size, &ofp->mp_pgno, - sizeof(pgno_t)); - if (F_ISSET(flags, MDB_RESERVE)) - data->mv_data = METADATA(ofp); - else - memcpy(METADATA(ofp), data->mv_data, data->mv_size); - } - } - - return MDB_SUCCESS; -} - -/** Delete the specified node from a page. - * @param[in] mp The page to operate on. - * @param[in] indx The index of the node to delete. - * @param[in] ksize The size of a node. Only used if the page is - * part of a #MDB_DUPFIXED database. - */ -static void -mdb_node_del(MDB_page *mp, indx_t indx, int ksize) -{ - unsigned int sz; - indx_t i, j, numkeys, ptr; - MDB_node *node; - char *base; - -#if MDB_DEBUG - { - pgno_t pgno; - COPY_PGNO(pgno, mp->mp_pgno); - DPRINTF("delete node %u on %s page %zu", indx, - IS_LEAF(mp) ? "leaf" : "branch", pgno); - } -#endif - assert(indx < NUMKEYS(mp)); - - if (IS_LEAF2(mp)) { - int x = NUMKEYS(mp) - 1 - indx; - base = LEAF2KEY(mp, indx, ksize); - if (x) - memmove(base, base + ksize, x * ksize); - mp->mp_lower -= sizeof(indx_t); - mp->mp_upper += ksize - sizeof(indx_t); - return; - } - - node = NODEPTR(mp, indx); - sz = NODESIZE + node->mn_ksize; - if (IS_LEAF(mp)) { - if (F_ISSET(node->mn_flags, F_BIGDATA)) - sz += sizeof(pgno_t); - else - sz += NODEDSZ(node); - } - sz += sz & 1; - - ptr = mp->mp_ptrs[indx]; - numkeys = NUMKEYS(mp); - for (i = j = 0; i < numkeys; i++) { - if (i != indx) { - mp->mp_ptrs[j] = mp->mp_ptrs[i]; - if (mp->mp_ptrs[i] < ptr) - mp->mp_ptrs[j] += sz; - j++; - } - } - - base = (char *)mp + mp->mp_upper; - memmove(base + sz, base, ptr - mp->mp_upper); - - mp->mp_lower -= sizeof(indx_t); - mp->mp_upper += sz; -} - -/** Compact the main page after deleting a node on a subpage. - * @param[in] mp The main page to operate on. - * @param[in] indx The index of the subpage on the main page. - */ -static void -mdb_node_shrink(MDB_page *mp, indx_t indx) -{ - MDB_node *node; - MDB_page *sp, *xp; - char *base; - int osize, nsize; - int delta; - indx_t i, numkeys, ptr; - - node = NODEPTR(mp, indx); - sp = (MDB_page *)NODEDATA(node); - osize = NODEDSZ(node); - - delta = sp->mp_upper - sp->mp_lower; - SETDSZ(node, osize - delta); - xp = (MDB_page *)((char *)sp + delta); - - /* shift subpage upward */ - if (IS_LEAF2(sp)) { - nsize = NUMKEYS(sp) * sp->mp_pad; - memmove(METADATA(xp), METADATA(sp), nsize); - } else { - int i; - nsize = osize - sp->mp_upper; - numkeys = NUMKEYS(sp); - for (i=numkeys-1; i>=0; i--) - xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta; - } - xp->mp_upper = sp->mp_lower; - xp->mp_lower = sp->mp_lower; - xp->mp_flags = sp->mp_flags; - xp->mp_pad = sp->mp_pad; - COPY_PGNO(xp->mp_pgno, mp->mp_pgno); - - /* shift lower nodes upward */ - ptr = mp->mp_ptrs[indx]; - numkeys = NUMKEYS(mp); - for (i = 0; i < numkeys; i++) { - if (mp->mp_ptrs[i] <= ptr) - mp->mp_ptrs[i] += delta; - } - - base = (char *)mp + mp->mp_upper; - memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node)); - mp->mp_upper += delta; -} - -/** Initial setup of a sorted-dups cursor. - * Sorted duplicates are implemented as a sub-database for the given key. - * The duplicate data items are actually keys of the sub-database. - * Operations on the duplicate data items are performed using a sub-cursor - * initialized when the sub-database is first accessed. This function does - * the preliminary setup of the sub-cursor, filling in the fields that - * depend only on the parent DB. - * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized. - */ -static void -mdb_xcursor_init0(MDB_cursor *mc) -{ - MDB_xcursor *mx = mc->mc_xcursor; - - mx->mx_cursor.mc_xcursor = NULL; - mx->mx_cursor.mc_txn = mc->mc_txn; - mx->mx_cursor.mc_db = &mx->mx_db; - mx->mx_cursor.mc_dbx = &mx->mx_dbx; - mx->mx_cursor.mc_dbi = mc->mc_dbi+1; - mx->mx_cursor.mc_dbflag = &mx->mx_dbflag; - mx->mx_cursor.mc_snum = 0; - mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags = C_SUB; - mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; - mx->mx_dbx.md_dcmp = NULL; - mx->mx_dbx.md_rel = mc->mc_dbx->md_rel; -} - -/** Final setup of a sorted-dups cursor. - * Sets up the fields that depend on the data from the main cursor. - * @param[in] mc The main cursor whose sorted-dups cursor is to be initialized. - * @param[in] node The data containing the #MDB_db record for the - * sorted-dup database. - */ -static void -mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node) -{ - MDB_xcursor *mx = mc->mc_xcursor; - - if (node->mn_flags & F_SUBDATA) { - memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db)); - mx->mx_cursor.mc_pg[0] = 0; - mx->mx_cursor.mc_snum = 0; - mx->mx_cursor.mc_flags = C_SUB; - } else { - MDB_page *fp = NODEDATA(node); - mx->mx_db.md_pad = mc->mc_pg[mc->mc_top]->mp_pad; - mx->mx_db.md_flags = 0; - mx->mx_db.md_depth = 1; - mx->mx_db.md_branch_pages = 0; - mx->mx_db.md_leaf_pages = 1; - mx->mx_db.md_overflow_pages = 0; - mx->mx_db.md_entries = NUMKEYS(fp); - COPY_PGNO(mx->mx_db.md_root, fp->mp_pgno); - mx->mx_cursor.mc_snum = 1; - mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB; - mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_pg[0] = fp; - mx->mx_cursor.mc_ki[0] = 0; - if (mc->mc_db->md_flags & MDB_DUPFIXED) { - mx->mx_db.md_flags = MDB_DUPFIXED; - mx->mx_db.md_pad = fp->mp_pad; - if (mc->mc_db->md_flags & MDB_INTEGERDUP) - mx->mx_db.md_flags |= MDB_INTEGERKEY; - } - } - DPRINTF("Sub-db %u for db %u root page %zu", mx->mx_cursor.mc_dbi, mc->mc_dbi, - mx->mx_db.md_root); - mx->mx_dbflag = DB_VALID | (F_ISSET(mc->mc_pg[mc->mc_top]->mp_flags, P_DIRTY) ? - DB_DIRTY : 0); - mx->mx_dbx.md_name.mv_data = NODEKEY(node); - mx->mx_dbx.md_name.mv_size = node->mn_ksize; -#if UINT_MAX < SIZE_MAX - if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) -#ifdef MISALIGNED_OK - mx->mx_dbx.md_cmp = mdb_cmp_long; -#else - mx->mx_dbx.md_cmp = mdb_cmp_cint; -#endif -#endif -} - -/** Initialize a cursor for a given transaction and database. */ -static void -mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) -{ - mc->mc_backup = NULL; - mc->mc_dbi = dbi; - mc->mc_txn = txn; - mc->mc_db = &txn->mt_dbs[dbi]; - mc->mc_dbx = &txn->mt_dbxs[dbi]; - mc->mc_dbflag = &txn->mt_dbflags[dbi]; - mc->mc_snum = 0; - mc->mc_top = 0; - mc->mc_pg[0] = 0; - mc->mc_flags = 0; - if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { - assert(mx != NULL); - mc->mc_xcursor = mx; - mdb_xcursor_init0(mc); - } else { - mc->mc_xcursor = NULL; - } - if (*mc->mc_dbflag & DB_STALE) { - mdb_page_search(mc, NULL, MDB_PS_ROOTONLY); - } -} - -int -mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) -{ - MDB_cursor *mc; - size_t size = sizeof(MDB_cursor); - - if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - /* Allow read access to the freelist */ - if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) - return EINVAL; - - if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) - size += sizeof(MDB_xcursor); - - if ((mc = malloc(size)) != NULL) { - mdb_cursor_init(mc, txn, dbi, (MDB_xcursor *)(mc + 1)); - if (txn->mt_cursors) { - mc->mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = mc; - mc->mc_flags |= C_UNTRACK; - } - } else { - return ENOMEM; - } - - *ret = mc; - - return MDB_SUCCESS; -} - -int -mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc) -{ - if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs) - return EINVAL; - - if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors) - return EINVAL; - - mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor); - return MDB_SUCCESS; -} - -/* Return the count of duplicate data items for the current key */ -int -mdb_cursor_count(MDB_cursor *mc, size_t *countp) -{ - MDB_node *leaf; - - if (mc == NULL || countp == NULL) - return EINVAL; - - if (!(mc->mc_db->md_flags & MDB_DUPSORT)) - return EINVAL; - - leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { - *countp = 1; - } else { - if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - return EINVAL; - - *countp = mc->mc_xcursor->mx_db.md_entries; - } - return MDB_SUCCESS; -} - -void -mdb_cursor_close(MDB_cursor *mc) -{ - if (mc && !mc->mc_backup) { - /* remove from txn, if tracked */ - if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) { - MDB_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; - while (*prev && *prev != mc) prev = &(*prev)->mc_next; - if (*prev == mc) - *prev = mc->mc_next; - } - free(mc); - } -} - -MDB_txn * -mdb_cursor_txn(MDB_cursor *mc) -{ - if (!mc) return NULL; - return mc->mc_txn; -} - -MDB_dbi -mdb_cursor_dbi(MDB_cursor *mc) -{ - assert(mc != NULL); - return mc->mc_dbi; -} - -/** Replace the key for a node with a new key. - * @param[in] mc Cursor pointing to the node to operate on. - * @param[in] key The new key to use. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_update_key(MDB_cursor *mc, MDB_val *key) -{ - MDB_page *mp; - MDB_node *node; - char *base; - size_t len; - int delta, delta0; - indx_t ptr, i, numkeys, indx; - DKBUF; - - indx = mc->mc_ki[mc->mc_top]; - mp = mc->mc_pg[mc->mc_top]; - node = NODEPTR(mp, indx); - ptr = mp->mp_ptrs[indx]; -#if MDB_DEBUG - { - MDB_val k2; - char kbuf2[(MDB_MAXKEYSIZE*2+1)]; - k2.mv_data = NODEKEY(node); - k2.mv_size = node->mn_ksize; - DPRINTF("update key %u (ofs %u) [%s] to [%s] on page %zu", - indx, ptr, - mdb_dkey(&k2, kbuf2), - DKEY(key), - mp->mp_pgno); - } -#endif - - delta0 = delta = key->mv_size - node->mn_ksize; - - /* Must be 2-byte aligned. If new key is - * shorter by 1, the shift will be skipped. - */ - delta += (delta & 1); - if (delta) { - if (delta > 0 && SIZELEFT(mp) < delta) { - pgno_t pgno; - /* not enough space left, do a delete and split */ - DPRINTF("Not enough room, delta = %d, splitting...", delta); - pgno = NODEPGNO(node); - mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0); - return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE); - } - - numkeys = NUMKEYS(mp); - for (i = 0; i < numkeys; i++) { - if (mp->mp_ptrs[i] <= ptr) - mp->mp_ptrs[i] -= delta; - } - - base = (char *)mp + mp->mp_upper; - len = ptr - mp->mp_upper + NODESIZE; - memmove(base - delta, base, len); - mp->mp_upper -= delta; - - node = NODEPTR(mp, indx); - } - - /* But even if no shift was needed, update ksize */ - if (delta0) - node->mn_ksize = key->mv_size; - - if (key->mv_size) - memcpy(NODEKEY(node), key->mv_data, key->mv_size); - - return MDB_SUCCESS; -} - -static void -mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst); - -/** Move a node from csrc to cdst. - */ -static int -mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst) -{ - MDB_node *srcnode; - MDB_val key, data; - pgno_t srcpg; - MDB_cursor mn; - int rc; - unsigned short flags; - - DKBUF; - - /* Mark src and dst as dirty. */ - if ((rc = mdb_page_touch(csrc)) || - (rc = mdb_page_touch(cdst))) - return rc; - - if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { - srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); /* fake */ - key.mv_size = csrc->mc_db->md_pad; - key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size); - data.mv_size = 0; - data.mv_data = NULL; - srcpg = 0; - flags = 0; - } else { - srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]); - assert(!((long)srcnode&1)); - srcpg = NODEPGNO(srcnode); - flags = srcnode->mn_flags; - if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { - unsigned int snum = csrc->mc_snum; - MDB_node *s2; - /* must find the lowest key below src */ - mdb_page_search_lowest(csrc); - if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { - key.mv_size = csrc->mc_db->md_pad; - key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); - } else { - s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); - key.mv_size = NODEKSZ(s2); - key.mv_data = NODEKEY(s2); - } - csrc->mc_snum = snum--; - csrc->mc_top = snum; - } else { - key.mv_size = NODEKSZ(srcnode); - key.mv_data = NODEKEY(srcnode); - } - data.mv_size = NODEDSZ(srcnode); - data.mv_data = NODEDATA(srcnode); - } - if (IS_BRANCH(cdst->mc_pg[cdst->mc_top]) && cdst->mc_ki[cdst->mc_top] == 0) { - unsigned int snum = cdst->mc_snum; - MDB_node *s2; - MDB_val bkey; - /* must find the lowest key below dst */ - mdb_page_search_lowest(cdst); - if (IS_LEAF2(cdst->mc_pg[cdst->mc_top])) { - bkey.mv_size = cdst->mc_db->md_pad; - bkey.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, bkey.mv_size); - } else { - s2 = NODEPTR(cdst->mc_pg[cdst->mc_top], 0); - bkey.mv_size = NODEKSZ(s2); - bkey.mv_data = NODEKEY(s2); - } - cdst->mc_snum = snum--; - cdst->mc_top = snum; - mdb_cursor_copy(cdst, &mn); - mn.mc_ki[snum] = 0; - rc = mdb_update_key(&mn, &bkey); - if (rc) - return rc; - } - - DPRINTF("moving %s node %u [%s] on page %zu to node %u on page %zu", - IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch", - csrc->mc_ki[csrc->mc_top], - DKEY(&key), - csrc->mc_pg[csrc->mc_top]->mp_pgno, - cdst->mc_ki[cdst->mc_top], cdst->mc_pg[cdst->mc_top]->mp_pgno); - - /* Add the node to the destination page. - */ - rc = mdb_node_add(cdst, cdst->mc_ki[cdst->mc_top], &key, &data, srcpg, flags); - if (rc != MDB_SUCCESS) - return rc; - - /* Delete the node from the source page. - */ - mdb_node_del(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size); - - { - /* Adjust other cursors pointing to mp */ - MDB_cursor *m2, *m3; - MDB_dbi dbi = csrc->mc_dbi; - MDB_page *mp = csrc->mc_pg[csrc->mc_top]; - - if (csrc->mc_flags & C_SUB) - dbi--; - - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (csrc->mc_flags & C_SUB) - m3 = &m2->mc_xcursor->mx_cursor; - else - m3 = m2; - if (m3 == csrc) continue; - if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] == - csrc->mc_ki[csrc->mc_top]) { - m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top]; - m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; - } - } - } - - /* Update the parent separators. - */ - if (csrc->mc_ki[csrc->mc_top] == 0) { - if (csrc->mc_ki[csrc->mc_top-1] != 0) { - if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { - key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); - } else { - srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); - key.mv_size = NODEKSZ(srcnode); - key.mv_data = NODEKEY(srcnode); - } - DPRINTF("update separator for source page %zu to [%s]", - csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key)); - mdb_cursor_copy(csrc, &mn); - mn.mc_snum--; - mn.mc_top--; - if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS) - return rc; - } - if (IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { - MDB_val nullkey; - indx_t ix = csrc->mc_ki[csrc->mc_top]; - nullkey.mv_size = 0; - csrc->mc_ki[csrc->mc_top] = 0; - rc = mdb_update_key(csrc, &nullkey); - csrc->mc_ki[csrc->mc_top] = ix; - assert(rc == MDB_SUCCESS); - } - } - - if (cdst->mc_ki[cdst->mc_top] == 0) { - if (cdst->mc_ki[cdst->mc_top-1] != 0) { - if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { - key.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, key.mv_size); - } else { - srcnode = NODEPTR(cdst->mc_pg[cdst->mc_top], 0); - key.mv_size = NODEKSZ(srcnode); - key.mv_data = NODEKEY(srcnode); - } - DPRINTF("update separator for destination page %zu to [%s]", - cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key)); - mdb_cursor_copy(cdst, &mn); - mn.mc_snum--; - mn.mc_top--; - if ((rc = mdb_update_key(&mn, &key)) != MDB_SUCCESS) - return rc; - } - if (IS_BRANCH(cdst->mc_pg[cdst->mc_top])) { - MDB_val nullkey; - indx_t ix = cdst->mc_ki[cdst->mc_top]; - nullkey.mv_size = 0; - cdst->mc_ki[cdst->mc_top] = 0; - rc = mdb_update_key(cdst, &nullkey); - cdst->mc_ki[cdst->mc_top] = ix; - assert(rc == MDB_SUCCESS); - } - } - - return MDB_SUCCESS; -} - -/** Merge one page into another. - * The nodes from the page pointed to by \b csrc will - * be copied to the page pointed to by \b cdst and then - * the \b csrc page will be freed. - * @param[in] csrc Cursor pointing to the source page. - * @param[in] cdst Cursor pointing to the destination page. - */ -static int -mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) -{ - int rc; - indx_t i, j; - MDB_node *srcnode; - MDB_val key, data; - unsigned nkeys; - - DPRINTF("merging page %zu into %zu", csrc->mc_pg[csrc->mc_top]->mp_pgno, - cdst->mc_pg[cdst->mc_top]->mp_pgno); - - assert(csrc->mc_snum > 1); /* can't merge root page */ - assert(cdst->mc_snum > 1); - - /* Mark dst as dirty. */ - if ((rc = mdb_page_touch(cdst))) - return rc; - - /* Move all nodes from src to dst. - */ - j = nkeys = NUMKEYS(cdst->mc_pg[cdst->mc_top]); - if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { - key.mv_size = csrc->mc_db->md_pad; - key.mv_data = METADATA(csrc->mc_pg[csrc->mc_top]); - for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) { - rc = mdb_node_add(cdst, j, &key, NULL, 0, 0); - if (rc != MDB_SUCCESS) - return rc; - key.mv_data = (char *)key.mv_data + key.mv_size; - } - } else { - for (i = 0; i < NUMKEYS(csrc->mc_pg[csrc->mc_top]); i++, j++) { - srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], i); - if (i == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) { - unsigned int snum = csrc->mc_snum; - MDB_node *s2; - /* must find the lowest key below src */ - mdb_page_search_lowest(csrc); - if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) { - key.mv_size = csrc->mc_db->md_pad; - key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size); - } else { - s2 = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); - key.mv_size = NODEKSZ(s2); - key.mv_data = NODEKEY(s2); - } - csrc->mc_snum = snum--; - csrc->mc_top = snum; - } else { - key.mv_size = srcnode->mn_ksize; - key.mv_data = NODEKEY(srcnode); - } - - data.mv_size = NODEDSZ(srcnode); - data.mv_data = NODEDATA(srcnode); - rc = mdb_node_add(cdst, j, &key, &data, NODEPGNO(srcnode), srcnode->mn_flags); - if (rc != MDB_SUCCESS) - return rc; - } - } - - DPRINTF("dst page %zu now has %u keys (%.1f%% filled)", - cdst->mc_pg[cdst->mc_top]->mp_pgno, NUMKEYS(cdst->mc_pg[cdst->mc_top]), (float)PAGEFILL(cdst->mc_txn->mt_env, cdst->mc_pg[cdst->mc_top]) / 10); - - /* Unlink the src page from parent and add to free list. - */ - mdb_node_del(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1], 0); - if (csrc->mc_ki[csrc->mc_top-1] == 0) { - key.mv_size = 0; - csrc->mc_top--; - rc = mdb_update_key(csrc, &key); - csrc->mc_top++; - if (rc) - return rc; - } - - rc = mdb_midl_append(&csrc->mc_txn->mt_free_pgs, - csrc->mc_pg[csrc->mc_top]->mp_pgno); - if (rc) - return rc; - if (IS_LEAF(csrc->mc_pg[csrc->mc_top])) - csrc->mc_db->md_leaf_pages--; - else - csrc->mc_db->md_branch_pages--; - { - /* Adjust other cursors pointing to mp */ - MDB_cursor *m2, *m3; - MDB_dbi dbi = csrc->mc_dbi; - MDB_page *mp = cdst->mc_pg[cdst->mc_top]; - - if (csrc->mc_flags & C_SUB) - dbi--; - - for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (csrc->mc_flags & C_SUB) - m3 = &m2->mc_xcursor->mx_cursor; - else - m3 = m2; - if (m3 == csrc) continue; - if (m3->mc_snum < csrc->mc_snum) continue; - if (m3->mc_pg[csrc->mc_top] == csrc->mc_pg[csrc->mc_top]) { - m3->mc_pg[csrc->mc_top] = mp; - m3->mc_ki[csrc->mc_top] += nkeys; - } - } - } - mdb_cursor_pop(csrc); - - return mdb_rebalance(csrc); -} - -/** Copy the contents of a cursor. - * @param[in] csrc The cursor to copy from. - * @param[out] cdst The cursor to copy to. - */ -static void -mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst) -{ - unsigned int i; - - cdst->mc_txn = csrc->mc_txn; - cdst->mc_dbi = csrc->mc_dbi; - cdst->mc_db = csrc->mc_db; - cdst->mc_dbx = csrc->mc_dbx; - cdst->mc_snum = csrc->mc_snum; - cdst->mc_top = csrc->mc_top; - cdst->mc_flags = csrc->mc_flags; - - for (i=0; imc_snum; i++) { - cdst->mc_pg[i] = csrc->mc_pg[i]; - cdst->mc_ki[i] = csrc->mc_ki[i]; - } -} - -/** Rebalance the tree after a delete operation. - * @param[in] mc Cursor pointing to the page where rebalancing - * should begin. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_rebalance(MDB_cursor *mc) -{ - MDB_node *node; - int rc; - unsigned int ptop, minkeys; - MDB_cursor mn; - - minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top])); -#if MDB_DEBUG - { - pgno_t pgno; - COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno); - DPRINTF("rebalancing %s page %zu (has %u keys, %.1f%% full)", - IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch", - pgno, NUMKEYS(mc->mc_pg[mc->mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10); - } -#endif - - if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD && - NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) { -#if MDB_DEBUG - pgno_t pgno; - COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno); - DPRINTF("no need to rebalance page %zu, above fill threshold", - pgno); -#endif - return MDB_SUCCESS; - } - - if (mc->mc_snum < 2) { - MDB_page *mp = mc->mc_pg[0]; - if (IS_SUBP(mp)) { - DPUTS("Can't rebalance a subpage, ignoring"); - return MDB_SUCCESS; - } - if (NUMKEYS(mp) == 0) { - DPUTS("tree is completely empty"); - mc->mc_db->md_root = P_INVALID; - mc->mc_db->md_depth = 0; - mc->mc_db->md_leaf_pages = 0; - rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); - if (rc) - return rc; - /* Adjust cursors pointing to mp */ - mc->mc_snum = 0; - mc->mc_top = 0; - { - MDB_cursor *m2, *m3; - MDB_dbi dbi = mc->mc_dbi; - - if (mc->mc_flags & C_SUB) - dbi--; - - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (mc->mc_flags & C_SUB) - m3 = &m2->mc_xcursor->mx_cursor; - else - m3 = m2; - if (m3->mc_snum < mc->mc_snum) continue; - if (m3->mc_pg[0] == mp) { - m3->mc_snum = 0; - m3->mc_top = 0; - } - } - } - } else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) { - DPUTS("collapsing root page!"); - rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno); - if (rc) - return rc; - mc->mc_db->md_root = NODEPGNO(NODEPTR(mp, 0)); - rc = mdb_page_get(mc->mc_txn,mc->mc_db->md_root,&mc->mc_pg[0],NULL); - if (rc) - return rc; - mc->mc_db->md_depth--; - mc->mc_db->md_branch_pages--; - mc->mc_ki[0] = mc->mc_ki[1]; - { - /* Adjust other cursors pointing to mp */ - MDB_cursor *m2, *m3; - MDB_dbi dbi = mc->mc_dbi; - - if (mc->mc_flags & C_SUB) - dbi--; - - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (mc->mc_flags & C_SUB) - m3 = &m2->mc_xcursor->mx_cursor; - else - m3 = m2; - if (m3 == mc || m3->mc_snum < mc->mc_snum) continue; - if (m3->mc_pg[0] == mp) { - m3->mc_pg[0] = mc->mc_pg[0]; - m3->mc_snum = 1; - m3->mc_top = 0; - m3->mc_ki[0] = m3->mc_ki[1]; - } - } - } - } else - DPUTS("root page doesn't need rebalancing"); - return MDB_SUCCESS; - } - - /* The parent (branch page) must have at least 2 pointers, - * otherwise the tree is invalid. - */ - ptop = mc->mc_top-1; - assert(NUMKEYS(mc->mc_pg[ptop]) > 1); - - /* Leaf page fill factor is below the threshold. - * Try to move keys from left or right neighbor, or - * merge with a neighbor page. - */ - - /* Find neighbors. - */ - mdb_cursor_copy(mc, &mn); - mn.mc_xcursor = NULL; - - if (mc->mc_ki[ptop] == 0) { - /* We're the leftmost leaf in our parent. - */ - DPUTS("reading right neighbor"); - mn.mc_ki[ptop]++; - node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); - rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); - if (rc) - return rc; - mn.mc_ki[mn.mc_top] = 0; - mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); - } else { - /* There is at least one neighbor to the left. - */ - DPUTS("reading left neighbor"); - mn.mc_ki[ptop]--; - node = NODEPTR(mc->mc_pg[ptop], mn.mc_ki[ptop]); - rc = mdb_page_get(mc->mc_txn,NODEPGNO(node),&mn.mc_pg[mn.mc_top],NULL); - if (rc) - return rc; - mn.mc_ki[mn.mc_top] = NUMKEYS(mn.mc_pg[mn.mc_top]) - 1; - mc->mc_ki[mc->mc_top] = 0; - } - - DPRINTF("found neighbor page %zu (%u keys, %.1f%% full)", - mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10); - - /* If the neighbor page is above threshold and has enough keys, - * move one key from it. Otherwise we should try to merge them. - * (A branch page must never have less than 2 keys.) - */ - minkeys = 1 + (IS_BRANCH(mn.mc_pg[mn.mc_top])); - if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) - return mdb_node_move(&mn, mc); - else { - if (mc->mc_ki[ptop] == 0) - rc = mdb_page_merge(&mn, mc); - else - rc = mdb_page_merge(mc, &mn); - mc->mc_flags &= ~(C_INITIALIZED|C_EOF); - } - return rc; -} - -/** Complete a delete operation started by #mdb_cursor_del(). */ -static int -mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf) -{ - int rc; - MDB_page *mp; - indx_t ki; - - mp = mc->mc_pg[mc->mc_top]; - ki = mc->mc_ki[mc->mc_top]; - - /* add overflow pages to free list */ - if (!IS_LEAF2(mp) && F_ISSET(leaf->mn_flags, F_BIGDATA)) { - MDB_page *omp; - pgno_t pg; - - memcpy(&pg, NODEDATA(leaf), sizeof(pg)); - if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) || - (rc = mdb_ovpage_free(mc, omp))) - return rc; - } - mdb_node_del(mp, ki, mc->mc_db->md_pad); - mc->mc_db->md_entries--; - rc = mdb_rebalance(mc); - if (rc != MDB_SUCCESS) - mc->mc_txn->mt_flags |= MDB_TXN_ERROR; - /* if mc points past last node in page, invalidate */ - else if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) - mc->mc_flags &= ~(C_INITIALIZED|C_EOF); - - { - /* Adjust other cursors pointing to mp */ - MDB_cursor *m2; - unsigned int nkeys; - MDB_dbi dbi = mc->mc_dbi; - - mp = mc->mc_pg[mc->mc_top]; - nkeys = NUMKEYS(mp); - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (m2 == mc) - continue; - if (!(m2->mc_flags & C_INITIALIZED)) - continue; - if (m2->mc_pg[mc->mc_top] == mp) { - if (m2->mc_ki[mc->mc_top] > ki) - m2->mc_ki[mc->mc_top]--; - if (m2->mc_ki[mc->mc_top] >= nkeys) - m2->mc_flags &= ~(C_INITIALIZED|C_EOF); - } - } - } - - return rc; -} - -int -mdb_del(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data) -{ - MDB_cursor mc; - MDB_xcursor mx; - MDB_cursor_op op; - MDB_val rdata, *xdata; - int rc, exact; - DKBUF; - - assert(key != NULL); - - DPRINTF("====> delete db %u key [%s]", dbi, DKEY(key)); - - if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { - return EACCES; - } - - if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) { - return EINVAL; - } - - mdb_cursor_init(&mc, txn, dbi, &mx); - - exact = 0; - if (data) { - op = MDB_GET_BOTH; - rdata = *data; - xdata = &rdata; - } else { - op = MDB_SET; - xdata = NULL; - } - rc = mdb_cursor_set(&mc, key, xdata, op, &exact); - if (rc == 0) { - /* let mdb_page_split know about this cursor if needed: - * delete will trigger a rebalance; if it needs to move - * a node from one page to another, it will have to - * update the parent's separator key(s). If the new sepkey - * is larger than the current one, the parent page may - * run out of space, triggering a split. We need this - * cursor to be consistent until the end of the rebalance. - */ - mc.mc_next = txn->mt_cursors[dbi]; - txn->mt_cursors[dbi] = &mc; - rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA); - txn->mt_cursors[dbi] = mc.mc_next; - } - return rc; -} - -/** Split a page and insert a new node. - * @param[in,out] mc Cursor pointing to the page and desired insertion index. - * The cursor will be updated to point to the actual page and index where - * the node got inserted after the split. - * @param[in] newkey The key for the newly inserted node. - * @param[in] newdata The data for the newly inserted node. - * @param[in] newpgno The page number, if the new node is a branch node. - * @param[in] nflags The #NODE_ADD_FLAGS for the new node. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, - unsigned int nflags) -{ - unsigned int flags; - int rc = MDB_SUCCESS, ins_new = 0, new_root = 0, newpos = 1, did_split = 0; - indx_t newindx; - pgno_t pgno = 0; - unsigned int i, j, split_indx, nkeys, pmax; - MDB_node *node; - MDB_val sepkey, rkey, xdata, *rdata = &xdata; - MDB_page *copy; - MDB_page *mp, *rp, *pp; - unsigned int ptop; - MDB_cursor mn; - DKBUF; - - mp = mc->mc_pg[mc->mc_top]; - newindx = mc->mc_ki[mc->mc_top]; - - DPRINTF("-----> splitting %s page %zu and adding [%s] at index %i", - IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, - DKEY(newkey), mc->mc_ki[mc->mc_top]); - - /* Create a right sibling. */ - if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp))) - return rc; - DPRINTF("new right sibling: page %zu", rp->mp_pgno); - - if (mc->mc_snum < 2) { - if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp))) - return rc; - /* shift current top to make room for new parent */ - mc->mc_pg[1] = mc->mc_pg[0]; - mc->mc_ki[1] = mc->mc_ki[0]; - mc->mc_pg[0] = pp; - mc->mc_ki[0] = 0; - mc->mc_db->md_root = pp->mp_pgno; - DPRINTF("root split! new root = %zu", pp->mp_pgno); - mc->mc_db->md_depth++; - new_root = 1; - - /* Add left (implicit) pointer. */ - if ((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS) { - /* undo the pre-push */ - mc->mc_pg[0] = mc->mc_pg[1]; - mc->mc_ki[0] = mc->mc_ki[1]; - mc->mc_db->md_root = mp->mp_pgno; - mc->mc_db->md_depth--; - return rc; - } - mc->mc_snum = 2; - mc->mc_top = 1; - ptop = 0; - } else { - ptop = mc->mc_top-1; - DPRINTF("parent branch page is %zu", mc->mc_pg[ptop]->mp_pgno); - } - - mc->mc_flags |= C_SPLITTING; - mdb_cursor_copy(mc, &mn); - mn.mc_pg[mn.mc_top] = rp; - mn.mc_ki[ptop] = mc->mc_ki[ptop]+1; - - if (nflags & MDB_APPEND) { - mn.mc_ki[mn.mc_top] = 0; - sepkey = *newkey; - split_indx = newindx; - nkeys = 0; - goto newsep; - } - - nkeys = NUMKEYS(mp); - split_indx = nkeys / 2; - if (newindx < split_indx) - newpos = 0; - - if (IS_LEAF2(rp)) { - char *split, *ins; - int x; - unsigned int lsize, rsize, ksize; - /* Move half of the keys to the right sibling */ - copy = NULL; - x = mc->mc_ki[mc->mc_top] - split_indx; - ksize = mc->mc_db->md_pad; - split = LEAF2KEY(mp, split_indx, ksize); - rsize = (nkeys - split_indx) * ksize; - lsize = (nkeys - split_indx) * sizeof(indx_t); - mp->mp_lower -= lsize; - rp->mp_lower += lsize; - mp->mp_upper += rsize - lsize; - rp->mp_upper -= rsize - lsize; - sepkey.mv_size = ksize; - if (newindx == split_indx) { - sepkey.mv_data = newkey->mv_data; - } else { - sepkey.mv_data = split; - } - if (x<0) { - ins = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], ksize); - memcpy(rp->mp_ptrs, split, rsize); - sepkey.mv_data = rp->mp_ptrs; - memmove(ins+ksize, ins, (split_indx - mc->mc_ki[mc->mc_top]) * ksize); - memcpy(ins, newkey->mv_data, ksize); - mp->mp_lower += sizeof(indx_t); - mp->mp_upper -= ksize - sizeof(indx_t); - } else { - if (x) - memcpy(rp->mp_ptrs, split, x * ksize); - ins = LEAF2KEY(rp, x, ksize); - memcpy(ins, newkey->mv_data, ksize); - memcpy(ins+ksize, split + x * ksize, rsize - x * ksize); - rp->mp_lower += sizeof(indx_t); - rp->mp_upper -= ksize - sizeof(indx_t); - mc->mc_ki[mc->mc_top] = x; - mc->mc_pg[mc->mc_top] = rp; - } - goto newsep; - } - - /* For leaf pages, check the split point based on what - * fits where, since otherwise mdb_node_add can fail. - * - * This check is only needed when the data items are - * relatively large, such that being off by one will - * make the difference between success or failure. - * - * It's also relevant if a page happens to be laid out - * such that one half of its nodes are all "small" and - * the other half of its nodes are "large." If the new - * item is also "large" and falls on the half with - * "large" nodes, it also may not fit. - */ - if (IS_LEAF(mp)) { - unsigned int psize, nsize; - /* Maximum free space in an empty page */ - pmax = mc->mc_txn->mt_env->me_psize - PAGEHDRSZ; - nsize = mdb_leaf_size(mc->mc_txn->mt_env, newkey, newdata); - if ((nkeys < 20) || (nsize > pmax/16)) { - if (newindx <= split_indx) { - psize = nsize; - newpos = 0; - for (i=0; imn_flags, F_BIGDATA)) - psize += sizeof(pgno_t); - else - psize += NODEDSZ(node); - psize += psize & 1; - if (psize > pmax) { - if (i <= newindx) { - split_indx = newindx; - if (i < newindx) - newpos = 1; - } - else - split_indx = i; - break; - } - } - } else { - psize = nsize; - for (i=nkeys-1; i>=split_indx; i--) { - node = NODEPTR(mp, i); - psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t); - if (F_ISSET(node->mn_flags, F_BIGDATA)) - psize += sizeof(pgno_t); - else - psize += NODEDSZ(node); - psize += psize & 1; - if (psize > pmax) { - if (i >= newindx) { - split_indx = newindx; - newpos = 0; - } else - split_indx = i+1; - break; - } - } - } - } - } - - /* First find the separating key between the split pages. - * The case where newindx == split_indx is ambiguous; the - * new item could go to the new page or stay on the original - * page. If newpos == 1 it goes to the new page. - */ - if (newindx == split_indx && newpos) { - sepkey.mv_size = newkey->mv_size; - sepkey.mv_data = newkey->mv_data; - } else { - node = NODEPTR(mp, split_indx); - sepkey.mv_size = node->mn_ksize; - sepkey.mv_data = NODEKEY(node); - } - -newsep: - DPRINTF("separator is [%s]", DKEY(&sepkey)); - - /* Copy separator key to the parent. - */ - if (SIZELEFT(mn.mc_pg[ptop]) < mdb_branch_size(mc->mc_txn->mt_env, &sepkey)) { - mn.mc_snum--; - mn.mc_top--; - did_split = 1; - rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0); - - /* root split? */ - if (mn.mc_snum == mc->mc_snum) { - mc->mc_pg[mc->mc_snum] = mc->mc_pg[mc->mc_top]; - mc->mc_ki[mc->mc_snum] = mc->mc_ki[mc->mc_top]; - mc->mc_pg[mc->mc_top] = mc->mc_pg[ptop]; - mc->mc_ki[mc->mc_top] = mc->mc_ki[ptop]; - mc->mc_snum++; - mc->mc_top++; - ptop++; - } - /* Right page might now have changed parent. - * Check if left page also changed parent. - */ - if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && - mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) { - for (i=0; imc_pg[i] = mn.mc_pg[i]; - mc->mc_ki[i] = mn.mc_ki[i]; - } - mc->mc_pg[ptop] = mn.mc_pg[ptop]; - mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1; - } - } else { - mn.mc_top--; - rc = mdb_node_add(&mn, mn.mc_ki[ptop], &sepkey, NULL, rp->mp_pgno, 0); - mn.mc_top++; - } - mc->mc_flags ^= C_SPLITTING; - if (rc != MDB_SUCCESS) { - return rc; - } - if (nflags & MDB_APPEND) { - mc->mc_pg[mc->mc_top] = rp; - mc->mc_ki[mc->mc_top] = 0; - rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags); - if (rc) - return rc; - for (i=0; imc_top; i++) - mc->mc_ki[i] = mn.mc_ki[i]; - goto done; - } - if (IS_LEAF2(rp)) { - goto done; - } - - /* Move half of the keys to the right sibling. */ - - /* grab a page to hold a temporary copy */ - copy = mdb_page_malloc(mc->mc_txn, 1); - if (copy == NULL) - return ENOMEM; - - copy->mp_pgno = mp->mp_pgno; - copy->mp_flags = mp->mp_flags; - copy->mp_lower = PAGEHDRSZ; - copy->mp_upper = mc->mc_txn->mt_env->me_psize; - mc->mc_pg[mc->mc_top] = copy; - for (i = j = 0; i <= nkeys; j++) { - if (i == split_indx) { - /* Insert in right sibling. */ - /* Reset insert index for right sibling. */ - if (i != newindx || (newpos ^ ins_new)) { - j = 0; - mc->mc_pg[mc->mc_top] = rp; - } - } - - if (i == newindx && !ins_new) { - /* Insert the original entry that caused the split. */ - rkey.mv_data = newkey->mv_data; - rkey.mv_size = newkey->mv_size; - if (IS_LEAF(mp)) { - rdata = newdata; - } else - pgno = newpgno; - flags = nflags; - - ins_new = 1; - - /* Update index for the new key. */ - mc->mc_ki[mc->mc_top] = j; - } else if (i == nkeys) { - break; - } else { - node = NODEPTR(mp, i); - rkey.mv_data = NODEKEY(node); - rkey.mv_size = node->mn_ksize; - if (IS_LEAF(mp)) { - xdata.mv_data = NODEDATA(node); - xdata.mv_size = NODEDSZ(node); - rdata = &xdata; - } else - pgno = NODEPGNO(node); - flags = node->mn_flags; - - i++; - } - - if (!IS_LEAF(mp) && j == 0) { - /* First branch index doesn't need key data. */ - rkey.mv_size = 0; - } - - rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags); - if (rc) break; - } - - nkeys = NUMKEYS(copy); - for (i=0; imp_ptrs[i] = copy->mp_ptrs[i]; - mp->mp_lower = copy->mp_lower; - mp->mp_upper = copy->mp_upper; - memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1), - mc->mc_txn->mt_env->me_psize - copy->mp_upper); - - /* reset back to original page */ - if (newindx < split_indx || (!newpos && newindx == split_indx)) { - mc->mc_pg[mc->mc_top] = mp; - if (nflags & MDB_RESERVE) { - node = NODEPTR(mp, mc->mc_ki[mc->mc_top]); - if (!(node->mn_flags & F_BIGDATA)) - newdata->mv_data = NODEDATA(node); - } - } else { - mc->mc_ki[ptop]++; - /* Make sure mc_ki is still valid. - */ - if (mn.mc_pg[ptop] != mc->mc_pg[ptop] && - mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) { - for (i=0; imc_pg[i] = mn.mc_pg[i]; - mc->mc_ki[i] = mn.mc_ki[i]; - } - mc->mc_pg[ptop] = mn.mc_pg[ptop]; - mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1; - } - } - - /* return tmp page to freelist */ - mdb_page_free(mc->mc_txn->mt_env, copy); -done: - { - /* Adjust other cursors pointing to mp */ - MDB_cursor *m2, *m3; - MDB_dbi dbi = mc->mc_dbi; - int fixup = NUMKEYS(mp); - - if (mc->mc_flags & C_SUB) - dbi--; - - for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) { - if (mc->mc_flags & C_SUB) - m3 = &m2->mc_xcursor->mx_cursor; - else - m3 = m2; - if (m3 == mc) - continue; - if (!(m3->mc_flags & C_INITIALIZED)) - continue; - if (m3->mc_flags & C_SPLITTING) - continue; - if (new_root) { - int k; - /* root split */ - for (k=m3->mc_top; k>=0; k--) { - m3->mc_ki[k+1] = m3->mc_ki[k]; - m3->mc_pg[k+1] = m3->mc_pg[k]; - } - if (m3->mc_ki[0] >= split_indx) { - m3->mc_ki[0] = 1; - } else { - m3->mc_ki[0] = 0; - } - m3->mc_pg[0] = mc->mc_pg[0]; - m3->mc_snum++; - m3->mc_top++; - } - if (m3->mc_pg[mc->mc_top] == mp) { - if (m3->mc_ki[mc->mc_top] >= newindx && !(nflags & MDB_SPLIT_REPLACE)) - m3->mc_ki[mc->mc_top]++; - if (m3->mc_ki[mc->mc_top] >= fixup) { - m3->mc_pg[mc->mc_top] = rp; - m3->mc_ki[mc->mc_top] -= fixup; - m3->mc_ki[ptop] = mn.mc_ki[ptop]; - } - } else if (!did_split && m3->mc_pg[ptop] == mc->mc_pg[ptop] && - m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { - m3->mc_ki[ptop]++; - } - } - } - return rc; -} - -int -mdb_put(MDB_txn *txn, MDB_dbi dbi, - MDB_val *key, MDB_val *data, unsigned int flags) -{ - MDB_cursor mc; - MDB_xcursor mx; - - assert(key != NULL); - assert(data != NULL); - - if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { - return EACCES; - } - - if (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE) { - return EINVAL; - } - - if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) - return EINVAL; - - mdb_cursor_init(&mc, txn, dbi, &mx); - return mdb_cursor_put(&mc, key, data, flags); -} - -int -mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff) -{ - if ((flag & CHANGEABLE) != flag) - return EINVAL; - if (onoff) - env->me_flags |= flag; - else - env->me_flags &= ~flag; - return MDB_SUCCESS; -} - -int -mdb_env_get_flags(MDB_env *env, unsigned int *arg) -{ - if (!env || !arg) - return EINVAL; - - *arg = env->me_flags; - return MDB_SUCCESS; -} - -int -mdb_env_get_path(MDB_env *env, const char **arg) -{ - if (!env || !arg) - return EINVAL; - - *arg = env->me_path; - return MDB_SUCCESS; -} - -/** Common code for #mdb_stat() and #mdb_env_stat(). - * @param[in] env the environment to operate in. - * @param[in] db the #MDB_db record containing the stats to return. - * @param[out] arg the address of an #MDB_stat structure to receive the stats. - * @return 0, this function always succeeds. - */ -static int -mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) -{ - arg->ms_psize = env->me_psize; - arg->ms_depth = db->md_depth; - arg->ms_branch_pages = db->md_branch_pages; - arg->ms_leaf_pages = db->md_leaf_pages; - arg->ms_overflow_pages = db->md_overflow_pages; - arg->ms_entries = db->md_entries; - - return MDB_SUCCESS; -} -int -mdb_env_stat(MDB_env *env, MDB_stat *arg) -{ - int toggle; - - if (env == NULL || arg == NULL) - return EINVAL; - - toggle = mdb_env_pick_meta(env); - - return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg); -} - -int -mdb_env_info(MDB_env *env, MDB_envinfo *arg) -{ - int toggle; - - if (env == NULL || arg == NULL) - return EINVAL; - - toggle = mdb_env_pick_meta(env); - arg->me_mapaddr = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : 0; - arg->me_mapsize = env->me_mapsize; - arg->me_maxreaders = env->me_maxreaders; - arg->me_numreaders = env->me_numreaders; - arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg; - arg->me_last_txnid = env->me_metas[toggle]->mm_txnid; - return MDB_SUCCESS; -} - -/** Set the default comparison functions for a database. - * Called immediately after a database is opened to set the defaults. - * The user can then override them with #mdb_set_compare() or - * #mdb_set_dupsort(). - * @param[in] txn A transaction handle returned by #mdb_txn_begin() - * @param[in] dbi A database handle returned by #mdb_dbi_open() - */ -static void -mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi) -{ - uint16_t f = txn->mt_dbs[dbi].md_flags; - - txn->mt_dbxs[dbi].md_cmp = - (f & MDB_REVERSEKEY) ? mdb_cmp_memnr : - (f & MDB_INTEGERKEY) ? mdb_cmp_cint : mdb_cmp_memn; - - txn->mt_dbxs[dbi].md_dcmp = - !(f & MDB_DUPSORT) ? 0 : - ((f & MDB_INTEGERDUP) - ? ((f & MDB_DUPFIXED) ? mdb_cmp_int : mdb_cmp_cint) - : ((f & MDB_REVERSEDUP) ? mdb_cmp_memnr : mdb_cmp_memn)); -} - -int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) -{ - MDB_val key, data; - MDB_dbi i; - MDB_cursor mc; - int rc, dbflag, exact; - unsigned int unused = 0; - size_t len; - - if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) { - mdb_default_cmp(txn, FREE_DBI); - } - - if ((flags & VALID_FLAGS) != flags) - return EINVAL; - - /* main DB? */ - if (!name) { - *dbi = MAIN_DBI; - if (flags & PERSISTENT_FLAGS) { - uint16_t f2 = flags & PERSISTENT_FLAGS; - /* make sure flag changes get committed */ - if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) != txn->mt_dbs[MAIN_DBI].md_flags) { - txn->mt_dbs[MAIN_DBI].md_flags |= f2; - txn->mt_flags |= MDB_TXN_DIRTY; - } - } - mdb_default_cmp(txn, MAIN_DBI); - return MDB_SUCCESS; - } - - if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) { - mdb_default_cmp(txn, MAIN_DBI); - } - - /* Is the DB already open? */ - len = strlen(name); - for (i=2; imt_numdbs; i++) { - if (!txn->mt_dbxs[i].md_name.mv_size) { - /* Remember this free slot */ - if (!unused) unused = i; - continue; - } - if (len == txn->mt_dbxs[i].md_name.mv_size && - !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) { - *dbi = i; - return MDB_SUCCESS; - } - } - - /* If no free slot and max hit, fail */ - if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs) - return MDB_DBS_FULL; - - /* Cannot mix named databases with some mainDB flags */ - if (txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT|MDB_INTEGERKEY)) - return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND; - - /* Find the DB info */ - dbflag = DB_NEW|DB_VALID; - exact = 0; - key.mv_size = len; - key.mv_data = (void *)name; - mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); - rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact); - if (rc == MDB_SUCCESS) { - /* make sure this is actually a DB */ - MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); - if (!(node->mn_flags & F_SUBDATA)) - return EINVAL; - } else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) { - /* Create if requested */ - MDB_db dummy; - data.mv_size = sizeof(MDB_db); - data.mv_data = &dummy; - memset(&dummy, 0, sizeof(dummy)); - dummy.md_root = P_INVALID; - dummy.md_flags = flags & PERSISTENT_FLAGS; - rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); - dbflag |= DB_DIRTY; - } - - /* OK, got info, add to table */ - if (rc == MDB_SUCCESS) { - unsigned int slot = unused ? unused : txn->mt_numdbs; - txn->mt_dbxs[slot].md_name.mv_data = strdup(name); - txn->mt_dbxs[slot].md_name.mv_size = len; - txn->mt_dbxs[slot].md_rel = NULL; - txn->mt_dbflags[slot] = dbflag; - memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db)); - *dbi = slot; - txn->mt_env->me_dbflags[slot] = txn->mt_dbs[slot].md_flags; - mdb_default_cmp(txn, slot); - if (!unused) { - txn->mt_numdbs++; - } - } - - return rc; -} - -int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) -{ - if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs) - return EINVAL; - - if (txn->mt_dbflags[dbi] & DB_STALE) { - MDB_cursor mc; - MDB_xcursor mx; - /* Stale, must read the DB's root. cursor_init does it for us. */ - mdb_cursor_init(&mc, txn, dbi, &mx); - } - return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); -} - -void mdb_dbi_close(MDB_env *env, MDB_dbi dbi) -{ - char *ptr; - if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs) - return; - ptr = env->me_dbxs[dbi].md_name.mv_data; - env->me_dbxs[dbi].md_name.mv_data = NULL; - env->me_dbxs[dbi].md_name.mv_size = 0; - env->me_dbflags[dbi] = 0; - free(ptr); -} - -/** Add all the DB's pages to the free list. - * @param[in] mc Cursor on the DB to free. - * @param[in] subs non-Zero to check for sub-DBs in this DB. - * @return 0 on success, non-zero on failure. - */ -static int -mdb_drop0(MDB_cursor *mc, int subs) -{ - int rc; - - rc = mdb_page_search(mc, NULL, 0); - if (rc == MDB_SUCCESS) { - MDB_txn *txn = mc->mc_txn; - MDB_node *ni; - MDB_cursor mx; - unsigned int i; - - /* LEAF2 pages have no nodes, cannot have sub-DBs */ - if (IS_LEAF2(mc->mc_pg[mc->mc_top])) - mdb_cursor_pop(mc); - - mdb_cursor_copy(mc, &mx); - while (mc->mc_snum > 0) { - MDB_page *mp = mc->mc_pg[mc->mc_top]; - unsigned n = NUMKEYS(mp); - if (IS_LEAF(mp)) { - for (i=0; imn_flags & F_BIGDATA) { - MDB_page *omp; - pgno_t pg; - memcpy(&pg, NODEDATA(ni), sizeof(pg)); - rc = mdb_page_get(txn, pg, &omp, NULL); - if (rc != 0) - return rc; - assert(IS_OVERFLOW(omp)); - rc = mdb_midl_append_range(&txn->mt_free_pgs, - pg, omp->mp_pages); - if (rc) - return rc; - } else if (subs && (ni->mn_flags & F_SUBDATA)) { - mdb_xcursor_init1(mc, ni); - rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0); - if (rc) - return rc; - } - } - } else { - if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0) - return rc; - for (i=0; imt_free_pgs, pg); - } - } - if (!mc->mc_top) - break; - mc->mc_ki[mc->mc_top] = i; - rc = mdb_cursor_sibling(mc, 1); - if (rc) { - /* no more siblings, go back to beginning - * of previous level. - */ - mdb_cursor_pop(mc); - mc->mc_ki[0] = 0; - for (i=1; imc_snum; i++) { - mc->mc_ki[i] = 0; - mc->mc_pg[i] = mx.mc_pg[i]; - } - } - } - /* free it */ - rc = mdb_midl_append(&txn->mt_free_pgs, mc->mc_db->md_root); - } else if (rc == MDB_NOTFOUND) { - rc = MDB_SUCCESS; - } - return rc; -} - -int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del) -{ - MDB_cursor *mc, *m2; - int rc; - - if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) - return EACCES; - - rc = mdb_cursor_open(txn, dbi, &mc); - if (rc) - return rc; - - rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT); - /* Invalidate the dropped DB's cursors */ - for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) - m2->mc_flags &= ~(C_INITIALIZED|C_EOF); - if (rc) - goto leave; - - /* Can't delete the main DB */ - if (del && dbi > MAIN_DBI) { - rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL); - if (!rc) { - txn->mt_dbflags[dbi] = DB_STALE; - mdb_dbi_close(txn->mt_env, dbi); - } - } else { - /* reset the DB record, mark it dirty */ - txn->mt_dbflags[dbi] |= DB_DIRTY; - txn->mt_dbs[dbi].md_depth = 0; - txn->mt_dbs[dbi].md_branch_pages = 0; - txn->mt_dbs[dbi].md_leaf_pages = 0; - txn->mt_dbs[dbi].md_overflow_pages = 0; - txn->mt_dbs[dbi].md_entries = 0; - txn->mt_dbs[dbi].md_root = P_INVALID; - - txn->mt_flags |= MDB_TXN_DIRTY; - } -leave: - mdb_cursor_close(mc); - return rc; -} - -int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) -{ - if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - txn->mt_dbxs[dbi].md_cmp = cmp; - return MDB_SUCCESS; -} - -int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) -{ - if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - txn->mt_dbxs[dbi].md_dcmp = cmp; - return MDB_SUCCESS; -} - -int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel) -{ - if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - txn->mt_dbxs[dbi].md_rel = rel; - return MDB_SUCCESS; -} - -int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx) -{ - if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) - return EINVAL; - - txn->mt_dbxs[dbi].md_relctx = ctx; - return MDB_SUCCESS; -} - -/** @} */ diff --git a/libraries/liblmdb/mdb_copy.1 b/libraries/liblmdb/mdb_copy.1 deleted file mode 100644 index 7837de5f6b..0000000000 --- a/libraries/liblmdb/mdb_copy.1 +++ /dev/null @@ -1,28 +0,0 @@ -.TH MDB_COPY 1 "2012/12/12" "LMDB 0.9.5" -.\" Copyright 2012 Howard Chu, Symas Corp. All Rights Reserved. -.\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.SH NAME -mdb_copy \- LMDB environment copy tool -.SH SYNOPSIS -.B mdb_copy -.I srcpath\ [dstpath] -.SH DESCRIPTION -The -.B mdb_copy -utility copies an LMDB environment. The environment can -be copied regardless of whether it is currently in use. - -If -.I dstpath -is specified it must be the path of an empty directory -for storing the backup. Otherwise, the backup will be -written to stdout. - -.SH DIAGNOSTICS -Exit status is zero if no errors occur. -Errors result in a non-zero exit status and -a diagnostic message being written to standard error. -.SH "SEE ALSO" -.BR mdb_stat (1) -.SH AUTHOR -Howard Chu of Symas Corporation diff --git a/libraries/liblmdb/mdb_copy.c b/libraries/liblmdb/mdb_copy.c deleted file mode 100644 index ca92009cff..0000000000 --- a/libraries/liblmdb/mdb_copy.c +++ /dev/null @@ -1,66 +0,0 @@ -/* mdb_copy.c - memory-mapped database backup tool */ -/* - * Copyright 2012 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ -#ifdef _WIN32 -#include -#define MDB_STDOUT GetStdHandle(STD_OUTPUT_HANDLE) -#else -#define MDB_STDOUT 1 -#endif -#include -#include -#include -#include "lmdb.h" - -static void -sighandle(int sig) -{ -} - -int main(int argc,char * argv[]) -{ - int rc; - MDB_env *env; - char *envname = argv[1]; - - if (argc<2 || argc>3) { - fprintf(stderr, "usage: %s srcpath [dstpath]\n", argv[0]); - exit(EXIT_FAILURE); - } - -#ifdef SIGPIPE - signal(SIGPIPE, sighandle); -#endif -#ifdef SIGHUP - signal(SIGHUP, sighandle); -#endif - signal(SIGINT, sighandle); - signal(SIGTERM, sighandle); - - rc = mdb_env_create(&env); - - rc = mdb_env_open(env, envname, MDB_RDONLY, 0); - if (rc) { - printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); - } else { - if (argc == 2) - rc = mdb_env_copyfd(env, MDB_STDOUT); - else - rc = mdb_env_copy(env, argv[2]); - if (rc) - printf("mdb_env_copy failed, error %d %s\n", rc, mdb_strerror(rc)); - } - mdb_env_close(env); - - return rc ? EXIT_FAILURE : EXIT_SUCCESS; -} diff --git a/libraries/liblmdb/mdb_stat.1 b/libraries/liblmdb/mdb_stat.1 deleted file mode 100644 index 1307c39d09..0000000000 --- a/libraries/liblmdb/mdb_stat.1 +++ /dev/null @@ -1,47 +0,0 @@ -.TH MDB_STAT 1 "2012/12/12" "LMDB 0.9.5" -.\" Copyright 2012 Howard Chu, Symas Corp. All Rights Reserved. -.\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.SH NAME -mdb_stat \- LMDB environment status tool -.SH SYNOPSIS -.B mdb_stat -.BR \ envpath -[\c -.BR \-e ] -[\c -.BR \-f [ f [ f ]]] -[\c -.BR \-n ] -[\c -.BR \-a \ | -.BI \-s \ subdb\fR] -.SH DESCRIPTION -The -.B mdb_stat -utility displays the status of an LMDB environment. -.SH OPTIONS -.TP -.BR \-e -Display information about the database environment. -.TP -.BR \-f -Display information about the environment freelist. -If \fB\-ff\fP is given, summarize each freelist entry. -If \fB\-fff\fP is given, display the full list of page IDs in the freelist. -.TP -.BR \-n -Display the status of an LMDB database which does not use subdirectories. -.TP -.BR \-a -Display the status of all of the subdatabases in the environment. -.TP -.BR \-s \ subdb -Display the status of a specific subdatabase. -.SH DIAGNOSTICS -Exit status is zero if no errors occur. -Errors result in a non-zero exit status and -a diagnostic message being written to standard error. -.SH "SEE ALSO" -.BR mdb_copy (1) -.SH AUTHOR -Howard Chu of Symas Corporation diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c deleted file mode 100644 index 3e6be21597..0000000000 --- a/libraries/liblmdb/mdb_stat.c +++ /dev/null @@ -1,230 +0,0 @@ -/* mdb_stat.c - memory-mapped database status tool */ -/* - * Copyright 2011-2013 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ -#include -#include -#include -#include -#include "lmdb.h" - -static void prstat(MDB_stat *ms) -{ -#if 0 - printf(" Page size: %u\n", ms->ms_psize); -#endif - printf(" Tree depth: %u\n", ms->ms_depth); - printf(" Branch pages: %zu\n", ms->ms_branch_pages); - printf(" Leaf pages: %zu\n", ms->ms_leaf_pages); - printf(" Overflow pages: %zu\n", ms->ms_overflow_pages); - printf(" Entries: %zu\n", ms->ms_entries); -} - -static void usage(char *prog) -{ - fprintf(stderr, "usage: %s dbpath [-e] [-f[f[f]]] [-n] [-a|-s subdb]\n", prog); - exit(EXIT_FAILURE); -} - -int main(int argc, char *argv[]) -{ - int i, rc; - MDB_env *env; - MDB_txn *txn; - MDB_dbi dbi; - MDB_stat mst; - MDB_envinfo mei; - char *prog = argv[0]; - char *envname; - char *subname = NULL; - int alldbs = 0, envinfo = 0, envflags = 0, freinfo = 0; - - if (argc < 2) { - usage(prog); - } - - /* -a: print stat of main DB and all subDBs - * -s: print stat of only the named subDB - * -e: print env info - * -f: print freelist info - * -n: use NOSUBDIR flag on env_open - * (default) print stat of only the main DB - */ - while ((i = getopt(argc, argv, "aefns:")) != EOF) { - switch(i) { - case 'a': - if (subname) - usage(prog); - alldbs++; - break; - case 'e': - envinfo++; - break; - case 'f': - freinfo++; - break; - case 'n': - envflags |= MDB_NOSUBDIR; - break; - case 's': - if (alldbs) - usage(prog); - subname = optarg; - break; - default: - usage(prog); - } - } - - if (optind != argc - 1) - usage(prog); - - envname = argv[optind]; - rc = mdb_env_create(&env); - - if (alldbs || subname) { - mdb_env_set_maxdbs(env, 4); - } - - rc = mdb_env_open(env, envname, envflags | MDB_RDONLY, 0664); - if (rc) { - printf("mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); - goto env_close; - } - rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); - if (rc) { - printf("mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); - goto env_close; - } - - if (envinfo) { - rc = mdb_env_stat(env, &mst); - rc = mdb_env_info(env, &mei); - printf("Environment Info\n"); - printf(" Map address: %p\n", mei.me_mapaddr); - printf(" Map size: %zu\n", mei.me_mapsize); - printf(" Page size: %u\n", mst.ms_psize); - printf(" Max pages: %zu\n", mei.me_mapsize / mst.ms_psize); - printf(" Number of pages used: %zu\n", mei.me_last_pgno+1); - printf(" Last transaction ID: %zu\n", mei.me_last_txnid); - printf(" Max readers: %u\n", mei.me_maxreaders); - printf(" Number of readers used: %u\n", mei.me_numreaders); - } - - if (freinfo) { - MDB_cursor *cursor; - MDB_val key, data; - size_t pages = 0, *iptr; - - printf("Freelist Status\n"); - dbi = 0; - rc = mdb_cursor_open(txn, dbi, &cursor); - if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); - goto txn_abort; - } - rc = mdb_stat(txn, dbi, &mst); - if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); - goto txn_abort; - } - prstat(&mst); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { - iptr = data.mv_data; - pages += *iptr; - if (freinfo > 1) { - char *bad = ""; - size_t pg, prev; - ssize_t i, j, span = 0; - j = *iptr++; - for (i = j, prev = 1; --i >= 0; ) { - pg = iptr[i]; - if (pg <= prev) - bad = " [bad sequence]"; - prev = pg; - pg += span; - for (; i >= span && iptr[i-span] == pg; span++, pg++) ; - } - printf(" Transaction %zu, %zd pages, maxspan %zd%s\n", - *(size_t *)key.mv_data, j, span, bad); - if (freinfo > 2) { - for (--j; j >= 0; ) { - pg = iptr[j]; - for (span=1; --j >= 0 && iptr[j] == pg+span; span++) ; - printf(span>1 ? " %9zu[%zd]\n" : " %9zu\n", - pg, span); - } - } - } - } - mdb_cursor_close(cursor); - printf(" Free pages: %zu\n", pages); - } - - rc = mdb_open(txn, subname, 0, &dbi); - if (rc) { - printf("mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); - goto txn_abort; - } - - rc = mdb_stat(txn, dbi, &mst); - if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); - goto txn_abort; - } - printf("Status of %s\n", subname ? subname : "Main DB"); - prstat(&mst); - - if (alldbs) { - MDB_cursor *cursor; - MDB_val key; - - rc = mdb_cursor_open(txn, dbi, &cursor); - if (rc) { - printf("mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); - goto txn_abort; - } - while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) { - char *str; - MDB_dbi db2; - if (memchr(key.mv_data, '\0', key.mv_size)) - continue; - str = malloc(key.mv_size+1); - memcpy(str, key.mv_data, key.mv_size); - str[key.mv_size] = '\0'; - rc = mdb_open(txn, str, 0, &db2); - if (rc == MDB_SUCCESS) - printf("Status of %s\n", str); - free(str); - if (rc) continue; - rc = mdb_stat(txn, db2, &mst); - if (rc) { - printf("mdb_stat failed, error %d %s\n", rc, mdb_strerror(rc)); - goto txn_abort; - } - prstat(&mst); - mdb_close(env, db2); - } - mdb_cursor_close(cursor); - } - - if (rc == MDB_NOTFOUND) - rc = MDB_SUCCESS; - - mdb_close(env, dbi); -txn_abort: - mdb_txn_abort(txn); -env_close: - mdb_env_close(env); - - return rc ? EXIT_FAILURE : EXIT_SUCCESS; -} diff --git a/libraries/liblmdb/midl.c b/libraries/liblmdb/midl.c deleted file mode 100644 index e7bd680cb0..0000000000 --- a/libraries/liblmdb/midl.c +++ /dev/null @@ -1,348 +0,0 @@ -/** @file midl.c - * @brief ldap bdb back-end ID List functions */ -/* $OpenLDAP$ */ -/* This work is part of OpenLDAP Software . - * - * Copyright 2000-2013 The OpenLDAP Foundation. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ - -#include -#include -#include -#include -#include -#include -#include "midl.h" - -/** @defgroup internal MDB Internals - * @{ - */ -/** @defgroup idls ID List Management - * @{ - */ -#define CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) ) - -#if 0 /* superseded by append/sort */ -static unsigned mdb_midl_search( MDB_IDL ids, MDB_ID id ) -{ - /* - * binary search of id in ids - * if found, returns position of id - * if not found, returns first position greater than id - */ - unsigned base = 0; - unsigned cursor = 1; - int val = 0; - unsigned n = ids[0]; - - while( 0 < n ) { - unsigned pivot = n >> 1; - cursor = base + pivot + 1; - val = CMP( ids[cursor], id ); - - if( val < 0 ) { - n = pivot; - - } else if ( val > 0 ) { - base = cursor; - n -= pivot + 1; - - } else { - return cursor; - } - } - - if( val > 0 ) { - ++cursor; - } - return cursor; -} - -int mdb_midl_insert( MDB_IDL ids, MDB_ID id ) -{ - unsigned x, i; - - x = mdb_midl_search( ids, id ); - assert( x > 0 ); - - if( x < 1 ) { - /* internal error */ - return -2; - } - - if ( x <= ids[0] && ids[x] == id ) { - /* duplicate */ - assert(0); - return -1; - } - - if ( ++ids[0] >= MDB_IDL_DB_MAX ) { - /* no room */ - --ids[0]; - return -2; - - } else { - /* insert id */ - for (i=ids[0]; i>x; i--) - ids[i] = ids[i-1]; - ids[x] = id; - } - - return 0; -} -#endif - -MDB_IDL mdb_midl_alloc(int num) -{ - MDB_IDL ids = malloc((num+2) * sizeof(MDB_ID)); - if (ids) { - *ids++ = num; - *ids = 0; - } - return ids; -} - -void mdb_midl_free(MDB_IDL ids) -{ - if (ids) - free(ids-1); -} - -int mdb_midl_shrink( MDB_IDL *idp ) -{ - MDB_IDL ids = *idp; - if (*(--ids) > MDB_IDL_UM_MAX && - (ids = realloc(ids, (MDB_IDL_UM_MAX+1) * sizeof(MDB_ID)))) - { - *ids++ = MDB_IDL_UM_MAX; - *idp = ids; - return 1; - } - return 0; -} - -static int mdb_midl_grow( MDB_IDL *idp, int num ) -{ - MDB_IDL idn = *idp-1; - /* grow it */ - idn = realloc(idn, (*idn + num + 2) * sizeof(MDB_ID)); - if (!idn) - return ENOMEM; - *idn++ += num; - *idp = idn; - return 0; -} - -int mdb_midl_need( MDB_IDL *idp, unsigned num ) -{ - MDB_IDL ids = *idp; - num += ids[0]; - if (num > ids[-1]) { - num = (num + num/4 + (256 + 2)) & -256; - if (!(ids = realloc(ids-1, num * sizeof(MDB_ID)))) - return ENOMEM; - *ids++ = num -= 2; - *idp = ids; - } - return 0; -} - -int mdb_midl_append( MDB_IDL *idp, MDB_ID id ) -{ - MDB_IDL ids = *idp; - /* Too big? */ - if (ids[0] >= ids[-1]) { - if (mdb_midl_grow(idp, MDB_IDL_UM_MAX)) - return ENOMEM; - ids = *idp; - } - ids[0]++; - ids[ids[0]] = id; - return 0; -} - -int mdb_midl_append_list( MDB_IDL *idp, MDB_IDL app ) -{ - MDB_IDL ids = *idp; - /* Too big? */ - if (ids[0] + app[0] >= ids[-1]) { - if (mdb_midl_grow(idp, app[0])) - return ENOMEM; - ids = *idp; - } - memcpy(&ids[ids[0]+1], &app[1], app[0] * sizeof(MDB_ID)); - ids[0] += app[0]; - return 0; -} - -int mdb_midl_append_range( MDB_IDL *idp, MDB_ID id, unsigned n ) -{ - MDB_ID *ids = *idp, len = ids[0]; - /* Too big? */ - if (len + n > ids[-1]) { - if (mdb_midl_grow(idp, n | MDB_IDL_UM_MAX)) - return ENOMEM; - ids = *idp; - } - ids[0] = len + n; - ids += len; - while (n) - ids[n--] = id++; - return 0; -} - -/* Quicksort + Insertion sort for small arrays */ - -#define SMALL 8 -#define SWAP(a,b) { itmp=(a); (a)=(b); (b)=itmp; } - -void -mdb_midl_sort( MDB_IDL ids ) -{ - /* Max possible depth of int-indexed tree * 2 items/level */ - int istack[sizeof(int)*CHAR_BIT * 2]; - int i,j,k,l,ir,jstack; - MDB_ID a, itmp; - - ir = (int)ids[0]; - l = 1; - jstack = 0; - for(;;) { - if (ir - l < SMALL) { /* Insertion sort */ - for (j=l+1;j<=ir;j++) { - a = ids[j]; - for (i=j-1;i>=1;i--) { - if (ids[i] >= a) break; - ids[i+1] = ids[i]; - } - ids[i+1] = a; - } - if (jstack == 0) break; - ir = istack[jstack--]; - l = istack[jstack--]; - } else { - k = (l + ir) >> 1; /* Choose median of left, center, right */ - SWAP(ids[k], ids[l+1]); - if (ids[l] < ids[ir]) { - SWAP(ids[l], ids[ir]); - } - if (ids[l+1] < ids[ir]) { - SWAP(ids[l+1], ids[ir]); - } - if (ids[l] < ids[l+1]) { - SWAP(ids[l], ids[l+1]); - } - i = l+1; - j = ir; - a = ids[l+1]; - for(;;) { - do i++; while(ids[i] > a); - do j--; while(ids[j] < a); - if (j < i) break; - SWAP(ids[i],ids[j]); - } - ids[l+1] = ids[j]; - ids[j] = a; - jstack += 2; - if (ir-i+1 >= j-l) { - istack[jstack] = ir; - istack[jstack-1] = i; - ir = j-1; - } else { - istack[jstack] = j-1; - istack[jstack-1] = l; - l = i; - } - } - } -} - -unsigned mdb_mid2l_search( MDB_ID2L ids, MDB_ID id ) -{ - /* - * binary search of id in ids - * if found, returns position of id - * if not found, returns first position greater than id - */ - unsigned base = 0; - unsigned cursor = 1; - int val = 0; - unsigned n = (unsigned)ids[0].mid; - - while( 0 < n ) { - unsigned pivot = n >> 1; - cursor = base + pivot + 1; - val = CMP( id, ids[cursor].mid ); - - if( val < 0 ) { - n = pivot; - - } else if ( val > 0 ) { - base = cursor; - n -= pivot + 1; - - } else { - return cursor; - } - } - - if( val > 0 ) { - ++cursor; - } - return cursor; -} - -int mdb_mid2l_insert( MDB_ID2L ids, MDB_ID2 *id ) -{ - unsigned x, i; - - x = mdb_mid2l_search( ids, id->mid ); - assert( x > 0 ); - - if( x < 1 ) { - /* internal error */ - return -2; - } - - if ( x <= ids[0].mid && ids[x].mid == id->mid ) { - /* duplicate */ - return -1; - } - - if ( ids[0].mid >= MDB_IDL_UM_MAX ) { - /* too big */ - return -2; - - } else { - /* insert id */ - ids[0].mid++; - for (i=(unsigned)ids[0].mid; i>x; i--) - ids[i] = ids[i-1]; - ids[x] = *id; - } - - return 0; -} - -int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id ) -{ - /* Too big? */ - if (ids[0].mid >= MDB_IDL_UM_MAX) { - return -2; - } - ids[0].mid++; - ids[ids[0].mid] = *id; - return 0; -} - -/** @} */ -/** @} */ diff --git a/libraries/liblmdb/midl.h b/libraries/liblmdb/midl.h deleted file mode 100644 index 9ce7133c6e..0000000000 --- a/libraries/liblmdb/midl.h +++ /dev/null @@ -1,179 +0,0 @@ -/** @file midl.h - * @brief mdb ID List header file. - * - * This file was originally part of back-bdb but has been - * modified for use in libmdb. Most of the macros defined - * in this file are unused, just left over from the original. - * - * This file is only used internally in libmdb and its definitions - * are not exposed publicly. - */ -/* $OpenLDAP$ */ -/* This work is part of OpenLDAP Software . - * - * Copyright 2000-2013 The OpenLDAP Foundation. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ - -#ifndef _MDB_MIDL_H_ -#define _MDB_MIDL_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** @defgroup internal MDB Internals - * @{ - */ - -/** @defgroup idls ID List Management - * @{ - */ - /** A generic ID number. These were entryIDs in back-bdb. - * Preferably it should have the same size as a pointer. - */ -typedef size_t MDB_ID; - - /** An IDL is an ID List, a sorted array of IDs. The first - * element of the array is a counter for how many actual - * IDs are in the list. In the original back-bdb code, IDLs are - * sorted in ascending order. For libmdb IDLs are sorted in - * descending order. - */ -typedef MDB_ID *MDB_IDL; - -/* IDL sizes - likely should be even bigger - * limiting factors: sizeof(ID), thread stack size - */ -#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ -#define MDB_IDL_DB_SIZE (1<. - */ -#define _XOPEN_SOURCE 500 /* srandom(), random() */ -#include -#include -#include -#include "lmdb.h" - -int main(int argc,char * argv[]) -{ - int i = 0, j = 0, rc; - MDB_env *env; - MDB_dbi dbi; - MDB_val key, data; - MDB_txn *txn; - MDB_stat mst; - MDB_cursor *cursor, *cur2; - int count; - int *values; - char sval[32]; - - srandom(time(NULL)); - - count = (random()%384) + 64; - values = (int *)malloc(count*sizeof(int)); - - for(i = 0;i -1; i-= (random()%5)) { - j++; - txn=NULL; - rc = mdb_txn_begin(env, NULL, 0, &txn); - sprintf(sval, "%03x ", values[i]); - rc = mdb_del(txn, dbi, &key, NULL); - if (rc) { - j--; - mdb_txn_abort(txn); - } else { - rc = mdb_txn_commit(txn); - } - } - free(values); - printf("Deleted %d values\n", j); - - rc = mdb_env_stat(env, &mst); - rc = mdb_txn_begin(env, NULL, 1, &txn); - rc = mdb_cursor_open(txn, dbi, &cursor); - printf("Cursor next\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - printf("Cursor last\n"); - rc = mdb_cursor_get(cursor, &key, &data, MDB_LAST); - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - printf("Cursor prev\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - printf("Cursor last/prev\n"); - rc = mdb_cursor_get(cursor, &key, &data, MDB_LAST); - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV); - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - - mdb_txn_abort(txn); - - printf("Deleting with cursor\n"); - rc = mdb_txn_begin(env, NULL, 0, &txn); - rc = mdb_cursor_open(txn, dbi, &cur2); - for (i=0; i<50; i++) { - rc = mdb_cursor_get(cur2, &key, &data, MDB_NEXT); - if (rc) - break; - printf("key: %p %.*s, data: %p %.*s\n", - key.mv_data, (int) key.mv_size, (char *) key.mv_data, - data.mv_data, (int) data.mv_size, (char *) data.mv_data); - rc = mdb_del(txn, dbi, &key, NULL); - } - - printf("Restarting cursor in txn\n"); - rc = mdb_cursor_get(cur2, &key, &data, MDB_FIRST); - printf("key: %p %.*s, data: %p %.*s\n", - key.mv_data, (int) key.mv_size, (char *) key.mv_data, - data.mv_data, (int) data.mv_size, (char *) data.mv_data); - for (i=0; i<32; i++) { - rc = mdb_cursor_get(cur2, &key, &data, MDB_NEXT); - if (rc) break; - printf("key: %p %.*s, data: %p %.*s\n", - key.mv_data, (int) key.mv_size, (char *) key.mv_data, - data.mv_data, (int) data.mv_size, (char *) data.mv_data); - } - mdb_cursor_close(cur2); - rc = mdb_txn_commit(txn); - - printf("Restarting cursor outside txn\n"); - rc = mdb_txn_begin(env, NULL, 0, &txn); - rc = mdb_cursor_open(txn, dbi, &cursor); - rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST); - printf("key: %p %.*s, data: %p %.*s\n", - key.mv_data, (int) key.mv_size, (char *) key.mv_data, - data.mv_data, (int) data.mv_size, (char *) data.mv_data); - for (i=0; i<32; i++) { - rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT); - if (rc) break; - printf("key: %p %.*s, data: %p %.*s\n", - key.mv_data, (int) key.mv_size, (char *) key.mv_data, - data.mv_data, (int) data.mv_size, (char *) data.mv_data); - } - mdb_cursor_close(cursor); - mdb_close(env, dbi); - - mdb_txn_abort(txn); - mdb_env_close(env); - - return 0; -} diff --git a/libraries/liblmdb/mtest2.c b/libraries/liblmdb/mtest2.c deleted file mode 100644 index 44d1de7ccd..0000000000 --- a/libraries/liblmdb/mtest2.c +++ /dev/null @@ -1,117 +0,0 @@ -/* mtest2.c - memory-mapped database tester/toy */ -/* - * Copyright 2011 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ - -/* Just like mtest.c, but using a subDB instead of the main DB */ - -#define _XOPEN_SOURCE 500 /* srandom(), random() */ -#include -#include -#include -#include "lmdb.h" - -int main(int argc,char * argv[]) -{ - int i = 0, j = 0, rc; - MDB_env *env; - MDB_dbi dbi; - MDB_val key, data; - MDB_txn *txn; - MDB_stat mst; - MDB_cursor *cursor; - int count; - int *values; - char sval[32]; - - srandom(time(NULL)); - - count = (random()%384) + 64; - values = (int *)malloc(count*sizeof(int)); - - for(i = 0;i -1; i-= (random()%5)) { - j++; - txn=NULL; - rc = mdb_txn_begin(env, NULL, 0, &txn); - sprintf(sval, "%03x ", values[i]); - rc = mdb_del(txn, dbi, &key, NULL); - if (rc) { - j--; - mdb_txn_abort(txn); - } else { - rc = mdb_txn_commit(txn); - } - } - free(values); - printf("Deleted %d values\n", j); - - rc = mdb_env_stat(env, &mst); - rc = mdb_txn_begin(env, NULL, 1, &txn); - rc = mdb_cursor_open(txn, dbi, &cursor); - printf("Cursor next\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - printf("Cursor prev\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - mdb_cursor_close(cursor); - mdb_close(env, dbi); - - mdb_txn_abort(txn); - mdb_env_close(env); - - return 0; -} diff --git a/libraries/liblmdb/mtest3.c b/libraries/liblmdb/mtest3.c deleted file mode 100644 index c189eaa952..0000000000 --- a/libraries/liblmdb/mtest3.c +++ /dev/null @@ -1,127 +0,0 @@ -/* mtest3.c - memory-mapped database tester/toy */ -/* - * Copyright 2011 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ - -/* Tests for sorted duplicate DBs */ -#define _XOPEN_SOURCE 500 /* srandom(), random() */ -#include -#include -#include -#include -#include "lmdb.h" - -int main(int argc,char * argv[]) -{ - int i = 0, j = 0, rc; - MDB_env *env; - MDB_dbi dbi; - MDB_val key, data; - MDB_txn *txn; - MDB_stat mst; - MDB_cursor *cursor; - int count; - int *values; - char sval[32]; - char kval[sizeof(int)]; - - srandom(time(NULL)); - - memset(sval, 0, sizeof(sval)); - - count = (random()%384) + 64; - values = (int *)malloc(count*sizeof(int)); - - for(i = 0;i -1; i-= (random()%5)) { - j++; - txn=NULL; - rc = mdb_txn_begin(env, NULL, 0, &txn); - sprintf(kval, "%03x", values[i & ~0x0f]); - sprintf(sval, "%03x %d foo bar", values[i], values[i]); - key.mv_size = sizeof(int); - key.mv_data = kval; - data.mv_size = sizeof(sval); - data.mv_data = sval; - rc = mdb_del(txn, dbi, &key, &data); - if (rc) { - j--; - mdb_txn_abort(txn); - } else { - rc = mdb_txn_commit(txn); - } - } - free(values); - printf("Deleted %d values\n", j); - - rc = mdb_env_stat(env, &mst); - rc = mdb_txn_begin(env, NULL, 1, &txn); - rc = mdb_cursor_open(txn, dbi, &cursor); - printf("Cursor next\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - printf("Cursor prev\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - mdb_cursor_close(cursor); - mdb_close(env, dbi); - - mdb_txn_abort(txn); - mdb_env_close(env); - - return 0; -} diff --git a/libraries/liblmdb/mtest4.c b/libraries/liblmdb/mtest4.c deleted file mode 100644 index e0ba7e20b6..0000000000 --- a/libraries/liblmdb/mtest4.c +++ /dev/null @@ -1,161 +0,0 @@ -/* mtest4.c - memory-mapped database tester/toy */ -/* - * Copyright 2011 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ - -/* Tests for sorted duplicate DBs with fixed-size keys */ -#define _XOPEN_SOURCE 500 /* srandom(), random() */ -#include -#include -#include -#include -#include "lmdb.h" - -int main(int argc,char * argv[]) -{ - int i = 0, j = 0, rc; - MDB_env *env; - MDB_dbi dbi; - MDB_val key, data; - MDB_txn *txn; - MDB_stat mst; - MDB_cursor *cursor; - int count; - int *values; - char sval[8]; - char kval[sizeof(int)]; - - memset(sval, 0, sizeof(sval)); - - count = 510; - values = (int *)malloc(count*sizeof(int)); - - for(i = 0;i -1; i-= (random()%3)) { - j++; - txn=NULL; - rc = mdb_txn_begin(env, NULL, 0, &txn); - sprintf(sval, "%07x", values[i]); - key.mv_size = sizeof(int); - key.mv_data = kval; - data.mv_size = sizeof(sval); - data.mv_data = sval; - rc = mdb_del(txn, dbi, &key, &data); - if (rc) { - j--; - mdb_txn_abort(txn); - } else { - rc = mdb_txn_commit(txn); - } - } - free(values); - printf("Deleted %d values\n", j); - - rc = mdb_env_stat(env, &mst); - rc = mdb_txn_begin(env, NULL, 1, &txn); - rc = mdb_cursor_open(txn, dbi, &cursor); - printf("Cursor next\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - printf("Cursor prev\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - mdb_cursor_close(cursor); - mdb_close(env, dbi); - - mdb_txn_abort(txn); - mdb_env_close(env); - - return 0; -} diff --git a/libraries/liblmdb/mtest5.c b/libraries/liblmdb/mtest5.c deleted file mode 100644 index bc472fa093..0000000000 --- a/libraries/liblmdb/mtest5.c +++ /dev/null @@ -1,129 +0,0 @@ -/* mtest5.c - memory-mapped database tester/toy */ -/* - * Copyright 2011 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ - -/* Tests for sorted duplicate DBs using cursor_put */ -#define _XOPEN_SOURCE 500 /* srandom(), random() */ -#include -#include -#include -#include -#include "lmdb.h" - -int main(int argc,char * argv[]) -{ - int i = 0, j = 0, rc; - MDB_env *env; - MDB_dbi dbi; - MDB_val key, data; - MDB_txn *txn; - MDB_stat mst; - MDB_cursor *cursor; - int count; - int *values; - char sval[32]; - char kval[sizeof(int)]; - - srandom(time(NULL)); - - memset(sval, 0, sizeof(sval)); - - count = (random()%384) + 64; - values = (int *)malloc(count*sizeof(int)); - - for(i = 0;i -1; i-= (random()%5)) { - j++; - txn=NULL; - rc = mdb_txn_begin(env, NULL, 0, &txn); - sprintf(kval, "%03x", values[i & ~0x0f]); - sprintf(sval, "%03x %d foo bar", values[i], values[i]); - key.mv_size = sizeof(int); - key.mv_data = kval; - data.mv_size = sizeof(sval); - data.mv_data = sval; - rc = mdb_del(txn, dbi, &key, &data); - if (rc) { - j--; - mdb_txn_abort(txn); - } else { - rc = mdb_txn_commit(txn); - } - } - free(values); - printf("Deleted %d values\n", j); - - rc = mdb_env_stat(env, &mst); - rc = mdb_txn_begin(env, NULL, 1, &txn); - rc = mdb_cursor_open(txn, dbi, &cursor); - printf("Cursor next\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - printf("Cursor prev\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - mdb_cursor_close(cursor); - mdb_close(env, dbi); - - mdb_txn_abort(txn); - mdb_env_close(env); - - return 0; -} diff --git a/libraries/liblmdb/mtest6.c b/libraries/liblmdb/mtest6.c deleted file mode 100644 index 0bf26ccc45..0000000000 --- a/libraries/liblmdb/mtest6.c +++ /dev/null @@ -1,131 +0,0 @@ -/* mtest6.c - memory-mapped database tester/toy */ -/* - * Copyright 2011 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ - -/* Tests for DB splits and merges */ -#define _XOPEN_SOURCE 500 /* srandom(), random() */ -#include -#include -#include -#include -#include "lmdb.h" - -char dkbuf[1024]; - -int main(int argc,char * argv[]) -{ - int i = 0, j = 0, rc; - MDB_env *env; - MDB_dbi dbi; - MDB_val key, data; - MDB_txn *txn; - MDB_stat mst; - MDB_cursor *cursor; - int count; - int *values; - long kval; - char *sval; - - srandom(time(NULL)); - - rc = mdb_env_create(&env); - rc = mdb_env_set_mapsize(env, 10485760); - rc = mdb_env_set_maxdbs(env, 4); - rc = mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664); - rc = mdb_txn_begin(env, NULL, 0, &txn); - rc = mdb_open(txn, "id2", MDB_CREATE|MDB_INTEGERKEY, &dbi); - rc = mdb_cursor_open(txn, dbi, &cursor); - rc = mdb_stat(txn, dbi, &mst); - - sval = calloc(1, mst.ms_psize / 4); - key.mv_size = sizeof(long); - key.mv_data = &kval; - data.mv_size = mst.ms_psize / 4 - 30; - data.mv_data = sval; - - printf("Adding 12 values, should yield 3 splits\n"); - for (i=0;i<12;i++) { - kval = i*5; - sprintf(sval, "%08x", kval); - rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); - } - printf("Adding 12 more values, should yield 3 splits\n"); - for (i=0;i<12;i++) { - kval = i*5+4; - sprintf(sval, "%08x", kval); - rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); - } - printf("Adding 12 more values, should yield 3 splits\n"); - for (i=0;i<12;i++) { - kval = i*5+1; - sprintf(sval, "%08x", kval); - rc = mdb_cursor_put(cursor, &key, &data, MDB_NOOVERWRITE); - } - rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST); - - do { - printf("key: %p %s, data: %p %.*s\n", - key.mv_data, mdb_dkey(&key, dkbuf), - data.mv_data, (int) data.mv_size, (char *) data.mv_data); - } while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0); - mdb_cursor_close(cursor); - mdb_txn_commit(txn); - -#if 0 - j=0; - - for (i= count - 1; i > -1; i-= (random()%5)) { - j++; - txn=NULL; - rc = mdb_txn_begin(env, NULL, 0, &txn); - sprintf(kval, "%03x", values[i & ~0x0f]); - sprintf(sval, "%03x %d foo bar", values[i], values[i]); - key.mv_size = sizeof(int); - key.mv_data = kval; - data.mv_size = sizeof(sval); - data.mv_data = sval; - rc = mdb_del(txn, dbi, &key, &data); - if (rc) { - j--; - mdb_txn_abort(txn); - } else { - rc = mdb_txn_commit(txn); - } - } - free(values); - printf("Deleted %d values\n", j); - - rc = mdb_env_stat(env, &mst); - rc = mdb_txn_begin(env, NULL, 1, &txn); - rc = mdb_cursor_open(txn, dbi, &cursor); - printf("Cursor next\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - printf("Cursor prev\n"); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV)) == 0) { - printf("key: %.*s, data: %.*s\n", - (int) key.mv_size, (char *) key.mv_data, - (int) data.mv_size, (char *) data.mv_data); - } - mdb_cursor_close(cursor); - mdb_close(txn, dbi); - - mdb_txn_abort(txn); -#endif - mdb_env_close(env); - - return 0; -} diff --git a/libraries/liblmdb/sample-bdb.c b/libraries/liblmdb/sample-bdb.c deleted file mode 100644 index 2c11bb38a0..0000000000 --- a/libraries/liblmdb/sample-bdb.c +++ /dev/null @@ -1,71 +0,0 @@ -/* sample-bdb.c - BerkeleyDB toy/sample - * - * Do a line-by-line comparison of this and sample-mdb.c - */ -/* - * Copyright 2012 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ -#include -#include -#include - -int main(int argc,char * argv[]) -{ - int rc; - DB_ENV *env; - DB *dbi; - DBT key, data; - DB_TXN *txn; - DBC *cursor; - char sval[32], kval[32]; - -#define FLAGS (DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_INIT_MPOOL|DB_CREATE|DB_THREAD) - rc = db_env_create(&env, 0); - rc = env->open(env, "./testdb", FLAGS, 0664); - rc = db_create(&dbi, env, 0); - rc = env->txn_begin(env, NULL, &txn, 0); - rc = dbi->open(dbi, txn, "test.bdb", NULL, DB_BTREE, DB_CREATE, 0664); - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - key.size = sizeof(int); - key.data = sval; - data.size = sizeof(sval); - data.data = sval; - - sprintf(sval, "%03x %d foo bar", 32, 3141592); - rc = dbi->put(dbi, txn, &key, &data, 0); - rc = txn->commit(txn, 0); - if (rc) { - fprintf(stderr, "txn->commit: (%d) %s\n", rc, db_strerror(rc)); - goto leave; - } - rc = env->txn_begin(env, NULL, &txn, 0); - rc = dbi->cursor(dbi, txn, &cursor, 0); - key.flags = DB_DBT_USERMEM; - key.data = kval; - key.ulen = sizeof(kval); - data.flags = DB_DBT_USERMEM; - data.data = sval; - data.ulen = sizeof(sval); - while ((rc = cursor->c_get(cursor, &key, &data, DB_NEXT)) == 0) { - printf("key: %p %.*s, data: %p %.*s\n", - key.data, (int) key.size, (char *) key.data, - data.data, (int) data.size, (char *) data.data); - } - rc = cursor->c_close(cursor); - rc = txn->abort(txn); -leave: - rc = dbi->close(dbi, 0); - rc = env->close(env, 0); - return rc; -} diff --git a/libraries/liblmdb/sample-mdb.c b/libraries/liblmdb/sample-mdb.c deleted file mode 100644 index 0b10f47173..0000000000 --- a/libraries/liblmdb/sample-mdb.c +++ /dev/null @@ -1,60 +0,0 @@ -/* sample-mdb.c - MDB toy/sample - * - * Do a line-by-line comparison of this and sample-bdb.c - */ -/* - * Copyright 2012 Howard Chu, Symas Corp. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted only as authorized by the OpenLDAP - * Public License. - * - * A copy of this license is available in the file LICENSE in the - * top-level directory of the distribution or, alternatively, at - * . - */ -#include -#include "lmdb.h" - -int main(int argc,char * argv[]) -{ - int rc; - MDB_env *env; - MDB_dbi dbi; - MDB_val key, data; - MDB_txn *txn; - MDB_cursor *cursor; - char sval[32]; - - rc = mdb_env_create(&env); - rc = mdb_env_open(env, "./testdb", 0, 0664); - rc = mdb_txn_begin(env, NULL, 0, &txn); - rc = mdb_open(txn, NULL, 0, &dbi); - - key.mv_size = sizeof(int); - key.mv_data = sval; - data.mv_size = sizeof(sval); - data.mv_data = sval; - - sprintf(sval, "%03x %d foo bar", 32, 3141592); - rc = mdb_put(txn, dbi, &key, &data, 0); - rc = mdb_txn_commit(txn); - if (rc) { - fprintf(stderr, "mdb_txn_commit: (%d) %s\n", rc, mdb_strerror(rc)); - goto leave; - } - rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn); - rc = mdb_cursor_open(txn, dbi, &cursor); - while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT)) == 0) { - printf("key: %p %.*s, data: %p %.*s\n", - key.mv_data, (int) key.mv_size, (char *) key.mv_data, - data.mv_data, (int) data.mv_size, (char *) data.mv_data); - } - mdb_cursor_close(cursor); - mdb_txn_abort(txn); -leave: - mdb_close(env, dbi); - mdb_env_close(env); - return 0; -} diff --git a/modules/ripple_mdb/ripple_mdb.c b/modules/ripple_mdb/ripple_mdb.c index 8f66bbcf83..53d03f11fa 100644 --- a/modules/ripple_mdb/ripple_mdb.c +++ b/modules/ripple_mdb/ripple_mdb.c @@ -12,7 +12,7 @@ #if RIPPLE_MDB_AVAILABLE -#include "libraries/liblmdb/mdb.c" -#include "libraries/liblmdb/midl.c" +#include "mdb/libraries/liblmdb/mdb.c" +#include "mdb/libraries/liblmdb/midl.c" #endif diff --git a/modules/ripple_mdb/ripple_mdb.h b/modules/ripple_mdb/ripple_mdb.h index faefb502b3..03efbc4eb7 100644 --- a/modules/ripple_mdb/ripple_mdb.h +++ b/modules/ripple_mdb/ripple_mdb.h @@ -12,7 +12,7 @@ #if ! BEAST_WIN32 #define RIPPLE_MDB_AVAILABLE 1 -#include "libraries/liblmdb/lmdb.h" +#include "mdb/libraries/liblmdb/lmdb.h" #else // mdb is unsupported on Win32 From 061865a5da9ca965a9879037121ad52c53a0962a Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 22 Jul 2013 08:44:27 -0700 Subject: [PATCH 40/50] Move log message up to caller of import --- TODO.txt | 1 + modules/ripple_app/node/ripple_NodeStore.cpp | 19 +++---- modules/ripple_app/node/ripple_NodeStore.h | 52 +++----------------- src/cpp/ripple/ripple_Application.cpp | 6 +++ 4 files changed, 24 insertions(+), 54 deletions(-) diff --git a/TODO.txt b/TODO.txt index 5621072c5a..92fb9272dc 100644 --- a/TODO.txt +++ b/TODO.txt @@ -9,6 +9,7 @@ Vinnie's Short List (Changes day to day) - Import beast::db and use it in SQliteBackend - Finish unit tests and code for Validators - Convert some Ripple boost unit tests to Beast. +- Move all code into modules/ -------------------------------------------------------------------------------- diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index 47f660a9fb..ea088a07ad 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -222,6 +222,11 @@ public: { } + String getName () const + { + return m_backend->getName (); + } + //------------------------------------------------------------------------------ NodeObject::Ptr fetch (uint256 const& hash) @@ -436,10 +441,6 @@ public: ScopedPointer srcBackend (createBackend (sourceBackendParameters, m_scheduler)); - WriteLog (lsWARNING, NodeObject) << - "Node import from '" << srcBackend->getName() << "' to '" - << m_backend->getName() << "'."; - ImportVisitCallback callback (*m_backend); srcBackend->visitAll (callback); @@ -736,7 +737,7 @@ public: Batch batch3; createPredictableBatch (batch3, 1, numObjectsToTest, seedValue); - expect (! areBatchesEqual (batch1, batch3), "Should be equal"); + expect (! areBatchesEqual (batch1, batch3), "Should not be equal"); } // Checks encoding/decoding blobs @@ -959,13 +960,13 @@ public: testBackend ("leveldb", seedValue); - #if RIPPLE_HYPERLEVELDB_AVAILABLE + #if RIPPLE_HYPERLEVELDB_AVAILABLE testBackend ("hyperleveldb", seedValue); - #endif + #endif - #if RIPPLE_MDB_AVAILABLE + #if RIPPLE_MDB_AVAILABLE testBackend ("mdb", seedValue); - #endif + #endif testBackend ("sqlite", seedValue); } diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index 0202f60178..78784df66b 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -7,51 +7,6 @@ #ifndef RIPPLE_NODESTORE_H_INCLUDED #define RIPPLE_NODESTORE_H_INCLUDED -// Javadoc comments are added to all public classes, member functions, -// type definitions, data types, and global variables (which we should -// minimize the use of. -// -// A Javadoc comment is introduced with an extra asterisk following the -// beginning of a normal C++ style comment, or by using a triple forward slash. -// -// Structure of a Javadoc comment: - -/** Brief one line description. - - A more detailed description, which may be broken up into multiple - paragraphs. Doxygen commands are prefixed with the at-sign '@'. For - example, here's a formatted code snippet: - - @code - - int main (int argc, char** argv) - { - return 0; - } - - @endcode - - You can also add a note, or document an invariant: - - @note This appears as its own note. - - @invariant This must not be called while holding the lock. - - When documenting functions, you can use these Doxygen commands - to annotate the parameters, return value, template types. - - @param argc The number of arguments to the program. - @param argv An array of strings argc in size, one for each argument. - - @return The return value of the program, passed to to the enclosing process. -*/ - -/** Functions can be documented with just the brief description, like this */ - -/// Here's the alternate form of a brief description. - -//------------------------------------------------------------------------------ - /** Persistency layer for NodeObject A Node is a ledger object which is uniquely identified by a key, which is @@ -379,6 +334,13 @@ public: */ virtual ~NodeStore () { } + /** Retrieve the name associated with this backend. + + This is used for diagnostics and may not reflect the actual path + or paths used by the underlying backend. + */ + virtual String getName () const = 0; + /** Add the specified backend factory to the list of available factories. The names of available factories are compared against the "type" diff --git a/src/cpp/ripple/ripple_Application.cpp b/src/cpp/ripple/ripple_Application.cpp index 42307dc853..bbef763057 100644 --- a/src/cpp/ripple/ripple_Application.cpp +++ b/src/cpp/ripple/ripple_Application.cpp @@ -981,7 +981,13 @@ void ApplicationImp::updateTables () } if (!theConfig.DB_IMPORT.empty()) + { + WriteLog (lsWARNING, NodeObject) << + "Node import from '" << theConfig.DB_IMPORT << "' to '" + << getApp().getNodeStore().getName () << "'."; + getApp().getNodeStore().import(theConfig.DB_IMPORT); + } } //------------------------------------------------------------------------------ From 3f358efee33bda4f3f3872191c37ad178500ae42 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 22 Jul 2013 13:20:27 -0700 Subject: [PATCH 41/50] Update TODO --- TODO.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/TODO.txt b/TODO.txt index 92fb9272dc..4cbeeda45e 100644 --- a/TODO.txt +++ b/TODO.txt @@ -3,13 +3,16 @@ RIPPLE TODO -------------------------------------------------------------------------------- Vinnie's Short List (Changes day to day) -- Memory NodeStore::Backend for unit tests +- Refactor Section code into COnfigFile +- Change NodeStore config file format to multiline key/value pairs - Improved Mutex to track deadlocks -- Work on KeyvaDB -- Import beast::db and use it in SQliteBackend +- Memory NodeStore::Backend for unit tests [*] - Finish unit tests and code for Validators -- Convert some Ripple boost unit tests to Beast. +- Import beast::db and use it in SQliteBackend +- Convert some Ripple boost unit tests to Beast. [*] - Move all code into modules/ +- Work on KeyvaDB +[*] These can be handled by external developers -------------------------------------------------------------------------------- From 64d45918eb50afa5f33f2730065a3ac324fb890b Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 22 Jul 2013 13:20:52 -0700 Subject: [PATCH 42/50] Allow external buffers in MemoryOutputStream --- .../beast_core/streams/beast_InputStream.h | 5 +- .../streams/beast_MemoryOutputStream.cpp | 92 ++++++++++++++----- .../streams/beast_MemoryOutputStream.h | 30 ++++-- 3 files changed, 91 insertions(+), 36 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/streams/beast_InputStream.h b/Subtrees/beast/modules/beast_core/streams/beast_InputStream.h index 30df5ac567..081a7b037b 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_InputStream.h +++ b/Subtrees/beast/modules/beast_core/streams/beast_InputStream.h @@ -92,7 +92,7 @@ public: /** Reads a boolean from the stream. - The bool is encoded as a single byte - 0 for false, nonzero for true. + The bool is encoded as a single byte - non-zero for true, 0 for false. If the stream is exhausted, this will return false. @@ -117,8 +117,7 @@ public: /** Reads two bytes from the stream as a little-endian 16-bit value. - If the next two bytes read are byte1 and byte2, this returns - (byte2 | (byte1 << 8)). + If the next two bytes read are byte1 and byte2, this returns (byte1 | (byte2 << 8)). If the stream is exhausted partway through reading the bytes, this will return zero. diff --git a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.cpp b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.cpp index 2e2e2f15a9..8df895da1b 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.cpp +++ b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.cpp @@ -22,23 +22,28 @@ //============================================================================== MemoryOutputStream::MemoryOutputStream (const size_t initialSize) - : data (internalBlock), - position (0), - size (0) + : blockToUse (&internalBlock), externalData (nullptr), + position (0), size (0), availableSize (0) { internalBlock.setSize (initialSize, false); } MemoryOutputStream::MemoryOutputStream (MemoryBlock& memoryBlockToWriteTo, const bool appendToExistingBlockContent) - : data (memoryBlockToWriteTo), - position (0), - size (0) + : blockToUse (&memoryBlockToWriteTo), externalData (nullptr), + position (0), size (0), availableSize (0) { if (appendToExistingBlockContent) position = size = memoryBlockToWriteTo.getSize(); } +MemoryOutputStream::MemoryOutputStream (void* destBuffer, size_t destBufferSize) + : blockToUse (nullptr), externalData (destBuffer), + position (0), size (0), availableSize (destBufferSize) +{ + bassert (externalData != nullptr); // This must be a valid pointer. +} + MemoryOutputStream::~MemoryOutputStream() { trimExternalBlockSize(); @@ -51,13 +56,14 @@ void MemoryOutputStream::flush() void MemoryOutputStream::trimExternalBlockSize() { - if (&data != &internalBlock) - data.setSize (size, false); + if (blockToUse != &internalBlock && blockToUse != nullptr) + blockToUse->setSize (size, false); } void MemoryOutputStream::preallocate (const size_t bytesToPreallocate) { - data.ensureSize (bytesToPreallocate + 1); + if (blockToUse != nullptr) + blockToUse->ensureSize (bytesToPreallocate + 1); } void MemoryOutputStream::reset() noexcept @@ -71,10 +77,24 @@ char* MemoryOutputStream::prepareToWrite (size_t numBytes) bassert ((ssize_t) numBytes >= 0); size_t storageNeeded = position + numBytes; - if (storageNeeded >= data.getSize()) - data.ensureSize ((storageNeeded + bmin (storageNeeded / 2, (size_t) (1024 * 1024)) + 32) & ~31u); + char* data; - char* const writePointer = static_cast (data.getData()) + position; + if (blockToUse != nullptr) + { + if (storageNeeded >= blockToUse->getSize()) + blockToUse->ensureSize ((storageNeeded + bmin (storageNeeded / 2, (size_t) (1024 * 1024)) + 32) & ~31u); + + data = static_cast (blockToUse->getData()); + } + else + { + if (storageNeeded > availableSize) + return nullptr; + + data = static_cast (externalData); + } + + char* const writePointer = data + position; position += numBytes; size = bmax (size, position); return writePointer; @@ -82,25 +102,43 @@ char* MemoryOutputStream::prepareToWrite (size_t numBytes) bool MemoryOutputStream::write (const void* const buffer, size_t howMany) { - bassert (buffer != nullptr && ((ssize_t) howMany) >= 0); + bassert (buffer != nullptr); - if (howMany > 0) - memcpy (prepareToWrite (howMany), buffer, howMany); + if (howMany == 0) + return true; - return true; + if (char* dest = prepareToWrite (howMany)) + { + memcpy (dest, buffer, howMany); + return true; + } + + return false; } bool MemoryOutputStream::writeRepeatedByte (uint8 byte, size_t howMany) { - if (howMany > 0) - memset (prepareToWrite (howMany), byte, howMany); + if (howMany == 0) + return true; - return true; + if (char* dest = prepareToWrite (howMany)) + { + memset (dest, byte, howMany); + return true; + } + + return false; } -void MemoryOutputStream::appendUTF8Char (beast_wchar c) +bool MemoryOutputStream::appendUTF8Char (beast_wchar c) { - CharPointer_UTF8 (prepareToWrite (CharPointer_UTF8::getBytesRequiredFor (c))).write (c); + if (char* dest = prepareToWrite (CharPointer_UTF8::getBytesRequiredFor (c))) + { + CharPointer_UTF8 (dest).write (c); + return true; + } + + return false; } MemoryBlock MemoryOutputStream::getMemoryBlock() const @@ -110,10 +148,13 @@ MemoryBlock MemoryOutputStream::getMemoryBlock() const const void* MemoryOutputStream::getData() const noexcept { - if (data.getSize() > size) - static_cast (data.getData()) [size] = 0; + if (blockToUse == nullptr) + return externalData; - return data.getData(); + if (blockToUse->getSize() > size) + static_cast (blockToUse->getData()) [size] = 0; + + return blockToUse->getData(); } bool MemoryOutputStream::setPosition (int64 newPosition) @@ -139,7 +180,8 @@ int MemoryOutputStream::writeFromInputStream (InputStream& source, int64 maxNumB if (maxNumBytesToWrite > availableData) maxNumBytesToWrite = availableData; - preallocate (data.getSize() + (size_t) maxNumBytesToWrite); + if (blockToUse != nullptr) + preallocate (blockToUse->getSize() + (size_t) maxNumBytesToWrite); } return OutputStream::writeFromInputStream (source, maxNumBytesToWrite); diff --git a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h index f2e8f7ad8c..be5fd04f28 100644 --- a/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h +++ b/Subtrees/beast/modules/beast_core/streams/beast_MemoryOutputStream.h @@ -28,6 +28,13 @@ #include "../memory/beast_MemoryBlock.h" #include "../memory/beast_ScopedPointer.h" +//============================================================================== +/** + Writes data to an internal memory buffer, which grows as required. + + The data that was written into the stream can then be accessed later as + a contiguous block of memory. +*/ //============================================================================== /** Writes data to an internal memory buffer, which grows as required. @@ -42,7 +49,6 @@ class BEAST_API MemoryOutputStream public: //============================================================================== /** Creates an empty memory stream, ready to be written into. - @param initialSize the intial amount of capacity to allocate for writing into */ MemoryOutputStream (size_t initialSize = 256); @@ -62,6 +68,14 @@ public: MemoryOutputStream (MemoryBlock& memoryBlockToWriteTo, bool appendToExistingBlockContent); + /** Creates a MemoryOutputStream that will write into a user-supplied, fixed-size + block of memory. + + When using this mode, the stream will write directly into this memory area until + it's full, at which point write operations will fail. + */ + MemoryOutputStream (void* destBuffer, size_t destBufferSize); + /** Destructor. This will free any data that was written to it. */ @@ -87,7 +101,7 @@ public: void preallocate (size_t bytesToPreallocate); /** Appends the utf-8 bytes for a unicode character */ - void appendUTF8Char (beast_wchar character); + bool appendUTF8Char (beast_wchar character); /** Returns a String created from the (UTF8) data that has been written to the stream. */ String toUTF8() const; @@ -114,17 +128,17 @@ public: bool writeRepeatedByte (uint8 byte, size_t numTimesToRepeat) override; private: - //============================================================================== - MemoryBlock& data; - MemoryBlock internalBlock; - size_t position, size; - void trimExternalBlockSize(); char* prepareToWrite (size_t); + + //============================================================================== + MemoryBlock* const blockToUse; + MemoryBlock internalBlock; + void* externalData; + size_t position, size, availableSize; }; /** Copies all the data that has been written to a MemoryOutputStream into another stream. */ OutputStream& BEAST_CALLTYPE operator<< (OutputStream& stream, const MemoryOutputStream& streamToRead); - #endif \ No newline at end of file From fd86a3e67f96e4dc4ecf866b0854b0ef466c5a35 Mon Sep 17 00:00:00 2001 From: JoelKatz Date: Mon, 22 Jul 2013 13:34:37 -0700 Subject: [PATCH 43/50] Merge fix. --- src/cpp/ripple/ripple_TransactionAcquire.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/cpp/ripple/ripple_TransactionAcquire.cpp b/src/cpp/ripple/ripple_TransactionAcquire.cpp index ac1accaa6d..9202a5dc23 100644 --- a/src/cpp/ripple/ripple_TransactionAcquire.cpp +++ b/src/cpp/ripple/ripple_TransactionAcquire.cpp @@ -61,14 +61,9 @@ void TransactionAcquire::onTimer (bool progress, boost::recursive_mutex::scoped_ if (getApp().getOPs ().stillNeedTXSet (mHash)) { - boost::recursive_mutex::scoped_lock sl (getApp().getMasterLock ()); - - if (getApp().getOPs ().stillNeedTXSet (mHash)) - { - WriteLog (lsWARNING, TransactionAcquire) << "Still need it"; - mTimeouts = 0; - aggressive = true; - } + WriteLog (lsWARNING, TransactionAcquire) << "Still need it"; + mTimeouts = 0; + aggressive = true; } } psl.lock(); From 20c20fd05617c14463762ce2d9b237f0fedf2778 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 22 Jul 2013 14:06:45 -0700 Subject: [PATCH 44/50] Fix path passed to DoSustain --- src/cpp/ripple/ripple_Main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/ripple/ripple_Main.cpp b/src/cpp/ripple/ripple_Main.cpp index 580d75abb4..eaaa83ab62 100644 --- a/src/cpp/ripple/ripple_Main.cpp +++ b/src/cpp/ripple/ripple_Main.cpp @@ -299,7 +299,7 @@ int rippleMain (int argc, char** argv) if (HaveSustain () && !iResult && !vm.count ("parameters") && !vm.count ("fg") && !vm.count ("standalone") && !vm.count ("unittest")) { - std::string logMe = DoSustain (theConfig.DEBUG_LOGFILE.c_str()); + std::string logMe = DoSustain (theConfig.DEBUG_LOGFILE.string()); if (!logMe.empty ()) Log (lsWARNING) << logMe; From 02e55f9794092fc034942d69fd387d93e6382af0 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 22 Jul 2013 16:01:57 -0700 Subject: [PATCH 45/50] Change format of config file for NodeStore --- TODO.txt | 3 +- modules/ripple_app/node/ripple_NodeStore.cpp | 83 +++++++++++++++---- modules/ripple_app/node/ripple_NodeStore.h | 32 ++++++- .../ripple_basics/utility/ripple_IniFile.cpp | 34 +++++++- .../ripple_basics/utility/ripple_IniFile.h | 7 ++ .../utility/ripple_StringUtilities.cpp | 38 --------- .../utility/ripple_StringUtilities.h | 4 - .../ripple_core/functional/ripple_Config.cpp | 4 +- .../ripple_core/functional/ripple_Config.h | 8 +- rippled-example.cfg | 14 ++-- src/cpp/ripple/ripple_Application.cpp | 16 ++-- 11 files changed, 160 insertions(+), 83 deletions(-) diff --git a/TODO.txt b/TODO.txt index 4cbeeda45e..517589b43c 100644 --- a/TODO.txt +++ b/TODO.txt @@ -3,7 +3,8 @@ RIPPLE TODO -------------------------------------------------------------------------------- Vinnie's Short List (Changes day to day) -- Refactor Section code into COnfigFile +- Add fast backend to the unit test +- Refactor Section code into ConfigFile - Change NodeStore config file format to multiline key/value pairs - Improved Mutex to track deadlocks - Memory NodeStore::Backend for unit tests [*] diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index ea088a07ad..1ffa2a09f9 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -206,12 +206,12 @@ class NodeStoreImp , LeakChecked { public: - NodeStoreImp (String backendParameters, - String fastBackendParameters, + NodeStoreImp (Parameters const& backendParameters, + Parameters const& fastBackendParameters, Scheduler& scheduler) : m_scheduler (scheduler) , m_backend (createBackend (backendParameters, scheduler)) - , m_fastBackend (fastBackendParameters.isNotEmpty () + , m_fastBackend ((fastBackendParameters.size () > 0) ? createBackend (fastBackendParameters, scheduler) : nullptr) , m_cache ("NodeStore", 16384, 300) , m_negativeCache ("NoteStoreNegativeCache", 0, 120) @@ -402,7 +402,7 @@ public: //------------------------------------------------------------------------------ - void import (String sourceBackendParameters) + void import (Parameters const& sourceBackendParameters) { class ImportVisitCallback : public Backend::VisitCallback { @@ -448,13 +448,11 @@ public: //------------------------------------------------------------------------------ - static NodeStore::Backend* createBackend (String const& parameters, Scheduler& scheduler) + static NodeStore::Backend* createBackend (Parameters const& parameters, Scheduler& scheduler) { Backend* backend = nullptr; - StringPairArray keyValues = parseKeyValueParameters (parameters, '|'); - - String const& type = keyValues ["type"]; + String const& type = parameters ["type"]; if (type.isNotEmpty ()) { @@ -471,7 +469,7 @@ public: if (factory != nullptr) { - backend = factory->createInstance (NodeObject::keyBytes, keyValues, scheduler); + backend = factory->createInstance (NodeObject::keyBytes, parameters, scheduler); } else { @@ -486,6 +484,11 @@ public: return backend; } + static NodeStore::Backend* createBackend (String const& parameterString, Scheduler& scheduler) + { + return createBackend (parseDelimitedKeyValueString (parameterString), scheduler); + } + static void addBackendFactory (BackendFactory& factory) { s_factories.add (&factory); @@ -513,13 +516,52 @@ Array NodeStoreImp::s_factories; //------------------------------------------------------------------------------ +NodeStore::Parameters NodeStore::parseDelimitedKeyValueString (String parameters, beast_wchar delimiter) +{ + StringPairArray keyValues; + + while (parameters.isNotEmpty ()) + { + String pair; + + { + int const delimiterPos = parameters.indexOfChar (delimiter); + + if (delimiterPos != -1) + { + pair = parameters.substring (0, delimiterPos); + + parameters = parameters.substring (delimiterPos + 1); + } + else + { + pair = parameters; + + parameters = String::empty; + } + } + + int const equalPos = pair.indexOfChar ('='); + + if (equalPos != -1) + { + String const key = pair.substring (0, equalPos); + String const value = pair.substring (equalPos + 1, pair.length ()); + + keyValues.set (key, value); + } + } + + return keyValues; +} + void NodeStore::addBackendFactory (BackendFactory& factory) { NodeStoreImp::addBackendFactory (factory); } -NodeStore* NodeStore::New (String backendParameters, - String fastBackendParameters, +NodeStore* NodeStore::New (Parameters const& backendParameters, + Parameters const& fastBackendParameters, Scheduler& scheduler) { return new NodeStoreImp (backendParameters, @@ -527,6 +569,15 @@ NodeStore* NodeStore::New (String backendParameters, scheduler); } +NodeStore* NodeStore::New (String const& backendParameters, + String const& fastBackendParameters, + Scheduler& scheduler) +{ + return new NodeStoreImp (parseDelimitedKeyValueString (backendParameters), + parseDelimitedKeyValueString (fastBackendParameters), + scheduler); +} + //============================================================================== // Some common code for the unit tests @@ -924,7 +975,8 @@ public: // Open the backend ScopedPointer backend ( - NodeStoreImp::createBackend (params, m_scheduler)); + NodeStoreImp::createBackend ( + NodeStore::parseDelimitedKeyValueString (params), m_scheduler)); Stopwatch t; @@ -1011,7 +1063,7 @@ public: beginTest (String ("import into '") + destBackendType + "' from '" + srcBackendType + "'"); // Do the import - dest->import (srcParams); + dest->import (NodeStore::parseDelimitedKeyValueString (srcParams)); // Get the results of the import NodeStore::Batch copy; @@ -1026,7 +1078,10 @@ public: void testBackend (String type, int64 const seedValue) { - beginTest (String ("NodeStore backend type=") + type); + String s; + s << String ("NodeStore backend type=") + type; + + beginTest (s); String params; params << "type=" << type diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index 78784df66b..4bd44cd1ec 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -33,6 +33,8 @@ public: typedef std::vector Batch; + typedef StringPairArray Parameters; + //-------------------------------------------------------------------------- /** Parsed key/value blob into NodeObject components. @@ -299,12 +301,26 @@ public: @return A pointer to the Backend object. */ virtual Backend* createInstance (size_t keyBytes, - StringPairArray const& keyValues, + Parameters const& parameters, Scheduler& scheduler) = 0; }; //-------------------------------------------------------------------------- + /** Create a Parameters from a String. + + Parameter strings have the format: + + =['|'=] + + The key "type" must exist, it defines the choice of backend. + For example + `type=LevelDB|path=/mnt/ephemeral` + + This is a convenience function for unit tests. + */ + static Parameters parseDelimitedKeyValueString (String s, beast_wchar delimiter='|'); + /** Construct a node store. Parameter strings have the format: @@ -323,8 +339,16 @@ public: @return A pointer to the created object. */ - static NodeStore* New (String backendParameters, - String fastBackendParameters, + static NodeStore* New (Parameters const& backendParameters, + Parameters const& fastBackendParameters, + Scheduler& scheduler); + + /** Construct a node store from a pipe delimited parameter string. + + This is used for unit tests. + */ + static NodeStore* New (String const& backendParameters, + String const& fastBackendParameters, Scheduler& scheduler); /** Destroy the node store. @@ -386,7 +410,7 @@ public: The other NodeStore database is constructed using the specified backend parameters. */ - virtual void import (String sourceBackendParameters) = 0; + virtual void import (Parameters const& sourceBackendParameters) = 0; /** Retrieve the estimated number of pending write operations. diff --git a/modules/ripple_basics/utility/ripple_IniFile.cpp b/modules/ripple_basics/utility/ripple_IniFile.cpp index 8f60104d83..795b010f6a 100644 --- a/modules/ripple_basics/utility/ripple_IniFile.cpp +++ b/modules/ripple_basics/utility/ripple_IniFile.cpp @@ -106,7 +106,7 @@ int SectionCount (Section& secSource, const std::string& strSection) { Section::mapped_type* pmtEntries = SectionEntries (secSource, strSection); - return pmtEntries ? -1 : pmtEntries->size (); + return pmtEntries ? pmtEntries->size () : 0; } bool SectionSingleB (Section& secSource, const std::string& strSection, std::string& strValue) @@ -128,4 +128,34 @@ bool SectionSingleB (Section& secSource, const std::string& strSection, std::str return bSingle; } -// vim:ts=4 +StringPairArray parseKeyValueSection (Section& secSource, std::string const& strSection) +{ + StringPairArray result; + + int const count = SectionCount (secSource, strSection); + + typedef Section::mapped_type Entries; + + Entries* const entries = SectionEntries (secSource, strSection); + + if (entries != nullptr) + { + for (Entries::const_iterator iter = entries->begin (); iter != entries->end (); ++iter) + { + String const line (iter->c_str ()); + + int const equalPos = line.indexOfChar ('='); + + if (equalPos != -1) + { + String const key = line.substring (0, equalPos); + String const value = line.substring (equalPos + 1, line.length ()); + + result.set (key, value); + } + } + } + + return result; +} + diff --git a/modules/ripple_basics/utility/ripple_IniFile.h b/modules/ripple_basics/utility/ripple_IniFile.h index fe5327ec89..79e7f546bc 100644 --- a/modules/ripple_basics/utility/ripple_IniFile.h +++ b/modules/ripple_basics/utility/ripple_IniFile.h @@ -20,4 +20,11 @@ bool SectionSingleB (Section& secSource, const std::string& strSection, std::str int SectionCount (Section& secSource, const std::string& strSection); Section::mapped_type* SectionEntries (Section& secSource, const std::string& strSection); +/** Parse a section of lines as a key/value array. + + Each line is in the form =. + Spaces are considered part of the key and value. +*/ +StringPairArray parseKeyValueSection (Section& secSource, std::string const& strSection); + #endif diff --git a/modules/ripple_basics/utility/ripple_StringUtilities.cpp b/modules/ripple_basics/utility/ripple_StringUtilities.cpp index cdc438a6ef..bfa42c589e 100644 --- a/modules/ripple_basics/utility/ripple_StringUtilities.cpp +++ b/modules/ripple_basics/utility/ripple_StringUtilities.cpp @@ -271,42 +271,4 @@ std::string addressToString (void const* address) return strHex (static_cast (address) - static_cast (0)); } -StringPairArray parseKeyValueParameters (String parameters, beast_wchar delimiter) -{ - StringPairArray keyValues; - - while (parameters.isNotEmpty ()) - { - String pair; - - { - int const delimiterPos = parameters.indexOfChar (delimiter); - - if (delimiterPos != -1) - { - pair = parameters.substring (0, delimiterPos); - - parameters = parameters.substring (delimiterPos + 1); - } - else - { - pair = parameters; - - parameters = String::empty; - } - } - - int const equalPos = pair.indexOfChar ('='); - - if (equalPos != -1) - { - String const key = pair.substring (0, equalPos); - String const value = pair.substring (equalPos + 1, pair.length ()); - - keyValues.set (key, value); - } - } - - return keyValues; -} diff --git a/modules/ripple_basics/utility/ripple_StringUtilities.h b/modules/ripple_basics/utility/ripple_StringUtilities.h index dd002a2a23..3ddcf75ae9 100644 --- a/modules/ripple_basics/utility/ripple_StringUtilities.h +++ b/modules/ripple_basics/utility/ripple_StringUtilities.h @@ -214,8 +214,4 @@ bool parseUrl (const std::string& strUrl, std::string& strScheme, std::string& s */ extern std::string addressToString (void const* address); -/** Parse a pipe delimited key/value parameter string. -*/ -StringPairArray parseKeyValueParameters (String parameters, beast_wchar delimiter); - #endif diff --git a/modules/ripple_core/functional/ripple_Config.cpp b/modules/ripple_core/functional/ripple_Config.cpp index 896d298280..54d9157a90 100644 --- a/modules/ripple_core/functional/ripple_Config.cpp +++ b/modules/ripple_core/functional/ripple_Config.cpp @@ -373,8 +373,8 @@ void Config::load () (void) SectionSingleB (secConfig, SECTION_RPC_IP, m_rpcIP); (void) SectionSingleB (secConfig, SECTION_RPC_PASSWORD, RPC_PASSWORD); (void) SectionSingleB (secConfig, SECTION_RPC_USER, RPC_USER); - (void) SectionSingleB (secConfig, SECTION_NODE_DB, NODE_DB); - (void) SectionSingleB (secConfig, SECTION_FASTNODE_DB, FASTNODE_DB); + theConfig.nodeDatabase = parseKeyValueSection (secConfig, SECTION_NODE_DB); + theConfig.ephemeralNodeDatabase = parseKeyValueSection (secConfig, SECTION_FASTNODE_DB); if (SectionSingleB (secConfig, SECTION_RPC_PORT, strTemp)) m_rpcPort = boost::lexical_cast (strTemp); diff --git a/modules/ripple_core/functional/ripple_Config.h b/modules/ripple_core/functional/ripple_Config.h index 47ecf9a5b9..05b6a377d4 100644 --- a/modules/ripple_core/functional/ripple_Config.h +++ b/modules/ripple_core/functional/ripple_Config.h @@ -84,8 +84,12 @@ public: boost::filesystem::path DATA_DIR; boost::filesystem::path DEBUG_LOGFILE; boost::filesystem::path VALIDATORS_FILE; // As specifed in rippled.cfg. - std::string NODE_DB; // Database to use for nodes - std::string FASTNODE_DB; // Database for temporary storage + + StringPairArray nodeDatabase; + StringPairArray ephemeralNodeDatabase; + //std::string NODE_DB; // Database to use for nodes + //std::string FASTNODE_DB; // Database for temporary storage + std::string DB_IMPORT; // Import from old DB bool ELB_SUPPORT; // Support Amazon ELB diff --git a/rippled-example.cfg b/rippled-example.cfg index 2c89e205ae..6992020723 100644 --- a/rippled-example.cfg +++ b/rippled-example.cfg @@ -230,10 +230,13 @@ # Set the choice of databases for storing Node objects. # # Format (without spaces): -# '=' [ '|' '=' ]... +# One or more lines of key / value pairs: +# '=' +# ... # -# Examples: -# type=HyperLevelDB|path=db/hashnode +# Example: +# type=HyperLevelDB +# path=db/hashnode # # Choices for 'type' (not case-sensitive) # HyperLevelDB Use an improved version of LevelDB (preferred) @@ -327,8 +330,9 @@ [node_size] medium -[node_db] -type=mdb|path=db +#[node_db] +#type=HyperLevelDB +#path=hyperldb [debug_logfile] log/debug.log diff --git a/src/cpp/ripple/ripple_Application.cpp b/src/cpp/ripple/ripple_Application.cpp index bbef763057..ff89b7d47e 100644 --- a/src/cpp/ripple/ripple_Application.cpp +++ b/src/cpp/ripple/ripple_Application.cpp @@ -52,8 +52,8 @@ public: , mJobQueue (mIOService) // VFALCO New stuff , m_nodeStore (NodeStore::New ( - theConfig.NODE_DB, - theConfig.FASTNODE_DB, + theConfig.nodeDatabase, + theConfig.ephemeralNodeDatabase, *this)) , m_validators (Validators::New (this)) , mFeatures (IFeatures::New (2 * 7 * 24 * 60 * 60, 200)) // two weeks, 200/256 @@ -955,15 +955,9 @@ static void addTxnSeqField () void ApplicationImp::updateTables () { - if (theConfig.NODE_DB.empty ()) + if (theConfig.nodeDatabase.size () <= 0) { - Log (lsFATAL) << "The NODE_DB configuration setting MUST be set"; - StopSustain (); - exit (1); - } - else if (theConfig.NODE_DB == "LevelDB" || theConfig.NODE_DB == "SQLite") - { - Log (lsFATAL) << "The NODE_DB setting has been updated, your value is out of date"; + Log (lsFATAL) << "The [node_db] configuration setting has been updated and must be set"; StopSustain (); exit (1); } @@ -986,7 +980,7 @@ void ApplicationImp::updateTables () "Node import from '" << theConfig.DB_IMPORT << "' to '" << getApp().getNodeStore().getName () << "'."; - getApp().getNodeStore().import(theConfig.DB_IMPORT); + getApp().getNodeStore().import(NodeStore::parseDelimitedKeyValueString (theConfig.DB_IMPORT)); } } From 58025fb8efb4d4a9a57aff336be4dce97e5d5ed3 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Tue, 23 Jul 2013 09:13:21 -0700 Subject: [PATCH 46/50] Finish NodeStore import config, add ephemeral db to unit tests --- Builds/VisualStudio2012/RippleD.vcxproj | 1 + .../VisualStudio2012/RippleD.vcxproj.filters | 3 + TODO.txt | 14 +- modules/ripple_app/node/ripple_NodeStore.cpp | 266 +++++++++--------- modules/ripple_app/node/ripple_NodeStore.h | 71 +++-- .../ripple_basics/utility/ripple_IniFile.cpp | 9 +- .../ripple_basics/utility/ripple_IniFile.h | 2 +- .../utility/ripple_StringUtilities.cpp | 37 +++ .../utility/ripple_StringUtilities.h | 8 + .../ripple_core/functional/ripple_Config.cpp | 86 ++---- .../ripple_core/functional/ripple_Config.h | 38 ++- .../functional/ripple_ConfigSections.h | 86 ++++++ modules/ripple_core/ripple_core.h | 1 + rippled-example.cfg | 18 +- src/cpp/ripple/ripple_Application.cpp | 8 +- src/cpp/ripple/ripple_Main.cpp | 19 +- 16 files changed, 406 insertions(+), 261 deletions(-) create mode 100644 modules/ripple_core/functional/ripple_ConfigSections.h diff --git a/Builds/VisualStudio2012/RippleD.vcxproj b/Builds/VisualStudio2012/RippleD.vcxproj index f4e259fdc9..10595bf0b0 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj +++ b/Builds/VisualStudio2012/RippleD.vcxproj @@ -1456,6 +1456,7 @@ + diff --git a/Builds/VisualStudio2012/RippleD.vcxproj.filters b/Builds/VisualStudio2012/RippleD.vcxproj.filters index a2d81919ff..63a4e3ae69 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj.filters +++ b/Builds/VisualStudio2012/RippleD.vcxproj.filters @@ -1689,6 +1689,9 @@ [1] Ripple\ripple_basics\utility + + [1] Ripple\ripple_core\functional + diff --git a/TODO.txt b/TODO.txt index 517589b43c..d2e68f7656 100644 --- a/TODO.txt +++ b/TODO.txt @@ -2,7 +2,10 @@ RIPPLE TODO -------------------------------------------------------------------------------- +Items marked '*' can be handled by third parties. + Vinnie's Short List (Changes day to day) +- Make theConfig a SharedSingleton to prevent leak warnings - Add fast backend to the unit test - Refactor Section code into ConfigFile - Change NodeStore config file format to multiline key/value pairs @@ -17,7 +20,10 @@ Vinnie's Short List (Changes day to day) -------------------------------------------------------------------------------- -- Replace all throw with beast::Throw +* Restyle all the macros in ripple_ConfigSection.h + +* Replace all throw with beast::Throw + Only in the ripple sources, not in Subtrees/ or protobuf or websocket - Replace base_uint and uintXXX with UnsignedInteger * Need to specialize UnsignedInteger to work efficiently with 4 and 8 byte @@ -25,12 +31,6 @@ Vinnie's Short List (Changes day to day) - Rewrite boost program_options in Beast -- Examples for different backend key/value config settings - -- Unit Test attention - -- NodeStore backend unit test - - Validations unit test - Replace endian conversion calls with beast calls: diff --git a/modules/ripple_app/node/ripple_NodeStore.cpp b/modules/ripple_app/node/ripple_NodeStore.cpp index 1ffa2a09f9..b0ddd751d7 100644 --- a/modules/ripple_app/node/ripple_NodeStore.cpp +++ b/modules/ripple_app/node/ripple_NodeStore.cpp @@ -402,7 +402,12 @@ public: //------------------------------------------------------------------------------ - void import (Parameters const& sourceBackendParameters) + void visitAll (Backend::VisitCallback& callback) + { + m_backend->visitAll (callback); + } + + void import (NodeStore& sourceDatabase) { class ImportVisitCallback : public Backend::VisitCallback { @@ -439,16 +444,15 @@ public: //-------------------------------------------------------------------------- - ScopedPointer srcBackend (createBackend (sourceBackendParameters, m_scheduler)); - ImportVisitCallback callback (*m_backend); - srcBackend->visitAll (callback); + sourceDatabase.visitAll (callback); } //------------------------------------------------------------------------------ - static NodeStore::Backend* createBackend (Parameters const& parameters, Scheduler& scheduler) + static NodeStore::Backend* createBackend ( + Parameters const& parameters, Scheduler& scheduler = getSynchronousScheduler ()) { Backend* backend = nullptr; @@ -484,11 +488,6 @@ public: return backend; } - static NodeStore::Backend* createBackend (String const& parameterString, Scheduler& scheduler) - { - return createBackend (parseDelimitedKeyValueString (parameterString), scheduler); - } - static void addBackendFactory (BackendFactory& factory) { s_factories.add (&factory); @@ -516,52 +515,29 @@ Array NodeStoreImp::s_factories; //------------------------------------------------------------------------------ -NodeStore::Parameters NodeStore::parseDelimitedKeyValueString (String parameters, beast_wchar delimiter) -{ - StringPairArray keyValues; - - while (parameters.isNotEmpty ()) - { - String pair; - - { - int const delimiterPos = parameters.indexOfChar (delimiter); - - if (delimiterPos != -1) - { - pair = parameters.substring (0, delimiterPos); - - parameters = parameters.substring (delimiterPos + 1); - } - else - { - pair = parameters; - - parameters = String::empty; - } - } - - int const equalPos = pair.indexOfChar ('='); - - if (equalPos != -1) - { - String const key = pair.substring (0, equalPos); - String const value = pair.substring (equalPos + 1, pair.length ()); - - keyValues.set (key, value); - } - } - - return keyValues; -} - void NodeStore::addBackendFactory (BackendFactory& factory) { NodeStoreImp::addBackendFactory (factory); } +NodeStore::Scheduler& NodeStore::getSynchronousScheduler () +{ + // Simple scheduler that performs the task immediately + struct SynchronousScheduler : Scheduler + { + void scheduleTask (Task* task) + { + task->performScheduledTask (); + } + }; + + static SynchronousScheduler scheduler; + + return scheduler; +} + NodeStore* NodeStore::New (Parameters const& backendParameters, - Parameters const& fastBackendParameters, + Parameters fastBackendParameters, Scheduler& scheduler) { return new NodeStoreImp (backendParameters, @@ -569,15 +545,6 @@ NodeStore* NodeStore::New (Parameters const& backendParameters, scheduler); } -NodeStore* NodeStore::New (String const& backendParameters, - String const& fastBackendParameters, - Scheduler& scheduler) -{ - return new NodeStoreImp (parseDelimitedKeyValueString (backendParameters), - parseDelimitedKeyValueString (fastBackendParameters), - scheduler); -} - //============================================================================== // Some common code for the unit tests @@ -598,15 +565,6 @@ public: typedef NodeStore::Backend Backend; typedef NodeStore::Batch Batch; - // Immediately performs the task - struct TestScheduler : NodeStore::Scheduler - { - void scheduleTask (Task* task) - { - task->performScheduledTask (); - } - }; - // Creates predictable objects class PredictableObjectFactory { @@ -846,19 +804,18 @@ public: { beginTest (String ("NodeStore::Backend type=") + type); - String params; - params << "type=" << type - << "|path=" << File::createTempFile ("unittest").getFullPathName (); + StringPairArray params; + File const path (File::createTempFile ("node_db")); + params.set ("type", type); + params.set ("path", path.getFullPathName ()); // Create a batch NodeStore::Batch batch; createPredictableBatch (batch, 0, numObjectsToTest, seedValue); - //createPredictableBatch (batch, 0, 10, seedValue); { // Open the backend - ScopedPointer backend ( - NodeStoreImp::createBackend (params, m_scheduler)); + ScopedPointer backend (NodeStoreImp::createBackend (params)); // Write the batch storeBatch (*backend, batch); @@ -881,8 +838,7 @@ public: { // Re-open the backend - ScopedPointer backend ( - NodeStoreImp::createBackend (params, m_scheduler)); + ScopedPointer backend (NodeStoreImp::createBackend (params)); // Read it back in NodeStore::Batch copy; @@ -894,6 +850,8 @@ public: } } + //-------------------------------------------------------------------------- + void runTest () { int const seedValue = 50; @@ -912,9 +870,6 @@ public: testBackend ("mdb", seedValue); #endif } - -private: - TestScheduler m_scheduler; }; static NodeStoreBackendTests nodeStoreBackendTests; @@ -957,15 +912,18 @@ public: int64 m_startTime; }; + //-------------------------------------------------------------------------- + void testBackend (String type, int64 const seedValue) { String s; s << "Testing backend '" << type << "' performance"; beginTest (s); - String params; - params << "type=" << type - << "|path=" << File::createTempFile ("unittest").getFullPathName (); + StringPairArray params; + File const path (File::createTempFile ("node_db")); + params.set ("type", type); + params.set ("path", path.getFullPathName ()); // Create batches NodeStore::Batch batch1; @@ -974,9 +932,7 @@ public: createPredictableBatch (batch2, 0, numObjectsToTest, seedValue); // Open the backend - ScopedPointer backend ( - NodeStoreImp::createBackend ( - NodeStore::parseDelimitedKeyValueString (params), m_scheduler)); + ScopedPointer backend (NodeStoreImp::createBackend (params)); Stopwatch t; @@ -1004,6 +960,8 @@ public: logMessage (s); } + //-------------------------------------------------------------------------- + void runTest () { int const seedValue = 50; @@ -1022,11 +980,10 @@ public: testBackend ("sqlite", seedValue); } - -private: - TestScheduler m_scheduler; }; +static NodeStoreTimingTests nodeStoreTimingTests; + //------------------------------------------------------------------------------ class NodeStoreTests : public NodeStoreUnitTest @@ -1038,9 +995,10 @@ public: void testImport (String destBackendType, String srcBackendType, int64 seedValue) { - String srcParams; - srcParams << "type=" << srcBackendType - << "|path=" << File::createTempFile ("unittest").getFullPathName (); + File const node_db (File::createTempFile ("node_db")); + StringPairArray srcParams; + srcParams.set ("type", srcBackendType); + srcParams.set ("path", node_db.getFullPathName ()); // Create a batch NodeStore::Batch batch; @@ -1048,26 +1006,33 @@ public: // Write to source db { - ScopedPointer src (NodeStore::New (srcParams, "", m_scheduler)); + ScopedPointer src (NodeStore::New (srcParams)); storeBatch (*src, batch); } - String destParams; - destParams << "type=" << destBackendType - << "|path=" << File::createTempFile ("unittest").getFullPathName (); - - ScopedPointer dest (NodeStore::New ( - destParams, "", m_scheduler)); - - beginTest (String ("import into '") + destBackendType + "' from '" + srcBackendType + "'"); - - // Do the import - dest->import (NodeStore::parseDelimitedKeyValueString (srcParams)); - - // Get the results of the import NodeStore::Batch copy; - fetchCopyOfBatch (*dest, ©, batch); + + { + // Re-open the db + ScopedPointer src (NodeStore::New (srcParams)); + + // Set up the destination database + File const dest_db (File::createTempFile ("dest_db")); + StringPairArray destParams; + destParams.set ("type", destBackendType); + destParams.set ("path", dest_db.getFullPathName ()); + + ScopedPointer dest (NodeStore::New (destParams)); + + beginTest (String ("import into '") + destBackendType + "' from '" + srcBackendType + "'"); + + // Do the import + dest->import (*src); + + // Get the results of the import + fetchCopyOfBatch (*dest, ©, batch); + } // Canonicalize the source and destination batches std::sort (batch.begin (), batch.end (), NodeObject::LessThan ()); @@ -1076,16 +1041,29 @@ public: } - void testBackend (String type, int64 const seedValue) + //-------------------------------------------------------------------------- + + void testNodeStore (String type, bool const useEphemeralDatabase, int64 const seedValue) { String s; - s << String ("NodeStore backend type=") + type; + s << String ("NodeStore backend '") + type + "'"; + if (useEphemeralDatabase) + s << " (with ephemeral database)"; beginTest (s); - String params; - params << "type=" << type - << "|path=" << File::createTempFile ("unittest").getFullPathName (); + File const node_db (File::createTempFile ("node_db")); + StringPairArray nodeParams; + nodeParams.set ("type", type); + nodeParams.set ("path", node_db.getFullPathName ()); + + File const temp_db (File::createTempFile ("temp_db")); + StringPairArray tempParams; + if (useEphemeralDatabase) + { + tempParams.set ("type", type); + tempParams.set ("path", temp_db.getFullPathName ()); + } // Create a batch NodeStore::Batch batch; @@ -1093,7 +1071,7 @@ public: { // Open the database - ScopedPointer db (NodeStore::New (params, "", m_scheduler)); + ScopedPointer db (NodeStore::New (nodeParams, tempParams)); // Write the batch storeBatch (*db, batch); @@ -1115,12 +1093,28 @@ public: } { - // Re-open the database - ScopedPointer db (NodeStore::New (params, "", m_scheduler)); + // Re-open the database without the ephemeral DB + ScopedPointer db (NodeStore::New (nodeParams)); // Read it back in NodeStore::Batch copy; fetchCopyOfBatch (*db, ©, batch); + + // Canonicalize the source and destination batches + std::sort (batch.begin (), batch.end (), NodeObject::LessThan ()); + std::sort (copy.begin (), copy.end (), NodeObject::LessThan ()); + expect (areBatchesEqual (batch, copy), "Should be equal"); + } + + if (useEphemeralDatabase) + { + // Verify the ephemeral db + ScopedPointer db (NodeStore::New (tempParams, StringPairArray ())); + + // Read it back in + NodeStore::Batch copy; + fetchCopyOfBatch (*db, ©, batch); + // Canonicalize the source and destination batches std::sort (batch.begin (), batch.end (), NodeObject::LessThan ()); std::sort (copy.begin (), copy.end (), NodeObject::LessThan ()); @@ -1128,33 +1122,29 @@ public: } } -public: - void runTest () + //-------------------------------------------------------------------------- + + void runBackendTests (bool useEphemeralDatabase, int64 const seedValue) { - int64 const seedValue = 50; + testNodeStore ("keyvadb", useEphemeralDatabase, seedValue); - // - // Backend tests - // + testNodeStore ("leveldb", useEphemeralDatabase, seedValue); - testBackend ("keyvadb", seedValue); - - testBackend ("leveldb", seedValue); - - testBackend ("sqlite", seedValue); + testNodeStore ("sqlite", useEphemeralDatabase, seedValue); #if RIPPLE_HYPERLEVELDB_AVAILABLE - testBackend ("hyperleveldb", seedValue); + testNodeStore ("hyperleveldb", useEphemeralDatabase, seedValue); #endif #if RIPPLE_MDB_AVAILABLE - testBackend ("mdb", seedValue); + testNodeStore ("mdb", useEphemeralDatabase, seedValue); #endif + } - // - // Import tests - // + //-------------------------------------------------------------------------- + void runImportTests (int64 const seedValue) + { //testImport ("keyvadb", "keyvadb", seedValue); testImport ("leveldb", "leveldb", seedValue); @@ -1170,10 +1160,18 @@ public: testImport ("sqlite", "sqlite", seedValue); } -private: - TestScheduler m_scheduler; + //-------------------------------------------------------------------------- + + void runTest () + { + int64 const seedValue = 50; + + runBackendTests (false, seedValue); + + runBackendTests (true, seedValue); + + runImportTests (seedValue); + } }; static NodeStoreTests nodeStoreTests; - -static NodeStoreTimingTests nodeStoreTimingTests; diff --git a/modules/ripple_app/node/ripple_NodeStore.h b/modules/ripple_app/node/ripple_NodeStore.h index 4bd44cd1ec..a2c26f72df 100644 --- a/modules/ripple_app/node/ripple_NodeStore.h +++ b/modules/ripple_app/node/ripple_NodeStore.h @@ -102,6 +102,8 @@ public: For improved performance, a backend has the option of performing writes in batches. These writes can be scheduled using the provided scheduler object. + + @see BatchWriter */ class Scheduler { @@ -272,7 +274,7 @@ public: @note This routine will not be called concurrently with itself or other methods. - @see import + @see import, VisitCallback */ virtual void visitAll (VisitCallback& callback) = 0; @@ -307,49 +309,38 @@ public: //-------------------------------------------------------------------------- - /** Create a Parameters from a String. - - Parameter strings have the format: - - =['|'=] - - The key "type" must exist, it defines the choice of backend. - For example - `type=LevelDB|path=/mnt/ephemeral` - - This is a convenience function for unit tests. - */ - static Parameters parseDelimitedKeyValueString (String s, beast_wchar delimiter='|'); - /** Construct a node store. - Parameter strings have the format: + The parameters are key value pairs passed to the backend. The + 'type' key must exist, it defines the choice of backend. Most + backends also require a 'path' field. + + Some choices for 'type' are: + HyperLevelDB, LevelDB, SQLite, KeyvaDB, MDB - =['|'=] + If the fastBackendParameter is omitted or empty, no ephemeral database + is used. If the scheduler parameter is omited or unspecified, a + synchronous scheduler is used which performs all tasks immediately on + the caller's thread. - The key "type" must exist, it defines the choice of backend. - For example - `type=LevelDB|path=/mnt/ephemeral` + @note If the database cannot be opened or created, an exception is thrown. @param backendParameters The parameter string for the persistent backend. - @param fastBackendParameters The parameter string for the ephemeral backend. - @param cacheSize ? - @param cacheAge ? - @param scheduler The scheduler to use for performing asynchronous tasks. + @param fastBackendParameters [optional] The parameter string for the ephemeral backend. + @param scheduler [optional The scheduler to use for performing asynchronous tasks. - @return A pointer to the created object. + @return The opened database. */ static NodeStore* New (Parameters const& backendParameters, - Parameters const& fastBackendParameters, - Scheduler& scheduler); + Parameters fastBackendParameters = Parameters (), + Scheduler& scheduler = getSynchronousScheduler ()); - /** Construct a node store from a pipe delimited parameter string. + /** Get the synchronous scheduler. - This is used for unit tests. + The synchronous scheduler performs all tasks immediately, before + returning to the caller, using the caller's thread. */ - static NodeStore* New (String const& backendParameters, - String const& fastBackendParameters, - Scheduler& scheduler); + static Scheduler& getSynchronousScheduler (); /** Destroy the node store. @@ -405,12 +396,20 @@ public: Blob& data, uint256 const& hash) = 0; - /** Import objects from another database. + /** Visit every object in the database + + This is usually called during import. - The other NodeStore database is constructed using the specified - backend parameters. + @note This routine will not be called concurrently with itself + or other methods. + + @see import */ - virtual void import (Parameters const& sourceBackendParameters) = 0; + virtual void visitAll (Backend::VisitCallback& callback) = 0; + + /** Import objects from another database. */ + virtual void import (NodeStore& sourceDatabase) = 0; + /** Retrieve the estimated number of pending write operations. diff --git a/modules/ripple_basics/utility/ripple_IniFile.cpp b/modules/ripple_basics/utility/ripple_IniFile.cpp index 795b010f6a..7c230cb03f 100644 --- a/modules/ripple_basics/utility/ripple_IniFile.cpp +++ b/modules/ripple_basics/utility/ripple_IniFile.cpp @@ -128,15 +128,18 @@ bool SectionSingleB (Section& secSource, const std::string& strSection, std::str return bSingle; } -StringPairArray parseKeyValueSection (Section& secSource, std::string const& strSection) +StringPairArray parseKeyValueSection (Section& secSource, String const& strSection) { StringPairArray result; - int const count = SectionCount (secSource, strSection); + // yuck. + std::string const stdStrSection (strSection.toStdString ()); + + int const count = SectionCount (secSource, stdStrSection); typedef Section::mapped_type Entries; - Entries* const entries = SectionEntries (secSource, strSection); + Entries* const entries = SectionEntries (secSource, stdStrSection); if (entries != nullptr) { diff --git a/modules/ripple_basics/utility/ripple_IniFile.h b/modules/ripple_basics/utility/ripple_IniFile.h index 79e7f546bc..d8e27abb95 100644 --- a/modules/ripple_basics/utility/ripple_IniFile.h +++ b/modules/ripple_basics/utility/ripple_IniFile.h @@ -25,6 +25,6 @@ Section::mapped_type* SectionEntries (Section& secSource, const std::string& str Each line is in the form =. Spaces are considered part of the key and value. */ -StringPairArray parseKeyValueSection (Section& secSource, std::string const& strSection); +StringPairArray parseKeyValueSection (Section& secSource, String const& strSection); #endif diff --git a/modules/ripple_basics/utility/ripple_StringUtilities.cpp b/modules/ripple_basics/utility/ripple_StringUtilities.cpp index bfa42c589e..550fab9213 100644 --- a/modules/ripple_basics/utility/ripple_StringUtilities.cpp +++ b/modules/ripple_basics/utility/ripple_StringUtilities.cpp @@ -271,4 +271,41 @@ std::string addressToString (void const* address) return strHex (static_cast (address) - static_cast (0)); } +StringPairArray parseDelimitedKeyValueString (String parameters, beast_wchar delimiter) +{ + StringPairArray keyValues; + while (parameters.isNotEmpty ()) + { + String pair; + + { + int const delimiterPos = parameters.indexOfChar (delimiter); + + if (delimiterPos != -1) + { + pair = parameters.substring (0, delimiterPos); + + parameters = parameters.substring (delimiterPos + 1); + } + else + { + pair = parameters; + + parameters = String::empty; + } + } + + int const equalPos = pair.indexOfChar ('='); + + if (equalPos != -1) + { + String const key = pair.substring (0, equalPos); + String const value = pair.substring (equalPos + 1, pair.length ()); + + keyValues.set (key, value); + } + } + + return keyValues; +} diff --git a/modules/ripple_basics/utility/ripple_StringUtilities.h b/modules/ripple_basics/utility/ripple_StringUtilities.h index 3ddcf75ae9..08dc14c545 100644 --- a/modules/ripple_basics/utility/ripple_StringUtilities.h +++ b/modules/ripple_basics/utility/ripple_StringUtilities.h @@ -214,4 +214,12 @@ bool parseUrl (const std::string& strUrl, std::string& strScheme, std::string& s */ extern std::string addressToString (void const* address); +/** Create a Parameters from a String. + + Parameter strings have the format: + + =['|'=] +*/ +extern StringPairArray parseDelimitedKeyValueString (String s, beast_wchar delimiter='|'); + #endif diff --git a/modules/ripple_core/functional/ripple_Config.cpp b/modules/ripple_core/functional/ripple_Config.cpp index 54d9157a90..2706f12923 100644 --- a/modules/ripple_core/functional/ripple_Config.cpp +++ b/modules/ripple_core/functional/ripple_Config.cpp @@ -8,71 +8,6 @@ // TODO: Check permissions on config file before using it. // -// VFALCO TODO Rename and replace these macros with variables. -#define SECTION_ACCOUNT_PROBE_MAX "account_probe_max" -#define SECTION_CLUSTER_NODES "cluster_nodes" -#define SECTION_DATABASE_PATH "database_path" -#define SECTION_DEBUG_LOGFILE "debug_logfile" -#define SECTION_ELB_SUPPORT "elb_support" -#define SECTION_FEE_DEFAULT "fee_default" -#define SECTION_FEE_NICKNAME_CREATE "fee_nickname_create" -#define SECTION_FEE_OFFER "fee_offer" -#define SECTION_FEE_OPERATION "fee_operation" -#define SECTION_FEE_ACCOUNT_RESERVE "fee_account_reserve" -#define SECTION_FEE_OWNER_RESERVE "fee_owner_reserve" -#define SECTION_NODE_DB "node_db" -#define SECTION_FASTNODE_DB "temp_db" -#define SECTION_LEDGER_HISTORY "ledger_history" -#define SECTION_IPS "ips" -#define SECTION_NETWORK_QUORUM "network_quorum" -#define SECTION_NODE_SEED "node_seed" -#define SECTION_NODE_SIZE "node_size" -#define SECTION_PATH_SEARCH_SIZE "path_search_size" -#define SECTION_PEER_CONNECT_LOW_WATER "peer_connect_low_water" -#define SECTION_PEER_IP "peer_ip" -#define SECTION_PEER_PORT "peer_port" -#define SECTION_PEER_PRIVATE "peer_private" -#define SECTION_PEER_SCAN_INTERVAL_MIN "peer_scan_interval_min" -#define SECTION_PEER_SSL_CIPHER_LIST "peer_ssl_cipher_list" -#define SECTION_PEER_START_MAX "peer_start_max" -#define SECTION_RPC_ALLOW_REMOTE "rpc_allow_remote" -#define SECTION_RPC_ADMIN_ALLOW "rpc_admin_allow" -#define SECTION_RPC_ADMIN_USER "rpc_admin_user" -#define SECTION_RPC_ADMIN_PASSWORD "rpc_admin_password" -#define SECTION_RPC_IP "rpc_ip" -#define SECTION_RPC_PORT "rpc_port" -#define SECTION_RPC_USER "rpc_user" -#define SECTION_RPC_PASSWORD "rpc_password" -#define SECTION_RPC_STARTUP "rpc_startup" -#define SECTION_RPC_SECURE "rpc_secure" -#define SECTION_RPC_SSL_CERT "rpc_ssl_cert" -#define SECTION_RPC_SSL_CHAIN "rpc_ssl_chain" -#define SECTION_RPC_SSL_KEY "rpc_ssl_key" -#define SECTION_SMS_FROM "sms_from" -#define SECTION_SMS_KEY "sms_key" -#define SECTION_SMS_SECRET "sms_secret" -#define SECTION_SMS_TO "sms_to" -#define SECTION_SMS_URL "sms_url" -#define SECTION_SNTP "sntp_servers" -#define SECTION_SSL_VERIFY "ssl_verify" -#define SECTION_SSL_VERIFY_FILE "ssl_verify_file" -#define SECTION_SSL_VERIFY_DIR "ssl_verify_dir" -#define SECTION_VALIDATORS_FILE "validators_file" -#define SECTION_VALIDATION_QUORUM "validation_quorum" -#define SECTION_VALIDATION_SEED "validation_seed" -#define SECTION_WEBSOCKET_PUBLIC_IP "websocket_public_ip" -#define SECTION_WEBSOCKET_PUBLIC_PORT "websocket_public_port" -#define SECTION_WEBSOCKET_PUBLIC_SECURE "websocket_public_secure" -#define SECTION_WEBSOCKET_PING_FREQ "websocket_ping_frequency" -#define SECTION_WEBSOCKET_IP "websocket_ip" -#define SECTION_WEBSOCKET_PORT "websocket_port" -#define SECTION_WEBSOCKET_SECURE "websocket_secure" -#define SECTION_WEBSOCKET_SSL_CERT "websocket_ssl_cert" -#define SECTION_WEBSOCKET_SSL_CHAIN "websocket_ssl_chain" -#define SECTION_WEBSOCKET_SSL_KEY "websocket_ssl_key" -#define SECTION_VALIDATORS "validators" -#define SECTION_VALIDATORS_SITE "validators_site" - // Fees are in XRP. #define DEFAULT_FEE_DEFAULT 10 #define DEFAULT_FEE_ACCOUNT_RESERVE 200*SYSTEM_CURRENCY_PARTS @@ -81,6 +16,8 @@ #define DEFAULT_FEE_OFFER DEFAULT_FEE_DEFAULT #define DEFAULT_FEE_OPERATION 1 +// VFALCO TODO Convert this to a SharedSingleton to prevent exit leaks +// Config theConfig; void Config::setup (const std::string& strConf, bool bTestNet, bool bQuiet) @@ -373,8 +310,23 @@ void Config::load () (void) SectionSingleB (secConfig, SECTION_RPC_IP, m_rpcIP); (void) SectionSingleB (secConfig, SECTION_RPC_PASSWORD, RPC_PASSWORD); (void) SectionSingleB (secConfig, SECTION_RPC_USER, RPC_USER); - theConfig.nodeDatabase = parseKeyValueSection (secConfig, SECTION_NODE_DB); - theConfig.ephemeralNodeDatabase = parseKeyValueSection (secConfig, SECTION_FASTNODE_DB); + + //--------------------------------------- + // + // VFALCO BEGIN CLEAN + // + theConfig.nodeDatabase = parseKeyValueSection ( + secConfig, ConfigSection::nodeDatabase ()); + + theConfig.ephemeralNodeDatabase = parseKeyValueSection ( + secConfig, ConfigSection::tempNodeDatabase ()); + + theConfig.importNodeDatabase = parseKeyValueSection ( + secConfig, ConfigSection::importNodeDatabase ()); + // + // VFALCO END CLEAN + // + //--------------------------------------- if (SectionSingleB (secConfig, SECTION_RPC_PORT, strTemp)) m_rpcPort = boost::lexical_cast (strTemp); diff --git a/modules/ripple_core/functional/ripple_Config.h b/modules/ripple_core/functional/ripple_Config.h index 05b6a377d4..6f1cfc9bc9 100644 --- a/modules/ripple_core/functional/ripple_Config.h +++ b/modules/ripple_core/functional/ripple_Config.h @@ -85,12 +85,40 @@ public: boost::filesystem::path DEBUG_LOGFILE; boost::filesystem::path VALIDATORS_FILE; // As specifed in rippled.cfg. - StringPairArray nodeDatabase; - StringPairArray ephemeralNodeDatabase; - //std::string NODE_DB; // Database to use for nodes - //std::string FASTNODE_DB; // Database for temporary storage + /** Parameters for the main NodeStore database. + + This is 1 or more strings of the form = + The 'type' and 'path' keys are required, see rippled-example.cfg + + @see NodeStore + */ + StringPairArray nodeDatabase; + + /** Parameters for the ephemeral NodeStore database. + + This is an auxiliary database for the NodeStore, usually placed + on a separate faster volume. However, the volume data may not persist + between launches. Use of the ephemeral database is optional. + + The format is the same as that for @ref nodeDatabase + + @see NodeStore + */ + StringPairArray ephemeralNodeDatabase; + + /** Parameters for importing an old database in to the current node database. + + If this is not empty, then it specifies the key/value parameters for + another node database from which to import all data into the current + node database specified by @ref nodeDatabase. + + The format of this string is in the form: + '='['|''='value] + + @see parseDelimitedKeyValueString + */ + StringPairArray importNodeDatabase; - std::string DB_IMPORT; // Import from old DB bool ELB_SUPPORT; // Support Amazon ELB std::string VALIDATORS_SITE; // Where to find validators.txt on the Internet. diff --git a/modules/ripple_core/functional/ripple_ConfigSections.h b/modules/ripple_core/functional/ripple_ConfigSections.h new file mode 100644 index 0000000000..445137ae49 --- /dev/null +++ b/modules/ripple_core/functional/ripple_ConfigSections.h @@ -0,0 +1,86 @@ +//------------------------------------------------------------------------------ +/* + Copyright (c) 2011-2013, OpenCoin, Inc. +*/ +//============================================================================== + +#ifndef RIPPLE_CONFIGSECTIONS_H_INCLUDED +#define RIPPLE_CONFIGSECTIONS_H_INCLUDED + +// VFALCO NOTE +// +// Please use this style for all new sections +// And if you're feeling generous, convert all the +// existing macros to this format as well. +// +struct ConfigSection +{ + static String nodeDatabase () { return "node_db"; } + static String tempNodeDatabase () { return "temp_db"; } + static String importNodeDatabase () { return "import_db"; } +}; + +// VFALCO TODO Rename and replace these macros with variables. +#define SECTION_ACCOUNT_PROBE_MAX "account_probe_max" +#define SECTION_CLUSTER_NODES "cluster_nodes" +#define SECTION_DATABASE_PATH "database_path" +#define SECTION_DEBUG_LOGFILE "debug_logfile" +#define SECTION_ELB_SUPPORT "elb_support" +#define SECTION_FEE_DEFAULT "fee_default" +#define SECTION_FEE_NICKNAME_CREATE "fee_nickname_create" +#define SECTION_FEE_OFFER "fee_offer" +#define SECTION_FEE_OPERATION "fee_operation" +#define SECTION_FEE_ACCOUNT_RESERVE "fee_account_reserve" +#define SECTION_FEE_OWNER_RESERVE "fee_owner_reserve" +#define SECTION_LEDGER_HISTORY "ledger_history" +#define SECTION_IPS "ips" +#define SECTION_NETWORK_QUORUM "network_quorum" +#define SECTION_NODE_SEED "node_seed" +#define SECTION_NODE_SIZE "node_size" +#define SECTION_PATH_SEARCH_SIZE "path_search_size" +#define SECTION_PEER_CONNECT_LOW_WATER "peer_connect_low_water" +#define SECTION_PEER_IP "peer_ip" +#define SECTION_PEER_PORT "peer_port" +#define SECTION_PEER_PRIVATE "peer_private" +#define SECTION_PEER_SCAN_INTERVAL_MIN "peer_scan_interval_min" +#define SECTION_PEER_SSL_CIPHER_LIST "peer_ssl_cipher_list" +#define SECTION_PEER_START_MAX "peer_start_max" +#define SECTION_RPC_ALLOW_REMOTE "rpc_allow_remote" +#define SECTION_RPC_ADMIN_ALLOW "rpc_admin_allow" +#define SECTION_RPC_ADMIN_USER "rpc_admin_user" +#define SECTION_RPC_ADMIN_PASSWORD "rpc_admin_password" +#define SECTION_RPC_IP "rpc_ip" +#define SECTION_RPC_PORT "rpc_port" +#define SECTION_RPC_USER "rpc_user" +#define SECTION_RPC_PASSWORD "rpc_password" +#define SECTION_RPC_STARTUP "rpc_startup" +#define SECTION_RPC_SECURE "rpc_secure" +#define SECTION_RPC_SSL_CERT "rpc_ssl_cert" +#define SECTION_RPC_SSL_CHAIN "rpc_ssl_chain" +#define SECTION_RPC_SSL_KEY "rpc_ssl_key" +#define SECTION_SMS_FROM "sms_from" +#define SECTION_SMS_KEY "sms_key" +#define SECTION_SMS_SECRET "sms_secret" +#define SECTION_SMS_TO "sms_to" +#define SECTION_SMS_URL "sms_url" +#define SECTION_SNTP "sntp_servers" +#define SECTION_SSL_VERIFY "ssl_verify" +#define SECTION_SSL_VERIFY_FILE "ssl_verify_file" +#define SECTION_SSL_VERIFY_DIR "ssl_verify_dir" +#define SECTION_VALIDATORS_FILE "validators_file" +#define SECTION_VALIDATION_QUORUM "validation_quorum" +#define SECTION_VALIDATION_SEED "validation_seed" +#define SECTION_WEBSOCKET_PUBLIC_IP "websocket_public_ip" +#define SECTION_WEBSOCKET_PUBLIC_PORT "websocket_public_port" +#define SECTION_WEBSOCKET_PUBLIC_SECURE "websocket_public_secure" +#define SECTION_WEBSOCKET_PING_FREQ "websocket_ping_frequency" +#define SECTION_WEBSOCKET_IP "websocket_ip" +#define SECTION_WEBSOCKET_PORT "websocket_port" +#define SECTION_WEBSOCKET_SECURE "websocket_secure" +#define SECTION_WEBSOCKET_SSL_CERT "websocket_ssl_cert" +#define SECTION_WEBSOCKET_SSL_CHAIN "websocket_ssl_chain" +#define SECTION_WEBSOCKET_SSL_KEY "websocket_ssl_key" +#define SECTION_VALIDATORS "validators" +#define SECTION_VALIDATORS_SITE "validators_site" + +#endif diff --git a/modules/ripple_core/ripple_core.h b/modules/ripple_core/ripple_core.h index 4b8b13a488..e3849298c7 100644 --- a/modules/ripple_core/ripple_core.h +++ b/modules/ripple_core/ripple_core.h @@ -30,6 +30,7 @@ namespace ripple // VFALCO NOTE Indentation shows dependency hierarchy // +/***/#include "functional/ripple_ConfigSections.h" /**/#include "functional/ripple_Config.h" /**/#include "functional/ripple_ILoadFeeTrack.h" /*..*/#include "functional/ripple_LoadEvent.h" diff --git a/rippled-example.cfg b/rippled-example.cfg index 6992020723..7d59091afb 100644 --- a/rippled-example.cfg +++ b/rippled-example.cfg @@ -223,18 +223,22 @@ # shfArahZT9Q9ckTf3s1psJ7C7qzVN # # +#------------------------------------------------------------------------------- # # [node_db] # [temp_db] +# [import_db] # -# Set the choice of databases for storing Node objects. +# Set database options for storing node objects in the primary database, +# caching node objects in the temporary database, or importing node objects +# from a previous database. # # Format (without spaces): # One or more lines of key / value pairs: # '=' # ... # -# Example: +# Examples: # type=HyperLevelDB # path=db/hashnode # @@ -252,11 +256,19 @@ # Optional keys: # (none yet) # -# Notes +# Notes: +# # The 'node_db' entry configures the primary, persistent storage. +# # The 'temp_db' configures a look-aside cache for high volume storage # which doesn't necessarily persist between server launches. # +# The 'import_db' is used with the '--import' command line option to +# migrate the specified database into the current database given +# in the [node_db] section. +# +#------------------------------------------------------------------------------- +# # [node_size] # Tunes the servers based on the expected load and available memory. Legal # sizes are "tiny", "small", "medium", "large", and "huge". We recommend diff --git a/src/cpp/ripple/ripple_Application.cpp b/src/cpp/ripple/ripple_Application.cpp index ff89b7d47e..a4e37d3e0f 100644 --- a/src/cpp/ripple/ripple_Application.cpp +++ b/src/cpp/ripple/ripple_Application.cpp @@ -974,13 +974,15 @@ void ApplicationImp::updateTables () exit (1); } - if (!theConfig.DB_IMPORT.empty()) + if (theConfig.importNodeDatabase.size () > 0) { + ScopedPointer source (NodeStore::New (theConfig.importNodeDatabase)); + WriteLog (lsWARNING, NodeObject) << - "Node import from '" << theConfig.DB_IMPORT << "' to '" + "Node import from '" << source->getName () << "' to '" << getApp().getNodeStore().getName () << "'."; - getApp().getNodeStore().import(NodeStore::parseDelimitedKeyValueString (theConfig.DB_IMPORT)); + getApp().getNodeStore().import (*source); } } diff --git a/src/cpp/ripple/ripple_Main.cpp b/src/cpp/ripple/ripple_Main.cpp index eaaa83ab62..598962ebf8 100644 --- a/src/cpp/ripple/ripple_Main.cpp +++ b/src/cpp/ripple/ripple_Main.cpp @@ -216,6 +216,15 @@ int rippleMain (int argc, char** argv) int iResult = 0; po::variables_map vm; // Map of options. + String importDescription; + { + importDescription << + "Import an existing node database (specified in the " << + "[" << ConfigSection::importNodeDatabase () << "] configuration file section) " + "into the current node database (specified in the " << + "[" << ConfigSection::nodeDatabase () << "] configuration file section). "; + } + // VFALCO TODO Replace boost program options with something from Beast. // // Set up option parsing. @@ -240,7 +249,7 @@ int rippleMain (int argc, char** argv) ("start", "Start from a fresh Ledger.") ("net", "Get the initial ledger from the network.") ("fg", "Run in the foreground.") - ("import", po::value (), "Import old DB into new DB.") + ("import", importDescription.toStdString ().c_str ()) ; // Interpret positional arguments as --parameters. @@ -354,8 +363,14 @@ int rippleMain (int argc, char** argv) if (vm.count ("start")) theConfig.START_UP = Config::FRESH; + // Handle a one-time import option + // if (vm.count ("import")) - theConfig.DB_IMPORT = vm["import"].as (); + { + String const optionString (vm ["import"].as ()); + + theConfig.importNodeDatabase = parseDelimitedKeyValueString (optionString); + } if (vm.count ("ledger")) { From e0bf86d01ef3c8675ecbea162c11bb522db32226 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Tue, 23 Jul 2013 13:04:52 -0700 Subject: [PATCH 47/50] Optimised Result::ok() --- .../modules/beast_core/misc/beast_Result.cpp | 7 ++---- .../modules/beast_core/misc/beast_Result.h | 22 +++++++++---------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/Subtrees/beast/modules/beast_core/misc/beast_Result.cpp b/Subtrees/beast/modules/beast_core/misc/beast_Result.cpp index a61394f08f..a3ad744474 100644 --- a/Subtrees/beast/modules/beast_core/misc/beast_Result.cpp +++ b/Subtrees/beast/modules/beast_core/misc/beast_Result.cpp @@ -21,6 +21,8 @@ */ //============================================================================== +Result::Result() noexcept {} + Result::Result (const String& message) noexcept : errorMessage (message) { @@ -60,11 +62,6 @@ bool Result::operator!= (const Result& other) const noexcept return errorMessage != other.errorMessage; } -Result Result::ok() noexcept -{ - return Result (String::empty); -} - Result Result::fail (const String& errorMessage) noexcept { return Result (errorMessage.isEmpty() ? "Unknown Error" : errorMessage); diff --git a/Subtrees/beast/modules/beast_core/misc/beast_Result.h b/Subtrees/beast/modules/beast_core/misc/beast_Result.h index 137daa830c..91bd1e1aee 100644 --- a/Subtrees/beast/modules/beast_core/misc/beast_Result.h +++ b/Subtrees/beast/modules/beast_core/misc/beast_Result.h @@ -26,10 +26,7 @@ #include "../text/beast_String.h" - -//============================================================================== -/** - Represents the 'success' or 'failure' of an operation, and holds an associated +/** Represents the 'success' or 'failure' of an operation, and holds an associated error message to describe the error when there's a failure. E.g. @@ -55,12 +52,12 @@ } @endcode */ -class BEAST_API Result +class BEAST_API Result { public: //============================================================================== /** Creates and returns a 'successful' result. */ - static Result ok() noexcept; + static Result ok() noexcept { return Result(); } /** Creates a 'failure' result. If you pass a blank error message in here, a default "Unknown Error" message @@ -94,12 +91,12 @@ public: const String& getErrorMessage() const noexcept; //============================================================================== - Result (const Result& other); - Result& operator= (const Result& other); + Result (const Result&); + Result& operator= (const Result&); #if BEAST_COMPILER_SUPPORTS_MOVE_SEMANTICS - Result (Result&& other) noexcept; - Result& operator= (Result&& other) noexcept; + Result (Result&&) noexcept; + Result& operator= (Result&&) noexcept; #endif bool operator== (const Result& other) const noexcept; @@ -108,6 +105,9 @@ public: private: String errorMessage; + // The default constructor is not for public use! + // Instead, use Result::ok() or Result::fail() + Result() noexcept; explicit Result (const String&) noexcept; // These casts are private to prevent people trying to use the Result object in numeric contexts @@ -115,5 +115,5 @@ private: operator void*() const; }; +#endif -#endif // BEAST_RESULT_BEASTHEADER From 07dda3eb26841cfb394d0590957ce6aac1a02e7a Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Tue, 23 Jul 2013 13:23:07 -0700 Subject: [PATCH 48/50] Import KeyvaDB to Beast --- Builds/QtCreator/rippled.pro | 1 + Builds/VisualStudio2012/RippleD.vcxproj | 1 + .../VisualStudio2012/RippleD.vcxproj.filters | 3 + SConstruct | 1 + .../Builds/VisualStudio2012/beast.vcxproj | 15 +++++ .../VisualStudio2012/beast.vcxproj.filters | 21 +++++++ Subtrees/beast/modules/beast_db/beast_db.cpp | 31 +++++++++++ Subtrees/beast/modules/beast_db/beast_db.h | 52 ++++++++++++++++++ Subtrees/beast/modules/beast_db/beast_db.mm | 20 +++++++ .../beast_db/keyvalue/beast_KeyvaDB.cpp | 24 ++++++-- .../modules/beast_db/keyvalue/beast_KeyvaDB.h | 55 +++++++++++++++++++ TODO.txt | 5 +- modules/ripple_app/node/ripple_KeyvaDB.h | 40 -------------- modules/ripple_app/ripple_app.cpp | 4 +- 14 files changed, 225 insertions(+), 48 deletions(-) create mode 100644 Subtrees/beast/modules/beast_db/beast_db.cpp create mode 100644 Subtrees/beast/modules/beast_db/beast_db.h create mode 100644 Subtrees/beast/modules/beast_db/beast_db.mm rename modules/ripple_app/node/ripple_KeyvaDB.cpp => Subtrees/beast/modules/beast_db/keyvalue/beast_KeyvaDB.cpp (96%) create mode 100644 Subtrees/beast/modules/beast_db/keyvalue/beast_KeyvaDB.h delete mode 100644 modules/ripple_app/node/ripple_KeyvaDB.h diff --git a/Builds/QtCreator/rippled.pro b/Builds/QtCreator/rippled.pro index 89ebd28df4..9858ac395a 100644 --- a/Builds/QtCreator/rippled.pro +++ b/Builds/QtCreator/rippled.pro @@ -63,6 +63,7 @@ SOURCES += \ ../../Subtrees/beast/modules/beast_basics/beast_basics.cpp \ ../../Subtrees/beast/modules/beast_core/beast_core.cpp \ ../../Subtrees/beast/modules/beast_crypto/beast_crypto.cpp \ + ../../Subtrees/beast/modules/beast_db/beast_db.cpp \ ../../modules/ripple_app/ripple_app_pt1.cpp \ ../../modules/ripple_app/ripple_app_pt2.cpp \ ../../modules/ripple_app/ripple_app_pt3.cpp \ diff --git a/Builds/VisualStudio2012/RippleD.vcxproj b/Builds/VisualStudio2012/RippleD.vcxproj index 10595bf0b0..bdf45f6cc1 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj +++ b/Builds/VisualStudio2012/RippleD.vcxproj @@ -1039,6 +1039,7 @@ + true true diff --git a/Builds/VisualStudio2012/RippleD.vcxproj.filters b/Builds/VisualStudio2012/RippleD.vcxproj.filters index 63a4e3ae69..beaa45d3af 100644 --- a/Builds/VisualStudio2012/RippleD.vcxproj.filters +++ b/Builds/VisualStudio2012/RippleD.vcxproj.filters @@ -903,6 +903,9 @@ [1] Ripple\ripple_app\node + + [0] Subtrees\beast + diff --git a/SConstruct b/SConstruct index 903d5bf877..69461f7c6f 100644 --- a/SConstruct +++ b/SConstruct @@ -122,6 +122,7 @@ COMPILED_FILES = [ 'Subtrees/beast/modules/beast_basics/beast_basics.cpp', 'Subtrees/beast/modules/beast_core/beast_core.cpp', 'Subtrees/beast/modules/beast_crypto/beast_crypto.cpp', + 'Subtrees/beast/modules/beast_db/beast_db.cpp', 'modules/ripple_app/ripple_app_pt1.cpp', 'modules/ripple_app/ripple_app_pt2.cpp', 'modules/ripple_app/ripple_app_pt3.cpp', diff --git a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj index b53c3918ec..99c88c4500 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj +++ b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj @@ -78,6 +78,12 @@ true true + + true + true + true + true + @@ -249,6 +255,8 @@ + + @@ -933,6 +941,13 @@ true true
+ + + true + true + true + true + diff --git a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters index 2692e37621..7f02acb998 100644 --- a/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters +++ b/Subtrees/beast/Builds/VisualStudio2012/beast.vcxproj.filters @@ -36,6 +36,9 @@ beast_basics + + beast_db + @@ -125,6 +128,12 @@ {1170f2bc-2456-410a-ab2b-c45f6ed37b9e} + + {4834218f-f13f-41bc-a8a0-50314a3a99a3} + + + {15a98fee-1b52-45eb-9480-514b8750d755} + @@ -632,6 +641,12 @@ beast_core\memory + + beast_db + + + beast_db\keyvalue + @@ -982,6 +997,12 @@ beast_core\diagnostic + + beast_db + + + beast_db\keyvalue + diff --git a/Subtrees/beast/modules/beast_db/beast_db.cpp b/Subtrees/beast/modules/beast_db/beast_db.cpp new file mode 100644 index 0000000000..622c2afdbd --- /dev/null +++ b/Subtrees/beast/modules/beast_db/beast_db.cpp @@ -0,0 +1,31 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include "BeastConfig.h" + +#include "beast_db.h" + +#include "../beast_crypto/beast_crypto.h" + +namespace beast +{ + +#include "keyvalue/beast_KeyvaDB.cpp" + +} diff --git a/Subtrees/beast/modules/beast_db/beast_db.h b/Subtrees/beast/modules/beast_db/beast_db.h new file mode 100644 index 0000000000..1612a178d9 --- /dev/null +++ b/Subtrees/beast/modules/beast_db/beast_db.h @@ -0,0 +1,52 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_BEAST_DB_H_INCLUDED +#define BEAST_BEAST_DB_H_INCLUDED + +//------------------------------------------------------------------------------ + +/* If you fail to make sure that all your compile units are building Beast with + the same set of option flags, then there's a risk that different compile + units will treat the classes as having different memory layouts, leading to + very nasty memory corruption errors when they all get linked together. + That's why it's best to always include the BeastConfig.h file before any + beast headers. +*/ +#ifndef BEAST_BEASTCONFIG_H_INCLUDED +# ifdef _MSC_VER +# pragma message ("Have you included your BeastConfig.h file before including the Beast headers?") +# else +# warning "Have you included your BeastConfig.h file before including the Beast headers?" +# endif +#endif + +#include "../beast_core/beast_core.h" +#include "../beast_basics/beast_basics.h" + +//------------------------------------------------------------------------------ + +namespace beast +{ + +#include "keyvalue/beast_KeyvaDB.h" + +} + +#endif diff --git a/Subtrees/beast/modules/beast_db/beast_db.mm b/Subtrees/beast/modules/beast_db/beast_db.mm new file mode 100644 index 0000000000..2ae0b83c82 --- /dev/null +++ b/Subtrees/beast/modules/beast_db/beast_db.mm @@ -0,0 +1,20 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include "beast_db.cpp" diff --git a/modules/ripple_app/node/ripple_KeyvaDB.cpp b/Subtrees/beast/modules/beast_db/keyvalue/beast_KeyvaDB.cpp similarity index 96% rename from modules/ripple_app/node/ripple_KeyvaDB.cpp rename to Subtrees/beast/modules/beast_db/keyvalue/beast_KeyvaDB.cpp index 8b8e4652dc..7867292d74 100644 --- a/modules/ripple_app/node/ripple_KeyvaDB.cpp +++ b/Subtrees/beast/modules/beast_db/keyvalue/beast_KeyvaDB.cpp @@ -1,8 +1,22 @@ //------------------------------------------------------------------------------ /* - Copyright (c) 2011-2013, OpenCoin, Inc. + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ //============================================================================== + /* TODO @@ -110,7 +124,7 @@ public: }; // Key records are indexed starting at one. - struct KeyRecord + struct KeyRecord : Uncopyable { explicit KeyRecord (void* const keyStorage) : key (keyStorage) @@ -139,7 +153,7 @@ public: // are identical to the format on disk. Therefore it is necessary to // use the serialization routines to extract or update the key records. // - class KeyBlock + class KeyBlock : Uncopyable { public: KeyBlock (int depth, int keyBytes) @@ -177,6 +191,7 @@ public: stream.read (keyRecord->key, m_keyBytes); } +#if 0 void writeKeyRecord (KeyRecord const& keyRecord, int keyIndex) { bassert (keyIndex >=1 && keyIndex <= calcKeysAtDepth (m_depth)); @@ -195,6 +210,7 @@ public: stream.write (keyRecord.key, m_keyBytes); #endif } +#endif private: int const m_depth; @@ -459,7 +475,7 @@ public: //-------------------------------------------------------------------------- - struct FindResult + struct FindResult : Uncopyable { FindResult (void* const keyStorage) : keyRecord (keyStorage) diff --git a/Subtrees/beast/modules/beast_db/keyvalue/beast_KeyvaDB.h b/Subtrees/beast/modules/beast_db/keyvalue/beast_KeyvaDB.h new file mode 100644 index 0000000000..20e4185f49 --- /dev/null +++ b/Subtrees/beast/modules/beast_db/keyvalue/beast_KeyvaDB.h @@ -0,0 +1,55 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_KEYVADB_H_INCLUDED +#define BEAST_KEYVADB_H_INCLUDED + +/** Specialized Key/value database + + Once written, a value can never be modified. +*/ +class KeyvaDB : LeakChecked +{ +public: + class GetCallback + { + public: + virtual void* getStorageForValue (int valueBytes) = 0; + }; + + static KeyvaDB* New (int keyBytes, + int keyBlockDepth, + File keyPath, + File valPath); + + virtual ~KeyvaDB () { } + + // VFALCO TODO Make the return value a Result so we can + // detect corruption and errors! + // + virtual bool get (void const* key, GetCallback* callback) = 0; + + // VFALCO TODO Use Result for return value + // + virtual void put (void const* key, void const* value, int valueBytes) = 0; + + virtual void flush () = 0; +}; + +#endif diff --git a/TODO.txt b/TODO.txt index d2e68f7656..69e5eda3da 100644 --- a/TODO.txt +++ b/TODO.txt @@ -4,6 +4,7 @@ RIPPLE TODO Items marked '*' can be handled by third parties. + Vinnie's Short List (Changes day to day) - Make theConfig a SharedSingleton to prevent leak warnings - Add fast backend to the unit test @@ -20,6 +21,8 @@ Vinnie's Short List (Changes day to day) -------------------------------------------------------------------------------- +- Raise the warning level and fix everything + * Restyle all the macros in ripple_ConfigSection.h * Replace all throw with beast::Throw @@ -139,8 +142,6 @@ Vinnie's Short List (Changes day to day) - Make LevelDB and Ripple code work with both Unicode and non-Unicode Windows APIs -- Raise the warning level and fix everything - - Go searching through VFALCO notes and fix everything - Deal with function-level statics used for SqliteDatabase (like in diff --git a/modules/ripple_app/node/ripple_KeyvaDB.h b/modules/ripple_app/node/ripple_KeyvaDB.h deleted file mode 100644 index a58a469829..0000000000 --- a/modules/ripple_app/node/ripple_KeyvaDB.h +++ /dev/null @@ -1,40 +0,0 @@ -//------------------------------------------------------------------------------ -/* - Copyright (c) 2011-2013, OpenCoin, Inc. -*/ -//============================================================================== - -#ifndef RIPPLE_KEYVADB_H_INCLUDED -#define RIPPLE_KEYVADB_H_INCLUDED - -/** Key/value database optimized for Ripple usage. -*/ -class KeyvaDB : LeakChecked -{ -public: - class GetCallback - { - public: - virtual void* getStorageForValue (int valueBytes) = 0; - }; - - static KeyvaDB* New (int keyBytes, - int keyBlockDepth, - File keyPath, - File valPath); - - virtual ~KeyvaDB () { } - - // VFALCO TODO Make the return value a Result so we can - // detect corruption and errors! - // - virtual bool get (void const* key, GetCallback* callback) = 0; - - // VFALCO TODO Use Result for return value - // - virtual void put (void const* key, void const* value, int valueBytes) = 0; - - virtual void flush () = 0; -}; - -#endif diff --git a/modules/ripple_app/ripple_app.cpp b/modules/ripple_app/ripple_app.cpp index 1d5bb112e0..ae84625c3b 100644 --- a/modules/ripple_app/ripple_app.cpp +++ b/modules/ripple_app/ripple_app.cpp @@ -65,6 +65,8 @@ #include "../ripple_core/ripple_core.h" +#include "beast/modules/beast_db/beast_db.h" + // VFALCO TODO fix these warnings! #ifdef _MSC_VER //#pragma warning (push) // Causes spurious C4503 "decorated name exceeds maximum length" @@ -246,8 +248,6 @@ static const uint64 tenTo17m1 = tenTo17 - 1; #include "node/ripple_NodeObject.cpp" #include "node/ripple_NodeStore.cpp" #include "node/ripple_HyperLevelDBBackendFactory.cpp" -#include "node/ripple_KeyvaDB.h" // private -#include "node/ripple_KeyvaDB.cpp" #include "node/ripple_KeyvaDBBackendFactory.cpp" #include "node/ripple_LevelDBBackendFactory.cpp" #include "node/ripple_NullBackendFactory.cpp" From d7892cc0823fc81372d383c73c87ad5110c34713 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Tue, 23 Jul 2013 14:06:06 -0700 Subject: [PATCH 49/50] Put unit test in 'beast' group --- .../modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp index 5d94a0bba0..0a553cffa4 100644 --- a/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp +++ b/Subtrees/beast/modules/beast_core/diagnostic/beast_UnitTestUtilities.cpp @@ -20,7 +20,7 @@ class UnitTestUtilitiesTests : public UnitTest { public: - UnitTestUtilitiesTests () : UnitTest ("UnitTestUtilities") + UnitTestUtilitiesTests () : UnitTest ("UnitTestUtilities", "beast") { } From ff6d855bfb3243cd01dbe3f64e5329c04d9a6bf0 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Tue, 23 Jul 2013 14:06:57 -0700 Subject: [PATCH 50/50] Make ProofOfWork unit tests manually triggered --- src/cpp/ripple/ripple_ProofOfWorkFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/ripple/ripple_ProofOfWorkFactory.cpp b/src/cpp/ripple/ripple_ProofOfWorkFactory.cpp index 2896d23da4..1df2d79d37 100644 --- a/src/cpp/ripple/ripple_ProofOfWorkFactory.cpp +++ b/src/cpp/ripple/ripple_ProofOfWorkFactory.cpp @@ -236,7 +236,7 @@ IProofOfWorkFactory* IProofOfWorkFactory::New () class ProofOfWorkTests : public UnitTest { public: - ProofOfWorkTests () : UnitTest ("ProofOfWork", "ripple") + ProofOfWorkTests () : UnitTest ("ProofOfWork", "ripple", UnitTest::runManual) { }