From 8d1cfaabe73a0ceb4b4e0968c5cab7f611404922 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Mon, 12 Jan 2015 07:59:11 -0800 Subject: [PATCH] Add NuDB: A Key/Value Store For Decentralized Systems NuDB is a high performance key/value database optimized for insert-only workloads, with these features: * Low memory footprint * Values are immutable * Value sizes from 1 2^48 bytes (281TB) * All keys are the same size * Performance independent of growth * Optimized for concurrent fetch * Key file can be rebuilt if needed * Inserts are atomic and consistent * Data file may be iterated, index rebuilt. * Key and data files may be on different volumes * Hardened against algorithmic complexity attacks * Header-only, nothing to build or link --- .../core/diagnostic/UnitTestUtilities.h | 3 +- beast/nudb.h | 32 + beast/nudb/README.md | 265 +++++ beast/nudb/create.h | 129 +++ beast/nudb/detail/arena.h | 247 ++++ beast/nudb/detail/bucket.h | 504 ++++++++ beast/nudb/detail/buffers.h | 147 +++ beast/nudb/detail/bulkio.h | 189 +++ beast/nudb/detail/cache.h | 248 ++++ beast/nudb/detail/config.h | 75 ++ beast/nudb/detail/field.h | 272 +++++ beast/nudb/detail/format.h | 507 ++++++++ beast/nudb/detail/gentex.h | 276 +++++ beast/nudb/detail/pool.h | 256 ++++ beast/nudb/detail/posix_file.h | 363 ++++++ beast/nudb/detail/stream.h | 232 ++++ beast/nudb/detail/win32_file.h | 444 +++++++ beast/nudb/error.h | 109 ++ beast/nudb/file.h | 41 + beast/nudb/mode.h | 43 + beast/nudb/nudb.cpp | 25 + beast/nudb/recover.h | 157 +++ beast/nudb/store.h | 1025 +++++++++++++++++ beast/nudb/tests/callgrind_test.cpp | 116 ++ beast/nudb/tests/common.h | 237 ++++ beast/nudb/tests/fail_file.h | 245 ++++ beast/nudb/tests/recover_test.cpp | 160 +++ beast/nudb/tests/store_test.cpp | 142 +++ beast/nudb/tests/verify_test.cpp | 55 + beast/nudb/verify.h | 283 +++++ beast/nudb/visit.h | 110 ++ 31 files changed, 6936 insertions(+), 1 deletion(-) create mode 100644 beast/nudb.h create mode 100644 beast/nudb/README.md create mode 100644 beast/nudb/create.h create mode 100644 beast/nudb/detail/arena.h create mode 100644 beast/nudb/detail/bucket.h create mode 100644 beast/nudb/detail/buffers.h create mode 100644 beast/nudb/detail/bulkio.h create mode 100644 beast/nudb/detail/cache.h create mode 100644 beast/nudb/detail/config.h create mode 100644 beast/nudb/detail/field.h create mode 100644 beast/nudb/detail/format.h create mode 100644 beast/nudb/detail/gentex.h create mode 100644 beast/nudb/detail/pool.h create mode 100644 beast/nudb/detail/posix_file.h create mode 100644 beast/nudb/detail/stream.h create mode 100644 beast/nudb/detail/win32_file.h create mode 100644 beast/nudb/error.h create mode 100644 beast/nudb/file.h create mode 100644 beast/nudb/mode.h create mode 100644 beast/nudb/nudb.cpp create mode 100644 beast/nudb/recover.h create mode 100644 beast/nudb/store.h create mode 100644 beast/nudb/tests/callgrind_test.cpp create mode 100644 beast/nudb/tests/common.h create mode 100644 beast/nudb/tests/fail_file.h create mode 100644 beast/nudb/tests/recover_test.cpp create mode 100644 beast/nudb/tests/store_test.cpp create mode 100644 beast/nudb/tests/verify_test.cpp create mode 100644 beast/nudb/verify.h create mode 100644 beast/nudb/visit.h diff --git a/beast/module/core/diagnostic/UnitTestUtilities.h b/beast/module/core/diagnostic/UnitTestUtilities.h index d354f2dfdb..b424282a85 100644 --- a/beast/module/core/diagnostic/UnitTestUtilities.h +++ b/beast/module/core/diagnostic/UnitTestUtilities.h @@ -21,7 +21,8 @@ #define BEAST_UNITTESTUTILITIES_H_INCLUDED #include - +#include + namespace beast { namespace UnitTestUtilities { diff --git a/beast/nudb.h b/beast/nudb.h new file mode 100644 index 0000000000..40b1d49e79 --- /dev/null +++ b/beast/nudb.h @@ -0,0 +1,32 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_H_INCLUDED +#define BEAST_NUDB_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/beast/nudb/README.md b/beast/nudb/README.md new file mode 100644 index 0000000000..6cb2747bd7 --- /dev/null +++ b/beast/nudb/README.md @@ -0,0 +1,265 @@ +# NuDB: A Key/Value Store For Decentralized Systems + +The new breed of decentralized systems such as Ripple or Bitcoin +that use embedded key/value databases place different demands on +these database than what is traditional. NuDB provides highly +optimized and concurrent atomic, durable, and isolated fetch and +insert operations to secondary storage, along with these features: + +* Low memory footprint +* Values are immutable +* Value sizes from 1 2^48 bytes (281TB) +* All keys are the same size +* Performance independent of growth +* Optimized for concurrent fetch +* Key file can be rebuilt if needed +* Inserts are atomic and consistent +* Data file may be iterated, index rebuilt. +* Key and data files may be on different volumes +* Hardened against algorithmic complexity attacks +* Header-only, nothing to build or link + +Three files are used. The data file holds keys and values stored +sequentially and size-prefixed. The key file holds a series of +fixed-size bucket records forming an on-disk hash table. The log file +stores bookkeeping information used to restore consistency when an +external failure occurs. In typical cases a fetch costs one I/O to +consult the key file and if the key is present, one I/O to read the +value. + +## Usage + +Callers define these parameters when a database is created: + +* KeySize: The size of a key in bytes +* BlockSize: The physical size of a key file record +* LoadFactor: The desired fraction of bucket occupancy + +The ideal block size matches the sector size or block size of the +underlying physical media that holds the key file. Functions are +provided to return a best estimate of this value for a particular +device, but a default of 4096 should work for typical installations. +The implementation tries to fit as many entries as possible in a key +file record, to maximize the amount of useful work performed per I/O. + +The load factor is chosen to make bucket overflows unlikely without +sacrificing bucket occupancy. A value of 0.50 seems to work well with +a good hash function. + +Callers also provide these parameters when a database is opened: + +* Appnum: An application-defined integer constant +* AllocSize: A significant multiple of the average data size + +To improve performance, memory is recycled. NuDB needs a hint about +the average size of the data being inserted. For an average data +size of 1KB (one kilobyte), AllocSize of sixteen megabytes (16MB) is +sufficient. If the AllocSize is too low, the memory recycler will +not make efficient use of allocated blocks. + +Two operations are defined, fetch and insert. + +### Fetch + +The fetch operation retrieves a variable length value given the +key. The caller supplies a factory used to provide a buffer for storing +the value. This interface allows custom memory allocation strategies. + +### Insert + +Insert adds a key/value pair to the store. Value data must contain at +least one byte. Duplicate keys are disallowed. Insertions are serialized. + +## Implementation + +All insertions are buffered in memory, with inserted values becoming +immediately discoverable in subsequent or concurrent calls to fetch. +Periodically, buffered data is safely committed to disk files using +a separate dedicated thread associated with the database. This commit +process takes place at least once per second, or more often during +a detected surge in insertion activity. In the commit process the +key/value pairs receive the following treatment: + +An insertion is performed by appending a value record to the data file. +The value record has some header information including the size of the +data and a copy of the key; the data file is iteratable without the key +file. The value data follows the header. The data file is append-only +and immutable: once written, bytes are never changed. + +Initially the hash table in the key file consists of a single bucket. +After the load factor is exceeded from insertions, the hash table grows +in size by one bucket by doing a "split". The split operation is the +linear hashing algorithm as described by Litwin and Larson: + +http://en.wikipedia.org/wiki/Linear_hashing + +When a bucket is split, each key is rehashed and either remains in the +original bucket or gets moved to the new bucket appended to the end of +the key file. + +An insertion on a full bucket first triggers the "spill" algorithm: +First, a spill record is appended to the data file. The spill record +contains header information followed by the entire bucket record. Then, +the bucket's size is set to zero and the offset of the spill record is +stored in the bucket. At this point the insertion may proceed normally, +since the bucket is empty. Spilled buckets in the data file are always +full. + +Because every bucket holds the offset of the next spill record in the +data file, each bucket forms a linked list. In practice, careful +selection of capacity and load factor will keep the percentage of +buckets with one spill record to a minimum, with no bucket requiring +two spill records. + +The implementation of fetch is straightforward: first the bucket in the +key file is checked, then each spill record in the linked list of +spill records is checked, until the key is found or there are no more +records. As almost all buckets have no spill records, the average +fetch requires one I/O (not including reading the value). + +One complication in the scheme is when a split occurs on a bucket that +has one or more spill records. In this case, both the bucket being split +and the new bucket may overflow. This is handled by performing the +spill algorithm for each overflow that occurs. The new buckets may have +one or more spill records each, depending on the number of keys that +were originally present. + +Because the data file is immutable, a bucket's original spill records +are no longer referenced after the bucket is split. These blocks of data +in the data file are unrecoverable wasted space. Correctly configured +databases can have a typical waste factor of 1%, which is acceptable. +These unused bytes can be removed by visiting each value in the value +file using an off-line process and inserting it into a new database, +then delete the old database and use the new one instead. + +## Recovery + +To provide atomicity and consistency, a log file associated with the +database stores information used to roll back partial commits. + +## Iteration + +Each record in the data file is prefixed with a header identifying +whether it is a value record or a spill record, along with the size of +the record in bytes and a copy of the key if its a value record. +Therefore, values may be iterated. A key file can be regenerated from +just the data file by iterating the values and performing the key +insertion algorithm. + +## Concurrency + +Locks are never held during disk reads and writes. Fetches are fully +concurrent, while inserts are serialized. Inserts prevent duplicate +keys. Inserts are atomic, they either succeed immediately or fail. +After an insert, the key is immediately visible to subsequent fetches. + +## Formats + +All integer values are stored as big endian. The uint48_t format +consists of 6 bytes. + +### Key File + +The Key File contains the Header followed by one or more +fixed-length Bucket Records. + +#### Header (104 bytes) + + char[8] Type The characters "nudb.key" + uint16 Version Holds the version number + uint64 Appnum Application defined constant + uint64 Salt A random seed + uint64 Pepper The salt hashed + uint16 KeySize Key size in bytes + uint16 BlockSize Size of a file block in bytes + uint16 LoadFactor Target fraction in 65536ths + uint8[64] Reserved Zeroes + uint8[] Reserved Zero-pad to block size + +The Type identifies the file as belonging to nudb. Salt is +generated when the database is created and helps prevent +complexity attacks; the salt is prepended to the key material +when computing a hash, or used to initialize the state of +the hash function. Appnum is an application defined constant +set when the database is created. It can be used for anything, +for example to distinguish between different data formats. + +Pepper is computed by hashing the salt using a hash function +seeded with the salt. This is used to fingerprint the hash +function used. If a database is opened and the fingerprint +does not match the hash calculation performed using the template +argument provided when constructing the store, an exception +is thrown. + +The header for the key file contains the File Header followed by +the information above. The Capacity is the number of keys per +bucket, and defines the size of a bucket record. The load factor +is the target fraction of bucket occupancy. + +None of the information in the key file header or the data file +header may be changed after the database is created. + +#### Bucket Record (fixed-length) + + uint16 Count Number of keys in this bucket + uint48 Spill Offset of the next spill record or 0 + BucketEntry[] Entries The bucket entries + +#### Bucket Entry + + uint48 Offset Offset in data file of the data + uint48 Size The size of the value in bytes + uint8[KeySize] Key The key + +### Data File + +The Data File contains the Header followed by zero or more +variable-length Value Records and Spill Records. + +#### Header (92 bytes) + + char[8] Type The characters "nudb.dat" + uint16 Version Holds the version number + uint64 Appnum Application defined constant + uint64 Salt A random seed + uint16 KeySize Key size in bytes + uint8[64] Reserved Zeroes + +Salt contains the same value as the salt in the corresponding +key file. This is placed in the data file so that key and value +files belonging to the same database can be identified. + +#### Data Record (variable-length) + + uint48 Size Size of the value in bytes + uint8[KeySize] Key The key. + uint8[Size] Data The value data. + +#### Spill Record (fixed-length) + + uint48 Zero All zero, identifies a spill record + uint16 Size Bytes in spill bucket (for skipping) + Bucket SpillBucket Bucket Record + +### Log File + +The Log file contains the Header followed by zero or more fixed size + +#### Header (44 bytes) + + char[8] Type The characters "nudb.log" + uint16 Version Holds the version number + uint64 Appnum Application defined constant + uint64 Salt A random seed. + uint64 Pepper The salt hashed + uint16 KeySize Key size in bytes + uint64 KeyFileSize Size of key file. + uint64 DataFileSize Size of data file. + +#### Log Record + + uint64_t Index Bucket index (0-based) + Bucket Bucket Compact Bucket record + +Compact buckets include only Size entries. These are primarily +used to minimize the volume of writes to the log file. diff --git a/beast/nudb/create.h b/beast/nudb/create.h new file mode 100644 index 0000000000..c7ccd37eb0 --- /dev/null +++ b/beast/nudb/create.h @@ -0,0 +1,129 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_CREATE_H_INCLUDED +#define BEAST_NUDB_CREATE_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { + +/** Create a new database. + Preconditions: + The files must not exist + Throws: + + @param args Arguments passed to File constructors + @return `false` if any file could not be created. +*/ +template +bool +create ( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + std::uint64_t appnum, + std::uint64_t salt, + std::size_t key_size, + std::size_t block_size, + float load_factor) +{ + using namespace detail; + using File = native_file; + if (key_size < 1) + throw std::domain_error( + "invalid key size"); + if (block_size > field::max) + throw std::domain_error( + "nudb: block size too large"); + if (load_factor <= 0.f) + throw std::domain_error( + "nudb: load factor too small"); + if (load_factor >= 1.f) + throw std::domain_error( + "nudb: load factor too large"); + auto const capacity = + bucket_capacity(key_size, block_size); + if (capacity < 1) + throw std::domain_error( + "nudb: block size too small"); + File df; + File kf; + File lf; + for(;;) + { + if (df.create( + file_mode::append, dat_path)) + { + if (kf.create ( + file_mode::append, key_path)) + { + if (lf.create( + file_mode::append, log_path)) + break; + File::erase (dat_path); + } + File::erase (key_path); + } + return false; + } + + dat_file_header dh; + dh.version = currentVersion; + dh.appnum = appnum; + dh.salt = salt; + dh.key_size = key_size; + + key_file_header kh; + kh.version = currentVersion; + kh.appnum = appnum; + kh.salt = salt; + kh.pepper = pepper(salt); + kh.key_size = key_size; + kh.block_size = block_size; + // VFALCO Should it be 65536? + // How do we set the min? + kh.load_factor = std::min( + 65536.0 * load_factor, 65535); + write (df, dh); + write (kf, kh); + buffer buf(block_size); + std::memset(buf.get(), 0, block_size); + bucket b (key_size, block_size, + buf.get(), empty); + b.write (kf, block_size); + // VFALCO Leave log file empty? + df.sync(); + kf.sync(); + lf.sync(); + return true; +} + +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/arena.h b/beast/nudb/detail/arena.h new file mode 100644 index 0000000000..832a2de669 --- /dev/null +++ b/beast/nudb/detail/arena.h @@ -0,0 +1,247 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_ARENA_H_INCLUDED +#define BEAST_NUDB_ARENA_H_INCLUDED + +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace detail { + +/* Custom memory manager that allocates in large blocks. + + No limit is placed on the size of an allocation but + alloc_size should be tuned upon construction to be a + significant multiple of the average allocation size. + + When the arena is cleared, allocated memory is placed + on a free list for re-use, avoiding future system calls. +*/ +template +class arena_t +{ +private: + class element; + + std::size_t alloc_size_; + element* used_ = nullptr; + element* free_ = nullptr; + +public: + arena_t (arena_t const&); + arena_t& operator= (arena_t const&); + + ~arena_t(); + + explicit + arena_t (std::size_t alloc_size); + + arena_t& operator= (arena_t&& other); + + // Makes used blocks free + void + clear(); + + // deletes free blocks + void + shrink_to_fit(); + + std::uint8_t* + alloc (std::size_t n); + + template + friend + void + swap (arena_t& lhs, arena_t& rhs); + +private: + void + dealloc (element*& list); +}; + +//------------------------------------------------------------------------------ + +template +class arena_t<_>::element +{ +private: + std::size_t const capacity_; + std::size_t used_ = 0; + +public: + element* next = nullptr; + + explicit + element (std::size_t alloc_size) + : capacity_ ( + alloc_size - sizeof(*this)) + { + } + + void + clear() + { + used_ = 0; + } + + std::size_t + remain() const + { + return capacity_ - used_; + } + + std::size_t + capacity() const + { + return capacity_; + } + + std::uint8_t* + alloc (std::size_t n); +}; + +template +std::uint8_t* +arena_t<_>::element::alloc (std::size_t n) +{ + if (n > capacity_ - used_) + return nullptr; + auto const p = const_cast( + reinterpret_cast(this + 1) + ) + used_; + used_ += n; + return p; +} + +//------------------------------------------------------------------------------ + +template +arena_t<_>::arena_t (std::size_t alloc_size) + : alloc_size_ (alloc_size) +{ + if (alloc_size <= sizeof(element)) + throw std::domain_error( + "arena: bad alloc size"); +} + +template +arena_t<_>::~arena_t() +{ + dealloc (used_); + dealloc (free_); +} + +template +arena_t<_>& +arena_t<_>::operator= (arena_t&& other) +{ + dealloc (used_); + dealloc (free_); + alloc_size_ = other.alloc_size_; + used_ = other.used_; + free_ = other.free_; + other.used_ = nullptr; + other.free_ = nullptr; + return *this; +} + +template +void +arena_t<_>::clear() +{ + while (used_) + { + auto const e = used_; + used_ = used_->next; + e->clear(); + e->next = free_; + free_ = e; + } +} + +template +void +arena_t<_>::shrink_to_fit() +{ + dealloc (free_); +} + +template +std::uint8_t* +arena_t<_>::alloc (std::size_t n) +{ + // Undefined behavior: Zero byte allocations + assert(n != 0); + n = 8 * ((n + 7) / 8); + if (used_ && used_->remain() >= n) + return used_->alloc(n); + if (free_ && free_->remain() >= n) + { + auto const e = free_; + free_ = free_->next; + e->next = used_; + used_ = e; + return used_->alloc(n); + } + std::size_t const size = std::max( + alloc_size_, sizeof(element) + n); + element* const e = reinterpret_cast( + new std::uint8_t[size]); + ::new(e) element(size); + e->next = used_; + used_ = e; + return used_->alloc(n); +} + +template +void +swap (arena_t<_>& lhs, arena_t<_>& rhs) +{ + using std::swap; + swap(lhs.alloc_size_, rhs.alloc_size_); + swap(lhs.used_, rhs.used_); + swap(lhs.free_, rhs.free_); +} + +template +void +arena_t<_>::dealloc (element*& list) +{ + while (list) + { + auto const e = list; + list = list->next; + e->~element(); + delete[] reinterpret_cast(e); + } +} + +using arena = arena_t<>; + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/bucket.h b/beast/nudb/detail/bucket.h new file mode 100644 index 0000000000..1b49b85b24 --- /dev/null +++ b/beast/nudb/detail/bucket.h @@ -0,0 +1,504 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_BUCKET_H_INCLUDED +#define BEAST_NUDB_BUCKET_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace detail { + +// Key, hash, and bucket calculations: + +// Returns the hash of a key given the salt +// +template +inline +typename Hasher::result_type +hash (void const* key, + std::size_t key_size, std::size_t salt) +{ + Hasher h (salt); + h.append (key, key_size); + return static_cast< + typename Hasher::result_type>(h); +} + +// Returns bucket index given hash, buckets, and modulus +// +inline +std::size_t +bucket_index (std::size_t h, + std::size_t buckets, std::size_t modulus) +{ + std::size_t n = h % modulus; + if (n >= buckets) + n -= modulus / 2; + return n; +} + +// Returns the bucket index of a key +// +template +inline +std::size_t +bucket_index (void const* key, std::size_t key_size, + std::size_t salt, std::size_t buckets, + std::size_t modulus) +{ + return bucket_index (hash + (key, key_size, salt), buckets, modulus); +} + +// Returns the bucket index of a key +// given the key file header +template +inline +std::size_t +bucket_index (void const* key, key_file_header const& kh) +{ + return bucket_index(key, kh.key_size, + kh.salt, kh.buckets, kh.modulus); +} + +//------------------------------------------------------------------------------ + +// Tag for constructing empty buckets +struct empty_t { }; +static empty_t empty; + +// Allows inspection and manipulation of bucket blobs in memory +template +class bucket_t +{ +private: + std::size_t key_size_; // Size of key in bytes + std::size_t block_size_; // Size of a key file block + std::size_t count_; // Current key count + std::size_t spill_; // Offset of next spill record or 0 + std::uint8_t* p_; // Pointer to the bucket blob + +public: + struct value_type + { + std::size_t offset; + std::size_t size; + void const* key; + }; + + bucket_t (bucket_t const&) = default; + bucket_t& operator= (bucket_t const&) = default; + + bucket_t (std::size_t key_size, + std::size_t block_size, void* p); + + bucket_t (std::size_t key_size, + std::size_t block_size, void* p, empty_t); + + std::size_t + key_size() const + { + return key_size_; + } + + std::size_t + block_size() const + { + return block_size_; + } + + std::size_t + compact_size() const + { + return detail::compact_size( + key_size_, count_); + } + + bool + empty() const + { + return count_ == 0; + } + + bool + full() const + { + return count_ >= detail::bucket_capacity( + key_size_, block_size_); + } + + std::size_t + size() const + { + return count_; + } + + // Returns offset of next spill record or 0 + std::size_t + spill() const + { + return spill_; + } + + // Clear contents of the bucket + void + clear(); + + // Set offset of next spill record + void + spill (std::size_t offset); + + // Returns the record for a key + // entry without bounds checking. + // + value_type const + at (std::size_t i) const; + + value_type const + operator[] (std::size_t i) const + { + return at(i); + } + + std::pair + find (void const* key) const; + + void + insert (std::size_t offset, + std::size_t size, void const* key); + + // Erase an element by index + // + void + erase (std::size_t i); + + // Read a full bucket from the + // file at the specified offset. + // + template + void + read (File& f, std::size_t offset); + + // Read a compact bucket + // + template + void + read (bulk_reader& r); + + // Write a compact bucket to the stream. + // This only writes entries that are not empty. + // + void + write (ostream& os) const; + + // Write a bucket to the file at the specified offset. + // The full block_size() bytes are written. + // + template + void + write (File& f, std::size_t offset) const; + +private: + // Update size and spill in the blob + void + update(); + + std::pair + lower_bound (void const* key) const; +}; + +//------------------------------------------------------------------------------ + +template +bucket_t<_>::bucket_t (std::size_t key_size, + std::size_t block_size, void* p) + : key_size_ (key_size) + , block_size_ (block_size) + , p_ (reinterpret_cast(p)) +{ + // Bucket Record + istream is(p_, block_size); + detail::read(is, count_); // Count + detail::read(is, spill_); // Spill +} + +template +bucket_t<_>::bucket_t (std::size_t key_size, + std::size_t block_size, void* p, empty_t) + : key_size_ (key_size) + , block_size_ (block_size) + , count_ (0) + , spill_ (0) + , p_ (reinterpret_cast(p)) +{ + update(); +} + +template +void +bucket_t<_>::clear() +{ + count_ = 0; + spill_ = 0; + update(); +} + +template +void +bucket_t<_>::spill (std::size_t offset) +{ + spill_ = offset; + update(); +} + +template +auto +bucket_t<_>::at (std::size_t i) const -> + value_type const +{ + value_type result; + // Bucket Entry + std::size_t const w = + field::size + // Offset + field::size + // Size + key_size_; // Key + // Bucket Record + detail::istream is(p_ + + field::size + // Count + field::size + // Spill + i * w, w); + // Bucket Entry + detail::read( + is, result.offset); // Offset + detail::read( + is, result.size); // Size + result.key = is.data(key_size_); // Key + return result; +} + +template +auto +bucket_t<_>::find (void const* key) const -> + std::pair +{ + std::pair result; + std::size_t i; + std::tie(i, result.second) = lower_bound(key); + if (result.second) + result.first = at(i); + return result; +} + +template +void +bucket_t<_>::insert (std::size_t offset, + std::size_t size, void const* key) +{ + bool found; + std::size_t i; + std::tie(i, found) = lower_bound(key); + (void)found; + assert(! found); + // Bucket Record + auto const p = p_ + + field::size + // Count + field::size; // Spill + // Bucket Entry + std::size_t const w = + field::size + // Offset + field::size + // Size + key_size_; // Key + std::memmove ( + p + (i + 1) * w, + p + i * w, + (count_ - i) * w); + count_++; + update(); + // Bucket Entry + ostream os (p + i * w, w); + detail::write(os, offset); // Offset + detail::write(os, size); // Size + std::memcpy (os.data(key_size_), + key, key_size_); // Key +} + +template +void +bucket_t<_>::erase (std::size_t i) +{ + // Bucket Record + auto const p = p_ + + field::size + // Count + field::size; // Spill + auto const w = + field::size + // Offset + field::size + // Size + key_size_; // Key + --count_; + if (i != count_) + std::memmove( + p + i * w, + p + (i + 1) * w, + (count_ - i) * w); + update(); +} + +template +template +void +bucket_t<_>::read (File& f, std::size_t offset) +{ + auto const cap = bucket_capacity ( + key_size_, block_size_); + // Excludes padding to block size + f.read (offset, p_, bucket_size( + key_size_, bucket_capacity( + key_size_, block_size_))); + istream is(p_, block_size_); + detail::read< + std::uint16_t>(is, count_); // Count + detail::read< + uint48_t>(is, spill_); // Spill + if (count_ > cap) + throw store_corrupt_error( + "bad bucket size"); +} + +template +template +void +bucket_t<_>::read (bulk_reader& r) +{ + // Bucket Record (compact) + auto is = r.prepare( + detail::field::size + + detail::field::size); + detail::read< + std::uint16_t>(is, count_); // Count + detail::read(is, spill_); // Spill + update(); + // Excludes empty bucket entries + auto const w = count_ * ( + field::size + // Offset + field::size + // Size + key_size_); // Key + is = r.prepare (w); + std::memcpy(p_ + + field::size + // Count + field::size, // Spill + is.data(w), w); // Entries +} + +template +void +bucket_t<_>::write (ostream& os) const +{ + // Does not pad up to the block size. This + // is called to write to the data file. + auto const size = compact_size(); + // Bucket Record + std::memcpy (os.data(size), p_, size); +} + +template +template +void +bucket_t<_>::write (File& f, std::size_t offset) const +{ + // Includes zero pad up to the block + // size, to make the key file size always + // a multiple of the block size. + auto const size = compact_size(); + std::memset (p_ + size, 0, + block_size_ - size); + // Bucket Record + f.write (offset, p_, block_size_); +} + +template +void +bucket_t<_>::update() +{ + // Bucket Record + ostream os(p_, block_size_); + detail::write< + std::uint16_t>(os, count_); // Count + detail::write< + uint48_t>(os, spill_); // Spill +} + +// bool is true if key matches index +template +std::pair +bucket_t<_>::lower_bound ( + void const* key) const +{ + // Bucket Entry + auto const w = + field::size + // Offset + field::size + // Size + key_size_; // Key + // Bucket Record + auto const p = p_ + + field::size + // Count + field::size + // Spill + // Bucket Entry + field::size + // Offset + field::size; // Size + std::size_t step; + std::size_t first = 0; + std::size_t count = count_; + while (count > 0) + { + step = count / 2; + auto const i = first + step; + auto const c = std::memcmp ( + p + i * w, key, key_size_); + if (c < 0) + { + first = i + 1; + count -= step + 1; + } + else if (c > 0) + { + count = step; + } + else + { + return std::make_pair (i, true); + } + } + return std::make_pair (first, false); +} +using bucket = bucket_t<>; + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/buffers.h b/beast/nudb/detail/buffers.h new file mode 100644 index 0000000000..40cdb5af01 --- /dev/null +++ b/beast/nudb/detail/buffers.h @@ -0,0 +1,147 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_BUFFERS_H_INCLUDED +#define BEAST_NUDB_BUFFERS_H_INCLUDED + +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace detail { + +// Thread safe pool of temp buffers, +// to avoid needless calls to malloc. +template +class buffers_t +{ +private: + struct element + { + element* next; + }; + + std::size_t const block_size_; + std::mutex m_; + element* h_ = nullptr; + +public: + class value_type + { + private: + buffers_t& b_; + element* e_; + + public: + value_type (value_type const&) = delete; + value_type& operator= (value_type const&) = delete; + + explicit + value_type (buffers_t& b) + : b_ (b) + , e_ (b.acquire()) + { + } + + ~value_type() + { + b_.release(e_); + } + + std::uint8_t* + get() const + { + return const_cast ( + reinterpret_cast< + std::uint8_t const*>(e_ + 1)); + } + }; + + explicit + buffers_t (std::size_t block_size); + + ~buffers_t(); + +private: + element* + acquire(); + + void + release (element* e); +}; + +template +buffers_t<_>::buffers_t (std::size_t block_size) + : block_size_ (block_size) + , h_ (nullptr) +{ +} + +template +buffers_t<_>::~buffers_t() +{ + for (element* e = h_; e;) + { + element* const next = e->next; + e->~element(); + delete[] reinterpret_cast< + std::uint8_t*>(e); + e = next; + } +} + +template +auto +buffers_t<_>::acquire() -> + element* +{ + { + std::lock_guard m(m_); + element* e = h_; + if (e) + { + h_ = e->next; + return e; + } + } + return ::new( + new std::uint8_t[ + sizeof(element) + block_size_] + ) element; +} + +template +void +buffers_t<_>::release (element* e) +{ + std::lock_guard m(m_); + e->next = h_; + h_ = e; +} + +using buffers = buffers_t<>; + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/bulkio.h b/beast/nudb/detail/bulkio.h new file mode 100644 index 0000000000..e7ea76a445 --- /dev/null +++ b/beast/nudb/detail/bulkio.h @@ -0,0 +1,189 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_BULKIO_H_INCLUDED +#define BEAST_NUDB_BULKIO_H_INCLUDED + +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace detail { + +// Scans a file in sequential large reads +template +class bulk_reader +{ +private: + File& f_; + buffer buf_; + std::size_t last_; // size of file + std::size_t offset_; // current position + std::size_t avail_; // bytes left to read in buf + std::size_t used_; // bytes consumed in buf + +public: + bulk_reader (File& f, std::size_t offset, + std::size_t last, std::size_t buffer_size); + + bool + eof() const + { + return offset_ - avail_ == last_; + } + + istream + prepare (std::size_t needed); +}; + +template +bulk_reader::bulk_reader (File& f, std::size_t offset, + std::size_t last, std::size_t buffer_size) + : f_ (f) + , last_ (last) + , offset_ (offset) + , avail_ (0) + , used_ (0) +{ + buf_.reserve (buffer_size); +} + + +template +istream +bulk_reader::prepare (std::size_t needed) +{ + if (needed > avail_) + { + if (offset_ + needed - avail_ > last_) + throw file_short_read_error(); + if (needed > buf_.size()) + { + buffer buf; + buf.reserve (needed); + std::memcpy (buf.get(), + buf_.get() + used_, avail_); + buf_ = std::move(buf); + } + else + { + std::memmove (buf_.get(), + buf_.get() + used_, avail_); + } + + auto const n = std::min( + buf_.size() - avail_, last_ - offset_); + f_.read(offset_, buf_.get() + avail_, n); + offset_ += n; + avail_ += n; + used_ = 0; + } + istream is(buf_.get() + used_, needed); + used_ += needed; + avail_ -= needed; + return is; +} + +//------------------------------------------------------------------------------ + +// Buffers file writes +// Caller must call flush manually at the end +template +class bulk_writer +{ +private: + File& f_; + buffer buf_; + std::size_t offset_; // current position + std::size_t used_; // bytes written to buf + +public: + bulk_writer (File& f, std::size_t offset, + std::size_t buffer_size); + + ostream + prepare (std::size_t needed); + + // Returns the number of bytes buffered + std::size_t + size() + { + return used_; + } + + // Return current offset in file. This + // is advanced with each call to prepare. + std::size_t + offset() const + { + return offset_ + used_; + } + + // flush cannot be called from the destructor + // since it can throw, so callers must do it manually. + void + flush(); +}; + +template +bulk_writer::bulk_writer (File& f, + std::size_t offset, std::size_t buffer_size) + : f_ (f) + , offset_ (offset) + , used_ (0) + +{ + buf_.reserve (buffer_size); +} + +template +ostream +bulk_writer::prepare (std::size_t needed) +{ + if (used_ + needed > buf_.size()) + flush(); + if (needed > buf_.size()) + buf_.reserve (needed); + ostream os (buf_.get() + used_, needed); + used_ += needed; + return os; +} + +template +void +bulk_writer::flush() +{ + if (used_) + { + auto const offset = offset_; + auto const used = used_; + offset_ += used_; + used_ = 0; + f_.write (offset, buf_.get(), used); + } +} + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/cache.h b/beast/nudb/detail/cache.h new file mode 100644 index 0000000000..7c2a397bda --- /dev/null +++ b/beast/nudb/detail/cache.h @@ -0,0 +1,248 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_CACHE_H_INCLUDED +#define BEAST_NUDB_CACHE_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace detail { + +// Associative container storing +// bucket blobs keyed by bucket index. +template +class cache_t +{ +public: + using value_type = std::pair< + std::size_t, bucket>; + +private: + enum + { + // The arena's alloc size will be this + // multiple of the block size. + factor = 64 + }; + + using map_type = std::unordered_map < + std::size_t, void*>; + + struct transform + { + using argument_type = + typename map_type::value_type; + using result_type = value_type; + + cache_t* cache_; + + transform() + : cache_ (nullptr) + { + } + + explicit + transform (cache_t& cache) + : cache_ (&cache) + { + } + + value_type + operator() (argument_type const& e) const + { + return std::make_pair(e.first, + bucket (cache_->key_size_, + cache_->block_size_, e.second)); + } + }; + + std::size_t key_size_; + std::size_t block_size_; + arena arena_; + map_type map_; + +public: + using iterator = boost::transform_iterator< + transform, typename map_type::iterator, + value_type, value_type>; + + cache_t (cache_t const&) = delete; + cache_t& operator= (cache_t const&) = delete; + + cache_t(); + + explicit + cache_t (std::size_t key_size, + std::size_t block_size); + + cache_t& operator= (cache_t&& other); + + iterator + begin() + { + return iterator(map_.begin(), + transform(*this)); + } + + iterator + end() + { + return iterator(map_.end(), + transform(*this)); + } + + bool + empty() const + { + return map_.empty(); + } + + void + clear(); + + void + shrink_to_fit(); + + iterator + find (std::size_t n); + + // Create an empty bucket + // + bucket + create (std::size_t n); + + // Insert a copy of a bucket. + // + iterator + insert (std::size_t n, bucket const& b); + + template + friend + void + swap (cache_t& lhs, cache_t& rhs); +}; + +// Constructs a cache that will never have inserts +template +cache_t<_>::cache_t() + : key_size_ (0) + , block_size_ (0) + , arena_ (32) // arbitrary small number +{ +} + +template +cache_t<_>::cache_t (std::size_t key_size, + std::size_t block_size) + : key_size_ (key_size) + , block_size_ (block_size) + , arena_ (block_size * factor) +{ +} + +template +cache_t<_>& +cache_t<_>::operator=(cache_t&& other) +{ + arena_ = std::move(other.arena_); + map_ = std::move(other.map_); + return *this; +} + +template +void +cache_t<_>::clear() +{ + arena_.clear(); + map_.clear(); +} + +template +void +cache_t<_>::shrink_to_fit() +{ + arena_.shrink_to_fit(); +} + +template +auto +cache_t<_>::find (std::size_t n) -> + iterator +{ + auto const iter = map_.find(n); + if (iter == map_.end()) + return iterator (map_.end(), + transform(*this)); + return iterator (iter, + transform(*this)); +} + +template +bucket +cache_t<_>::create (std::size_t n) +{ + auto const p = arena_.alloc (block_size_); + map_.emplace (n, p); + return bucket (key_size_, block_size_, + p, detail::empty); +} + +template +auto +cache_t<_>::insert (std::size_t n, + bucket const& b) -> + iterator +{ + void* const p = arena_.alloc( + b.block_size()); + ostream os(p, b.block_size()); + b.write(os); + auto const result = map_.emplace(n, p); + return iterator(result.first, + transform(*this)); +} + +template +void +swap (cache_t& lhs, cache_t& rhs) +{ + using std::swap; + swap(lhs.key_size_, rhs.key_size_); + swap(lhs.block_size_, rhs.block_size_); + swap(lhs.arena_, rhs.arena_); + swap(lhs.map_, rhs.map_); +} + +using cache = cache_t<>; + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/config.h b/beast/nudb/detail/config.h new file mode 100644 index 0000000000..8d8de463f0 --- /dev/null +++ b/beast/nudb/detail/config.h @@ -0,0 +1,75 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_CONFIG_H_INCLUDED +#define BEAST_NUDB_CONFIG_H_INCLUDED + +#include + +// Compiles out domain checks +#ifndef BEAST_NUDB_NO_DOMAIN_CHECK +# ifdef NDEBUG +# define BEAST_NUDB_NO_DOMAIN_CHECK 1 +# else +# define BEAST_NUDB_NO_DOMAIN_CHECK 0 +# endif +#endif + +namespace beast { +namespace nudb { + +// xxhasher is the fastest and the best choice +// when keys are already uniformly distributed +using default_hash = xxhasher; + +namespace detail { + +// Returns the closest power of 2 not less than x +template +std::size_t +ceil_pow2 (unsigned long long x) +{ + static const unsigned long long t[6] = { + 0xFFFFFFFF00000000ull, + 0x00000000FFFF0000ull, + 0x000000000000FF00ull, + 0x00000000000000F0ull, + 0x000000000000000Cull, + 0x0000000000000002ull + }; + + int y = (((x & (x - 1)) == 0) ? 0 : 1); + int j = 32; + int i; + + for(i = 0; i < 6; i++) { + int k = (((x & t[i]) == 0) ? 0 : j); + y += k; + x >>= k; + j >>= 1; + } + + return std::size_t(1)< + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_FIELD_H_INCLUDED +#define BEAST_NUDB_FIELD_H_INCLUDED + +#include +#include +#include // for BEAST_CONSTEXPR +#include +#include +#include +#include // + +namespace beast { +namespace nudb { +namespace detail { + +// A 24-bit integer +struct uint24_t; + +// A 48-bit integer +struct uint48_t; + +// These metafunctions describe the binary format of fields on disk + +template +struct field; + +template <> +struct field +{ + static std::size_t BEAST_CONSTEXPR size = 1; + static std::size_t BEAST_CONSTEXPR max = 0xff; +}; + +template <> +struct field +{ + static std::size_t BEAST_CONSTEXPR size = 2; + static std::size_t BEAST_CONSTEXPR max = 0xffff; +}; + +template <> +struct field +{ + static std::size_t BEAST_CONSTEXPR size = 3; + static std::size_t BEAST_CONSTEXPR max = 0xffffff; +}; + +template <> +struct field +{ + static std::size_t BEAST_CONSTEXPR size = 4; + static std::size_t BEAST_CONSTEXPR max = 0xffffffff; +}; + +template <> +struct field +{ + static std::size_t BEAST_CONSTEXPR size = 6; + static std::size_t BEAST_CONSTEXPR max = 0x0000ffffffffffff; +}; + +template <> +struct field +{ + static std::size_t BEAST_CONSTEXPR size = 8; + static std::size_t BEAST_CONSTEXPR max = 0xffffffffffffffff; +}; + +// read field from istream + +template ::value>* = nullptr> +void +read (istream& is, U& u) +{ + T t; + std::uint8_t const* p = + is.data(field::size); + t = T(*p++)<< 8; + t = T(*p ) | t; + u = t; +} + +template ::value>* = nullptr> +void +read (istream& is, U& u) +{ + T t; + std::uint8_t const* p = + is.data(field::size); + t = (T(*p++)<<16) | t; + t = (T(*p++)<< 8) | t; + t = T(*p ) | t; + u = t; +} + +template ::value>* = nullptr> +void +read (istream& is, U& u) +{ + T t; + std::uint8_t const* p = + is.data(field::size); + t = T(*p++)<<24; + t = (T(*p++)<<16) | t; + t = (T(*p++)<< 8) | t; + t = T(*p ) | t; + u = t; +} + +template ::value>* = nullptr> +void +read (istream& is, U& u) +{ + std::uint64_t t; + std::uint8_t const* p = + is.data(field::size); + t = (std::uint64_t(*p++)<<40); + t = (std::uint64_t(*p++)<<32) | t; + t = (std::uint64_t(*p++)<<24) | t; + t = (std::uint64_t(*p++)<<16) | t; + t = (std::uint64_t(*p++)<< 8) | t; + t = std::uint64_t(*p ) | t; + u = t; +} + +template ::value>* = nullptr> +void +read (istream& is, U& u) +{ + T t; + std::uint8_t const* p = + is.data(field::size); + t = T(*p++)<<56; + t = (T(*p++)<<48) | t; + t = (T(*p++)<<40) | t; + t = (T(*p++)<<32) | t; + t = (T(*p++)<<24) | t; + t = (T(*p++)<<16) | t; + t = (T(*p++)<< 8) | t; + t = T(*p ) | t; + u = t; +} + +// write field to ostream + +template ::value>* = nullptr> +void +write (ostream& os, U const& u) +{ +#ifndef BEAST_NUDB_NO_DOMAIN_CHECK + if (u > field::max) + throw std::logic_error( + "nudb: field max exceeded"); +#endif + T t = u; + std::uint8_t* p = + os.data(field::size); + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +template ::value>* = nullptr> +void +write (ostream& os, U const& u) +{ +#ifndef BEAST_NUDB_NO_DOMAIN_CHECK + if (u > field::max) + throw std::logic_error( + "nudb: field max exceeded"); +#endif + T t = u; + std::uint8_t* p = + os.data(field::size); + *p++ = (t>>16)&0xff; + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +template ::value>* = nullptr> +void +write (ostream& os, U const& u) +{ +#ifndef BEAST_NUDB_NO_DOMAIN_CHECK + if (u > field::max) + throw std::logic_error( + "nudb: field max exceeded"); +#endif + T t = u; + std::uint8_t* p = + os.data(field::size); + *p++ = (t>>24)&0xff; + *p++ = (t>>16)&0xff; + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +template ::value>* = nullptr> +void +write (ostream& os, U const& u) +{ +#ifndef BEAST_NUDB_NO_DOMAIN_CHECK + if (u > field::max) + throw std::logic_error( + "nudb: field max exceeded"); +#endif + std::uint64_t const t = u; + std::uint8_t* p = + os.data(field::size); + *p++ = (t>>40)&0xff; + *p++ = (t>>32)&0xff; + *p++ = (t>>24)&0xff; + *p++ = (t>>16)&0xff; + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +template ::value>* = nullptr> +void +write (ostream& os, U const& u) +{ +#ifndef BEAST_NUDB_NO_DOMAIN_CHECK + if (u > field::max) + throw std::logic_error( + "nudb: field max exceeded"); +#endif + T t = u; + std::uint8_t* p = + os.data(field::size); + *p++ = (t>>56)&0xff; + *p++ = (t>>48)&0xff; + *p++ = (t>>40)&0xff; + *p++ = (t>>32)&0xff; + *p++ = (t>>24)&0xff; + *p++ = (t>>16)&0xff; + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/format.h b/beast/nudb/detail/format.h new file mode 100644 index 0000000000..8e41e3b100 --- /dev/null +++ b/beast/nudb/detail/format.h @@ -0,0 +1,507 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_FORMAT_H_INCLUDED +#define BEAST_NUDB_FORMAT_H_INCLUDED + +#include +#include +#include +#include // for BEAST_CONSTEXPR +#include +#include +#include +#include +#include +#include +#include // + +namespace beast { +namespace nudb { +namespace detail { + +// Format of the nudb files: + +static std::size_t BEAST_CONSTEXPR currentVersion = 1; + +struct dat_file_header +{ + static std::size_t BEAST_CONSTEXPR size = + 8 + // Type + 2 + // Version + 8 + // Appnum + 8 + // Salt + 2 + // KeySize + 64; // (Reserved) + + char type[8]; + std::size_t version; + std::uint64_t appnum; + std::uint64_t salt; + std::size_t key_size; +}; + +struct key_file_header +{ + static std::size_t BEAST_CONSTEXPR size = + 8 + // Type + 2 + // Version + 8 + // Appnum + 8 + // Salt + 8 + // Pepper + 2 + // KeySize + 2 + // BlockSize + 2 + // LoadFactor + 64; // (Reserved) + + char type[8]; + std::size_t version; + std::uint64_t appnum; + std::uint64_t salt; + std::uint64_t pepper; + std::size_t key_size; + std::size_t block_size; + std::size_t load_factor; + + // Computed values + std::size_t capacity; + std::size_t bucket_size; + std::size_t buckets; + std::size_t modulus; +}; + +struct log_file_header +{ + static std::size_t BEAST_CONSTEXPR size = + 8 + // Type + 2 + // Version + 8 + // Appnum + 8 + // Salt + 8 + // Pepper + 2 + // KeySize + 8 + // KeyFileSize + 8; // DataFileSize + + char type[8]; + std::size_t version; + std::uint64_t appnum; + std::uint64_t salt; + std::uint64_t pepper; + std::size_t key_size; + std::size_t key_file_size; + std::size_t dat_file_size; +}; + +// Computes pepper from salt +// +template +std::size_t +pepper (std::size_t salt) +{ + Hasher h (salt); + h.append (&salt, sizeof(salt)); + return static_cast(h); +} + +// Returns the actual size of a bucket. +// This can be smaller than the block size. +// +template +std::size_t +bucket_size (std::size_t key_size, + std::size_t capacity) +{ + // Bucket Record + return + field::size + // Count + field::size + // Spill + capacity * ( + field::size + // Offset + field::size + // Size + key_size); // Key +} + +// Returns the size of a bucket large enough to +// hold size keys of length key_size. +// +inline +std::size_t +compact_size(std::size_t key_size, + std::size_t size) +{ + // Bucket Record + return + field::size + // Size + field::size + // Spill + size * ( + field::size + // Offset + field::size + // Size + key_size); // Key +} + +// Returns: number of keys that fit in a bucket +// +template +std::size_t +bucket_capacity (std::size_t key_size, + std::size_t block_size) +{ + // Bucket Record + auto const size = + field::size + // Count + field::size; // Spill + auto const entry_size = + field::size + // Offset + field::size + // Size + key_size; // Key + if (block_size < key_file_header::size || + block_size < size) + return 0; + return (block_size - size) / entry_size; +} + +// returns the number of bytes occupied by a value record +inline +std::size_t +data_size (std::size_t size, std::size_t key_size) +{ + // Data Record + return + field::size + // Size + key_size + // Key + size; // Data +} + +//------------------------------------------------------------------------------ + +// Read data file header from stream +template +void +read (istream& is, dat_file_header& dh) +{ + read (is, dh.type, sizeof(dh.type)); + read(is, dh.version); + read(is, dh.appnum); + read(is, dh.salt); + read(is, dh.key_size); + std::array zero; + read (is, zero.data(), zero.size()); +} + +// Read data file header from file +template +void +read (File& f, dat_file_header& dh) +{ + std::array buf; + try + { + f.read(0, buf.data(), buf.size()); + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "short data file header"); + } + istream is(buf); + read (is, dh); +} + +// Write data file header to stream +template +void +write (ostream& os, dat_file_header const& dh) +{ + write (os, "nudb.dat", 8); + write(os, dh.version); + write(os, dh.appnum); + write(os, dh.salt); + write(os, dh.key_size); + std::array zero; + zero.fill(0); + write (os, zero.data(), zero.size()); +} + +// Write data file header to file +template +void +write (File& f, dat_file_header const& dh) +{ + std::array buf; + ostream os(buf); + write(os, dh); + f.write (0, buf.data(), buf.size()); +} + +// Read key file header from stream +template +void +read (istream& is, std::size_t file_size, + key_file_header& kh) +{ + read(is, kh.type, sizeof(kh.type)); + read(is, kh.version); + read(is, kh.appnum); + read(is, kh.salt); + read(is, kh.pepper); + read(is, kh.key_size); + read(is, kh.block_size); + read(is, kh.load_factor); + std::array zero; + read (is, zero.data(), zero.size()); + + // VFALCO These need to be checked to handle + // when the file size is too small + kh.capacity = bucket_capacity( + kh.key_size, kh.block_size); + kh.bucket_size = bucket_size( + kh.key_size, kh.capacity); + if (file_size > kh.block_size) + { + // VFALCO This should be handled elsewhere. + // we shouldn't put the computed fields in this header. + if (kh.block_size > 0) + kh.buckets = (file_size - kh.bucket_size) + / kh.block_size; + else + // VFALCO Corruption or logic error + kh.buckets = 0; + } + else + { + kh.buckets = 0; + } + kh.modulus = ceil_pow2(kh.buckets); +} + +// Read key file header from file +template +void +read (File& f, key_file_header& kh) +{ + std::array buf; + try + { + f.read(0, buf.data(), buf.size()); + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "short key file header"); + } + istream is(buf); + read (is, f.actual_size(), kh); +} + +// Write key file header to stream +template +void +write (ostream& os, key_file_header const& kh) +{ + write (os, "nudb.key", 8); + write(os, kh.version); + write(os, kh.appnum); + write(os, kh.salt); + write(os, kh.pepper); + write(os, kh.key_size); + write(os, kh.block_size); + write(os, kh.load_factor); + std::array zero; + zero.fill (0); + write (os, zero.data(), zero.size()); +} + +// Write key file header to file +template +void +write (File& f, key_file_header const& kh) +{ + buffer buf; + buf.reserve (kh.block_size); + if (kh.block_size < key_file_header::size) + throw std::logic_error( + "nudb: block size too small"); + std::fill(buf.get(), buf.get() + buf.size(), 0); + ostream os (buf.get(), buf.size()); + write (os, kh); + f.write (0, buf.get(), buf.size()); +} + +// Read log file header from stream +template +void +read (istream& is, log_file_header& lh) +{ + read (is, lh.type, sizeof(lh.type)); + read(is, lh.version); + read(is, lh.appnum); + read(is, lh.salt); + read(is, lh.pepper); + read(is, lh.key_size); + read(is, lh.key_file_size); + read(is, lh.dat_file_size); +} + +// Read log file header from file +template +void +read (File& f, log_file_header& lh) +{ + std::array buf; + // Can throw file_short_read_error to callers + f.read (0, buf.data(), buf.size()); + istream is(buf); + read (is, lh); +} + +// Write log file header to stream +template +void +write (ostream& os, log_file_header const& lh) +{ + write (os, "nudb.log", 8); + write(os, lh.version); + write(os, lh.appnum); + write(os, lh.salt); + write(os, lh.pepper); + write(os, lh.key_size); + write(os, lh.key_file_size); + write(os, lh.dat_file_size); +} + +// Write log file header to file +template +void +write (File& f, log_file_header const& lh) +{ + std::array buf; + ostream os (buf); + write (os, lh); + f.write (0, buf.data(), buf.size()); +} + +template +void +verify (key_file_header const& kh) +{ + std::string const type (kh.type, 8); + if (type != "nudb.key") + throw store_corrupt_error ( + "bad type in key file"); + if (kh.version != currentVersion) + throw store_corrupt_error ( + "bad version in key file"); + if (kh.pepper != pepper(kh.salt)) + throw store_corrupt_error( + "wrong hash function for key file"); + if (kh.key_size < 1) + throw store_corrupt_error ( + "bad key size in key file"); + if (kh.load_factor < 1) + throw store_corrupt_error ( + "bad load factor in key file"); + if (kh.capacity < 1) + throw store_corrupt_error ( + "bad capacity in key file"); + if (kh.buckets < 1) + throw store_corrupt_error ( + "bad key file size"); +} + +template +void +verify (dat_file_header const& dh) +{ + std::string const type (dh.type, 8); + if (type != "nudb.dat") + throw store_corrupt_error ( + "bad type in data file"); + if (dh.version != currentVersion) + throw store_corrupt_error ( + "bad version in data file"); + if (dh.key_size < 1) + throw store_corrupt_error ( + "bad key size in data file"); +} + +template +void +verify (log_file_header const& lh) +{ + std::string const type (lh.type, 8); + if (type != "nudb.log") + throw store_corrupt_error ( + "bad type in log file"); + if (lh.version != currentVersion) + throw store_corrupt_error ( + "bad version in log file"); + if (lh.pepper != pepper(lh.salt)) + throw store_corrupt_error( + "wrong hash function for log file"); + if (lh.key_size < 1) + throw store_corrupt_error ( + "bad key size in log file"); +} + +// Make sure key file and value file headers match +template +void +verify (dat_file_header const& dh, + key_file_header const& kh) +{ + verify (dh); + verify (kh); + if (kh.salt != dh.salt) + throw store_corrupt_error( + "salt mismatch"); + if (kh.key_size != dh.key_size) + throw store_corrupt_error( + "key size mismatch"); + if (kh.appnum != dh.appnum) + throw store_corrupt_error( + "appnum mismatch"); +} + +template +void +verify (key_file_header const& kh, + log_file_header const& lh) +{ + verify(lh); + if (kh.salt != lh.salt) + throw store_corrupt_error ( + "salt mismatch in log file"); + if (kh.key_size != lh.key_size) + throw store_corrupt_error ( + "key size mismatch in log file"); + if (kh.appnum != lh.appnum) + throw store_corrupt_error( + "appnum mismatch"); +} + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/gentex.h b/beast/nudb/detail/gentex.h new file mode 100644 index 0000000000..bbd21d49d7 --- /dev/null +++ b/beast/nudb/detail/gentex.h @@ -0,0 +1,276 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_GENTEX_H_INCLUDED +#define BEAST_NUDB_GENTEX_H_INCLUDED + +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace detail { + +// Generation counting mutex +// +template +class gentex_t +{ +private: + std::mutex m_; + std::size_t gen_ = 0; + std::size_t cur_ = 0; + std::size_t prev_ = 0; + std::condition_variable cond_; + +public: + gentex_t() = default; + gentex_t (gentex_t const&) = delete; + gentex_t& operator= (gentex_t const&) = delete; + + void + lock(); + + void + unlock(); + + std::size_t + lock_gen(); + + void + unlock_gen (std::size_t gen); +}; + +template +void +gentex_t<_>::lock() +{ + std::lock_guard< + std::mutex> l(m_); + prev_ += cur_; + cur_ = 0; + ++gen_; +} + +template +void +gentex_t<_>::unlock() +{ + std::unique_lock< + std::mutex> l(m_); + while (prev_ > 0) + cond_.wait(l); +} + +template +std::size_t +gentex_t<_>::lock_gen() +{ + std::lock_guard< + std::mutex> l(m_); + ++cur_; + return gen_; +} + +template +void +gentex_t<_>::unlock_gen ( + std::size_t gen) +{ + std::lock_guard< + std::mutex> l(m_); + if (gen == gen_) + { + --cur_; + } + else + { + --prev_; + if (prev_ == 0) + cond_.notify_all(); + } +} + +using gentex = gentex_t<>; + +//------------------------------------------------------------------------------ + +template +class genlock +{ +private: + bool owned_ = false; + GenerationLockable* g_ = nullptr; + std::size_t gen_; + +public: + using mutex_type = GenerationLockable; + + genlock() = default; + genlock (genlock const&) = delete; + genlock& operator= (genlock const&) = delete; + + genlock (genlock&& other); + + genlock& operator= (genlock&& other); + + explicit + genlock (mutex_type& g); + + genlock (mutex_type& g, std::defer_lock_t); + + ~genlock(); + + mutex_type* + mutex() noexcept + { + return g_; + } + + bool + owns_lock() const noexcept + { + return g_ && owned_; + } + + explicit + operator bool() const noexcept + { + return owns_lock(); + } + + void + lock(); + + void + unlock(); + + mutex_type* + release() noexcept; + + template + friend + void + swap (genlock& lhs, genlock& rhs) noexcept; +}; + +template +genlock::genlock (genlock&& other) + : owned_ (other.owned_) + , g_ (other.g_) +{ + other.owned_ = false; + other.g_ = nullptr; +} + +template +genlock& +genlock::operator= (genlock&& other) +{ + if (owns_lock()) + unlock(); + owned_ = other.owned_; + g_ = other.g_; + other.owned_ = false; + other.g_ = nullptr; + return *this; +} + +template +genlock::genlock (mutex_type& g) + : g_ (&g) +{ + lock(); +} + +template +genlock::genlock ( + mutex_type& g, std::defer_lock_t) + : g_ (&g) +{ +} + +template +genlock::~genlock() +{ + if (owns_lock()) + unlock(); +} + +template +void +genlock::lock() +{ + if (! g_) + throw std::system_error(std::make_error_code( + std::errc::operation_not_permitted), + "genlock: no associated mutex"); + if (owned_) + throw std::system_error(std::make_error_code( + std::errc::resource_deadlock_would_occur), + "genlock: already owned"); + gen_ = g_->lock_gen(); + owned_ = true; +} + +template +void +genlock::unlock() +{ + if (! g_) + throw std::system_error(std::make_error_code( + std::errc::operation_not_permitted), + "genlock: no associated mutex"); + if (! owned_) + throw std::system_error(std::make_error_code( + std::errc::operation_not_permitted), + "genlock: not owned"); + g_->unlock_gen (gen_); + owned_ = false; +} + +template +auto +genlock::release() noexcept -> + mutex_type* +{ + mutex_type* const g = g_; + g_ = nullptr; + return g; +} + +template +void +swap (genlock& lhs, genlock& rhs) noexcept +{ + using namespace std; + swap (lhs.owned_, rhs.owned_); + swap (lhs.g_, rhs.g_); +} + +} // detail +} // nudb +} // beast + + +#endif diff --git a/beast/nudb/detail/pool.h b/beast/nudb/detail/pool.h new file mode 100644 index 0000000000..b4a80fdaa1 --- /dev/null +++ b/beast/nudb/detail/pool.h @@ -0,0 +1,256 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_POOL_H_INCLUDED +#define BEAST_NUDB_POOL_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace detail { + +// Buffers key/value pairs in a map, associating +// them with a modifiable data file offset. +template +class pool_t +{ +public: + struct value_type; + class compare; + +private: + using map_type = std::map< + value_type, std::size_t, compare>; + + arena arena_; + std::size_t key_size_; + std::size_t data_size_ = 0; + map_type map_; + +public: + using iterator = + typename map_type::iterator; + + pool_t (pool_t const&) = delete; + pool_t& operator= (pool_t const&) = delete; + + explicit + pool_t (std::size_t key_size, + std::size_t alloc_size); + + pool_t& operator= (pool_t&& other); + + iterator + begin() + { + return map_.begin(); + } + + iterator + end() + { + return map_.end(); + } + + bool + empty() + { + return map_.size() == 0; + } + + // Returns the number of elements in the pool + std::size_t + size() const + { + return map_.size(); + } + + // Returns the sum of data sizes in the pool + std::size_t + data_size() const + { + return data_size_; + } + + void + clear(); + + void + shrink_to_fit(); + + iterator + find (void const* key); + + // Insert a value + // @param h The hash of the key + void + insert (std::size_t h, void const* key, + void const* buffer, std::size_t size); + + template + friend + void + swap (pool_t& lhs, pool_t& rhs); +}; + +template +struct pool_t<_>::value_type +{ + std::size_t hash; + std::size_t size; + void const* key; + void const* data; + + value_type (value_type const&) = default; + value_type& operator= (value_type const&) = default; + + value_type (std::size_t hash_, std::size_t size_, + void const* key_, void const* data_) + : hash (hash_) + , size (size_) + , key (key_) + , data (data_) + { + } +}; + +template +class pool_t<_>::compare +{ +private: + std::size_t key_size_; + +public: + using result_type = bool; + using first_argument_type = value_type; + using second_argument_type = value_type; + + compare (compare const&) = default; + compare& operator= (compare const&) = default; + + compare (std::size_t key_size) + : key_size_ (key_size) + { + } + + bool + operator()(value_type const& lhs, + value_type const& rhs) const + { + return std::memcmp( + lhs.key, rhs.key, key_size_) < 0; + } +}; + +//------------------------------------------------------------------------------ + +template +pool_t<_>::pool_t (std::size_t key_size, + std::size_t alloc_size) + : arena_ (alloc_size) + , key_size_ (key_size) + , map_ (compare(key_size)) +{ +} + +template +pool_t<_>& +pool_t<_>::operator= (pool_t&& other) +{ + arena_ = std::move(other.arena_); + key_size_ = other.key_size_; + data_size_ = other.data_size_; + map_ = std::move(other.map_); + return *this; +} + +template +void +pool_t<_>::clear() +{ + arena_.clear(); + data_size_ = 0; + map_.clear(); +} + +template +void +pool_t<_>::shrink_to_fit() +{ + arena_.shrink_to_fit(); +} + +template +auto +pool_t<_>::find (void const* key) -> + iterator +{ + // VFALCO need is_transparent here + value_type tmp (0, 0, key, nullptr); + auto const iter = map_.find(tmp); + return iter; +} + +template +void +pool_t<_>::insert (std::size_t h, + void const* key, void const* data, + std::size_t size) +{ + auto const k = arena_.alloc(key_size_); + auto const d = arena_.alloc(size); + std::memcpy(k, key, key_size_); + std::memcpy(d, data, size); + auto const result = map_.emplace( + std::piecewise_construct, + std::make_tuple(h, size, k, d), + std::make_tuple(0)); + (void)result.second; + // Must not already exist! + assert(result.second); + data_size_ += size; +} + +template +void +swap (pool_t<_>& lhs, pool_t<_>& rhs) +{ + using std::swap; + swap(lhs.arena_, rhs.arena_); + swap(lhs.key_size_, rhs.key_size_); + swap(lhs.data_size_, rhs.data_size_); + swap(lhs.map_, rhs.map_); +} + +using pool = pool_t<>; + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/posix_file.h b/beast/nudb/detail/posix_file.h new file mode 100644 index 0000000000..81061e5d91 --- /dev/null +++ b/beast/nudb/detail/posix_file.h @@ -0,0 +1,363 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_POSIX_FILE_H_INCLUDED +#define BEAST_NUDB_POSIX_FILE_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include + +#ifndef BEAST_NUDB_POSIX_FILE +# ifdef _MSC_VER +# define BEAST_NUDB_POSIX_FILE 0 +# else +# define BEAST_NUDB_POSIX_FILE 1 +# endif +#endif + +#if BEAST_NUDB_POSIX_FILE +# include +# include +# include +# include +# include +#endif + +namespace beast { +namespace nudb { + +#if BEAST_NUDB_POSIX_FILE + +namespace detail { + +class file_posix_error : public file_error +{ +public: + explicit + file_posix_error (char const* m, + int errnum = errno) + : file_error (std::string("nudb: ") + m + + ", " + text(errnum)) + { + } + + explicit + file_posix_error (std::string const& m, + int errnum = errno) + : file_error (std::string("nudb: ") + m + + ", " + text(errnum)) + { + } + +private: + static + std::string + text (int errnum) + { + return ::strerror(errnum); + } +}; + +//------------------------------------------------------------------------------ + +template +class posix_file +{ +private: + int fd_ = -1; + +public: + posix_file() = default; + posix_file (posix_file const&) = delete; + posix_file& operator= (posix_file const&) = delete; + + ~posix_file(); + + posix_file (posix_file&&); + + posix_file& + operator= (posix_file&& other); + + bool + is_open() const + { + return fd_ != -1; + } + + void + close(); + + bool + create (file_mode mode, path_type const& path); + + bool + open (file_mode mode, path_type const& path); + + static + bool + erase (path_type const& path); + + std::size_t + actual_size() const; + + void + read (std::size_t offset, + void* buffer, std::size_t bytes); + + void + write (std::size_t offset, + void const* buffer, std::size_t bytes); + + void + sync(); + + void + trunc (std::size_t length); + +private: + static + std::pair + flags (file_mode mode); +}; + +template +posix_file<_>::~posix_file() +{ + close(); +} + +template +posix_file<_>::posix_file (posix_file &&other) + : fd_ (other.fd_) +{ + other.fd_ = -1; +} + +template +posix_file<_>& +posix_file<_>::operator= (posix_file&& other) +{ + if (&other == this) + return *this; + close(); + fd_ = other.fd_; + other.fd_ = -1; + return *this; +} + +template +void +posix_file<_>::close() +{ + if (fd_ != -1) + { + if (::close(fd_) != 0) + throw file_posix_error( + "close file"); + fd_ = -1; + } +} + +template +bool +posix_file<_>::create (file_mode mode, + path_type const& path) +{ + auto const result = flags(mode); + assert(! is_open()); + fd_ = ::open(path.c_str(), result.first); + if (fd_ != -1) + { + ::close(fd_); + fd_ = -1; + return false; + } + int errnum = errno; + if (errnum != ENOENT) + throw file_posix_error( + "open file", errnum); + fd_ = ::open(path.c_str(), + result.first | O_CREAT, 0644); + if (fd_ == -1) + throw file_posix_error( + "create file"); +#ifndef __APPLE__ + if (::posix_fadvise(fd_, 0, 0, result.second) != 0) + throw file_posix_error( + "fadvise"); +#endif + return true; +} + +template +bool +posix_file<_>::open (file_mode mode, + path_type const& path) +{ + assert(! is_open()); + auto const result = flags(mode); + fd_ = ::open(path.c_str(), result.first); + if (fd_ == -1) + { + int errnum = errno; + if (errnum == ENOENT) + return false; + throw file_posix_error( + "open file", errnum); + } +#ifndef __APPLE__ + if (::posix_fadvise(fd_, 0, 0, result.second) != 0) + throw file_posix_error( + "fadvise"); +#endif + return true; +} + +template +bool +posix_file<_>::erase (path_type const& path) +{ + if (::unlink(path.c_str()) != 0) + { + int const ec = errno; + if (ec != ENOENT) + throw file_posix_error( + "unlink", ec); + return false; + } + return true; +} + +template +std::size_t +posix_file<_>::actual_size() const +{ + struct stat st; + if (::fstat(fd_, &st) != 0) + throw file_posix_error( + "fstat"); + return st.st_size; +} + +template +void +posix_file<_>::read (std::size_t offset, + void* buffer, std::size_t bytes) +{ + auto const n = ::pread ( + fd_, buffer, bytes, offset); + // VFALCO end of file should throw short_read + if (n == -1) + throw file_posix_error( + "pread"); + if (n < bytes) + throw file_short_read_error(); +} + +template +void +posix_file<_>::write (std::size_t offset, + void const* buffer, std::size_t bytes) +{ + auto const n = ::pwrite ( + fd_, buffer, bytes, offset); + if (n == -1) + throw file_posix_error( + "pwrite"); + if (n < bytes) + throw file_short_write_error(); +} + +template +void +posix_file<_>::sync() +{ + if (::fsync(fd_) != 0) + throw file_posix_error( + "fsync"); +} + +template +void +posix_file<_>::trunc (std::size_t length) +{ + if (::ftruncate(fd_, length) != 0) + throw file_posix_error( + "ftruncate"); +} + +template +std::pair +posix_file<_>::flags (file_mode mode) +{ + std::pair result; + switch(mode) + { + case file_mode::scan: + result.first = + O_RDONLY; +#ifndef __APPLE__ + result.second = + POSIX_FADV_SEQUENTIAL; +#endif + break; + case file_mode::read: + result.first = + O_RDONLY; +#ifndef __APPLE__ + result.second = + POSIX_FADV_RANDOM; +#endif + break; + case file_mode::append: + result.first = + O_RDWR | + O_APPEND; +#ifndef __APPLE__ + result.second = + POSIX_FADV_RANDOM; +#endif + break; + case file_mode::write: + result.first = + O_RDWR; +#ifndef __APPLE__ + result.second = + POSIX_FADV_NORMAL; +#endif + break; + } + return result; +} + +} // detail + +using posix_file = detail::posix_file<>; + +#endif + +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/stream.h b/beast/nudb/detail/stream.h new file mode 100644 index 0000000000..93862ee37d --- /dev/null +++ b/beast/nudb/detail/stream.h @@ -0,0 +1,232 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_STREAM_H_INCLUDED +#define BEAST_NUDB_STREAM_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace detail { + +// Simple growable memory buffer +class buffer +{ +private: + std::size_t size_ = 0; + std::unique_ptr buf_; + +public: + buffer() = default; + buffer (buffer const&) = delete; + buffer& operator= (buffer const&) = delete; + + explicit + buffer (std::size_t n) + : size_ (n) + , buf_ (new std::uint8_t[n]) + { + } + + buffer (buffer&& other) + : size_ (other.size_) + , buf_ (std::move(other.buf_)) + { + other.size_ = 0; + } + + buffer& operator= (buffer&& other) + { + size_ = other.size_; + buf_ = std::move(other.buf_); + other.size_ = 0; + return *this; + } + + std::size_t + size() const + { + return size_; + } + + std::uint8_t* + get() const + { + return buf_.get(); + } + + void + reserve (std::size_t n) + { + if (size_ < n) + buf_.reset (new std::uint8_t[n]); + size_ = n; + } +}; + +//------------------------------------------------------------------------------ + +// Input stream from bytes +template +class istream_t +{ +private: + std::uint8_t const* buf_; +#if ! BEAST_NUDB_NO_DOMAIN_CHECK + std::size_t bytes_; +#endif + +public: + istream_t (istream_t const&) = default; + istream_t& operator= (istream_t const&) = default; + + istream_t (void const* data, std::size_t + #if ! BEAST_NUDB_NO_DOMAIN_CHECK + bytes + #endif + ) + : buf_(reinterpret_cast< + std::uint8_t const*>(data)) + #if ! BEAST_NUDB_NO_DOMAIN_CHECK + , bytes_(bytes) + #endif + { + } + + template + istream_t (std::array const& a) + : buf_ (a.data()) + #if ! BEAST_NUDB_NO_DOMAIN_CHECK + , bytes_ (a.size()) + #endif + { + } + + std::uint8_t const* + data (std::size_t bytes) + { + #if ! BEAST_NUDB_NO_DOMAIN_CHECK + if (bytes > bytes_) + throw std::logic_error( + "nudb: istream"); + bytes_ -= bytes; + #endif + auto const data = buf_; + buf_ = buf_ + bytes; + return data; + } +}; + +using istream = istream_t<>; + +//------------------------------------------------------------------------------ + +// Output stream to bytes +template +class ostream_t +{ +private: + std::uint8_t* buf_; + std::size_t size_ = 0; +#if ! BEAST_NUDB_NO_DOMAIN_CHECK + std::size_t bytes_; +#endif + +public: + ostream_t (ostream_t const&) = default; + ostream_t& operator= (ostream_t const&) = default; + + ostream_t (void* data, std::size_t + #if ! BEAST_NUDB_NO_DOMAIN_CHECK + bytes + #endif + ) + : buf_ (reinterpret_cast(data)) + #if ! BEAST_NUDB_NO_DOMAIN_CHECK + , bytes_ (bytes) + #endif + { + } + + template + ostream_t (std::array& a) + : buf_ (a.data()) + #if ! BEAST_NUDB_NO_DOMAIN_CHECK + , bytes_ (a.size()) + #endif + { + } + + // Returns the number of bytes written + std::size_t + size() const + { + return size_; + } + + std::uint8_t* + data (std::size_t bytes) + { +#if ! BEAST_NUDB_NO_DOMAIN_CHECK + if (bytes > bytes_) + throw std::logic_error( + "nudb: ostream"); + bytes_ -= bytes; +#endif + auto const data = buf_; + buf_ = buf_ + bytes; + size_ += bytes; + return data; + } +}; + +using ostream = ostream_t<>; + +//------------------------------------------------------------------------------ + +// read blob +inline +void +read (istream& is, + void* buffer, std::size_t bytes) +{ + std::memcpy (buffer, is.data(bytes), bytes); +} + +// write blob +inline +void +write (ostream& os, + void const* buffer, std::size_t bytes) +{ + std::memcpy (os.data(bytes), buffer, bytes); +} + +} // detail +} // nudb +} // beast + +#endif diff --git a/beast/nudb/detail/win32_file.h b/beast/nudb/detail/win32_file.h new file mode 100644 index 0000000000..d497fb517f --- /dev/null +++ b/beast/nudb/detail/win32_file.h @@ -0,0 +1,444 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_WIN32_FILE_H_INCLUDED +#define BEAST_NUDB_WIN32_FILE_H_INCLUDED + +#include +#include +#include +#include +#include + +#ifndef BEAST_NUDB_WIN32_FILE +# ifdef _MSC_VER +# define BEAST_NUDB_WIN32_FILE 1 +# else +# define BEAST_NUDB_WIN32_FILE 0 +# endif +#endif + +#if BEAST_NUDB_WIN32_FILE +# ifndef NOMINMAX +# define NOMINMAX +# endif +# ifndef UNICODE +# define UNICODE +# endif +# ifndef STRICT +# define STRICT +# endif +# include +# undef NOMINMAX +# undef UNICODE +# undef STRICT +#endif + +namespace beast { +namespace nudb { + +#if BEAST_NUDB_WIN32_FILE + +namespace detail { + +// Win32 error code +class file_win32_error + : public file_error +{ +public: + explicit + file_win32_error (char const* m, + DWORD dwError = ::GetLastError()) + : file_error (std::string("nudb: ") + m + + ", " + text(dwError)) + { + } + + explicit + file_win32_error (std::string const& m, + DWORD dwError = ::GetLastError()) + : file_error (std::string("nudb: ") + m + + ", " + text(dwError)) + { + } + +private: + template + static + std::string + text (DWORD dwError); +}; + +template +std::string +file_win32_error::text (DWORD dwError) +{ + LPSTR buf = nullptr; + size_t const size = FormatMessageA ( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + dwError, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPSTR)&buf, + 0, + NULL); + std::string s; + if (size) + { + s.append(buf, size); + LocalFree (buf); + } + else + { + s = "error " + std::to_string(dwError); + } + return s; +} + +//------------------------------------------------------------------------------ + +template +class win32_file +{ +private: + HANDLE hf_ = INVALID_HANDLE_VALUE; + +public: + win32_file() = default; + win32_file (win32_file const&) = delete; + win32_file& operator= (win32_file const&) = delete; + + ~win32_file(); + + win32_file (win32_file&&); + + win32_file& + operator= (win32_file&& other); + + bool + is_open() const + { + return hf_ != INVALID_HANDLE_VALUE; + } + + void + close(); + + // Returns: + // `false` if the file already exists + // `true` on success, else throws + // + bool + create (file_mode mode, std::string const& path); + + // Returns: + // `false` if the file doesnt exist + // `true` on success, else throws + // + bool + open (file_mode mode, std::string const& path); + + // Effects: + // Removes the file from the file system. + // + // Throws: + // Throws is an error occurs. + // + // Returns: + // `true` if the file was erased + // `false` if the file was not present + // + static + bool + erase (path_type const& path); + + // Returns: + // Current file size in bytes measured by operating system + // Requires: + // is_open() == true + // + std::size_t + actual_size() const; + + void + read (std::size_t offset, + void* buffer, std::size_t bytes); + + void + write (std::size_t offset, + void const* buffer, std::size_t bytes); + + void + sync(); + + void + trunc (std::size_t length); + +private: + static + std::pair + flags (file_mode mode); +}; + +template +win32_file<_>::~win32_file() +{ + close(); +} + +template +win32_file<_>::win32_file (win32_file&& other) + : hf_ (other.hf_) +{ + other.hf_ = INVALID_HANDLE_VALUE; +} + +template +win32_file<_>& +win32_file<_>::operator= (win32_file&& other) +{ + if (&other == this) + return *this; + close(); + hf_ = other.hf_; + other.hf_ = INVALID_HANDLE_VALUE; + return *this; +} + +template +void +win32_file<_>::close() +{ + if (hf_ != INVALID_HANDLE_VALUE) + { + ::CloseHandle(hf_); + hf_ = INVALID_HANDLE_VALUE; + } +} + +template +bool +win32_file<_>::create (file_mode mode, + std::string const& path) +{ + assert(! is_open()); + auto const f = flags(mode); + hf_ = ::CreateFileA (path.c_str(), + f.first, + 0, + NULL, + CREATE_NEW, + f.second, + NULL); + if (hf_ == INVALID_HANDLE_VALUE) + { + DWORD const dwError = ::GetLastError(); + if (dwError != ERROR_FILE_EXISTS) + throw file_win32_error( + "create file", dwError); + return false; + } + return true; +} + +template +bool +win32_file<_>::open (file_mode mode, + std::string const& path) +{ + assert(! is_open()); + auto const f = flags(mode); + hf_ = ::CreateFileA (path.c_str(), + f.first, + 0, + NULL, + OPEN_EXISTING, + f.second, + NULL); + if (hf_ == INVALID_HANDLE_VALUE) + { + DWORD const dwError = ::GetLastError(); + if (dwError != ERROR_FILE_NOT_FOUND && + dwError != ERROR_PATH_NOT_FOUND) + throw file_win32_error( + "open file", dwError); + return false; + } + return true; +} + +template +bool +win32_file<_>::erase (path_type const& path) +{ + BOOL const bSuccess = + ::DeleteFileA(path.c_str()); + if (! bSuccess) + { + DWORD dwError = ::GetLastError(); + if (dwError != ERROR_FILE_NOT_FOUND && + dwError != ERROR_PATH_NOT_FOUND) + throw file_win32_error( + "erase file"); + return false; + } + return true; +} + +// Return: Current file size in bytes measured by operating system +template +std::size_t +win32_file<_>::actual_size() const +{ + assert(is_open()); + LARGE_INTEGER fileSize; + if (! ::GetFileSizeEx(hf_, &fileSize)) + throw file_win32_error( + "size file"); + return static_cast(fileSize.QuadPart); +} + +template +void +win32_file<_>::read (std::size_t offset, + void* buffer, std::size_t bytes) +{ + DWORD bytesRead; + LARGE_INTEGER li; + li.QuadPart = static_cast(offset); + OVERLAPPED ov; + ov.Offset = li.LowPart; + ov.OffsetHigh = li.HighPart; + ov.hEvent = NULL; + BOOL const bSuccess = ::ReadFile( + hf_, buffer, bytes, &bytesRead, &ov); + if (! bSuccess) + { + DWORD const dwError = ::GetLastError(); + if (dwError != ERROR_HANDLE_EOF) + throw file_win32_error( + "read file", dwError); + throw file_short_read_error(); + } + if (bytesRead != bytes) + throw file_short_read_error(); +} + +template +void +win32_file<_>::write (std::size_t offset, + void const* buffer, std::size_t bytes) +{ + LARGE_INTEGER li; + li.QuadPart = static_cast(offset); + OVERLAPPED ov; + ov.Offset = li.LowPart; + ov.OffsetHigh = li.HighPart; + ov.hEvent = NULL; + DWORD bytesWritten; + BOOL const bSuccess = ::WriteFile( + hf_, buffer, bytes, &bytesWritten, &ov); + if (! bSuccess) + throw file_win32_error( + "write file"); + if (bytesWritten != bytes) + throw file_short_write_error(); +} + +template +void +win32_file<_>::sync() +{ + BOOL const bSuccess = + ::FlushFileBuffers(hf_); + if (! bSuccess) + throw file_win32_error( + "sync file"); +} + +template +void +win32_file<_>::trunc (std::size_t length) +{ + LARGE_INTEGER li; + li.QuadPart = length; + BOOL bSuccess; + bSuccess = ::SetFilePointerEx( + hf_, li, NULL, FILE_BEGIN); + if (bSuccess) + bSuccess = SetEndOfFile(hf_); + if (! bSuccess) + throw file_win32_error( + "trunc file"); +} + +template +std::pair +win32_file<_>::flags (file_mode mode) +{ +mode = file_mode::write; + std::pair result(0, 0); + switch (mode) + { + case file_mode::scan: + result.first = + GENERIC_READ; + result.second = + FILE_FLAG_SEQUENTIAL_SCAN; + break; + + case file_mode::read: + result.first = + GENERIC_READ; + result.second = + FILE_FLAG_RANDOM_ACCESS; + break; + + case file_mode::append: + result.first = + GENERIC_READ | GENERIC_WRITE; + result.second = + FILE_FLAG_RANDOM_ACCESS + //| FILE_FLAG_NO_BUFFERING + //| FILE_FLAG_WRITE_THROUGH + ; + break; + + case file_mode::write: + result.first = + GENERIC_READ | GENERIC_WRITE; + result.second = + FILE_FLAG_RANDOM_ACCESS; + break; + } + return result; +} + +} // detail + +using win32_file = detail::win32_file<>; + +#endif + +} // nudb +} // detail + +#endif diff --git a/beast/nudb/error.h b/beast/nudb/error.h new file mode 100644 index 0000000000..a6f1915425 --- /dev/null +++ b/beast/nudb/error.h @@ -0,0 +1,109 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_ERROR_H_INCLUDED +#define BEAST_NUDB_ERROR_H_INCLUDED + +#include +#include +#include +#include + +namespace beast { +namespace nudb { + +// All exceptions thrown by nudb are derived +// from std::exception except for fail_error + +/** Base class for all errors thrown by file classes. */ +struct file_error : std::runtime_error +{ + explicit + file_error (char const* s) + : std::runtime_error(s) + { + } + + explicit + file_error (std::string const& s) + : std::runtime_error(s) + { + } +}; + +/** Thrown when file bytes read are less than requested. */ +struct file_short_read_error : file_error +{ + file_short_read_error() + : file_error ( + "nudb: short read") + { + } +}; + +/** Thrown when file bytes written are less than requested. */ +struct file_short_write_error : file_error +{ + file_short_write_error() + : file_error ( + "nudb: short write") + { + } +}; + +/** Base class for all exceptions thrown by store. */ +class store_error : public std::runtime_error +{ +public: + explicit + store_error (char const* m) + : std::runtime_error( + std::string("nudb: ") + m) + { + } + + explicit + store_error (std::string const& m) + : std::runtime_error( + std::string("nudb: ") + m) + { + } +}; + +/** Thrown when corruption in a file is detected. */ +class store_corrupt_error : public store_error +{ +public: + explicit + store_corrupt_error (char const* m) + : store_error (m) + { + } + + explicit + store_corrupt_error (std::string const& m) + : store_error (m) + { + } +}; + +} // nudb +} // beast + +#endif diff --git a/beast/nudb/file.h b/beast/nudb/file.h new file mode 100644 index 0000000000..bba66092ca --- /dev/null +++ b/beast/nudb/file.h @@ -0,0 +1,41 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_FILE_H_INCLUDED +#define BEAST_NUDB_FILE_H_INCLUDED + +#include +#include +#include +#include + +namespace beast { +namespace nudb { + +using native_file = +#ifdef _MSC_VER + win32_file; +#else + posix_file; +#endif + +} // nudb +} // beast + +#endif diff --git a/beast/nudb/mode.h b/beast/nudb/mode.h new file mode 100644 index 0000000000..b4b42e18f1 --- /dev/null +++ b/beast/nudb/mode.h @@ -0,0 +1,43 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_MODE_H_INCLUDED +#define BEAST_NUDB_MODE_H_INCLUDED + +#include +#include + +namespace beast { +namespace nudb { + +enum class file_mode +{ + scan, // read sequential + read, // read random + append, // read random, write append + write // read random, write random +}; + +// This sort of doesn't belong here +using path_type = std::string; + +} // nudb +} // beast + +#endif diff --git a/beast/nudb/nudb.cpp b/beast/nudb/nudb.cpp new file mode 100644 index 0000000000..9fb8cb182f --- /dev/null +++ b/beast/nudb/nudb.cpp @@ -0,0 +1,25 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include + +#include +#include +#include +#include diff --git a/beast/nudb/recover.h b/beast/nudb/recover.h new file mode 100644 index 0000000000..031fcd764c --- /dev/null +++ b/beast/nudb/recover.h @@ -0,0 +1,157 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_RECOVER_H_INCLUDED +#define BEAST_NUDB_RECOVER_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { + +/** Perform recovery on a database. + This implements the recovery algorithm by rolling back + any partially committed data. +*/ +template < + class Hasher = default_hash, + class File = native_file> +bool +recover ( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + std::size_t read_size = 16 * 1024 * 1024) +{ + using namespace detail; + File df; + File lf; + File kf; + if (! df.open (file_mode::append, dat_path)) + return false; + if (! kf.open (file_mode::write, key_path)) + return false; + if (! lf.open (file_mode::append, log_path)) + return true; + dat_file_header dh; + key_file_header kh; + log_file_header lh; + try + { + read (kf, kh); + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "short key file header"); + } + // VFALCO should the number of buckets be based on the + // file size in the log record instead? + verify(kh); + try + { + read (df, dh); + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "short data file header"); + } + verify(dh, kh); + auto const lf_size = lf.actual_size(); + if (lf_size == 0) + { + lf.close(); + File::erase (log_path); + return true; + } + try + { + read (lf, lh); + verify(kh, lh); + auto const df_size = df.actual_size(); + buffer buf(kh.block_size); + bucket b (kh.key_size, + kh.block_size, buf.get()); + bulk_reader r(lf, log_file_header::size, + lf_size, read_size); + while(! r.eof()) + { + std::size_t n; + try + { + // Log Record + auto is = r.prepare(field< + std::uint64_t>::size); + read(is, n); // Index + b.read(r); // Bucket + } + catch (store_corrupt_error const&) + { + throw store_corrupt_error( + "corrupt log record"); + } + catch (file_short_read_error const&) + { + // This means that the log file never + // got fully synced. In which case, there + // were no changes made to the key file. + // So we can recover by just truncating. + break; + } + if (b.spill() && + b.spill() + kh.bucket_size > df_size) + throw store_corrupt_error( + "bad spill in log record"); + // VFALCO is this the right condition? + if (n > kh.buckets) + throw store_corrupt_error( + "bad index in log record"); + b.write (kf, (n + 1) * kh.block_size); + } + kf.trunc(lh.key_file_size); + df.trunc(lh.dat_file_size); + kf.sync(); + df.sync(); + } + catch (file_short_read_error const&) + { + // key and data files should be consistent here + } + + lf.trunc(0); + lf.sync(); + lf.close(); + File::erase (log_path); + return true; +} + +} // nudb +} // beast + +#endif diff --git a/beast/nudb/store.h b/beast/nudb/store.h new file mode 100644 index 0000000000..d5befef302 --- /dev/null +++ b/beast/nudb/store.h @@ -0,0 +1,1025 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_STORE_H_INCLUDED +#define BEAST_NUDB_STORE_H_INCLUDED + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // +#include +#include +#include +#include +#include +#include + +#if DOXYGEN +#include +#endif + +#ifndef BEAST_NUDB_DEBUG_CHECKS +# ifndef NDEBUG +# define BEAST_NUDB_DEBUG_CHECKS 0 +# else +# define BEAST_NUDB_DEBUG_CHECKS 0 +# endif +#endif + +namespace beast { +namespace nudb { + +namespace detail { + +// Holds state variables of the open database. +template +struct state +{ + File df; + File kf; + File lf; + path_type dp; + path_type kp; + path_type lp; + buffers b; + pool p0; + pool p1; + cache c0; + cache c1; + key_file_header const kh; + + // pool commit high water mark + std::size_t pool_thresh = 0; + + state (state const&) = delete; + state& operator= (state const&) = delete; + + state (File&& df_, File&& kf_, File&& lf_, + path_type const& dp_, path_type const& kp_, + path_type const& lp_, key_file_header const& kh_, + std::size_t arena_alloc_size); +}; + +template +state::state ( + File&& df_, File&& kf_, File&& lf_, + path_type const& dp_, path_type const& kp_, + path_type const& lp_, key_file_header const& kh_, + std::size_t arena_alloc_size) + : df (std::move(df_)) + , kf (std::move(kf_)) + , lf (std::move(lf_)) + , dp (dp_) + , kp (kp_) + , lp (lp_) + , b (kh_.block_size) + , p0 (kh_.key_size, arena_alloc_size) + , p1 (kh_.key_size, arena_alloc_size) + , c0 (kh_.key_size, kh_.block_size) + , c1 (kh_.key_size, kh_.block_size) + , kh (kh_) +{ +} + +} // detail + +/* + + TODO + + - fingerprint / checksum on log records + + - size field at end of data records + allows walking backwards + + - timestamp every so often on data records + allows knowing the age of the data + +*/ + +/** A simple key/value database + @tparam File The type of File object to use. + @tparam Hash The hash function to use on key +*/ +template +class basic_store +{ +public: + using file_type = File; + using hash_type = Hasher; + +private: + // requires 64-bit integers or better + static_assert(sizeof(std::size_t)>=8, ""); + + enum + { + // Size of bulk writes + bulk_write_size = 16 * 1024 * 1024, + + // Size of bulk reads during recover + recover_read_size = 16 * 1024 * 1024 + }; + + using clock_type = + std::chrono::steady_clock; + using shared_lock_type = + boost::shared_lock; + using unique_lock_type = + boost::unique_lock; + using blockbuf = + typename detail::buffers::value_type; + + bool open_ = false; + // VFALCO Make consistency checks optional? + //bool safe_ = true; // Do consistency checks + // VFALCO Unfortunately boost::optional doesn't support + // move construction so we use unique_ptr instead. + std::unique_ptr < + detail::state> s_; // State of an open database + + std::size_t frac_; // accumulates load + std::size_t thresh_; // split threshold + std::size_t buckets_; // number of buckets + std::size_t modulus_; // hash modulus + + std::mutex u_; // serializes insert() + detail::gentex g_; + boost::shared_mutex m_; + std::thread thread_; + std::condition_variable_any cond_; + + std::atomic epb_; // `true` when ep_ set + std::exception_ptr ep_; + +public: + basic_store() = default; + basic_store (basic_store const&) = delete; + basic_store& operator= (basic_store const&) = delete; + + /** Destroy the database. + + Files are closed, memory is freed, and data that has not been + committed is discarded. To ensure that all inserted data is + written, it is necessary to call close() before destroying the + store. + + This function catches all exceptions thrown by callees, so it + will be necessary to call close() before destroying the store + if callers want to catch exceptions. + + Throws: + None + */ + ~basic_store(); + + /** Returns `true` if the database is open. */ + bool + is_open() const + { + return open_; + } + + path_type const& + dat_path() const + { + return s_->dp; + } + + path_type const& + key_path() const + { + return s_->kp; + } + + path_type const& + log_path() const + { + return s_->lp; + } + + std::uint64_t + appnum() const + { + return s_->kh.appnum; + } + + /** Close the database. + + All data is committed before closing. + + Throws: + store_error + */ + void + close(); + + /** Open a database. + + @param args Arguments passed to File constructors + @return `true` if each file could be opened + */ + template + bool + open ( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + std::size_t arena_alloc_size, + Args&&... args); + + /** Fetch a value. + + If key is found, BufferFactory will be called as: + `(void*)()(std::size_t bytes)` + + where bytes is the size of the value, and the returned pointer + points to a buffer of at least bytes size. + + @return `true` if the key exists. + */ + template + bool + fetch (void const* key, BufferFactory&& bf); + + /** Insert a value. + + Returns: + `true` if the key was inserted, + `false` if the key already existed + */ + bool + insert (void const* key, void const* data, + std::size_t bytes); + +private: + void + rethrow() + { + if (epb_.load()) + std::rethrow_exception(ep_); + } + + std::pair + find (void const* key, detail::bucket& b); + + void + maybe_spill (detail::bucket& b, + detail::bulk_writer& w); + + void + split (detail::bucket& b1, detail::bucket& b2, + detail::bucket& tmp, std::size_t n1, std::size_t n2, + std::size_t buckets, std::size_t modulus, + detail::bulk_writer& w); + + void + check (std::size_t n, detail::bucket& b, + std::size_t buckets, std::size_t modulus); + + detail::bucket + load (std::size_t n, detail::cache& c1, + detail::cache& c0, void* buf); + + bool check(); + + void + commit(); + + void + run(); +}; + +//------------------------------------------------------------------------------ + +template +basic_store::~basic_store() +{ + try + { + close(); + } + catch (...) + { + // If callers want to see the exceptions + // they have to call close manually. + } +} + +template +template +bool +basic_store::open ( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + std::size_t arena_alloc_size, + Args&&... args) +{ + using namespace detail; + if (is_open()) + throw std::logic_error("nudb: already open"); + epb_.store(false); + recover (dat_path, key_path, log_path, + recover_read_size); + File df(std::forward(args)...); + File kf(std::forward(args)...); + File lf(std::forward(args)...); + if (! df.open (file_mode::append, dat_path)) + return false; + if (! kf.open (file_mode::write, key_path)) + return false; + if (! lf.create (file_mode::append, log_path)) + return false; + dat_file_header dh; + key_file_header kh; + read (df, dh); + read (kf, kh); + verify (dh); + verify (kh); + verify (dh, kh); + auto s = std::make_unique>( + std::move(df), std::move(kf), std::move(lf), + dat_path, key_path, log_path, kh, + arena_alloc_size); + thresh_ = std::max(65536UL, + kh.load_factor * kh.capacity); + frac_ = thresh_ / 2; + buckets_ = kh.buckets; + modulus_ = ceil_pow2(kh.buckets); + // VFALCO TODO This could be better + if (buckets_ < 1) + throw store_corrupt_error ( + "bad key file length"); + s_ = std::move(s); + open_ = true; + thread_ = std::thread( + &basic_store::run, this); + return true; +} + +template +void +basic_store::close() +{ + if (open_) + { + // Set this first otherwise a + // throw can cause another close(). + open_ = false; + cond_.notify_all(); + thread_.join(); + rethrow(); + s_->lf.close(); + File::erase(s_->lp); + s_.reset(); + } +} + +template +template +bool +basic_store::fetch ( + void const* key, BufferFactory&& bf) +{ + using namespace detail; + rethrow(); + std::size_t offset; + std::size_t size; + blockbuf buf(s_->b); + bucket tmp (s_->kh.key_size, + s_->kh.block_size, buf.get()); + { + auto const h = hash( + key, s_->kh.key_size, s_->kh.salt); + shared_lock_type m (m_, + boost::defer_lock); + m.lock(); + { + typename pool::iterator iter; + iter = s_->p1.find(key); + if (iter != s_->p1.end()) + { + void* const b = bf( + iter->first.size); + if (b == nullptr) + return false; + std::memcpy (b, + iter->first.data, + iter->first.size); + return true; + } + iter = s_->p0.find(key); + if (iter != s_->p0.end()) + { + void* const b = bf( + iter->first.size); + if (b == nullptr) + return false; + std::memcpy (b, + iter->first.data, + iter->first.size); + return true; + } + } + auto const n = bucket_index( + h, buckets_, modulus_); + auto const iter = s_->c1.find(n); + if (iter != s_->c1.end()) + { + auto const result = + iter->second.find(key); + if (result.second) + { + offset = result.first.offset; + size = result.first.size; + goto found; + } + // VFALCO Audit for concurrency + auto spill = iter->second.spill(); + m.unlock(); + while (spill) + { + tmp.read(s_->df, spill); + auto const result = tmp.find(key); + if (result.second) + { + offset = result.first.offset; + size = result.first.size; + goto found; + } + spill = tmp.spill(); + } + return false; + } + // VFALCO Audit for concurrency + genlock g (g_); + m.unlock(); + tmp.read (s_->kf, + (n + 1) * tmp.block_size()); + auto const result = find(key, tmp); + if (! result.second) + return false; + offset = result.first.offset; + size = result.first.size; + } +found: + void* const b = bf(size); + if (b == nullptr) + return false; + // Data Record + s_->df.read (offset + + field::size + // Size + s_->kh.key_size, // Key + b, size); + return true; +} + +template +bool +basic_store::insert (void const* key, + void const* data, std::size_t size) +{ + using namespace detail; + rethrow(); +#if ! BEAST_NUDB_NO_DOMAIN_CHECK + if (size > field::max) + throw std::logic_error( + "nudb: size too large"); +#endif + blockbuf buf (s_->b); + bucket tmp (s_->kh.key_size, + s_->kh.block_size, buf.get()); + auto const h = hash( + key, s_->kh.key_size, s_->kh.salt); + std::lock_guard u (u_); + shared_lock_type m (m_, boost::defer_lock); + m.lock(); + if (s_->p1.find(key) != s_->p1.end()) + return false; + if (s_->p0.find(key) != s_->p0.end()) + return false; + auto const n = bucket_index( + h, buckets_, modulus_); + auto const iter = s_->c1.find(n); + if (iter != s_->c1.end()) + { + if (iter->second.find(key).second) + return false; + // VFALCO Audit for concurrency + auto spill = iter->second.spill(); + m.unlock(); + while (spill) + { + tmp.read (s_->df, spill); + if (tmp.find(key).second) + return false; + spill = tmp.spill(); + } + } + else + { + genlock g (g_); + m.unlock(); + // VFALCO Audit for concurrency + tmp.read (s_->kf, + (n + 1) * s_->kh.block_size); + if (find(key, tmp).second) + return false; + } + { + unique_lock_type m (m_); + s_->p1.insert (h, key, data, size); + bool const full = + s_->p1.data_size() >= s_->pool_thresh; + m.unlock(); + if (full) + cond_.notify_all(); + } + return true; +} + +// Find key in loaded bucket b or its spills. +// +template +std::pair +basic_store::find ( + void const* key, detail::bucket& b) +{ + auto result = b.find(key); + if (result.second) + return result; + auto spill = b.spill(); + while (spill) + { + b.read (s_->df, spill); + result = b.find(key); + if (result.second) + return result; + spill = b.spill(); + } + return result; +} + +// Spill bucket if full +// +template +void +basic_store::maybe_spill( + detail::bucket& b, detail::bulk_writer& w) +{ + using namespace detail; + if (b.full()) + { + // Spill Record + auto const offset = w.offset(); + auto os = w.prepare( + field::size + // Zero + field::size + // Size + b.compact_size()); + write (os, 0); // Zero + write ( + os, b.compact_size()); // Size + auto const spill = + offset + os.size(); + b.write (os); // Bucket + // Update bucket + b.clear(); + b.spill (spill); + } +} + +// Split the bucket in b1 to b2 +// b1 must be loaded +// tmp is used as a temporary buffer +// splits are written but not the new buckets +// +template +void +basic_store::split (detail::bucket& b1, + detail::bucket& b2, detail::bucket& tmp, + std::size_t n1, std::size_t n2, + std::size_t buckets, std::size_t modulus, + detail::bulk_writer& w) +{ + using namespace detail; + // Trivial case: split empty bucket + if (b1.empty()) + return; + // Split + for (std::size_t i = 0; i < b1.size();) + { + auto e = b1[i]; + auto const h = hash( + e.key, s_->kh.key_size, s_->kh.salt); + auto const n = bucket_index( + h, buckets, modulus); + assert(n==n1 || n==n2); + if (n == n2) + { + b2.insert (e.offset, e.size, e.key); + b1.erase (i); + } + else + { + ++i; + } + } + std::size_t spill = b1.spill(); + if (spill) + { + b1.spill (0); + do + { + // If any part of the spill record is + // in the write buffer then flush first + // VFALCO Needs audit + if (spill + bucket_size(s_->kh.key_size, + s_->kh.capacity) > w.offset() - w.size()) + w.flush(); + tmp.read (s_->df, spill); + for (std::size_t i = 0; i < tmp.size(); ++i) + { + auto e = tmp[i]; + auto const n = bucket_index( + e.key, s_->kh.key_size, s_->kh.salt, + buckets, modulus); + assert(n==n1 || n==n2); + if (n == n2) + { + maybe_spill (b2, w); + b2.insert (e.offset, e.size, e.key); + } + else + { + maybe_spill (b1, w); + b1.insert (e.offset, e.size, e.key); + } + } + spill = tmp.spill(); + } + while (spill); + } +} + +// Effects: +// +// Returns a bucket from caches or the key file +// +// If the bucket is found in c1, returns the +// bucket from c1. +// Else if the bucket number is greater than buckets(), +// throws. +// Else, If the bucket is found in c2, inserts the +// bucket into c1 and returns the bucket from c1. +// Else, reads the bucket from the key file, inserts +// the bucket into c0 and c1, and returns +// the bucket from c1. +// +// Preconditions: +// buf points to a buffer of at least block_size() bytes +// +// Postconditions: +// c1, and c0, and the memory pointed to by buf may be modified +// +template +detail::bucket +basic_store::load ( + std::size_t n, detail::cache& c1, + detail::cache& c0, void* buf) +{ + using namespace detail; + auto iter = c1.find(n); + if (iter != c1.end()) + return iter->second; +#if BEAST_NUDB_DEBUG_CHECKS + if (n >= buckets_) + throw std::logic_error( + "nudb: missing bucket in cache"); +#endif + iter = c0.find(n); + if (iter != c0.end()) + return c1.insert (n, + iter->second)->second; + bucket tmp (s_->kh.key_size, + s_->kh.block_size, buf); + tmp.read (s_->kf, (n + 1) * + s_->kh.block_size); + c0.insert (n, tmp); + return c1.insert (n, tmp)->second; +} + +template +void +basic_store::check ( + std::size_t n, detail::bucket& b, + std::size_t buckets, std::size_t modulus) +{ + using namespace detail; + for (std::size_t i = 0; i < b.size(); ++i) + { + auto const e = b[i]; + auto const h = hash( + e.key, s_->kh.key_size, s_->kh.salt); + auto const n1 = bucket_index( + h, buckets, modulus); + assert(n1 == n); + } +} + +// Commit the memory pool to disk, then sync. +// +// Preconditions: +// +// Effects: +// +template +void +basic_store::commit() +{ + using namespace detail; + blockbuf buf1 (s_->b); + blockbuf buf2 (s_->b); + bucket tmp (s_->kh.key_size, + s_->kh.block_size, buf1.get()); + // Empty cache put in place temporarily + // so we can reuse the memory from s_->c1 + cache c1; + { + unique_lock_type m (m_); + if (s_->p1.empty()) + return; + swap (s_->c1, c1); + swap (s_->p0, s_->p1); + s_->pool_thresh = std::max( + s_->pool_thresh, s_->p0.data_size()); + m.unlock(); + } + // Prepare rollback information + // Log File Header + log_file_header lh; + lh.version = currentVersion; // Version + lh.appnum = s_->kh.appnum; // Appnum + lh.salt = s_->kh.salt; // Salt + lh.pepper = pepper( + lh.salt); // Pepper + lh.key_size = s_->kh.key_size; // Key Size + lh.key_file_size = + s_->kf.actual_size(); // Key File Size + lh.dat_file_size = + s_->df.actual_size(); // Data File Size + write (s_->lf, lh); + s_->lf.sync(); + // Append data and spills to data file + auto modulus = modulus_; + auto buckets = buckets_; + { + // Bulk write to avoid write amplification + bulk_writer w (s_->df, + s_->df.actual_size(), bulk_write_size); + // Write inserted data to the data file + for (auto& e : s_->p0) + { + #if BEAST_NUDB_DEBUG_CHECKS + assert (e.first.hash == hash( + e.first.key, s_->kh.key_size, s_->kh.salt)); + #endif + // VFALCO This could be UB since other + // threads are reading other data members + // of this object in memory + e.second = w.offset(); + auto os = w.prepare (data_size( + e.first.size, s_->kh.key_size)); + // Data Record + write (os, + e.first.size); // Size + write (os, e.first.key, + s_->kh.key_size); // Key + write (os, e.first.data, + e.first.size); // Data + } + // Do inserts, splits, and build view + // of original and modified buckets + for (auto const e : s_->p0) + { + #if BEAST_NUDB_DEBUG_CHECKS + assert (e.first.hash == hash( + e.first.key, s_->kh.key_size, s_->kh.salt)); + #endif + // VFALCO Should this be >= or > ? + if ((frac_ += 65536) >= thresh_) + { + // split + frac_ -= thresh_; + if (buckets == modulus) + modulus *= 2; + auto const n1 = buckets - (modulus / 2); + auto const n2 = buckets++; + auto b1 = load (n1, c1, s_->c0, buf2.get()); + #if BEAST_NUDB_DEBUG_CHECKS + check(n1, b1, buckets, modulus); + #endif + auto b2 = c1.create (n2); + // If split spills, the writer is + // flushed which can amplify writes. + split (b1, b2, tmp, n1, n2, + buckets, modulus, w); + #if BEAST_NUDB_DEBUG_CHECKS + check(n1, b1, buckets, modulus); + check(n2, b2, buckets, modulus); + #endif + } + // insert + auto const n = bucket_index( + e.first.hash, buckets, modulus); + auto b = load (n, c1, s_->c0, buf2.get()); + // This can amplify writes if it spills. + #if BEAST_NUDB_DEBUG_CHECKS + check(n, b, buckets, modulus); + #endif + maybe_spill (b, w); + #if BEAST_NUDB_DEBUG_CHECKS + check(n, b, buckets, modulus); + #endif + b.insert (e.second, e.first.size, e.first.key); + #if BEAST_NUDB_DEBUG_CHECKS + check(n, b, buckets, modulus); + #endif + } + w.flush(); + } + // Give readers a view of the new buckets. + // This might be slightly better than the old + // view since there could be fewer spills. + { + unique_lock_type m (m_); + swap(c1, s_->c1); + s_->p0.clear(); + buckets_ = buckets; + modulus_ = modulus; + } + // Write clean buckets to log file + // VFALCO Should the bulk_writer buffer size be tunable? + { + bulk_writer w(s_->lf, + s_->lf.actual_size(), bulk_write_size); + for (auto const e : s_->c0) + { + // Log Record + auto os = w.prepare( + field::size + // Index + e.second.compact_size()); // Bucket + // Log Record + write(os, e.first); // Index + e.second.write(os); // Bucket + } + s_->c0.clear(); + w.flush(); + s_->lf.sync(); + } + // VFALCO Audit for concurrency + { + std::lock_guard g (g_); + // Write new buckets to key file + for (auto const e : s_->c1) + e.second.write (s_->kf, + (e.first + 1) * s_->kh.block_size); + } + // Finalize the commit + s_->df.sync(); + s_->kf.sync(); + s_->lf.trunc(0); + s_->lf.sync(); + // Cache is no longer needed, all fetches will go straight + // to disk again. Do this after the sync, otherwise readers + // might get blocked longer due to the extra I/O. + // VFALCO is this correct? + { + unique_lock_type m (m_); + s_->c1.clear(); + } +} + +template +void +basic_store::run() +{ + try + { + while (open_) + { + auto when = clock_type::now() + + std::chrono::seconds(1); + for(;;) + { + unique_lock_type m (m_); + bool const timeout = + cond_.wait_until (m, when) == + std::cv_status::timeout; + if (! open_) + break; + if (timeout || + s_->p1.data_size() >= + s_->pool_thresh) + { + m.unlock(); + commit(); + } + // Reclaim some memory if + // we get a spare moment. + if (timeout) + { + m.lock(); + s_->pool_thresh /= 2; + s_->p1.shrink_to_fit(); + s_->p0.shrink_to_fit(); + s_->c1.shrink_to_fit(); + s_->c0.shrink_to_fit(); + m.unlock(); + when = clock_type::now() + + std::chrono::seconds(1); + } + } + } + commit(); + } + catch(...) + { + ep_ = std::current_exception(); // must come first + epb_.store(true); + } +} + +//------------------------------------------------------------------------------ + +using store = basic_store ; + +/** Generate a random salt. */ +template +std::uint64_t +make_salt() +{ + std::random_device rng; + std::mt19937_64 gen {rng()}; + std::uniform_int_distribution dist; + return dist(gen); +} + +/** Returns the best guess at the volume's block size. */ +inline +std::size_t +block_size(std::string const& /*path*/) +{ + return 4096; +} + +} // nudb +} // beast + +#endif diff --git a/beast/nudb/tests/callgrind_test.cpp b/beast/nudb/tests/callgrind_test.cpp new file mode 100644 index 0000000000..b7c70296a1 --- /dev/null +++ b/beast/nudb/tests/callgrind_test.cpp @@ -0,0 +1,116 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace test { + +// This test is designed for callgrind runs to find hotspots +class callgrind_test : public unit_test::suite +{ +public: + // Creates and opens a database, performs a bunch + // of inserts, then alternates fetching all the keys + // with keys not present. + void + do_test (std::size_t count, + nudb::path_type const& path) + { + auto const dp = path + ".dat"; + auto const kp = path + ".key"; + auto const lp = path + ".log"; + nudb::create (dp, kp, lp, + appnum, + salt, + sizeof(nudb::test::key_type), + nudb::block_size(path), + 0.50); + nudb::store db; + if (! expect (db.open(dp, kp, lp, + arena_alloc_size), "open")) + return; + expect (db.appnum() == appnum, "appnum"); + Sequence seq; + for (std::size_t i = 0; i < count; ++i) + { + auto const v = seq[i]; + expect (db.insert(&v.key, v.data, v.size), + "insert"); + } + storage s; + for (std::size_t i = 0; i < count * 2; ++i) + { + if (! (i%2)) + { + auto const v = seq[i/2]; + expect (db.fetch (&v.key, s), "fetch"); + expect (s.size() == v.size, "size"); + expect (std::memcmp(s.get(), + v.data, v.size) == 0, "data"); + } + else + { + auto const v = seq[count + i/2]; + expect (! db.fetch (&v.key, s), + "fetch missing"); + } + } + db.close(); + nudb::native_file::erase (dp); + nudb::native_file::erase (kp); + nudb::native_file::erase (lp); + } + + void + run() override + { + enum + { + // higher numbers, more pain + N = 100000 + }; + + testcase (abort_on_fail); + path_type const path = + beast::UnitTestUtilities::TempDirectory( + "nudb").getFullPathName().toStdString(); + do_test (N, path); + } +}; + +BEAST_DEFINE_TESTSUITE_MANUAL(callgrind,nudb,beast); + +} // test +} // nudb +} // beast + diff --git a/beast/nudb/tests/common.h b/beast/nudb/tests/common.h new file mode 100644 index 0000000000..bf5a745755 --- /dev/null +++ b/beast/nudb/tests/common.h @@ -0,0 +1,237 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_TEST_COMMON_H_INCLUDED +#define BEAST_NUDB_TEST_COMMON_H_INCLUDED + +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace test { + +using key_type = std::size_t; + +using fail_store = nudb::basic_store< + beast::nudb::default_hash, nudb::fail_file < + nudb::native_file>>; + +static std::size_t BEAST_CONSTEXPR arena_alloc_size = 16 * 1024 * 1024; + +static std::uint64_t BEAST_CONSTEXPR appnum = 1337; + +static std::uint64_t BEAST_CONSTEXPR salt = 42; + +//------------------------------------------------------------------------------ + +// Meets the requirements of BufferFactory +class storage +{ +private: + std::size_t size_ = 0; + std::size_t capacity_ = 0; + std::unique_ptr buf_; + +public: + storage() = default; + storage (storage const&) = delete; + storage& operator= (storage const&) = delete; + + std::size_t + size() const + { + return size_; + } + + std::uint8_t* + get() const + { + return buf_.get(); + } + + std::uint8_t* + operator()(std::size_t n) + { + if (capacity_ < n) + { + capacity_ = detail::ceil_pow2(n); + buf_.reset ( + new std::uint8_t[capacity_]); + } + size_ = n; + return buf_.get(); + } +}; + +struct value_type +{ + value_type() = default; + value_type (value_type const&) = default; + value_type& operator= (value_type const&) = default; + + key_type key; + std::size_t size; + uint8_t* data; +}; + +//------------------------------------------------------------------------------ + +template +static +void +rngcpy (void* buffer, std::size_t bytes, + Generator& g) +{ + using result_type = + typename Generator::result_type; + while (bytes >= sizeof(result_type)) + { + auto const v = g(); + memcpy(buffer, &v, sizeof(v)); + buffer = reinterpret_cast< + std::uint8_t*>(buffer) + sizeof(v); + bytes -= sizeof(v); + } + if (bytes > 0) + { + auto const v = g(); + memcpy(buffer, &v, bytes); + } +} + +//------------------------------------------------------------------------------ + +class Sequence +{ +public: + using key_type = test::key_type; + +private: + enum + { + minSize = 250, + maxSize = 1250 + }; + + storage s_; + beast::xor_shift_engine gen_; + std::uniform_int_distribution d_size_; + +public: + Sequence() + : d_size_ (minSize, maxSize) + { + } + + // Returns the n-th key + key_type + key (std::size_t n) + { + gen_.seed(n+1); + key_type result; + rngcpy (&result, sizeof(result), gen_); + return result; + } + + // Returns the n-th value + value_type + operator[] (std::size_t n) + { + gen_.seed(n+1); + value_type v; + rngcpy (&v.key, sizeof(v.key), gen_); + v.size = d_size_(gen_); + v.data = s_(v.size); + rngcpy (v.data, v.size, gen_); + return v; + } +}; + +template +static +std::string +num (T t) +{ + std::string s = std::to_string(t); + std::reverse(s.begin(), s.end()); + std::string s2; + s2.reserve(s.size() + (s.size()+2)/3); + int n = 0; + for (auto c : s) + { + if (n == 3) + { + n = 0; + s2.insert (s2.begin(), ','); + } + ++n; + s2.insert(s2.begin(), c); + } + return s2; +} + +template +void +print (Log log, + beast::nudb::verify_info const& info) +{ + log << "avg_fetch: " << std::fixed << std::setprecision(3) << + info.avg_fetch; + log << "waste: " << std::fixed << std::setprecision(3) << + info.waste * 100 << "%"; + log << "overhead: " << std::fixed << std::setprecision(1) << + info.overhead * 100 << "%"; + log << "actual_load: " << std::fixed << std::setprecision(0) << + info.actual_load * 100 << "%"; + log << "version: " << num(info.version); + log << "salt: " << std::showbase << std::hex << info.salt; + log << "key_size: " << num(info.key_size); + log << "block_size: " << num(info.block_size); + log << "bucket_size: " << num(info.bucket_size); + log << "load_factor: " << std::fixed << std::setprecision(0) << + info.load_factor * 100 << "%"; + log << "capacity: " << num(info.capacity); + log << "buckets: " << num(info.buckets); + log << "value_count: " << num(info.value_count); + log << "value_bytes: " << num(info.value_bytes); + log << "spill_count: " << num(info.spill_count); + log << "spill_count_tot: " << num(info.spill_count_tot); + log << "spill_bytes: " << num(info.spill_bytes); + log << "spill_bytes_tot: " << num(info.spill_bytes_tot); + log << "key_file_size: " << num(info.key_file_size); + log << "dat_file_size: " << num(info.dat_file_size); + + std::string s; + for (int i = 0; i < info.hist.size(); ++i) + s += (i==0) ? + std::to_string(info.hist[i]) : + (", " + std::to_string(info.hist[i])); + log << "hist: " << s; +} + +} // test +} // nudb +} // beast + +#endif diff --git a/beast/nudb/tests/fail_file.h b/beast/nudb/tests/fail_file.h new file mode 100644 index 0000000000..401cfe525c --- /dev/null +++ b/beast/nudb/tests/fail_file.h @@ -0,0 +1,245 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2013, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_FAIL_FILE_H_INCLUDED +#define BEAST_NUDB_FAIL_FILE_H_INCLUDED + +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { + +/** Thrown when a test failure mode occurs. */ +struct fail_error : std::exception +{ + char const* + what() const noexcept override + { + return "test failure"; + } +}; + +/** Countdown to test failure modue. */ +class fail_counter +{ +private: + std::size_t target_; + std::atomic count_; + +public: + fail_counter (fail_counter const&) = delete; + fail_counter& operator= (fail_counter const&) = delete; + + explicit + fail_counter (std::size_t target = 0) + { + reset (target); + } + + /** Reset the counter to fail at the nth step, or 0 for no failure. */ + void + reset (std::size_t n = 0) + { + target_ = n; + count_.store(0); + } + + bool + fail() + { + return target_ && (++count_ >= target_); + } +}; + +/** Wrapper to simulate file system failures. */ +template +class fail_file +{ +private: + File f_; + fail_counter* c_ = nullptr; + +public: + fail_file() = default; + fail_file (fail_file const&) = delete; + fail_file& operator= (fail_file const&) = delete; + ~fail_file() = default; + + fail_file (fail_file&&); + + fail_file& + operator= (fail_file&& other); + + explicit + fail_file (fail_counter& c); + + bool + is_open() const + { + return f_.is_open(); + } + + path_type const& + path() const + { + return f_.path(); + } + + std::size_t + actual_size() const + { + return f_.actual_size(); + } + + void + close() + { + f_.close(); + } + + bool + create (file_mode mode, + path_type const& path) + { + return f_.create(mode, path); + } + + bool + open (file_mode mode, + path_type const& path) + { + return f_.open(mode, path); + } + + static + void + erase (path_type const& path) + { + File::erase(path); + } + + void + read (std::size_t offset, + void* buffer, std::size_t bytes) + { + f_.read(offset, buffer, bytes); + } + + void + write (std::size_t offset, + void const* buffer, std::size_t bytes); + + void + sync(); + + void + trunc (std::size_t length); + +private: + bool + fail(); + + void + do_fail(); +}; + +template +fail_file::fail_file (fail_file&& other) + : f_ (std::move(other.f_)) + , c_ (other.c_) +{ + other.c_ = nullptr; +} + +template +fail_file& +fail_file::operator= (fail_file&& other) +{ + f_ = std::move(other.f_); + c_ = other.c_; + other.c_ = nullptr; + return *this; +} + +template +fail_file::fail_file (fail_counter& c) + : c_ (&c) +{ +} + +template +void +fail_file::write (std::size_t offset, + void const* buffer, std::size_t bytes) +{ + if (fail()) + do_fail(); + if (fail()) + { + f_.write(offset, buffer, (bytes + 1) / 2); + do_fail(); + } + f_.write(offset, buffer, bytes); +} + +template +void +fail_file::sync() +{ + if (fail()) + do_fail(); + // We don't need a real sync for + // testing, it just slows things down. + //f_.sync(); +} + +template +void +fail_file::trunc (std::size_t length) +{ + if (fail()) + do_fail(); + f_.trunc(length); +} + +template +bool +fail_file::fail() +{ + if (c_) + return c_->fail(); + return false; +} + +template +void +fail_file::do_fail() +{ + throw fail_error(); +} + +} +} + +#endif + diff --git a/beast/nudb/tests/recover_test.cpp b/beast/nudb/tests/recover_test.cpp new file mode 100644 index 0000000000..84a603b07b --- /dev/null +++ b/beast/nudb/tests/recover_test.cpp @@ -0,0 +1,160 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace test { + +class basic_recover_test : public unit_test::suite +{ +public: + // Creates and opens a database, performs a bunch + // of inserts, then fetches all of them to make sure + // they are there. Uses a fail_file that causes the n-th + // I/O to fail, causing an exception. + void + do_work (std::size_t n, std::size_t count, + float load_factor, nudb::path_type const& path) + { + auto const dp = path + ".dat"; + auto const kp = path + ".key"; + auto const lp = path + ".log"; + nudb::fail_counter c(0); + nudb::create (dp, kp, lp, appnum, salt, + sizeof(key_type), block_size(path), + load_factor); + fail_store db; + if (! expect(db.open(dp, kp, lp, + arena_alloc_size, c), "open")) + { + // VFALCO open should never fail here, we need + // to report this and terminate the test. + } + expect (db.appnum() == appnum, "appnum"); + c.reset(n); + Sequence seq; + for (std::size_t i = 0; i < count; ++i) + { + auto const v = seq[i]; + db.insert(&v.key, v.data, v.size); + } + storage s; + for (std::size_t i = 0; i < count; ++i) + { + auto const v = seq[i]; + if (! expect(db.fetch (&v.key, s), + "fetch")) + break; + if (! expect(s.size() == v.size, "size")) + break; + if (! expect(std::memcmp(s.get(), + v.data, v.size) == 0, "data")) + break; + } + db.close(); + #ifndef NDEBUG + print(log, verify(dp, kp)); + verify(dp, kp); + #endif + nudb::native_file::erase (dp); + nudb::native_file::erase (kp); + nudb::native_file::erase (lp); + } + + void + do_recover (path_type const& path) + { + auto const dp = path + ".dat"; + auto const kp = path + ".key"; + auto const lp = path + ".log"; + recover(dp, kp, lp); + verify(dp, kp); + nudb::native_file::erase (dp); + nudb::native_file::erase (kp); + nudb::native_file::erase (lp); + } + + void + test_recover (float load_factor, std::size_t count) + { + testcase << count << " inserts"; + path_type const path = + beast::UnitTestUtilities::TempDirectory( + "nudb").getFullPathName().toStdString(); + for (std::size_t n = 1;;++n) + { + try + { + do_work (n, count, load_factor, path); + break; + } + catch (nudb::fail_error const&) + { + do_recover (path); + } + } + } +}; + +class recover_test : public basic_recover_test +{ +public: + void + run() override + { + float lf = 0.75f; + test_recover (lf, 0); + test_recover (lf, 10); + test_recover (lf, 100); + test_recover (lf, 1000); + } +}; + +BEAST_DEFINE_TESTSUITE(recover,nudb,beast); + +class recover_big_test : public basic_recover_test +{ +public: + void + run() override + { + float lf = 0.90f; + test_recover (lf, 100000); + } +}; + +BEAST_DEFINE_TESTSUITE_MANUAL(recover_big,nudb,beast); + +} // test +} // nudb +} // beast + diff --git a/beast/nudb/tests/store_test.cpp b/beast/nudb/tests/store_test.cpp new file mode 100644 index 0000000000..4a858bbc2b --- /dev/null +++ b/beast/nudb/tests/store_test.cpp @@ -0,0 +1,142 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { +namespace test { + +// Basic, single threaded test that verifies the +// correct operation of the store. Load factor is +// set high to ensure that spill records are created, +// exercised, and split. +// +class store_test : public unit_test::suite +{ +public: + void + do_test (std::size_t N, + std::size_t block_size, float load_factor) + { + testcase (abort_on_fail); + std::string const path = + beast::UnitTestUtilities::TempDirectory( + "test_db").getFullPathName().toStdString(); + auto const dp = path + ".dat"; + auto const kp = path + ".key"; + auto const lp = path + ".log"; + Sequence seq; + nudb::store db; + try + { + expect (nudb::create (dp, kp, lp, appnum, + salt, sizeof(key_type), block_size, + load_factor), "create"); + expect (db.open(dp, kp, lp, + arena_alloc_size), "open"); + storage s; + // insert + for (std::size_t i = 0; i < N; ++i) + { + auto const v = seq[i]; + expect (db.insert( + &v.key, v.data, v.size), "insert 1"); + } + // fetch + for (std::size_t i = 0; i < N; ++i) + { + auto const v = seq[i]; + bool const found = db.fetch (&v.key, s); + expect (found, "not found"); + expect (s.size() == v.size, "wrong size"); + expect (std::memcmp(s.get(), + v.data, v.size) == 0, "not equal"); + } + // insert duplicates + for (std::size_t i = 0; i < N; ++i) + { + auto const v = seq[i]; + expect (! db.insert(&v.key, + v.data, v.size), "insert duplicate"); + } + // insert/fetch + for (std::size_t i = 0; i < N; ++i) + { + auto v = seq[i]; + bool const found = db.fetch (&v.key, s); + expect (found, "missing"); + expect (s.size() == v.size, "wrong size"); + expect (memcmp(s.get(), + v.data, v.size) == 0, "wrong data"); + v = seq[i + N]; + expect (db.insert(&v.key, v.data, v.size), + "insert 2"); + } + db.close(); + auto const stats = nudb::verify (dp, kp); + expect (stats.hist[1] > 0, "no splits"); + print (log, stats); + } + catch (nudb::store_error const& e) + { + fail (e.what()); + } + catch (std::exception const& e) + { + fail (e.what()); + } + expect (native_file::erase(dp)); + expect (native_file::erase(kp)); + expect (! native_file::erase(lp)); + } + + void + run() override + { + enum + { + N = 50000 + ,block_size = 256 + }; + + float const load_factor = 0.95f; + + do_test (N, block_size, load_factor); + } +}; + +BEAST_DEFINE_TESTSUITE(store,nudb,beast); + +} // test +} // nudb +} // beast + diff --git a/beast/nudb/tests/verify_test.cpp b/beast/nudb/tests/verify_test.cpp new file mode 100644 index 0000000000..a934de7c63 --- /dev/null +++ b/beast/nudb/tests/verify_test.cpp @@ -0,0 +1,55 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include +#include +#include + +namespace beast { +namespace nudb { +namespace test { + +class verify_test : public unit_test::suite +{ +public: + // Runs verify on the database and reports statistics + void + do_verify (nudb::path_type const& path) + { + auto const dp = path + ".dat"; + auto const kp = path + ".key"; + print(log, verify(dp, kp)); + } + + void + run() override + { + if (arg().empty()) + return fail("missing unit test argument"); + do_verify(arg()); + pass(); + } +}; + +BEAST_DEFINE_TESTSUITE_MANUAL(verify,nudb,beast); + +} // test +} // nudb +} // beast + diff --git a/beast/nudb/verify.h b/beast/nudb/verify.h new file mode 100644 index 0000000000..2bd749f310 --- /dev/null +++ b/beast/nudb/verify.h @@ -0,0 +1,283 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_VERIFY_H_INCLUDED +#define BEAST_NUDB_VERIFY_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { + +/** Reports database information during verify mode. */ +struct verify_info +{ + // Configured + std::size_t version = 0; // API version + std::size_t salt = 0; // Salt or database ID + std::size_t key_size = 0; // Size of a key in bytes + std::size_t block_size = 0; // Block size in bytes + float load_factor = 0; // Target bucket fill fraction + + // Calculated + std::size_t capacity = 0; // Max keys per bucket + std::size_t buckets = 0; // Number of buckets + std::size_t bucket_size = 0; // Size of bucket in bytes + + // Measured + std::size_t key_file_size = 0; // Key file size in bytes + std::size_t dat_file_size = 0; // Data file size in bytes + std::size_t key_count = 0; // Keys in buckets and active spills + std::size_t value_count = 0; // Count of values in the data file + std::size_t value_bytes = 0; // Sum of value bytes in the data file + std::size_t spill_count = 0; // used number of spill records + std::size_t spill_count_tot = 0; // Number of spill records in data file + std::size_t spill_bytes = 0; // used byte of spill records + std::size_t spill_bytes_tot = 0; // Sum of spill record bytes in data file + + // Performance + float avg_fetch = 0; // average reads per fetch (excluding value) + float waste = 0; // fraction of data file bytes wasted (0..100) + float overhead = 0; // percent of extra bytes per byte of value + float actual_load = 0; // actual bucket fill fraction + + // number of buckets having n spills + std::array hist; + + verify_info() + { + hist.fill(0); + } +}; + +/** Verify consistency of the key and data files. + Effects: + Opens the key and data files in read-only mode. + Throws file_error if a file can't be opened. + Iterates the key and data files, throws store_corrupt_error + on broken invariants. +*/ +template +verify_info +verify ( + path_type const& dat_path, + path_type const& key_path, + std::size_t read_size = 16 * 1024 * 1024) +{ + using namespace detail; + using File = native_file; + File df; + File kf; + if (! df.open (file_mode::scan, dat_path)) + throw store_corrupt_error( + "no data file"); + if (! kf.open (file_mode::read, key_path)) + throw store_corrupt_error( + "no key file"); + key_file_header kh; + dat_file_header dh; + read (df, dh); + read (kf, kh); + verify(dh, kh); + + verify_info info; + info.version = dh.version; + info.salt = dh.salt; + info.key_size = dh.key_size; + info.block_size = kh.block_size; + info.load_factor = kh.load_factor / 65536.f; + info.capacity = kh.capacity; + info.buckets = kh.buckets; + info.bucket_size = kh.bucket_size; + info.key_file_size = kf.actual_size(); + info.dat_file_size = df.actual_size(); + + buffer buf (kh.block_size); + bucket b (kh.key_size, + kh.block_size, buf.get()); + + // Iterate Data File + { + bulk_reader r(df, + dat_file_header::size, + df.actual_size(), read_size); + while (! r.eof()) + { + // Data Record or Spill Record + std::size_t size; + auto is = r.prepare( + field::size); // Size + read(is, size); + if (size > 0) + { + // Data Record + is = r.prepare( + kh.key_size + // Key + size); // Data + std::uint8_t const* const key = + is.data(kh.key_size); + std::uint8_t const* const data = + is.data(size); + (void)data; + // Check bucket and spills + try + { + b.read (kf, (bucket_index( + key, kh) + 1) * kh.block_size); + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "short bucket"); + } + for(;;) + { + if (b.find(key).second) + break; + if (b.spill() != 0) + { + try + { + b.read (df, b.spill()); + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "short spill"); + } + } + else + { + throw store_corrupt_error( + "orphaned value"); + } + } + // Update + ++info.value_count; + info.value_bytes += size; + } + else + { + // Spill Record + is = r.prepare( + field::size); + read(is, size); // Size + if (size != kh.bucket_size) + throw store_corrupt_error( + "bad spill size"); + b.read(r); // Bucket + ++info.spill_count_tot; + info.spill_bytes_tot += + field::size + // Zero + field::size + // Size + b.compact_size(); // Bucket + + } + } + } + + // Iterate Key File + { + // Data Record (header) + buffer buf ( + field::size + // Size + kh.key_size); // Key Size + for (std::size_t n = 0; n < kh.buckets; ++n) + { + std::size_t nspill = 0; + b.read (kf, (n + 1) * kh.block_size); + for(;;) + { + info.key_count += b.size(); + for (std::size_t i = 0; i < b.size(); ++i) + { + auto const e = b[i]; + try + { + df.read (e.offset, + buf.get(), buf.size()); + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "missing value"); + } + // Data Record + istream is(buf.get(), buf.size()); + std::size_t size; + read(is, size); // Size + if (size != e.size) + throw store_corrupt_error( + "wrong size"); + if (std::memcmp(is.data(kh.key_size), + e.key, kh.key_size) != 0) + throw store_corrupt_error( + "wrong key"); + } + if (! b.spill()) + break; + try + { + b.read (df, b.spill()); + ++nspill; + ++info.spill_count; + info.spill_bytes += + field::size + // Zero + field::size + // Size + b.compact_size(); // SpillBucket + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "missing spill"); + } + } + if (nspill >= info.hist.size()) + nspill = info.hist.size() - 1; + ++info.hist[nspill]; + } + } + + float sum = 0; + for (int i = 0; i < info.hist.size(); ++i) + sum += info.hist[i] * (i + 1); + info.avg_fetch = sum / info.buckets; + info.waste = (info.spill_bytes_tot - info.spill_bytes) / + float(info.dat_file_size); + info.overhead = + float(info.key_file_size + info.dat_file_size) / + (info.value_bytes + info.key_count * info.key_size) - 1; + info.actual_load = info.key_count / float( + info.capacity * info.buckets); + return info; +} + +} // nudb +} // beast + +#endif diff --git a/beast/nudb/visit.h b/beast/nudb/visit.h new file mode 100644 index 0000000000..ed343c9cc4 --- /dev/null +++ b/beast/nudb/visit.h @@ -0,0 +1,110 @@ +//------------------------------------------------------------------------------ +/* + This file is part of Beast: https://github.com/vinniefalco/Beast + Copyright 2014, Vinnie Falco + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef BEAST_NUDB_VISIT_H_INCLUDED +#define BEAST_NUDB_VISIT_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace beast { +namespace nudb { + +/** Visit each key/data pair in a database file. + + Function will be called with this signature: + bool(void const* key, std::size_t key_size, + void const* data, std::size_t size) + + If Function returns false, the visit is terminated. + + @return `true` if the visit completed + This only requires the data file. +*/ +template +bool +visit( + path_type const& path, + Function f, + std::size_t read_size = 16 * 1024 * 1024) +{ + using namespace detail; + using File = native_file; + File df; + df.open (file_mode::scan, path); + dat_file_header dh; + read (df, dh); + verify (dh); + // Iterate Data File + bulk_reader r( + df, dat_file_header::size, + df.actual_size(), read_size); + try + { + while (! r.eof()) + { + // Data Record or Spill Record + std::size_t size; + auto is = r.prepare( + field::size); // Size + read(is, size); + if (size > 0) + { + // Data Record + is = r.prepare( + dh.key_size + // Key + size); // Data + std::uint8_t const* const key = + is.data(dh.key_size); + std::uint8_t const* const data = + is.data(size); + if (! f(key, dh.key_size, + data, size)) + return false; + } + else + { + // Spill Record + is = r.prepare( + field::size); + read(is, size); // Size + r.prepare(size); // skip bucket + } + } + } + catch (file_short_read_error const&) + { + throw store_corrupt_error( + "nudb: data short read"); + } + + return true; +} + +} // nudb +} // beast + +#endif