commit 79159ffd87bf86e92ab5af6fffd5cc93c205a630 Author: Vinnie Falco Date: Thu Sep 29 19:24:12 2016 -0400 Squashed 'src/nudb/' content from commit 00adc6a git-subtree-dir: src/nudb git-subtree-split: 00adc6a4f16679a376f40c967f77dfa544c179c1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..99f984bdaa --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +bin/ +bin64/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..0115ab4e41 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,9 @@ +[submodule "extras/beast"] + path = extras/beast + url = https://github.com/vinniefalco/Beast.git +[submodule "extras/rocksdb"] + path = extras/rocksdb + url = https://github.com/facebook/rocksdb.git +[submodule "doc/docca"] + path = doc/docca + url = https://github.com/vinniefalco/docca.git diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..1900451f96 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,89 @@ +language: cpp + +env: + global: + - LLVM_VERSION=3.8.0 + # Maintenance note: to move to a new version + # of boost, update both BOOST_ROOT and BOOST_URL. + # Note that for simplicity, BOOST_ROOT's final + # namepart must match the folder name internal + # to boost's .tar.gz. + - LCOV_ROOT=$HOME/lcov + - VALGRIND_ROOT=$HOME/valgrind-install + - BOOST_ROOT=$HOME/boost_1_60_0 + - BOOST_URL='http://downloads.sourceforge.net/project/boost/boost/1.60.0/boost_1_60_0.tar.gz?r=https%3A%2F%2Fsourceforge.net%2Fprojects%2Fboost%2Ffiles%2Fboost%2F1.60.0%2Fboost_1_60_0.tar.gz&ts=1460417589&use_mirror=netix' +packages: &gcc5_pkgs + - gcc-5 + - g++-5 + - python-software-properties + - libssl-dev + - libffi-dev + - libstdc++6 + - binutils-gold + # Provides a backtrace if the unittests crash + - gdb + # Needed for installing valgrind + - subversion + - automake + - autotools-dev + - libc6-dbg + # Needed to build rocksdb + - libsnappy-dev + +matrix: + include: + # GCC/Coverage/Autobahn + - compiler: gcc + env: + - GCC_VER=5 + - VARIANT=coverage + - ADDRESS_MODEL=64 + - BUILD_SYSTEM=cmake + - PATH=$PWD/cmake/bin:$PATH + addons: &ao_gcc5 + apt: + sources: ['ubuntu-toolchain-r-test'] + packages: *gcc5_pkgs + + # Clang/UndefinedBehaviourSanitizer + - compiler: clang + env: + - GCC_VER=5 + - VARIANT=usan + - CLANG_VER=3.8 + - ADDRESS_MODEL=64 + - UBSAN_OPTIONS='print_stacktrace=1' + - BUILD_SYSTEM=cmake + - PATH=$PWD/cmake/bin:$PATH + - PATH=$PWD/llvm-$LLVM_VERSION/bin:$PATH + addons: *ao_gcc5 + + # Clang/AddressSanitizer + - compiler: clang + env: + - GCC_VER=5 + - VARIANT=asan + - CLANG_VER=3.8 + - ADDRESS_MODEL=64 + - PATH=$PWD/llvm-$LLVM_VERSION/bin:$PATH + addons: *ao_gcc5 + +cache: + directories: + - $BOOST_ROOT + - $VALGRIND_ROOT + - llvm-$LLVM_VERSION + - cmake + +before_install: + - scripts/install-dependencies.sh + +script: + - scripts/build-and-test.sh + +after_script: + - cat nohup.out || echo "nohup.out already deleted" + +notifications: + email: + false diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000..d7d65c3fe0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,58 @@ +1.0.0-b6 + +* Fix incorrect file deletion in create() + +--- + +1.0.0-b5 + +* fail_file also fails on reads +* Fix bug in rekey where an error code wasn't checked +* Increase coverage +* Add buffer unit test +* Add is_File concept and checks +* Update documentation +* Add example program +* Demote exceptions to asserts in gentex +* Improved commit process +* Dynamic block size in custom allocator + +--- + +1.0.0-b4 + +* Improved test coverage +* Use master branch for codecov badge +* Throw on API calls when no database open +* Benchmarks vs. RocksDB + +### API Changes: + +* `insert` sets `error::key_exists` instead of returning `false` +* `fetch` sets `error::key_not_found` instead of returning `false` + +--- + +1.0.0-b3 + +* Tune buffer sizes for performance +* Fix large POSIX and Win32 writes +* Adjust progress indicator for nudb tool +* Document link requirements +* Add visit test +* Improved coverage + +--- + +1.0.0-b2 + +* Minor documentation and tidying +* Add CHANGELOG + +--- + +1.0.0-b1 + +* Initial source tree + + diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..af1bfc33a3 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,87 @@ +cmake_minimum_required (VERSION 3.2) + +project (nudb) + +set_property (GLOBAL PROPERTY USE_FOLDERS ON) + +if (WIN32) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP /W4 /wd4100 /D _WIN32_WINNT=0x0600 /D_SCL_SECURE_NO_WARNINGS=1 /D_CRT_SECURE_NO_WARNINGS=1") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SAFESEH:NO") +else () + set (Boost_USE_STATIC_LIBS ON) + set (Boost_USE_MULTITHREADED ON) + find_package (Boost REQUIRED COMPONENTS filesystem program_options system thread) + include_directories (SYSTEM ${Boost_INCLUDE_DIRS}) + link_directories (${Boost_LIBRARY_DIR}) + + set (THREADS_PREFER_PTHREAD_FLAG ON) + find_package (Threads) + + set (CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wpedantic") +endif () + +if ("${VARIANT}" STREQUAL "coverage") + set (CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") + set (CMAKE_BUILD_TYPE RELWITHDEBINFO) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov") +elseif ("${VARIANT}" STREQUAL "asan") + set (CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address") + set (CMAKE_BUILD_TYPE RELWITHDEBINFO) +elseif ("${VARIANT}" STREQUAL "usan") + set (CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") + set (CMAKE_BUILD_TYPE RELWITHDEBINFO) +elseif ("${VARIANT}" STREQUAL "debug") + set (CMAKE_BUILD_TYPE DEBUG) +elseif ("${VARIANT}" STREQUAL "release") + set (CMAKE_BUILD_TYPE RELEASE) +endif () + +function (DoGroupSources curdir rootdir folder) + file (GLOB children RELATIVE ${PROJECT_SOURCE_DIR}/${curdir} ${PROJECT_SOURCE_DIR}/${curdir}/*) + foreach (child ${children}) + if (IS_DIRECTORY ${PROJECT_SOURCE_DIR}/${curdir}/${child}) + DoGroupSources (${curdir}/${child} ${rootdir} ${folder}) + elseif (${child} STREQUAL "CMakeLists.txt") + source_group ("" FILES ${PROJECT_SOURCE_DIR}/${curdir}/${child}) + else () + string (REGEX REPLACE ^${rootdir} ${folder} groupname ${curdir}) + #set (groupname ${curdir}) + string (REPLACE "/" "\\" groupname ${groupname}) + source_group (${groupname} FILES ${PROJECT_SOURCE_DIR}/${curdir}/${child}) + endif () + endforeach () +endfunction () + +function (GroupSources curdir folder) + DoGroupSources (${curdir} ${curdir} ${folder}) +endfunction () + +include_directories ( + include + extras + extras/beast/extras + ) + +file (GLOB_RECURSE BEAST_INCLUDES + ${PROJECT_SOURCE_DIR}/extras/beast/extras/beast/unit_test/*.hpp + ${PROJECT_SOURCE_DIR}/extras/beast/extras/beast/unit_test/*.ipp +) + +file (GLOB_RECURSE EXTRAS_INCLUDES + ${PROJECT_SOURCE_DIR}/extras/nudb/* +) + +file (GLOB_RECURSE NUDB_INCLUDES + ${PROJECT_SOURCE_DIR}/include/nudb/* +) + +add_subdirectory (bench) +add_subdirectory (examples) +add_subdirectory (test) +add_subdirectory (tools) diff --git a/Jamroot b/Jamroot new file mode 100644 index 0000000000..7c140b811c --- /dev/null +++ b/Jamroot @@ -0,0 +1,93 @@ +# +# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +# + +import os ; +import feature ; +import boost ; + +boost.use-project ; + +variant coverage + : + debug + : + "-fprofile-arcs -ftest-coverage" + "-lgcov" + ; + +variant asan + : + release + : + "-fsanitize=address -fno-omit-frame-pointer" + "-fsanitize=address" + ; + +variant msan + : + debug + : + "-fsanitize=memory -fno-omit-frame-pointer -fsanitize-memory-track-origins=2 -fsanitize-memory-use-after-dtor" + "-fsanitize=memory" + ; + +variant usan + : + debug + : + "-fsanitize=undefined -fno-omit-frame-pointer" + "-fsanitize=undefined" + ; + +project nudb + : requirements + ./extras + ./extras/beast/extras + ./include + #/boost//headers + /boost/system//boost_system + /boost/thread//boost_thread + /boost/filesystem//boost_filesystem + /boost/program_options//boost_program_options + BOOST_ALL_NO_LIB=1 + BOOST_SYSTEM_NO_DEPRECATED=1 + multi + static + shared + on + gcc:-std=c++11 + gcc:-Wno-unused-variable + clang:-std=c++11 + msvc:_SCL_SECURE_NO_WARNINGS=1 + msvc:_CRT_SECURE_NO_WARNINGS=1 + msvc:-bigobj + LINUX:_XOPEN_SOURCE=600 + LINUX:_GNU_SOURCE=1 + SOLARIS:_XOPEN_SOURCE=500 + SOLARIS:__EXTENSIONS__ + SOLARIS:socket + SOLARIS:nsl + NT:_WIN32_WINNT=0x0601 + NT,cw:ws2_32 + NT,cw:mswsock + NT,gcc:ws2_32 + NT,gcc:mswsock + NT,gcc-cygwin:__USE_W32_SOCKETS + HPUX,gcc:_XOPEN_SOURCE_EXTENDED + HPUX:ipv6 + QNXNTO:socket + HAIKU:network + : usage-requirements + . + : + build-dir bin + ; + +build-project bench ; +build-project examples ; +build-project test ; +build-project tools ; diff --git a/LICENSE_1_0.txt b/LICENSE_1_0.txt new file mode 100644 index 0000000000..36b7cd93cd --- /dev/null +++ b/LICENSE_1_0.txt @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000000..4d3b0d7e8b --- /dev/null +++ b/README.md @@ -0,0 +1,466 @@ +NuDB + +[![Join the chat at https://gitter.im/vinniefalco/NuDB](https://badges.gitter.im/vinniefalco/NuDB.svg)](https://gitter.im/vinniefalco/NuDB?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status] +(https://travis-ci.org/vinniefalco/NuDB.svg?branch=master)](https://travis-ci.org/vinniefalco/NuDB) [![codecov] +(https://codecov.io/gh/vinniefalco/NuDB/branch/master/graph/badge.svg)](https://codecov.io/gh/vinniefalco/NuDB) [![coveralls] +(https://coveralls.io/repos/github/vinniefalco/NuDB/badge.svg?branch=master)](https://coveralls.io/github/vinniefalco/NuDB?branch=master) [![Documentation] +(https://img.shields.io/badge/documentation-master-brightgreen.svg)](http://vinniefalco.github.io/nudb/) [![License] +(https://img.shields.io/badge/license-boost-brightgreen.svg)](LICENSE_1_0.txt) + +# A Key/Value Store For SSDs + +--- + +## Contents + +- [Introduction](#introduction) +- [Description](#description) +- [Requirements](#requirements) +- [Example](#example) +- [Building](#building) +- [Algorithm](#algorithm) +- [Licence](#licence) +- [Contact](#contact) + +--- + +## Introduction + +NuDB is an append-only, key/value store specifically optimized for random +read performance on modern SSDs or equivalent high-IOPS devices. The most +common application for NuDB is content addressible storage where a +cryptographic digest of the data is used as the key. The read performance +and memory usage are independent of the size of the database. These are +some other features: + +* Low memory footprint +* Database size up to 281TB +* All keys are the same size +* Append-only, no update or delete +* Value sizes from 1 to 2^32 bytes (4GB) +* Performance independent of growth +* Optimized for concurrent fetch +* Key file can be rebuilt if needed +* Inserts are atomic and consistent +* Data file may be efficiently iterated +* Key and data files may be on different devices +* Hardened against algorithmic complexity attacks +* Header-only, no separate library to build + +## Description + +This software is close to final. Interfaces are stable. +For recent changes see the [CHANGELOG](CHANGELOG.md). + +NuDB has been in use for over a year on production servers +running [rippled](https://github.com/ripple/rippled), with +database sizes over 3 terabytes. + +* [Repository](https://github.com/vinniefalco/Beast) +* [Documentation](http://vinniefalco.github.io/nudb/) + +## Requirements + +* Boost 1.58 or higher +* C++11 or greater +* SSD drive, or equivalent device with high IOPS + +## Example + +This complete program creates a database, opens the database, +inserts several key/value pairs, fetches the key/value pairs, +closes the database, then erases the database files. Source +code for this program is located in the examples directory. + +```C++ +#include +#include +#include + +int main() +{ + using namespace nudb; + std::size_t constexpr N = 1000; + using key_type = std::uint32_t; + error_code ec; + auto const dat_path = "db.dat"; + auto const key_path = "db.key"; + auto const log_path = "db.log"; + create( + dat_path, key_path, log_path, + 1, + make_salt(), + sizeof(key_type), + block_size("."), + 0.5f, + ec); + store db; + db.open(dat_path, key_path, log_path, ec); + char data = 0; + // Insert + for(key_type i = 0; i < N; ++i) + db.insert(&i, &data, sizeof(data), ec); + // Fetch + for(key_type i = 0; i < N; ++i) + db.fetch(&i, + [&](void const* buffer, std::size_t size) + { + // do something with buffer, size + }, ec); + db.close(ec); + erase_file(dat_path); + erase_file(key_path); + erase_file(log_path); +} +``` + +## Building + +NuDB is header-only so there are no libraries to build. To use it in your +project, simply copy the NuDB sources to your project's source tree +(alternatively, bring NuDB into your Git repository using the +`git subtree` or `git submodule` commands). Then, edit your build scripts +to add the `include/` directory to the list of paths checked by the C++ +compiler when searching for includes. NuDB `#include` lines will look +like this: + +``` +#include +``` + +To link your program successfully, you'll need to add the Boost.Thread and +Boost.System libraries to link with. Please visit the Boost documentation +for instructions on how to do this for your particular build system. + +NuDB tests require Beast, and the benchmarks require RocksDB. These projects +are linked to the repository using git submodules. Before building the tests +or benchmarks, these commands should be issued at the root of the repository: + +``` +git submodule init +git submodule update +``` + +For the examples and tests, NuDB provides build scripts for Boost.Build (b2) +and CMake. To generate build scripts using CMake, execute these commands at +the root of the repository (project and solution files will be generated +for Visual Studio users): + +``` +cd bin +cmake .. # for 32-bit Windows build + +cd ../bin64 +cmake .. # for Linux/Mac builds, OR +cmake -G"Visual Studio 14 2015 Win64" .. # for 64-bit Windows builds +``` + +To build with Boost.Build, it is necessary to have the b2 executable +in your path. And b2 needs to know how to find the Boost sources. The +easiest way to do this is make sure that the version of b2 in your path +is the one at the root of the Boost source tree, which is built when +running `bootstrap.sh` (or `bootstrap.bat` on Windows). + +Once b2 is in your path, simply run b2 in the root of the Beast +repository to automatically build the required Boost libraries if they +are not already built, build the examples, then build and run the unit +tests. + +On OSX it may be necessary to pass "toolset=clang" on the b2 command line. +Alternatively, this may be site in site-config.jam or user-config.jam. + +The files in the repository are laid out thusly: + +``` +./ + bench/ Holds the benchmark sources and scripts + bin/ Holds executables and project files + bin64/ Holds 64-bit Windows executables and project files + examples/ Holds example program source code + extras/ Additional APIs, may change + include/ Add this to your compiler includes + nudb/ + test/ Unit tests and benchmarks + tools/ Holds the command line tool sources +``` + +## Algorithm + +Three files are used. + +* The data file holds keys and values stored sequentially and size-prefixed. +* The key file holds a series of fixed-size bucket records forming an on-disk + hash table. +* The log file stores bookkeeping information used to restore consistency when +an external failure occurs. + +In typical cases a fetch costs one I/O cycle to consult the key file, and if the +key is present, one I/O cycle to read the value. + +### Usage + +Callers must define these parameters when _creating_ a database: + +* `KeySize`: The size of a key in bytes. +* `BlockSize`: The physical size of a key file record. + +The ideal block size matches the sector size or block size of the +underlying physical media that holds the key file. Functions are +provided to return a best estimate of this value for a particular +device, but a default of 4096 should work for typical installations. +The implementation tries to fit as many entries as possible in a key +file record, to maximize the amount of useful work performed per I/O. + +* `LoadFactor`: The desired fraction of bucket occupancy + +`LoadFactor` is chosen to make bucket overflows unlikely without +sacrificing bucket occupancy. A value of 0.50 seems to work well with +a good hash function. + +Callers must also provide these parameters when a database is _opened:_ + +* `Appnum`: An application-defined integer constant which can be retrieved +later from the database [TODO]. +* `AllocSize`: A significant multiple of the average data size. + +Memory is recycled to improve performance, so NuDB needs `AllocSize` as a +hint about the average size of the data being inserted. For an average data size +of 1KB (one kilobyte), `AllocSize` of sixteen megabytes (16MB) is sufficient. If +the `AllocSize` is too low, the memory recycler will not make efficient use of +allocated blocks. + +Two operations are defined: `fetch`, and `insert`. + +#### `fetch` + +The `fetch` operation retrieves a variable length value given the +key. The caller supplies a factory used to provide a buffer for storing +the value. This interface allows custom memory allocation strategies. + +#### `insert` + +`insert` adds a key/value pair to the store. Value data must contain at least +one byte. Duplicate keys are disallowed. Insertions are serialized, which means +[TODO]. + +### Implementation + +All insertions are buffered in memory, with inserted values becoming +immediately discoverable in subsequent or concurrent calls to fetch. +Periodically, buffered data is safely committed to disk files using +a separate dedicated thread associated with the database. This commit +process takes place at least once per second, or more often during +a detected surge in insertion activity. In the commit process the +key/value pairs receive the following treatment: + +An insertion is performed by appending a value record to the data file. +The value record has some header information including the size of the +data and a copy of the key; the data file is iteratable without the key +file. The value data follows the header. The data file is append-only +and immutable: once written, bytes are never changed. + +Initially the hash table in the key file consists of a single bucket. +After the load factor is exceeded from insertions, the hash table grows +in size by one bucket by doing a "split". The split operation is the +[linear hashing algorithm](http://en.wikipedia.org/wiki/Linear_hashing) +as described by Litwin and Larson. + +When a bucket is split, each key is rehashed, and either remains in the +original bucket or gets moved to the a bucket appended to the end of +the key file. + +An insertion on a full bucket first triggers the "spill" algorithm. + +First, a spill record is appended to the data file, containing header +information followed by the entire bucket record. Then the bucket's size is set +to zero and the offset of the spill record is stored in the bucket. At this +point the insertion may proceed normally, since the bucket is empty. Spilled +buckets in the data file are always full. + +Because every bucket holds the offset of the next spill record in the +data file, the buckets form a linked list. In practice, careful +selection of capacity and load factor will keep the percentage of +buckets with one spill record to a minimum, with no bucket requiring +two spill records. + +The implementation of fetch is straightforward: first the bucket in the +key file is checked, then each spill record in the linked list of +spill records is checked, until the key is found or there are no more +records. As almost all buckets have no spill records, the average +fetch requires one I/O (not including reading the value). + +One complication in the scheme is when a split occurs on a bucket that +has one or more spill records. In this case, both the bucket being split +and the new bucket may overflow. This is handled by performing the +spill algorithm for each overflow that occurs. The new buckets may have +one or more spill records each, depending on the number of keys that +were originally present. + +Because the data file is immutable, a bucket's original spill records +are no longer referenced after the bucket is split. These blocks of data +in the data file are unrecoverable wasted space. Correctly configured +databases can have a typical waste factor of 1%, which is acceptable. +These unused bytes can be removed by visiting each value in the value +file using an off-line process and inserting it into a new database, +then delete the old database and use the new one instead. + +### Recovery + +To provide atomicity and consistency, a log file associated with the +database stores information used to roll back partial commits. + +### Iteration + +Each record in the data file is prefixed with a header identifying +whether it is a value record or a spill record, along with the size of +the record in bytes and a copy of the key if it's a value record, so values can +be iterated by incrementing a byte counter. A key file can be regenerated from +just the data file by iterating the values and performing the key +insertion algorithm. + +### Concurrency + +Locks are never held during disk reads and writes. Fetches are fully +concurrent, while inserts are serialized. Inserts fail on duplicate +keys, and are atomic: they either succeed immediately or fail. +After an insert, the key is immediately visible to subsequent fetches. + +### Formats + +All integer values are stored as big endian. The uint48_t format +consists of 6 bytes. + +#### Key File + +The Key File contains the Header followed by one or more +fixed-length Bucket Records. + +#### Header (104 bytes) + + char[8] Type The characters "nudb.key" + uint16 Version Holds the version number + uint64 UID Unique ID generated on creation + uint64 Appnum Application defined constant + uint16 KeySize Key size in bytes + + uint64 Salt A random seed + uint64 Pepper The salt hashed + uint16 BlockSize Size of a file block in bytes + + uint16 LoadFactor Target fraction in 65536ths + + uint8[56] Reserved Zeroes + uint8[] Reserved Zero-pad to block size + +`Type` identifies the file as belonging to nudb. `UID` is +generated randomly when the database is created, and this value +is stored in the data and log files as well - it's used +to determine if files belong to the same database. `Salt` is +generated when the database is created and helps prevent +complexity attacks; it is prepended to the key material +when computing a hash, or used to initialize the state of +the hash function. `Appnum` is an application defined constant +set when the database is created. It can be used for anything, +for example to distinguish between different data formats. + +`Pepper` is computed by hashing `Salt` using a hash function +seeded with the salt. This is used to fingerprint the hash +function used. If a database is opened and the fingerprint +does not match the hash calculation performed using the template +argument provided when constructing the store, an exception +is thrown. + +The header for the key file contains the File Header followed by +the information above. The Capacity is the number of keys per +bucket, and defines the size of a bucket record. The load factor +is the target fraction of bucket occupancy. + +None of the information in the key file header or the data file +header may be changed after the database is created, including +the Appnum. + +#### Bucket Record (fixed-length) + + uint16 Count Number of keys in this bucket + uint48 Spill Offset of the next spill record or 0 + BucketEntry[] Entries The bucket entries + +#### Bucket Entry + + uint48 Offset Offset in data file of the data + uint48 Size The size of the value in bytes + uint48 Hash The hash of the key + +### Data File + +The Data File contains the Header followed by zero or more +variable-length Value Records and Spill Records. + +#### Header (92 bytes) + + char[8] Type The characters "nudb.dat" + uint16 Version Holds the version number + uint64 UID Unique ID generated on creation + uint64 Appnum Application defined constant + uint16 KeySize Key size in bytes + uint8[64] (reserved) Zeroes + +UID contains the same value as the salt in the corresponding key +file. This is placed in the data file so that key and value files +belonging to the same database can be identified. + +#### Data Record (variable-length) + + uint48 Size Size of the value in bytes + uint8[KeySize] Key The key. + uint8[Size] Data The value data. + +#### Spill Record (fixed-length) + + uint48 Zero All zero, identifies a spill record + uint16 Size Bytes in spill bucket (for skipping) + Bucket SpillBucket Bucket Record + +#### Log File + +The Log file contains the Header followed by zero or more fixed size +log records. Each log record contains a snapshot of a bucket. When a +database is not closed cleanly, the recovery process applies the log +records to the key file, overwriting data that may be only partially +updated with known good information. After the log records are applied, +the data and key files are truncated to the last known good size. + +#### Header (62 bytes) + + char[8] Type The characters "nudb.log" + uint16 Version Holds the version number + uint64 UID Unique ID generated on creation + uint64 Appnum Application defined constant + uint16 KeySize Key size in bytes + + uint64 Salt A random seed. + uint64 Pepper The salt hashed + uint16 BlockSize Size of a file block in bytes + + uint64 KeyFileSize Size of key file. + uint64 DataFileSize Size of data file. + +#### Log Record + + uint64_t Index Bucket index (0-based) + Bucket Bucket Compact Bucket record + +Compact buckets include only Size entries. These are primarily +used to minimize the volume of writes to the log file. + +## License + +Distributed under the Boost Software License, Version 1.0. +(See accompanying file [LICENSE_1_0.txt](LICENSE_1_0.txt) or copy at +http://www.boost.org/LICENSE_1_0.txt) + +## Contact + +Please report issues or questions here: +https://github.com/vinniefalco/NuDB/issues diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 0000000000..432f762120 --- /dev/null +++ b/TODO.txt @@ -0,0 +1,2 @@ +* Support 32/64-bit + -- xxhasher specialization for 4/8 byte size-t diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt new file mode 100644 index 0000000000..ab96d1df1b --- /dev/null +++ b/bench/CMakeLists.txt @@ -0,0 +1,363 @@ +cmake_minimum_required (VERSION 3.2) + +GroupSources(bench /) +GroupSources(include/nudb nudb) +GroupSources(extras/nudb extras) +GroupSources(extras/beast/include/beast beast) +GroupSources(extras/beast/extras/beast beast) +GroupSources(extras/rocksdb rocksdb) + +if (WIN32) + set(CMAKE_CONFIGURATION_TYPES Release) +endif () + +project (bench) + +############################################################ + +macro(append_flags name) + foreach (arg ${ARGN}) + set(${name} "${${name}} ${arg}") + endforeach() +endmacro() + +############################################################ + +set (DEPS "${PROJECT_SOURCE_DIR}/../extras") + +set (DEFAULT_WITH_ROCKSDB true) + +set (WITH_ROCKSDB ${DEFAULT_WITH_ROCKSDB} CACHE BOOL "Runs benchmarks against rocksdb") + +if (WITH_ROCKSDB) + set(ROCKSDB ${DEPS}/rocksdb) + + set(ROCKSDB_SRC + ${ROCKSDB}/db/auto_roll_logger.cc + ${ROCKSDB}/db/builder.cc + ${ROCKSDB}/db/c.cc + ${ROCKSDB}/db/column_family.cc + ${ROCKSDB}/db/compacted_db_impl.cc + ${ROCKSDB}/db/compaction.cc + ${ROCKSDB}/db/compaction_iterator.cc + ${ROCKSDB}/db/compaction_job.cc + ${ROCKSDB}/db/compaction_picker.cc + ${ROCKSDB}/db/convenience.cc + ${ROCKSDB}/db/db_filesnapshot.cc + ${ROCKSDB}/db/dbformat.cc + ${ROCKSDB}/db/db_impl.cc + ${ROCKSDB}/db/db_impl_debug.cc + ${ROCKSDB}/db/db_impl_readonly.cc + ${ROCKSDB}/db/db_impl_experimental.cc + ${ROCKSDB}/db/db_impl_add_file.cc + ${ROCKSDB}/db/db_info_dumper.cc + ${ROCKSDB}/db/db_iter.cc + ${ROCKSDB}/db/experimental.cc + ${ROCKSDB}/db/event_helpers.cc + ${ROCKSDB}/db/file_indexer.cc + ${ROCKSDB}/db/filename.cc + ${ROCKSDB}/db/flush_job.cc + ${ROCKSDB}/db/flush_scheduler.cc + ${ROCKSDB}/db/forward_iterator.cc + ${ROCKSDB}/db/internal_stats.cc + ${ROCKSDB}/db/log_reader.cc + ${ROCKSDB}/db/log_writer.cc + ${ROCKSDB}/db/managed_iterator.cc + ${ROCKSDB}/db/memtable_allocator.cc + ${ROCKSDB}/db/memtable.cc + ${ROCKSDB}/db/memtable_list.cc + ${ROCKSDB}/db/merge_helper.cc + ${ROCKSDB}/db/merge_operator.cc + ${ROCKSDB}/db/repair.cc + ${ROCKSDB}/db/snapshot_impl.cc + ${ROCKSDB}/db/table_cache.cc + ${ROCKSDB}/db/table_properties_collector.cc + ${ROCKSDB}/db/transaction_log_impl.cc + ${ROCKSDB}/db/version_builder.cc + ${ROCKSDB}/db/version_edit.cc + ${ROCKSDB}/db/version_set.cc + ${ROCKSDB}/db/wal_manager.cc + ${ROCKSDB}/db/write_batch.cc + ${ROCKSDB}/db/write_batch_base.cc + ${ROCKSDB}/db/write_controller.cc + ${ROCKSDB}/db/write_thread.cc + ${ROCKSDB}/db/xfunc_test_points.cc + ${ROCKSDB}/memtable/hash_cuckoo_rep.cc + ${ROCKSDB}/memtable/hash_linklist_rep.cc + ${ROCKSDB}/memtable/hash_skiplist_rep.cc + ${ROCKSDB}/memtable/skiplistrep.cc + ${ROCKSDB}/memtable/vectorrep.cc + ${ROCKSDB}/port/stack_trace.cc + ${ROCKSDB}/table/adaptive_table_factory.cc + ${ROCKSDB}/table/block_based_filter_block.cc + ${ROCKSDB}/table/block_based_table_builder.cc + ${ROCKSDB}/table/block_based_table_factory.cc + ${ROCKSDB}/table/block_based_table_reader.cc + ${ROCKSDB}/table/block_builder.cc + ${ROCKSDB}/table/block.cc + ${ROCKSDB}/table/block_prefix_index.cc + ${ROCKSDB}/table/bloom_block.cc + ${ROCKSDB}/table/cuckoo_table_builder.cc + ${ROCKSDB}/table/cuckoo_table_factory.cc + ${ROCKSDB}/table/cuckoo_table_reader.cc + ${ROCKSDB}/table/flush_block_policy.cc + ${ROCKSDB}/table/format.cc + ${ROCKSDB}/table/full_filter_block.cc + ${ROCKSDB}/table/get_context.cc + ${ROCKSDB}/table/iterator.cc + ${ROCKSDB}/table/merger.cc + ${ROCKSDB}/table/meta_blocks.cc + ${ROCKSDB}/table/sst_file_writer.cc + ${ROCKSDB}/table/plain_table_builder.cc + ${ROCKSDB}/table/plain_table_factory.cc + ${ROCKSDB}/table/plain_table_index.cc + ${ROCKSDB}/table/plain_table_key_coding.cc + ${ROCKSDB}/table/plain_table_reader.cc + ${ROCKSDB}/table/persistent_cache_helper.cc + ${ROCKSDB}/table/table_properties.cc + ${ROCKSDB}/table/two_level_iterator.cc + ${ROCKSDB}/tools/dump/db_dump_tool.cc + ${ROCKSDB}/util/arena.cc + ${ROCKSDB}/util/bloom.cc + # ${ROCKSDB}/util/build_version.cc + ${ROCKSDB}/util/coding.cc + ${ROCKSDB}/util/comparator.cc + ${ROCKSDB}/util/compaction_job_stats_impl.cc + ${ROCKSDB}/util/concurrent_arena.cc + ${ROCKSDB}/util/crc32c.cc + ${ROCKSDB}/util/delete_scheduler.cc + ${ROCKSDB}/util/dynamic_bloom.cc + ${ROCKSDB}/util/env.cc + ${ROCKSDB}/util/env_chroot.cc + ${ROCKSDB}/util/env_hdfs.cc + ${ROCKSDB}/util/file_util.cc + ${ROCKSDB}/util/file_reader_writer.cc + ${ROCKSDB}/util/filter_policy.cc + ${ROCKSDB}/util/hash.cc + ${ROCKSDB}/util/histogram.cc + ${ROCKSDB}/util/histogram_windowing.cc + ${ROCKSDB}/util/instrumented_mutex.cc + ${ROCKSDB}/util/iostats_context.cc + ${ROCKSDB}/util/lru_cache.cc + ${ROCKSDB}/util/threadpool.cc + ${ROCKSDB}/util/transaction_test_util.cc + ${ROCKSDB}/util/sharded_cache.cc + ${ROCKSDB}/util/sst_file_manager_impl.cc + ${ROCKSDB}/utilities/backupable/backupable_db.cc + ${ROCKSDB}/utilities/blob_db/blob_db.cc + ${ROCKSDB}/utilities/convenience/info_log_finder.cc + ${ROCKSDB}/utilities/checkpoint/checkpoint.cc + ${ROCKSDB}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc + ${ROCKSDB}/utilities/document/document_db.cc + ${ROCKSDB}/utilities/document/json_document_builder.cc + ${ROCKSDB}/utilities/document/json_document.cc + ${ROCKSDB}/utilities/env_mirror.cc + ${ROCKSDB}/utilities/env_registry.cc + ${ROCKSDB}/utilities/flashcache/flashcache.cc + ${ROCKSDB}/utilities/geodb/geodb_impl.cc + ${ROCKSDB}/utilities/leveldb_options/leveldb_options.cc + ${ROCKSDB}/utilities/memory/memory_util.cc + ${ROCKSDB}/utilities/merge_operators/put.cc + ${ROCKSDB}/utilities/merge_operators/max.cc + ${ROCKSDB}/utilities/merge_operators/string_append/stringappend2.cc + ${ROCKSDB}/utilities/merge_operators/string_append/stringappend.cc + ${ROCKSDB}/utilities/merge_operators/uint64add.cc + ${ROCKSDB}/utilities/option_change_migration/option_change_migration.cc + ${ROCKSDB}/utilities/options/options_util.cc + ${ROCKSDB}/utilities/persistent_cache/persistent_cache_tier.cc + ${ROCKSDB}/utilities/persistent_cache/volatile_tier_impl.cc + ${ROCKSDB}/utilities/persistent_cache/block_cache_tier_file.cc + ${ROCKSDB}/utilities/persistent_cache/block_cache_tier_metadata.cc + ${ROCKSDB}/utilities/persistent_cache/block_cache_tier.cc + ${ROCKSDB}/utilities/redis/redis_lists.cc + ${ROCKSDB}/utilities/simulator_cache/sim_cache.cc + ${ROCKSDB}/utilities/spatialdb/spatial_db.cc + ${ROCKSDB}/utilities/table_properties_collectors/compact_on_deletion_collector.cc + ${ROCKSDB}/utilities/transactions/optimistic_transaction_impl.cc + ${ROCKSDB}/utilities/transactions/optimistic_transaction_db_impl.cc + ${ROCKSDB}/utilities/transactions/transaction_base.cc + ${ROCKSDB}/utilities/transactions/transaction_db_impl.cc + ${ROCKSDB}/utilities/transactions/transaction_db_mutex_impl.cc + ${ROCKSDB}/utilities/transactions/transaction_lock_mgr.cc + ${ROCKSDB}/utilities/transactions/transaction_impl.cc + ${ROCKSDB}/utilities/transactions/transaction_util.cc + ${ROCKSDB}/utilities/ttl/db_ttl_impl.cc + ${ROCKSDB}/utilities/date_tiered/date_tiered_db_impl.cc + ${ROCKSDB}/utilities/write_batch_with_index/write_batch_with_index.cc + ${ROCKSDB}/utilities/write_batch_with_index/write_batch_with_index_internal.cc + ${ROCKSDB}/util/event_logger.cc + ${ROCKSDB}/util/log_buffer.cc + ${ROCKSDB}/util/logging.cc + ${ROCKSDB}/util/memenv.cc + ${ROCKSDB}/util/murmurhash.cc + ${ROCKSDB}/util/mutable_cf_options.cc + ${ROCKSDB}/util/options.cc + ${ROCKSDB}/util/options_helper.cc + ${ROCKSDB}/util/options_parser.cc + ${ROCKSDB}/util/options_sanity_check.cc + ${ROCKSDB}/util/perf_context.cc + ${ROCKSDB}/util/perf_level.cc + ${ROCKSDB}/util/random.cc + ${ROCKSDB}/util/rate_limiter.cc + ${ROCKSDB}/util/slice.cc + ${ROCKSDB}/util/statistics.cc + ${ROCKSDB}/util/status.cc + ${ROCKSDB}/util/status_message.cc + ${ROCKSDB}/util/string_util.cc + ${ROCKSDB}/util/sync_point.cc + ${ROCKSDB}/util/thread_local.cc + ${ROCKSDB}/util/thread_status_impl.cc + ${ROCKSDB}/util/thread_status_updater.cc + ${ROCKSDB}/util/thread_status_updater_debug.cc + ${ROCKSDB}/util/thread_status_util.cc + ${ROCKSDB}/util/thread_status_util_debug.cc + ${ROCKSDB}/util/xfunc.cc + ${ROCKSDB}/util/xxhash.cc + ) + + if (WIN32) + add_definitions( + -DOS_WIN + ) + set(ROCKSDB_SRC ${ROCKSDB_SRC} + ${ROCKSDB}/port/win/io_win.cc + ${ROCKSDB}/port/win/env_default.cc + ${ROCKSDB}/port/win/env_win.cc + ${ROCKSDB}/port/win/port_win.cc + ${ROCKSDB}/port/win/win_logger.cc + ${ROCKSDB}/port/win/xpress_win.cc + ) + else () + #if (${CMAKE_SYSTEM_NAME} MATCHES Linux) + add_definitions( + -DOS_LINUX + -DROCKSDB_PLATFORM_POSIX + -DROCKSDB_LIB_IO_POSIX + ) + set(ROCKSDB_SRC ${ROCKSDB_SRC} + ${ROCKSDB}/util/io_posix.cc + ${ROCKSDB}/util/env_posix.cc + ${ROCKSDB}/port/port_posix.cc + ) + endif () + + include_directories( + SYSTEM + ${ROCKSDB} + ${ROCKSDB}/include + ${ROCKSDB}/third-party/gtest-1.7.0/fused-src + ) + + add_definitions( + -DWITH_ROCKSDB + ) +endif(WITH_ROCKSDB) + +if (NOT WIN32) + append_flags(CMAKE_CXX_FLAGS -std=c++11) +endif () + +if(WIN32) + add_compile_options( + /bigobj # Increase object file max size + /EHa # ExceptionHandling all + /fp:precise # Floating point behavior + /Gd # __cdecl calling convention + /Gm- # Minimal rebuild: disabled + /GR # Enable RTTI + /Gy- # Function level linking: disabled + /FS + /MP # Multiprocessor compilation + /openmp- # pragma omp: disabled + /Zc:forScope # Language extension: for scope + /Zi # Generate complete debug info + /errorReport:none # No error reporting to Internet + /nologo # Suppress login banner + /W3 # Warning level 3 + /WX- # Disable warnings as errors + /wd"4018" + /wd"4244" + /wd"4267" + /wd"4800" # Disable C4800(int to bool performance) + /wd"4503" # Decorated name length exceeded, name was truncated + ) + + add_definitions( + -D_WIN32_WINNT=0x6000 + -D_ITERATOR_DEBUG_LEVEL=0 + -D_SCL_SECURE_NO_WARNINGS + -D_CRT_SECURE_NO_WARNINGS + -DWIN32_CONSOLE + -DNOMINMAX) + + append_flags(CMAKE_EXE_LINKER_FLAGS + /DEBUG + /DYNAMICBASE + /ERRORREPORT:NONE + /MACHINE:X64 + /MANIFEST + /nologo + /NXCOMPAT + /SUBSYSTEM:CONSOLE + /TLBID:1) + + # There seems to be an issue using generator experssions with multiple values, + # split the expression + # /GS Buffers security check: enable + add_compile_options($<$:/GS>) + # /MTd Language: Multi-threaded Debug CRT + add_compile_options($<$:/MTd>) + # /Od Optimization: Disabled + add_compile_options($<$:/Od>) + # /RTC1 Run-time error checks: + add_compile_options($<$:/RTC1>) + + # Generator expressions are not supported in add_definitions, use set_property instead + set_property( + DIRECTORY + APPEND + PROPERTY COMPILE_DEFINITIONS + $<$:_CRTDBG_MAP_ALLOC>) + + # /MT Language: Multi-threaded CRT + add_compile_options($<$:/MT>) + add_compile_options($<$:/Ox>) + # /Ox Optimization: Full + +endif (WIN32) + + +include_directories( + ../include + ../test + . + ${DEPS} + ) + +add_executable(bench + ${BEAST_INCLUDES} + ${EXTRAS_INCLUDES} + ${NUDB_INCLUDES} + ${ROCKSDB_SRC} + bench.cpp + ) + +target_link_libraries(bench + ${Boost_LIBRARIES} + ) + +if (WITH_ROCKSDB) + if (WIN32) + target_link_libraries(bench + Rpcrt4 + ) + else () + target_link_libraries(bench + rt + Threads::Threads + z + snappy + ) + endif () +endif () + diff --git a/bench/Jamfile b/bench/Jamfile new file mode 100644 index 0000000000..42e863e120 --- /dev/null +++ b/bench/Jamfile @@ -0,0 +1,226 @@ +# +# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +# + +# Properties: +# +# with-rocksdb=no|yes Select building with rocksdb support (not supported on windows) + +import feature : feature ; + +path-constant ROCKSDB : ../extras/rocksdb ; + +feature with-rocksdb : no yes : propagated optional ; + +path-constant ROCKSDB_SRC : + $(ROCKSDB)/db/auto_roll_logger.cc + $(ROCKSDB)/db/builder.cc + $(ROCKSDB)/db/c.cc + $(ROCKSDB)/db/column_family.cc + $(ROCKSDB)/db/compacted_db_impl.cc + $(ROCKSDB)/db/compaction.cc + $(ROCKSDB)/db/compaction_iterator.cc + $(ROCKSDB)/db/compaction_job.cc + $(ROCKSDB)/db/compaction_picker.cc + $(ROCKSDB)/db/convenience.cc + $(ROCKSDB)/db/db_filesnapshot.cc + $(ROCKSDB)/db/dbformat.cc + $(ROCKSDB)/db/db_impl.cc + $(ROCKSDB)/db/db_impl_debug.cc + $(ROCKSDB)/db/db_impl_readonly.cc + $(ROCKSDB)/db/db_impl_experimental.cc + $(ROCKSDB)/db/db_impl_add_file.cc + $(ROCKSDB)/db/db_info_dumper.cc + $(ROCKSDB)/db/db_iter.cc + $(ROCKSDB)/db/experimental.cc + $(ROCKSDB)/db/event_helpers.cc + $(ROCKSDB)/db/file_indexer.cc + $(ROCKSDB)/db/filename.cc + $(ROCKSDB)/db/flush_job.cc + $(ROCKSDB)/db/flush_scheduler.cc + $(ROCKSDB)/db/forward_iterator.cc + $(ROCKSDB)/db/internal_stats.cc + $(ROCKSDB)/db/log_reader.cc + $(ROCKSDB)/db/log_writer.cc + $(ROCKSDB)/db/managed_iterator.cc + $(ROCKSDB)/db/memtable_allocator.cc + $(ROCKSDB)/db/memtable.cc + $(ROCKSDB)/db/memtable_list.cc + $(ROCKSDB)/db/merge_helper.cc + $(ROCKSDB)/db/merge_operator.cc + $(ROCKSDB)/db/repair.cc + $(ROCKSDB)/db/snapshot_impl.cc + $(ROCKSDB)/db/table_cache.cc + $(ROCKSDB)/db/table_properties_collector.cc + $(ROCKSDB)/db/transaction_log_impl.cc + $(ROCKSDB)/db/version_builder.cc + $(ROCKSDB)/db/version_edit.cc + $(ROCKSDB)/db/version_set.cc + $(ROCKSDB)/db/wal_manager.cc + $(ROCKSDB)/db/write_batch.cc + $(ROCKSDB)/db/write_batch_base.cc + $(ROCKSDB)/db/write_controller.cc + $(ROCKSDB)/db/write_thread.cc + $(ROCKSDB)/db/xfunc_test_points.cc + $(ROCKSDB)/memtable/hash_cuckoo_rep.cc + $(ROCKSDB)/memtable/hash_linklist_rep.cc + $(ROCKSDB)/memtable/hash_skiplist_rep.cc + $(ROCKSDB)/memtable/skiplistrep.cc + $(ROCKSDB)/memtable/vectorrep.cc + $(ROCKSDB)/port/stack_trace.cc + $(ROCKSDB)/table/adaptive_table_factory.cc + $(ROCKSDB)/table/block_based_filter_block.cc + $(ROCKSDB)/table/block_based_table_builder.cc + $(ROCKSDB)/table/block_based_table_factory.cc + $(ROCKSDB)/table/block_based_table_reader.cc + $(ROCKSDB)/table/block_builder.cc + $(ROCKSDB)/table/block.cc + $(ROCKSDB)/table/block_prefix_index.cc + $(ROCKSDB)/table/bloom_block.cc + $(ROCKSDB)/table/cuckoo_table_builder.cc + $(ROCKSDB)/table/cuckoo_table_factory.cc + $(ROCKSDB)/table/cuckoo_table_reader.cc + $(ROCKSDB)/table/flush_block_policy.cc + $(ROCKSDB)/table/format.cc + $(ROCKSDB)/table/full_filter_block.cc + $(ROCKSDB)/table/get_context.cc + $(ROCKSDB)/table/iterator.cc + $(ROCKSDB)/table/merger.cc + $(ROCKSDB)/table/meta_blocks.cc + $(ROCKSDB)/table/sst_file_writer.cc + $(ROCKSDB)/table/plain_table_builder.cc + $(ROCKSDB)/table/plain_table_factory.cc + $(ROCKSDB)/table/plain_table_index.cc + $(ROCKSDB)/table/plain_table_key_coding.cc + $(ROCKSDB)/table/plain_table_reader.cc + $(ROCKSDB)/table/persistent_cache_helper.cc + $(ROCKSDB)/table/table_properties.cc + $(ROCKSDB)/table/two_level_iterator.cc + $(ROCKSDB)/tools/dump/db_dump_tool.cc + $(ROCKSDB)/util/arena.cc + $(ROCKSDB)/util/bloom.cc + # $(ROCKSDB)/util/build_version.cc + $(ROCKSDB)/util/coding.cc + $(ROCKSDB)/util/comparator.cc + $(ROCKSDB)/util/compaction_job_stats_impl.cc + $(ROCKSDB)/util/concurrent_arena.cc + $(ROCKSDB)/util/crc32c.cc + $(ROCKSDB)/util/delete_scheduler.cc + $(ROCKSDB)/util/dynamic_bloom.cc + $(ROCKSDB)/util/env.cc + $(ROCKSDB)/util/env_chroot.cc + $(ROCKSDB)/util/env_hdfs.cc + $(ROCKSDB)/util/file_util.cc + $(ROCKSDB)/util/file_reader_writer.cc + $(ROCKSDB)/util/filter_policy.cc + $(ROCKSDB)/util/hash.cc + $(ROCKSDB)/util/histogram.cc + $(ROCKSDB)/util/histogram_windowing.cc + $(ROCKSDB)/util/instrumented_mutex.cc + $(ROCKSDB)/util/iostats_context.cc + $(ROCKSDB)/util/lru_cache.cc + $(ROCKSDB)/util/threadpool.cc + $(ROCKSDB)/util/transaction_test_util.cc + $(ROCKSDB)/util/sharded_cache.cc + $(ROCKSDB)/util/sst_file_manager_impl.cc + $(ROCKSDB)/utilities/backupable/backupable_db.cc + $(ROCKSDB)/utilities/blob_db/blob_db.cc + $(ROCKSDB)/utilities/convenience/info_log_finder.cc + $(ROCKSDB)/utilities/checkpoint/checkpoint.cc + $(ROCKSDB)/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc + $(ROCKSDB)/utilities/document/document_db.cc + $(ROCKSDB)/utilities/document/json_document_builder.cc + $(ROCKSDB)/utilities/document/json_document.cc + $(ROCKSDB)/utilities/env_mirror.cc + $(ROCKSDB)/utilities/env_registry.cc + $(ROCKSDB)/utilities/flashcache/flashcache.cc + $(ROCKSDB)/utilities/geodb/geodb_impl.cc + $(ROCKSDB)/utilities/leveldb_options/leveldb_options.cc + $(ROCKSDB)/utilities/memory/memory_util.cc + $(ROCKSDB)/utilities/merge_operators/put.cc + $(ROCKSDB)/utilities/merge_operators/max.cc + $(ROCKSDB)/utilities/merge_operators/string_append/stringappend2.cc + $(ROCKSDB)/utilities/merge_operators/string_append/stringappend.cc + $(ROCKSDB)/utilities/merge_operators/uint64add.cc + $(ROCKSDB)/utilities/option_change_migration/option_change_migration.cc + $(ROCKSDB)/utilities/options/options_util.cc + $(ROCKSDB)/utilities/persistent_cache/persistent_cache_tier.cc + $(ROCKSDB)/utilities/persistent_cache/volatile_tier_impl.cc + $(ROCKSDB)/utilities/persistent_cache/block_cache_tier_file.cc + $(ROCKSDB)/utilities/persistent_cache/block_cache_tier_metadata.cc + $(ROCKSDB)/utilities/persistent_cache/block_cache_tier.cc + $(ROCKSDB)/utilities/redis/redis_lists.cc + $(ROCKSDB)/utilities/simulator_cache/sim_cache.cc + $(ROCKSDB)/utilities/spatialdb/spatial_db.cc + $(ROCKSDB)/utilities/table_properties_collectors/compact_on_deletion_collector.cc + $(ROCKSDB)/utilities/transactions/optimistic_transaction_impl.cc + $(ROCKSDB)/utilities/transactions/optimistic_transaction_db_impl.cc + $(ROCKSDB)/utilities/transactions/transaction_base.cc + $(ROCKSDB)/utilities/transactions/transaction_db_impl.cc + $(ROCKSDB)/utilities/transactions/transaction_db_mutex_impl.cc + $(ROCKSDB)/utilities/transactions/transaction_lock_mgr.cc + $(ROCKSDB)/utilities/transactions/transaction_impl.cc + $(ROCKSDB)/utilities/transactions/transaction_util.cc + $(ROCKSDB)/utilities/ttl/db_ttl_impl.cc + $(ROCKSDB)/utilities/date_tiered/date_tiered_db_impl.cc + $(ROCKSDB)/utilities/write_batch_with_index/write_batch_with_index.cc + $(ROCKSDB)/utilities/write_batch_with_index/write_batch_with_index_internal.cc + $(ROCKSDB)/util/event_logger.cc + $(ROCKSDB)/util/log_buffer.cc + $(ROCKSDB)/util/logging.cc + $(ROCKSDB)/util/memenv.cc + $(ROCKSDB)/util/murmurhash.cc + $(ROCKSDB)/util/mutable_cf_options.cc + $(ROCKSDB)/util/options.cc + $(ROCKSDB)/util/options_helper.cc + $(ROCKSDB)/util/options_parser.cc + $(ROCKSDB)/util/options_sanity_check.cc + $(ROCKSDB)/util/perf_context.cc + $(ROCKSDB)/util/perf_level.cc + $(ROCKSDB)/util/random.cc + $(ROCKSDB)/util/rate_limiter.cc + $(ROCKSDB)/util/slice.cc + $(ROCKSDB)/util/statistics.cc + $(ROCKSDB)/util/status.cc + $(ROCKSDB)/util/status_message.cc + $(ROCKSDB)/util/string_util.cc + $(ROCKSDB)/util/sync_point.cc + $(ROCKSDB)/util/thread_local.cc + $(ROCKSDB)/util/thread_status_impl.cc + $(ROCKSDB)/util/thread_status_updater.cc + $(ROCKSDB)/util/thread_status_updater_debug.cc + $(ROCKSDB)/util/thread_status_util.cc + $(ROCKSDB)/util/thread_status_util_debug.cc + $(ROCKSDB)/util/xfunc.cc + $(ROCKSDB)/util/xxhash.cc + ; + +path-constant ROCKSDB_POSIX_SRC : + # Posix only + $(ROCKSDB)/util/io_posix.cc + $(ROCKSDB)/util/env_posix.cc + $(ROCKSDB)/port/port_posix.cc + ; + +project bench + : requirements + yes:$(ROCKSDB_SRC) + yes:WITH_ROCKSDB + yes:$(ROCKSDB) + yes:$(ROCKSDB)/include + yes:$(ROCKSDB)/third-party/gtest-1.7.0/fused-src + # Posix only + LINUX,yes:ROCKSDB_PLATFORM_POSIX + LINUX,yes:ROCKSDB_LIB_IO_POSIX + # LINUX Only + LINUX,yes:OS_LINUX + LINUX,yes:/boost/thread//boost_thread + LINUX,yes:$(ROCKSDB_POSIX_SRC) + ; + +exe bench : + bench.cpp + ; \ No newline at end of file diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 0000000000..94d0dbe9b8 --- /dev/null +++ b/bench/README.md @@ -0,0 +1,102 @@ +# Benchmarks for NuDB + +These benchmarks time two operations: + +1. The time to insert N values into a database. The inserted keys and values are + pseudo-randomly generated. The random number generator is always seeded with + the same value for each run, so the same values are always inserted. +2. The time to fetch M existing values from a database with N values. The order + that the keys are fetched are pseudo-randomly generated. The random number + generator is always seeded with the same value on each fun, so the keys are + always looked up in the same order. + +At the end of a run, the program outputs a table of operations per second. The +tables have a row for each database size, and a column for each database (in +cases where NuDB is compared against other databases). A cell in the table is +the number of operations per second for that trial. For example, in the table +below NuDB had 340397 Ops/Sec when fetching from an existing database with +10,000,000 values. This is a summary report, and only reports samples at order +of magnitudes of ten. + +A sample output: + +``` +insert (per second) + num_db_keys nudb rocksdb + 100000 406598 231937 + 1000000 374330 258519 + 10000000 NA NA + +fetch (per second) + num_db_keys nudb rocksdb + 100000 325228 697158 + 1000000 333443 34557 + 10000000 337300 20835 +``` + +In addition to the summary report, the benchmark can collect detailed samples. +The `--raw_out` command line options is used to specify a file to output the raw +samples. The python 3 script `plot_bench.py` may be used to plot the result. For +example, if bench was run as `bench --raw_out=samples.txt`, the the python +script can be run as `python plot_bench.py -i samples.txt`. The python script +requires the `pandas` and `seaborn` packages (anaconda python is a good way to +install and manage python if these packages are not already +installed: [anaconda download](https://www.continuum.io/downloads)). + +# Building + +## Building with CMake + +Note: Building with RocksDB is currently not supported on Windows. + +1. The benchmark requires boost. If building with rocksdb, it also requires zlib + and snappy. These are popular libraries and should be available through the + package manager. +1. The benchmark and test programs require some submodules that are not + installed by default. Get these submodules by running: + `git submodule update --init` +2. From the main nudb directory, create a directory for the build and change to + that directory: `mkdir bench_build;cd bench_build` +3. Generate a project file or makefile. + * If building on Linux, generate a makefile. If building with rocksdb + support, use: `cmake -DCMAKE_BUILD_TYPE=Release ../bench` If building + without rocksdb support, use: `cmake -DCMAKE_BUILD_TYPE=Release ../bench + -DWITH_ROCKSDB=false` Replace `../bench` with the path to the `bench` + directory if the build directory is not in the suggested location. + * If building on windows, generate a project file. The CMake gui program is + useful for this. Use the `bench` directory as the `source` directory and + the `bench_build` directory as the `binaries` directory. Press the `Add + Entry` button and add a `BOOST_ROOT` variable that points to the `boost` + directory. Hit `configure`. A dialog box will pop up. Select the generator + for Win64. Select `generate` to generate the visual studio project. +4. Compile the program. + * If building on Linux, run: `make` + * If building on Windows, open the project file generated above in Visual + Studio. + +## Test the build + +Try running the benchmark with a small database: `./bench --num_batches=10`. A +report similar to sample should appear after a few seconds. + +# Command Line Options + +* `batch_size arg` : Number of elements to insert or fetch per batch. If not + specified, it defaults to 20000. +* `num_batches arg` : Number of batches to run. If not specified, it defaults to + 500. +* `db_dir arg` : Directory to place the databases. If not specified, it defaults to + boost::filesystem::temp_directory_path (likely `/tmp` on Linux) +* `raw_out arg` : File to record the raw measurements. This is useful for plotting. If + not specified the raw measurements will not be output. +* `--dbs arg` : Databases to run the benchmark on. Currently, only `nudb` and + `rocksdb` are supported. Building with `rocksdb` is optional on Linux, and + only `nudb` is supported on windows. The argument may be a list. If `dbs` is + not specified, it defaults to all the database the build supports (either + `nudb` or `nudb rocksdb`). +* `--key_size arg` : nudb key size. If not specified the default is 64. +* `--block_size arg` : nudb block size. This is an advanced argument. If not + specified the default is 4096. +* `--load_factor arg` : nudb load factor. This is an advanced argument. If not + specified the default is 0.5. + diff --git a/bench/bench.cpp b/bench/bench.cpp new file mode 100644 index 0000000000..ed2c79655d --- /dev/null +++ b/bench/bench.cpp @@ -0,0 +1,535 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#include +#include +#include + +#if WITH_ROCKSDB +#include "rocksdb/db.h" + +char const* rocksdb_build_git_sha="Benchmark Dummy Sha"; +char const* rocksdb_build_compile_date="Benchmark Dummy Compile Date"; +#endif + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace test { + +beast::unit_test::dstream dout{std::cout}; +beast::unit_test::dstream derr{std::cerr}; + +struct stop_watch +{ + using clock = std::chrono::steady_clock; + using time_point = clock::time_point; + time_point start_; + + stop_watch() : start_(clock::now()) + { + } + + std::chrono::duration + elapsed() const + { + return std::chrono::duration_cast>( + clock::now() - start_); + } +}; + +class bench_progress +{ + progress p_; + std::uint64_t const total_=0; + std::uint64_t batch_start_=0; + +public: + bench_progress(std::ostream& os, std::uint64_t total) + : p_(os), total_(total) + { + p_(0, total); + } + void + update(std::uint64_t batch_amount) + { + p_(batch_start_ + batch_amount, total_); + batch_start_ += batch_amount; + } +}; + +class gen_key_value +{ + test_store& ts_; + std::uint64_t cur_; + +public: + gen_key_value(test_store& ts, std::uint64_t cur) + : ts_(ts), + cur_(cur) + { + } + item_type + operator()() + { + return ts_[cur_++]; + } +}; + +class rand_existing_key +{ + xor_shift_engine rng_; + std::uniform_int_distribution dist_; + test_store& ts_; + + public: + rand_existing_key(test_store& ts, + std::uint64_t max_index, + std::uint64_t seed = 1337) + : dist_(0, max_index), + ts_(ts) + { + rng_.seed(seed); + } + item_type + operator()() + { + return ts_[dist_(rng_)]; + } +}; + + +template +std::chrono::duration +time_block(std::uint64_t n, Generator&& g, F&& f) +{ + stop_watch timer; + for (std::uint64_t i = 0; i < n; ++i) + { + f(g()); + } + return timer.elapsed(); +} + +template +void +time_fetch_insert_interleaved( + std::uint64_t batch_size, + std::uint64_t num_batches, + test_store& ts, + Inserter&& inserter, + Fetcher&& fetcher, + AddSample&& add_sample, + PreFetchHook&& pre_fetch_hook, + bench_progress& progress) +{ + std::uint64_t next_insert_index = 0; + for (auto b = 0ull; b < num_batches; ++b) + { + auto const insert_time = time_block( + batch_size, gen_key_value{ts, next_insert_index}, inserter); + add_sample( + "insert", next_insert_index, batch_size / insert_time.count()); + next_insert_index += batch_size; + progress.update(batch_size); + pre_fetch_hook(); + auto const fetch_time = time_block( + batch_size, rand_existing_key{ts, next_insert_index - 1}, fetcher); + add_sample("fetch", next_insert_index, batch_size / fetch_time.count()); + progress.update(batch_size); + } +} + +#if WITH_ROCKSDB +template +void +do_timings_rocks( + std::string const& db_dir, + std::uint64_t batch_size, + std::uint64_t num_batches, + std::uint32_t key_size, + AddSample&& add_sample, + bench_progress& progress) +{ + temp_dir td{db_dir}; + std::unique_ptr pdb = [&td] { + rocksdb::DB* db = nullptr; + rocksdb::Options options; + options.create_if_missing = true; + auto const status = rocksdb::DB::Open(options, td.path(), &db); + if (!status.ok()) + db = nullptr; + return std::unique_ptr{db}; + }(); + + if (!pdb) + { + derr << "Failed to open rocks db.\n"; + return; + } + + auto inserter = [key_size, &pdb](item_type const& v) { + auto const s = pdb->Put(rocksdb::WriteOptions(), + rocksdb::Slice(reinterpret_cast(v.key), key_size), + rocksdb::Slice(reinterpret_cast(v.data), v.size)); + if (!s.ok()) + throw std::runtime_error("Rocks Insert: " + s.ToString()); + }; + + auto fetcher = [key_size, &pdb](item_type const& v) { + std::string value; + auto const s = pdb->Get(rocksdb::ReadOptions(), + rocksdb::Slice(reinterpret_cast(v.key), key_size), + &value); + if (!s.ok()) + throw std::runtime_error("Rocks Fetch: " + s.ToString()); + }; + + test_store ts{key_size, 0, 0}; + try + { + time_fetch_insert_interleaved(batch_size, num_batches, ts, + std::move(inserter), std::move(fetcher), + std::forward(add_sample), [] {}, progress); + } + catch (std::exception const& e) + { + derr << "Error: " << e.what() << '\n'; + } +} +#endif + +template +void +do_timings(std::string const& db_dir, + std::uint64_t batch_size, + std::uint64_t num_batches, + std::uint32_t key_size, + std::size_t block_size, + float load_factor, + AddSample&& add_sample, + bench_progress& progress) +{ + boost::system::error_code ec; + + try + { + test_store ts{db_dir, key_size, block_size, load_factor}; + ts.create(ec); + if (ec) + goto fail; + ts.open(ec); + if (ec) + goto fail; + + auto inserter = [&ts, &ec](item_type const& v) { + ts.db.insert(v.key, v.data, v.size, ec); + if (ec) + throw boost::system::system_error(ec); + }; + + auto fetcher = [&ts, &ec](item_type const& v) { + ts.db.fetch(v.key, [&](void const* data, std::size_t size) {}, ec); + if (ec) + throw boost::system::system_error(ec); + }; + + auto pre_fetch_hook = [&ts, &ec]() { + // Close then open the db otherwise the + // commit thread confounds the timings + ts.close(ec); + if (ec) + throw boost::system::system_error(ec); + ts.open(ec); + if (ec) + throw boost::system::system_error(ec); + }; + + time_fetch_insert_interleaved(batch_size, num_batches, ts, + std::move(inserter), std::move(fetcher), + std::forward(add_sample), std::move(pre_fetch_hook), + progress); + } + catch (boost::system::system_error const& e) + { + ec = e.code(); + } + catch (std::exception const& e) + { + derr << "Error: " << e.what() << '\n'; + } + +fail: + if (ec) + derr << "Error: " << ec.message() << '\n'; + + return; +} + +namespace po = boost::program_options; + +void +print_help(std::string const& prog_name, const po::options_description& desc) +{ + derr << prog_name << ' ' << desc; +} + +po::variables_map +parse_args(int argc, char** argv, po::options_description& desc) +{ + +#if WITH_ROCKSDB + std::vector const default_dbs = {"nudb", "rocksdb"}; +#else + std::vector const default_dbs = {"nudb"}; +#endif + std::vector const default_ops({100000,1000000}); + + desc.add_options() + ("help,h", "Display this message.") + ("batch_size", + po::value(), + "Batch Size Default: 20000)") + ("num_batches", + po::value(), + "Num Batches Default: 500)") + ("dbs", + po::value>()->multitoken(), + "databases (Default: nudb rocksdb)") + ("block_size", po::value(), + "nudb block size (default: 4096)") + ("key_size", po::value(), + "key size (default: 64)") + ("load_factor", po::value(), + "nudb load factor (default: 0.5)") + ("db_dir", po::value(), + "Directory to place the databases" + " (default: boost::filesystem::temp_directory_path)") + ("raw_out", po::value(), + "File to record the raw measurements (useful for plotting)" + " (default: no output)") + ; + + po::variables_map vm; + po::store(po::command_line_parser(argc, argv).options(desc).run(), vm); + po::notify(vm); + + return vm; +} + +template +T +get_opt(po::variables_map const& vm, std::string const& key, T const& default_value) +{ + return vm.count(key) ? vm[key].as() : default_value; +} + +} // test +} // nudb + +int +main(int argc, char** argv) +{ + using namespace nudb::test; + + po::variables_map vm; + + { + po::options_description desc{"Benchmark Options"}; + bool parse_error = false; + try + { + vm = parse_args(argc, argv, desc); + } + catch (std::exception const& e) + { + derr << "Incorrect command line syntax.\n"; + derr << "Exception: " << e.what() << '\n'; + parse_error = true; + } + + if (vm.count("help") || parse_error) + { + auto prog_name = boost::filesystem::path(argv[0]).stem().string(); + print_help(prog_name, desc); + return 0; + } + } + + auto const batch_size = get_opt(vm, "batch_size", 20000); + auto const num_batches = get_opt(vm, "num_batches", 500); + auto const block_size = get_opt(vm, "block_size", 4096); + auto const load_factor = get_opt(vm, "load_factor", 0.5f); + auto const key_size = get_opt(vm, "key_size", 64); + auto const db_dir = [&vm]() -> std::string { + auto r = get_opt(vm, "db_dir", ""); + if (!r.empty() && r.back() != '/' && r.back() != '\\') + { + r += '/'; + } + return r; + }(); + auto const raw_out = get_opt(vm, "raw_out", ""); +#if WITH_ROCKSDB + std::vector const default_dbs({"nudb", "rocksdb"}); +#else + std::vector const default_dbs({"nudb"}); +#endif + auto to_set = [](std::vector const& v) { + return std::set(v.begin(), v.end()); + }; + auto const dbs = to_set(get_opt>(vm, "dbs", default_dbs)); + + for (auto const& db : dbs) + { + if (db == "rocksdb") + { +#if !WITH_ROCKSDB + derr << "Benchmark was not built with rocksdb support\n"; + exit(1); +#endif + continue; + } + + if (db != "nudb" && db != "rocksdb") + { + derr << "Unsupported database: " << db << '\n'; + exit(1); + } + } + + bool const with_rocksdb = dbs.count("rocksdb") != 0; + (void) with_rocksdb; + bool const with_nudb = dbs.count("nudb") != 0; + std::uint64_t const num_db = int(with_nudb) + int(with_rocksdb); + std::uint64_t const total_ops = num_db * batch_size * num_batches * 2; + bench_progress progress(derr, total_ops); + + enum + { + db_nudb, + db_rocks, + db_last + }; + enum + { + op_insert, + op_fetch, + op_last + }; + std::array db_names{{"nudb", "rocksdb"}}; + std::array op_names{{"insert", "fetch"}}; + using result_dict = boost::container::flat_multimap; + result_dict ops_per_sec[db_last][op_last]; + // Reserve up front to database that run later don't have less memory + for (int i = 0; i < db_last; ++i) + for (int j = 0; j < op_last; ++j) + ops_per_sec[i][j].reserve(num_batches); + + std::ofstream raw_out_stream; + bool const record_raw_out = !raw_out.empty(); + if (record_raw_out) + { + raw_out_stream.open(raw_out, std::ios::trunc); + raw_out_stream << "num_db_items,db,op,ops/sec\n"; + } + for (int i = 0; i < db_last; ++i) + { + auto result = [&] + (std::string const& op_name, std::uint64_t num_items, + double sample) { + auto op_idx = op_name == "insert" ? op_insert : op_fetch; + ops_per_sec[i][op_idx].emplace(num_items, sample); + if (record_raw_out) + raw_out_stream << num_items << ',' << db_names[i] << ',' + << op_name << ',' << std::fixed << sample + << std::endl; // flush + + }; + if (with_nudb && i == db_nudb) + do_timings(db_dir, batch_size, num_batches, key_size, block_size, + load_factor, result, progress); +#if WITH_ROCKSDB + if (with_rocksdb && i == db_rocks) + do_timings_rocks( + db_dir, batch_size, num_batches, key_size, result, progress); +#endif + } + + // Write summary by sampling raw data at powers of 10 + auto const col_w = 14; + auto const iter_w = 15; + + for (int op_idx = 0; op_idx < op_last; ++op_idx) + { + auto const& t = op_names[op_idx]; + dout << '\n' << t << " (per second)\n"; + dout << std::setw(iter_w) << "num_db_keys"; + if (with_nudb) + dout << std::setw(col_w) << "nudb"; +#if WITH_ROCKSDB + if (with_rocksdb) + dout << std::setw(col_w) << "rocksdb"; +#endif + dout << '\n'; + auto const max_sample = [&ops_per_sec] { + std::uint64_t r = 0; + for (auto i = 0; i < db_last; ++i) + for (auto j = 0; j < op_last; ++j) + if (!ops_per_sec[i][j].empty()) + r = std::max(r, ops_per_sec[i][j].rbegin()->first); // no `back()` + return r; + }(); + auto const min_sample = batch_size; + + auto write_val = [&]( + result_dict const& dict, std::uint64_t key) { + dout << std::setw(col_w) << std::fixed << std::setprecision(2); + // Take the average of all the values, or "NA" if none collected + auto l = dict.lower_bound(key); + auto u = dict.upper_bound(key); + if (l == u) + dout << "NA"; + else + { + auto const total = std::accumulate(l, u, 0, + [](double a, std::pair const& b) { + return a + b.second; + }); + dout << total / std::distance(l, u); + } + }; + for (std::uint64_t n = 100; n <= max_sample; n *= 10) + { + if (n=500000], hue='db', col='op') + plt.show(p) + return d # for testing + + +def parse_args(): + parser = argparse.ArgumentParser( + description=('Plot the benchmark results')) + parser.add_argument( + '--input', + '-i', + help=('input'), ) + return parser.parse_args() + + +if __name__ == '__main__': + args = parse_args() + result_filename = args.input + if not result_filename: + print('No result file specified. Exiting') + else: + run_main(result_filename) diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000000..fc40be018b --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,5 @@ +bin +html +temp +reference.qbk +out.txt diff --git a/doc/Jamfile.v2 b/doc/Jamfile.v2 new file mode 100644 index 0000000000..6b3a8e0d2e --- /dev/null +++ b/doc/Jamfile.v2 @@ -0,0 +1,77 @@ +# +# Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +# + +import os ; + +local broot = [ os.environ BOOST_ROOT ] ; + +project nudb/doc ; + +using boostbook ; +using quickbook ; +using doxygen ; + +path-constant out : . ; + +install stylesheets + : + $(broot)/doc/src/boostbook.css + : + $(out)/html + ; + +explicit stylesheets ; + +install images + : + [ glob $(broot)/doc/src/images/*.png ] + images/logo.png + : + $(out)/html/images + ; + +explicit images ; + +install callouts + : + [ glob $(broot)/doc/src/images/callouts/*.png ] + : + $(out)/html/images/callouts + ; + +explicit callout ; + +xml doc + : + main.qbk + : + temp + $(broot)/tools/boostbook/dtd + ; + +boostbook boostdoc + : + doc + : + chapter.autolabel=0 + boost.image.src=images/logo.png + boost.image.alt="NuDB Logo" + boost.image.w=1270 + boost.image.h=80 + boost.root=$(broot) + chapter.autolabel=0 + chunk.first.sections=1 # Chunk the first top-level section? + chunk.section.depth=8 # Depth to which sections should be chunked + generate.section.toc.level=1 # Control depth of TOC generation in sections + toc.max.depth=2 # How many levels should be created for each TOC? + toc.section.depth=2 # How deep should recursive sections appear in the TOC? + generate.toc="chapter nop section nop" + : + temp + stylesheets + images + ; diff --git a/doc/boostbook.dtd b/doc/boostbook.dtd new file mode 100644 index 0000000000..bd4c3f871e --- /dev/null +++ b/doc/boostbook.dtd @@ -0,0 +1,439 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +%DocBook; diff --git a/doc/docca b/doc/docca new file mode 160000 index 0000000000..335dbf9c36 --- /dev/null +++ b/doc/docca @@ -0,0 +1 @@ +Subproject commit 335dbf9c3613e997ed56d540cc8c5ff2e28cab2d diff --git a/doc/images/logo.png b/doc/images/logo.png new file mode 100644 index 0000000000..8636189ae9 Binary files /dev/null and b/doc/images/logo.png differ diff --git a/doc/images/logo.psd b/doc/images/logo.psd new file mode 100644 index 0000000000..8a4669aab0 Binary files /dev/null and b/doc/images/logo.psd differ diff --git a/doc/images/readme2.png b/doc/images/readme2.png new file mode 100644 index 0000000000..fc130b6156 Binary files /dev/null and b/doc/images/readme2.png differ diff --git a/doc/index.xml b/doc/index.xml new file mode 100644 index 0000000000..0a0e816515 --- /dev/null +++ b/doc/index.xml @@ -0,0 +1,14 @@ + + + + + +
+ Index + +
diff --git a/doc/main.qbk b/doc/main.qbk new file mode 100644 index 0000000000..8ab0d4eb04 --- /dev/null +++ b/doc/main.qbk @@ -0,0 +1,342 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[library NuDB + [quickbook 1.6] + [copyright 2015 - 2016 Vinnie Falco] + [purpose C++ Library] + [license + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + [@http://www.boost.org/LICENSE_1_0.txt]) + ] + [authors [Falco, Vinnie]] + [category template] + [category generic] +] + +[template mdash[] '''— '''] +[template indexterm1[term1] ''''''[term1]''''''] +[template indexterm2[term1 term2] ''''''[term1]''''''[term2]''''''] + +[variablelist + [[ + [link nudb.overview Overview] + ][ + An overview of features, requirements, and credits, plus + rationale and design information. + ]] + [[ + [link nudb.example Example] + ][ + An example that illustrates the use of NuDB. + ]] + [[ + [link nudb.usage Usage] + ][ + An explanation of operations on the database. + ]] + [[ + [link nudb.ref Reference] + ][ + Detailed class and function reference. + ]] + [[ + [link nudb.index Index] + ][ + Book-style text index of the documentation. + ]] +] + +[section:overview Overview] + +NuDB is an append only, key/value store specifically optimized for random +read performance on modern SSDs or equivalent high-IOPS decices. The most +common application for NuDB is content addressible storage where a +cryptographic digest of the data is used as the key. The read performance +and memory usage are independent of the size of the database. These are +some other features: + +[heading History] + +The first versions of rippled, the application behind the Ripple consensus +network, used SQLite as their back end for unstructured data. The +performance quickly became a limiting factor. + +Then rippled then went through a series of back ends including LMDB, LevelDB, and +RocksDB. Each of these databases performed well at first, but as the data +size increased, memory usage increased and performance dropped off drastically. + +The problem is caching. Each of these databases relies on some O(n) data +structure, such as a Bloom filter, to improve their performance. These work +well until the structures no longer fit in memory. In addition, many virtual +machines are memory constrained. + +To address this issue, the developers performed a thought experiment -- if +you assume the data size is so large that no O(n) caching is effective, what +is the best read performance you could expect? They reached the following +conclusions: + +1) Writes should not block reads. +2) Reads should be limited only by the SSD's IOPS limit. +3) A read for a non-present key should require one IOP. +4) A read for a present key whose data can be read in a single IOP should +only require two IOPs, one to figure out where it is and one to read it in. + +NuDB is designed to come as close to this ideal as possible. + +[heading Design] + +NuDB uses three files to hold the data and indexes. The data file is append +only and contains sufficient information to rebuild the index. The index +file is random access and contains hash buckets. When an update is in +progress, a temporary journal file is used to roll the update back if +needed. + +NuDB uses linear hashing to dynamically increase the number of buckets in +the index file as the data size grows. Bucket overflows are handled by +adding "overflow" records to the data file. Bucket overflows can be +minimized by increasing the number of buckets, leading to a size/speed +tradeoff. Typical databases keep the average bucket half full (or half +empty, depending on your point of view) resulting in spill records +accounting for less than 1% of reads. + +Inserts are buffered in memory and appended to the data file immediately. +Updates to the index file are performed as an atomic operation. Fetch +operations retrieve records in the process of being modified from memory +during the update operation so that writes do not block fetches. + +Before the index file is modified, a journal file is created to recover +consistency in the event of a crash during the update. The recovery process +will index all records written to the data file, so the aggregation of index +updates does not increase the time which a crash would result in loss of +data. + +Iteration can be performed on the data file directly. Since it is append +only, there is no risk of other operations corrupting an iteration in +progress. + +[heading Performance] + +Writes do not block reads. Read rates are typically around 90% of the SSD's +IOPS limit. An average fetch for a non-present key typically requires fewer +than 1.01 IOPs. An average fetch for a present key requires fewer than 1.01 +IOPs plus however many IOPs it takes to read the data. + +[heading Applications] + +Content addressable storage associates data with its cryptographic digest. +This type of storage is commonly used in decentralized blockchain applications. + +Often these applications require following hash chains -- where one object +contains the hash of another object that ultimately leads to the object +desired. NuDB's low latency and high speed are particularly advantageous +in these kinds of applications. + +NuDB is append only and does not support a delete operation. To support +retaining limited historical information, NuDB is often used in a dual +database configuration. One database is older and is read only, the other +is newer and is read/write. Periodically, the older database is discarded and +the newer database becomes the new read only database and a new read/write +database is created. + +[endsect] + + + +[section:example Example] + +This complete program creates a database, opens the database, inserts several +key/value pairs, fetches the key/value pairs, closes the database, then erases +the database files. Source code for this program is located in the examples +directory. + +``` +#include +#include +#include + +int main() +{ + using namespace nudb; + std::size_t constexpr N = 1000; + using key_type = std::uint32_t; + error_code ec; + auto const dat_path = "db.dat"; + auto const key_path = "db.key"; + auto const log_path = "db.log"; + create( + dat_path, key_path, log_path, + 1, + make_salt(), + sizeof(key_type), + block_size("."), + 0.5f, + ec); + store db; + db.open(dat_path, key_path, log_path, + 16 * 1024 * 1024, ec); + char data = 0; + // Insert + for(key_type i = 0; i < N; ++i) + db.insert(&i, &data, sizeof(data), ec); + // Fetch + for(key_type i = 0; i < N; ++i) + db.fetch(&i, + [&](void const* buffer, std::size_t size) + { + // do something with buffer, size + }, ec); + db.close(ec); + erase_file(dat_path); + erase_file(key_path); + erase_file(log_path); +} +``` + +[endsect] + + + +[section:usage Usage] + +[heading Files] + +A database is represented by three files: the data file, the key file, +and the log file. Each file has a distinct header in a well known format. +The data file holds all of the key/value pairs and is serially iterable. The +key file holds a hash table indexing all of the contents in the data file. +The log file holds information used to roll the database back in the event +of a failure. + +[heading Create/Open] + +The [link nudb.ref.nudb__create create] function creates a new data file and key +file for a database with the specified parameters. The caller specifies +the hash function to use as a template argument, the file paths, +and the database constants: + +[note + Sample code and identifiers mentioned in this section are written + as if the following declarations are in effect: + ``` + #include + using namespace nudb; + error_code ec; + ``` +] + +``` +create( + "nudb.dat", // Path to data file + "nudb.key", // Path to key file + "nudb.log", // Path to log file + 1, // Application-defined constant + make_salt(), // A random integer + 4, // The size of keys + block_size(".") // Block size in key file + 0.5f // The load factor + ec); +``` + +The application-defined constant is a 64-bit unsigned integer which the +caller may set to any value. This value can be retrieved from an open +database, where it will be equal to the value used at creation time. This +constant can be used for any purpose. For example, to inform the application +of what application-specific version was used to create the database. + +The salt is a 64-bit unsigned integer used to prevent algorithmic complexity +attacks. Hash functions used during database operations are constructed with +the salt, providing an opportunity to permute the hash function. This feature +is useful when inserted database keys come from untrusted sources, such as the +network. + +The key size is specified when the database is created, and cannot be changed. +All key files indexing the same data file will use the key size of the data +file. + +The block size indicates the size of buckets in the key file. The best choice +for the block size is the natural sector size of the device. For most SSDs +in production today this is 4096, or less often 8192 or 16384. The function +[link nudb.ref.nudb__block_size block_size] returns the best guess of the block +size used by the device mounted at the specified path. + +The load factor determines the target bucket occupancy fraction. There is +almost never a need to specify anything other than the recommended value of +0.5, which strikes the perfect balance of space-efficiency and fast lookup. + +An open database is represented by objects of type +[link nudb.ref.nudb__basic_store basic_store], templated on the hasher. The type +alias [link nudb.ref.nudb__store store] represents a database using +[link nudb.ref.nudb__xxhasher xxhasher], the default hash function. To open +a database, declare a database object and then call the +[link nudb.ref.nudb__basic_store.open open] member function: + +``` +store db; +db.open("nudb.dat", "nudb.key", "nudb.log", ec); +``` + +When opening a database that was previously opened by a program that was +terminated abnormally, the implementation automatically invokes the +recovery process. This process restores the integrity of the database by +replaying the log file if it is present. + +[heading Insert/Fetch] + +Once a database is open, it becomes possible to insert new key/value pairs +and look them up. Insertions are straightforward: + +``` +db.insert(key, data, bytes, ec); +``` + +If the key already exists, the error is set to +[link nudb.ref.nudb__error.key_exists error::key_exists]. All keys in a NuDB +database must be unique. Multiple threads can call insert at the same time. +Internally however, insertions are serialized to present a consistent view +of the database to callers. + +Retrieving a key/value pair if it exists is similary straightforward: + +``` +db.fetch(key, + [&](void const* buffer, std::size_t size) + { + ... + }, ec); +``` + +To give callers control over memory allocation strategies, the fetch +function takes a callback object as a parameter. The callback is invoked +with a pointer to the data and size, if the item exists in the database. +The callback can decide how to store this information, if at all. + +[endsect] + +[section Command Line Tool] + +To allow administration, NuDB comes with the "nudb" command line tool, +which may be built using b2 or CMake. Files for the tool are located in +the "tools" directory. Once the tool is built, and located in your path, +execute this command for additional instructions: + +``` +nudb help +``` + +[endsect] + +[section:ref Reference] +[xinclude quickref.xml] +[include types/File.qbk] +[include types/Hasher.qbk] +[include types/Progress.qbk] +[include reference.qbk] +[endsect] + +[xinclude index.xml] diff --git a/doc/makeqbk.sh b/doc/makeqbk.sh new file mode 100644 index 0000000000..3da8c0790f --- /dev/null +++ b/doc/makeqbk.sh @@ -0,0 +1,12 @@ +#!/usr/bin/bash + +# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +mkdir -p temp +doxygen source.dox +cd temp +xsltproc combine.xslt index.xml > all.xml +xsltproc ../reference.xsl all.xml > ../reference.qbk diff --git a/doc/quickref.xml b/doc/quickref.xml new file mode 100644 index 0000000000..243e57cb94 --- /dev/null +++ b/doc/quickref.xml @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + NuDB + + + + + + + Classes + + basic_store + native_file + no_progress + posix_file + store + win32_file + xxhasher + + Constants + + errc + error + file_mode + + + + Functions + + block_size + create + erase_file + make_error_code + recover + rekey + verify + visit + + Type Traits + + is_File + is_Hasher + is_Progress + + + + Types + + error_category + error_code + error_condition + path_type + system_error + verify_info + + Concepts + + File + Progress + Hasher + + + + + + diff --git a/doc/reference.xsl b/doc/reference.xsl new file mode 100644 index 0000000000..ec2a22d30e --- /dev/null +++ b/doc/reference.xsl @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/doc/source.dox b/doc/source.dox new file mode 100644 index 0000000000..73bff7b030 --- /dev/null +++ b/doc/source.dox @@ -0,0 +1,333 @@ +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = "NuDB" +PROJECT_NUMBER = +PROJECT_BRIEF = C++ Library +PROJECT_LOGO = +OUTPUT_DIRECTORY = +CREATE_SUBDIRS = NO +ALLOW_UNICODE_NAMES = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = YES +FULL_PATH_NAMES = NO +STRIP_FROM_PATH = ../include/ +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = YES +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +ALIASES = +TCL_SUBST = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +EXTENSION_MAPPING = +MARKDOWN_SUPPORT = YES +AUTOLINK_SUPPORT = YES +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +GROUP_NESTED_COMPOUNDS = NO +SUBGROUPING = YES +INLINE_GROUPED_CLASSES = NO +INLINE_SIMPLE_STRUCTS = NO +TYPEDEF_HIDES_STRUCT = NO +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = YES +EXTRACT_PRIVATE = YES +EXTRACT_PACKAGE = NO +EXTRACT_STATIC = YES +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = YES +EXTRACT_ANON_NSPACES = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +HIDE_COMPOUND_REFERENCE= NO +SHOW_INCLUDE_FILES = NO +SHOW_GROUPED_MEMB_INC = NO +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = NO +SORT_MEMBER_DOCS = NO +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = YES +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +STRICT_PROTO_MATCHING = NO +GENERATE_TODOLIST = NO +GENERATE_TESTLIST = NO +GENERATE_BUGLIST = NO +GENERATE_DEPRECATEDLIST= NO +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = NO +SHOW_FILES = NO +SHOW_NAMESPACES = NO +FILE_VERSION_FILTER = +LAYOUT_FILE = +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_AS_ERROR = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = ../include/nudb/ +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = +RECURSIVE = NO +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +FILTER_SOURCE_PATTERNS = +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = YES +SOURCE_TOOLTIPS = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +CLANG_ASSISTED_PARSING = NO +CLANG_OPTIONS = + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = NO +HTML_OUTPUT = dhtm +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_EXTRA_STYLESHEET = +HTML_EXTRA_FILES = +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_TIMESTAMP = NO +HTML_DYNAMIC_SECTIONS = NO +HTML_INDEX_NUM_ENTRIES = 100 +GENERATE_DOCSET = NO +DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_BUNDLE_ID = org.doxygen.Project +DOCSET_PUBLISHER_ID = org.doxygen.Publisher +DOCSET_PUBLISHER_NAME = Publisher +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +CHM_INDEX_ENCODING = +BINARY_TOC = NO +TOC_EXPAND = NO +GENERATE_QHP = NO +QCH_FILE = +QHP_NAMESPACE = org.doxygen.Project +QHP_VIRTUAL_FOLDER = doc +QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = +QHG_LOCATION = +GENERATE_ECLIPSEHELP = NO +ECLIPSE_DOC_ID = org.doxygen.Project +DISABLE_INDEX = NO +GENERATE_TREEVIEW = NO +ENUM_VALUES_PER_LINE = 4 +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +FORMULA_FONTSIZE = 10 +FORMULA_TRANSPARENT = YES +USE_MATHJAX = NO +MATHJAX_FORMAT = HTML-CSS +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest +MATHJAX_EXTENSIONS = +MATHJAX_CODEFILE = +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +EXTERNAL_SEARCH = NO +SEARCHENGINE_URL = +SEARCHDATA_FILE = searchdata.xml +EXTERNAL_SEARCH_ID = +EXTRA_SEARCH_MAPPINGS = + +#--------------------------------------------------------------------------- +# Configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = NO +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4 +EXTRA_PACKAGES = +LATEX_HEADER = +LATEX_FOOTER = +LATEX_EXTRA_STYLESHEET = +LATEX_EXTRA_FILES = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +LATEX_SOURCE_CODE = NO +LATEX_BIB_STYLE = plain +LATEX_TIMESTAMP = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +RTF_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_SUBDIR = +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = YES +XML_OUTPUT = temp/ +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the DOCBOOK output +#--------------------------------------------------------------------------- +GENERATE_DOCBOOK = NO +DOCBOOK_OUTPUT = docbook +DOCBOOK_PROGRAMLISTING = NO + +#--------------------------------------------------------------------------- +# Configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = YES +EXPAND_ONLY_PREDEF = YES +SEARCH_INCLUDES = YES +INCLUDE_PATH = ../ +INCLUDE_FILE_PATTERNS = +PREDEFINED = DOXYGEN \ + GENERATING_DOCS \ + _MSC_VER \ + NUDB_POSIX_FILE=1 + +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +EXTERNAL_PAGES = YES +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = NO +MSCGEN_PATH = +DIA_PATH = +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = NO +DOT_NUM_THREADS = 0 +DOT_FONTNAME = Helvetica +DOT_FONTSIZE = 10 +DOT_FONTPATH = +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +UML_LIMIT_NUM_FIELDS = 10 +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +INTERACTIVE_SVG = NO +DOT_PATH = +DOTFILE_DIRS = +MSCFILE_DIRS = +DIAFILE_DIRS = +PLANTUML_JAR_PATH = +PLANTUML_INCLUDE_PATH = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES diff --git a/doc/types/File.qbk b/doc/types/File.qbk new file mode 100644 index 0000000000..f36259cdd4 --- /dev/null +++ b/doc/types/File.qbk @@ -0,0 +1,159 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:File File] + +The [*File] concept abstracts access to files in the underlying file system. +Two implementations are provided, one for the Win32 API and the other for +POSIX compliant systems. The [link nudb.ref.nudb__native_file native_file] type +alias is automatically set to either [link nudb.ref.nudb__win32_file win32_file] +or [link nudb.ref.nudb__posix_file posix_file] as appropriate. + +To support interfaces other than Win32 or POSIX, callers may provide their +own [*File] type that meets these requirements. The unit test code also provides +its own [*File] type which causes simulated operating system file failures +to exercise all failure paths in the implementation. + +In the table below: + +* `X` denotes a [*File] type +* `a` and `b` denote values of type `X` +* `c` denotes a (possibly const) value of type `X` +* `m` denotes a value of type [link nudb.ref.nudb__file_mode file_mode] +* `f` denotes a value of type [link nudb.ref.nudb__path_type path_type] +* `q` denotes a value of type `void*` +* `p` denotes a value of type `void const*` +* `ec` denotes a value of type [link nudb.ref.nudb__error_code error_code] + + +* `o` denotes a value of type `std::uint64_t` +* `n` denotes a value of type `std::size_t` + +[table File requirements +[[operation] [type] [semantics, pre/post-conditions]] +[ + [`X a{std::move(b)}`] + [ ] + [ + `X` is `MoveConstructible` + ] +] +[ + [`c.is_open()`] + [`bool`] + [ + Returns `true` if `c` refers to an open file. + ] +] +[ + [`a.close()`] + [ ] + [ + If `a` refers to an open file, closes the file. Does nothing if + `a` does not refer to an open file. After this call, `a.open()` + will return `false`. + ] +] +[ + [`a.create(m,f,ec)`] + [ ] + [ + Attempts to create a file at the path specified by `f`, and + open it with the mode specified by `m`. If an error occurs, + `ec` is set to the system specific error code. If no error + occurs, a subsequent call to `a.is_open()` will return `true`. + Undefined behavior if `a` already refers to an open file. + ] +] +[ + [`a.open(m,f,ec)`] + [ ] + [ + Attempts to open the file at the path specified by `f`. If + an error occurs, `ec` is set to the system specific error + code. If no error occurs, a subsequent call to `a.is_open()` + will return `true`. Undefined behavior if `a` already refers + to an open file. + ] +] +[ + [`X::erase(f,ec)`] + [ ] + [ + Attempts to delete the file at the path specified by `f`. + If an error occurs, `ec` is set to the system specific error + code. + ] +] +[ + [`c.size(ec)`] + [ `std::uint64_t` ] + [ + Returns the size of the file in bytes. This value is also equal to + lowest byte offset for which a read will always return a + [link nudb.ref.nudb__error short_read] error. Undefined + behavior if `a` does not refer to an open file. + ] +] +[ + [`a.read(o,p,n,ec)`] + [ ] + [ + Attempts to read `n` bytes from the open file referred to by `a`, + starting at offset `o`, and storing the results in the memory + pointed to by `p`, which must be at least of size `n` bytes. + If an error occurs, `ec` is set to the system specific error + code. Undefined behavior if `a` does not refer to an open file. + ] +] +[ + [`a.write(o,q,n,ec)`] + [ ] + [ + Attempts to write `n` bytes to the open file referred to by `a` + and opened with a write mode, starting at offset `o`, and storing + the results in the memory pointed to by `p`, which must be at + least of size `n` bytes. If an error occurs, `ec` is set to the + system specific error code. Undefined behavior if `a` does not + refer to an open file. + ] +] +[ + [`a.sync(ec)`] + [ ] + [ + Attempts to synchronize the file on disk. This instructs the + operating system to ensure that any data which resides in caches + or buffers is fully written to the underlying storage device + before this call returns. If an error occurs, `ec` is set to the + system specific error code. Undefined behavior if `a` does not + refer to an open file. + + NuDB's database integrity guarantees are only valid if the + implementation of `sync` assures that all data is fully written + to the underlying file before the call returns. + ] +] +[ + [`a.trunc(o,ec)`] + [ ] + [ + Attempts to change the size of the open file referred to by `a` + and opened with a write mode, to the size in bytes specified + by `o`. If an error occurs, `ec` is set to the system specific + error code. Undefined behavior if `a` does not refer to an open + file. After a successful call, `a.size(ec)` will return `o`. + + NuDB's database integrity guarantees are only valid if the + implementation of `trunc` assures that subsequent calls to + `size` will return `o`, even if the program is terminated or the + device is taken offline before calling `size`. + ] +] +] + +[endsect] diff --git a/doc/types/Hasher.qbk b/doc/types/Hasher.qbk new file mode 100644 index 0000000000..e80955b9ee --- /dev/null +++ b/doc/types/Hasher.qbk @@ -0,0 +1,56 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:Hasher Hasher] + +A [@Hasher] implements a hash algorithm. This is used to compute the small +digests NuDB needs to effectively implement a hash table. NuDB provides +the default implementation [link nudb.ref.nudb__xxhasher xxhasher], which is +suitable for most use cases. For advanced applications, a user supplied +hash function may be supplied which must meet these requirements. + +In the table below: + +* `X` denotes a hasher class +* `a` denotes a value of type `X const` +* `s` denotes a value of type `std::uint64_t` +* `p` denotes a value of type `void const*` +* `n` denotes a value of type `std::size_t` + +[table Hasher requirements +[[operation] [type] [semantics, pre/post-conditions]] +[ + [`X a{s}`] + [ ] + [ + `a` is constructed with a seed value integer. To achieve resistance + from algorithmic complexity attacks, an implementation of [*Hasher] + should ensure that values returned from the hash function will be + distinctly different for different values of `s` given the same + inputs. If algorithmic complexity attack resistance is not a + requirement, the seed may be ignored upon construction. + ] +] +[ + [`a(p,n)`] + [ `std::uint64_t` ] + [ + Returns the digest of the memory `n` bytes in size and pointed + to by `p`. `n` will never be zero. A good hash function will + return values with these qualities: + +* Values are uniformly distributed in the full range + +* Values for the same input are distinctly different for different seeds + +* Small changes in the input produce unpredictable output values + + ] +] +] + +[endsect] diff --git a/doc/types/Progress.qbk b/doc/types/Progress.qbk new file mode 100644 index 0000000000..44a76ad51d --- /dev/null +++ b/doc/types/Progress.qbk @@ -0,0 +1,40 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:Progress Progress] + +A [*Progress] object provides feedback to callers on the progress of +long running operations such as calls to [link nudb.ref.nudb__verify verify] or +[link nudb.ref.nudb__rekey rekey] which can take days or weeks for database that +measure in the terabytes. These objects are used by passing them as parameters +to the appropriate functions, where the will be called periodically with +numbers that indicate the amount of work completed, versus the total amount +of work required. + +In the table below: + +* `X` denotes a progress class +* `a` denotes a value of type `X` +* `p` and `q` denote values of type `std::uint64_t` + +[table Progress requirements +[[operation] [type] [semantics, pre/post-conditions]] +[ + [`a(p, q)`] + [ ] + [ + Indicates to the progress object that work has been performed and + intermediate results calculated. `p` represents the amount of work + completed from the beginning of the operation. `q` represents the + total amount of work required. The fraction of completed work is + therefore `p/q`, with zero representing no work complete, and one + represents all work complete. `p` and `q` are unitless. + ] +] +] + +[endsect] diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000000..ab7e10061a --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,17 @@ +# Part of nudb + +GroupSources (include/nudb nudb) +GroupSources (extras/nudb extras) +GroupSources (examples/ "/") + +add_executable (example + ${NUDB_INCLUDES} + ${EXTRAS_INCLUDES} + example.cpp +) + +if (WIN32) + target_link_libraries (example ${Boost_LIBRARIES}) +else () + target_link_libraries (example ${Boost_LIBRARIES} rt Threads::Threads) +endif () diff --git a/examples/Jamfile b/examples/Jamfile new file mode 100644 index 0000000000..d165cc9145 --- /dev/null +++ b/examples/Jamfile @@ -0,0 +1,12 @@ +# +# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +# + +import os ; + +exe example : + example.cpp + ; diff --git a/examples/example.cpp b/examples/example.cpp new file mode 100644 index 0000000000..c9a96961cf --- /dev/null +++ b/examples/example.cpp @@ -0,0 +1,46 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#include +#include +#include + +int main() +{ + using namespace nudb; + std::size_t constexpr N = 1000; + using key_type = std::uint32_t; + error_code ec; + auto const dat_path = "db.dat"; + auto const key_path = "db.key"; + auto const log_path = "db.log"; + create( + dat_path, key_path, log_path, + 1, + make_salt(), + sizeof(key_type), + block_size("."), + 0.5f, + ec); + store db; + db.open(dat_path, key_path, log_path, ec); + char data = 0; + // Insert + for(key_type i = 0; i < N; ++i) + db.insert(&i, &data, sizeof(data), ec); + // Fetch + for(key_type i = 0; i < N; ++i) + db.fetch(&i, + [&](void const* buffer, std::size_t size) + { + // do something with buffer, size + }, ec); + db.close(ec); + erase_file(dat_path); + erase_file(key_path); + erase_file(log_path); +} diff --git a/extras/README.md b/extras/README.md new file mode 100644 index 0000000000..eb458c1692 --- /dev/null +++ b/extras/README.md @@ -0,0 +1,5 @@ +This directory contains: + +* Additional interfaces not strictly part of NuDB's public APIs + +* Git submodules of dependencies used to build the tests and benchmarks diff --git a/extras/beast b/extras/beast new file mode 160000 index 0000000000..2f9a8440c2 --- /dev/null +++ b/extras/beast @@ -0,0 +1 @@ +Subproject commit 2f9a8440c2432d8a196571d6300404cb76314125 diff --git a/extras/nudb/basic_seconds_clock.hpp b/extras/nudb/basic_seconds_clock.hpp new file mode 100644 index 0000000000..61d004cffa --- /dev/null +++ b/extras/nudb/basic_seconds_clock.hpp @@ -0,0 +1,200 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef BASIC_SECONDS_CLOCK_HPP +#define BASIC_SECONDS_CLOCK_HPP + +#include "chrono_util.hpp" + +#include +#include +#include +#include +#include +#include + +namespace detail { + +class seconds_clock_worker +{ +public: + virtual void sample() = 0; +}; + +//------------------------------------------------------------------------------ + +// Updates the clocks +class seconds_clock_thread +{ +public: + using mutex = std::mutex; + using cond_var = std::condition_variable; + using lock_guard = std::lock_guard ; + using unique_lock = std::unique_lock ; + using clock_type = std::chrono::steady_clock; + using seconds = std::chrono::seconds; + using thread = std::thread; + using workers = std::vector ; + + bool stop_; + mutex m_; + cond_var cond_; + workers workers_; + thread thread_; + + seconds_clock_thread() + : stop_(false) + { + thread_ = thread{ + &seconds_clock_thread::run, this}; + } + + ~seconds_clock_thread() + { + stop(); + } + + void add(seconds_clock_worker& w) + { + lock_guard lock{m_}; + workers_.push_back(&w); + } + + void remove(seconds_clock_worker& w) + { + lock_guard lock{m_}; + workers_.erase(std::find( + workers_.begin(), workers_.end(), &w)); + } + + void stop() + { + if(thread_.joinable()) + { + { + lock_guard lock{m_}; + stop_ = true; + } + cond_.notify_all(); + thread_.join(); + } + } + + void run() + { + unique_lock lock{m_}; + for(;;) + { + for(auto iter : workers_) + iter->sample(); + + using namespace std::chrono; + clock_type::time_point const when( + floor ( + clock_type::now().time_since_epoch()) + + milliseconds(900)); + + if(cond_.wait_until(lock, when, [this]{ return stop_; })) + return; + } + } + + static seconds_clock_thread& instance() + { + static seconds_clock_thread singleton; + return singleton; + } +}; + +} // detail + +//------------------------------------------------------------------------------ + +/** Called before main exits to terminate the utility thread. + This is a workaround for Visual Studio 2013: + http://connect.microsoft.com/VisualStudio/feedback/details/786016/creating-a-global-c-object-that-used-thread-join-in-its-destructor-causes-a-lockup + http://stackoverflow.com/questions/10915233/stdthreadjoin-hangs-if-called-after-main-exits-when-using-vs2012-rc +*/ +inline +void +basic_seconds_clock_main_hook() +{ +#ifdef _MSC_VER + detail::seconds_clock_thread::instance().stop(); +#endif +} + +/** A clock whose minimum resolution is one second. + + The purpose of this class is to optimize the performance of the now() + member function call. It uses a dedicated thread that wakes up at least + once per second to sample the requested trivial clock. + + @tparam Clock A type meeting these requirements: + http://en.cppreference.com/w/cpp/concept/Clock +*/ +template +class basic_seconds_clock +{ +public: + using rep = typename Clock::rep; + using period = typename Clock::period; + using duration = typename Clock::duration; + using time_point = typename Clock::time_point; + + static bool const is_steady = Clock::is_steady; + + static time_point now() + { + // Make sure the thread is constructed before the + // worker otherwise we will crash during destruction + // of objects with static storage duration. + struct initializer + { + initializer() + { + detail::seconds_clock_thread::instance(); + } + }; + static initializer init; + + struct worker : detail::seconds_clock_worker + { + time_point m_now; + std::mutex m_; + + worker() + : m_now(Clock::now()) + { + detail::seconds_clock_thread::instance().add(*this); + } + + ~worker() + { + detail::seconds_clock_thread::instance().remove(*this); + } + + time_point now() + { + std::lock_guard lock{m_}; + return m_now; + } + + void sample() + { + std::lock_guard lock{m_}; + m_now = Clock::now(); + } + }; + + static worker w; + + return w.now(); + } +}; + +#endif diff --git a/extras/nudb/chrono_util.hpp b/extras/nudb/chrono_util.hpp new file mode 100644 index 0000000000..582407aac8 --- /dev/null +++ b/extras/nudb/chrono_util.hpp @@ -0,0 +1,58 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef CHRONO_UTIL_HPP +#define CHRONO_UTIL_HPP + +#include + +// From Howard Hinnant +// http://home.roadrunner.com/~hinnant/duration_io/chrono_util.html + +#if !defined(_MSC_FULL_VER) || (_MSC_FULL_VER <= 190023506) +// round down +template +To floor(std::chrono::duration const& d) +{ + To t = std::chrono::duration_cast(d); + if (t > d) + --t; + return t; +} + +// round to nearest, to even on tie +template +To round (std::chrono::duration const& d) +{ + To t0 = std::chrono::duration_cast(d); + To t1 = t0; + ++t1; + auto diff0 = d - t0; + auto diff1 = t1 - d; + if (diff0 == diff1) + { + if (t0.count() & 1) + return t1; + return t0; + } + else if (diff0 < diff1) + return t0; + return t1; +} + +// round up +template +To ceil (std::chrono::duration const& d) +{ + To t = std::chrono::duration_cast(d); + if (t < d) + ++t; + return t; +} +#endif + +#endif diff --git a/extras/nudb/test/fail_file.hpp b/extras/nudb/test/fail_file.hpp new file mode 100644 index 0000000000..252f2feed2 --- /dev/null +++ b/extras/nudb/test/fail_file.hpp @@ -0,0 +1,343 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_TEST_FAIL_FILE_HPP +#define NUDB_TEST_FAIL_FILE_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace test { + +/// Test error codes. +enum class test_error +{ + /// No error + success = 0, + + /// Simulated failure + failure +}; + +/// Returns the error category used for test error codes. +inline +error_category const& +test_category() +{ + struct cat_t : public error_category + { + char const* + name() const noexcept override + { + return "nudb"; + } + + std::string + message(int ev) const override + { + switch(static_cast(ev)) + { + case test_error::failure: + return "test failure"; + + default: + return "test error"; + } + } + + error_condition + default_error_condition(int ev) const noexcept override + { + return error_condition{ev, *this}; + } + + bool + equivalent(int ev, + error_condition const& ec) const noexcept override + { + return ec.value() == ev && &ec.category() == this; + } + + bool + equivalent(error_code const& ec, int ev) const noexcept override + { + return ec.value() == ev && &ec.category() == this; + } + }; + static cat_t const cat{}; + return cat; +} + +/// Returns a test error code. +inline +error_code +make_error_code(test_error ev) +{ + return error_code{static_cast(ev), test_category()}; +} + +} // test +} // nudb + +namespace boost { +namespace system { +template<> +struct is_error_code_enum +{ + static bool const value = true; +}; +} // system +} // boost + +namespace nudb { +namespace test { + +/** Countdown to test failure mode. + + The counter is constructed with a target ordinal and decremented + by callers. When the count reaches zero, a simulated test failure + is generated. +*/ +class fail_counter +{ + std::size_t target_; + std::atomic count_; + +public: + fail_counter(fail_counter const&) = delete; + fail_counter& operator=(fail_counter const&) = delete; + + /// Construct the counter with a target ordinal. + explicit + fail_counter(std::size_t target = 0) + { + reset(target); + } + + /// Reset the counter to fail at the nth step, or 0 for no failure. + void + reset(std::size_t n = 0) + { + target_ = n; + count_.store(0); + } + + /// Returns `true` if a simulated failure should be generated. + bool + fail() + { + return target_ && (++count_ >= target_); + } +}; + +/** A file wrapper to simulate file system failures. + + This wraps an object meeting the requirements of File. On each call, + the fail counter is decremented. When the counter reaches zero, a simulated + failure is generated. +*/ +template +class fail_file +{ + static_assert(is_File::value, + "File requirements not met"); + + File f_; + fail_counter* c_ = nullptr; + +public: + fail_file() = default; + fail_file(fail_file const&) = delete; + fail_file& operator=(fail_file const&) = delete; + ~fail_file() = default; + + fail_file(fail_file&&); + + fail_file& + operator=(fail_file&& other); + + explicit + fail_file(fail_counter& c); + + bool + is_open() const + { + return f_.is_open(); + } + + path_type const& + path() const + { + return f_.path(); + } + + std::uint64_t + size(error_code& ec) const + { + return f_.size(ec); + } + + void + close() + { + f_.close(); + } + + void + create(file_mode mode, path_type const& path, error_code& ec) + { + return f_.create(mode, path, ec); + } + + void + open(file_mode mode, path_type const& path, error_code& ec) + { + return f_.open(mode, path, ec); + } + + static + void + erase(path_type const& path, error_code& ec) + { + File::erase(path, ec); + } + + void + read(std::uint64_t offset, + void* buffer, std::size_t bytes, error_code& ec); + + void + write(std::uint64_t offset, + void const* buffer, std::size_t bytes, error_code& ec); + + void + sync(error_code& ec); + + void + trunc(std::uint64_t length, error_code& ec); + +private: + bool + fail(); + + void + do_fail(error_code& ec) + { + ec = test_error::failure; + } +}; + +template +fail_file:: +fail_file(fail_file&& other) + : f_(std::move(other.f_)) + , c_(other.c_) +{ + other.c_ = nullptr; +} + +template +fail_file& +fail_file:: +operator=(fail_file&& other) +{ + f_ = std::move(other.f_); + c_ = other.c_; + other.c_ = nullptr; + return *this; +} + +template +fail_file:: +fail_file(fail_counter& c) + : c_(&c) +{ +} + +template +void +fail_file:: +read(std::uint64_t offset, + void* buffer, std::size_t bytes, error_code& ec) +{ + if(fail()) + { + do_fail(ec); + return; + } + f_.read(offset, buffer, bytes, ec); +} + +template +void +fail_file:: +write(std::uint64_t offset, + void const* buffer, std::size_t bytes, error_code& ec) +{ + if(fail()) + { + do_fail(ec); + return; + } + if(fail()) + { + // partial write + f_.write(offset, buffer,(bytes + 1) / 2, ec); + if(ec) + return; + do_fail(ec); + return; + } + f_.write(offset, buffer, bytes, ec); +} + +template +void +fail_file:: +sync(error_code& ec) +{ + if(fail()) + do_fail(ec); + // We don't need a real sync for + // testing, it just slows things down. + //f_.sync(); +} + +template +void +fail_file:: +trunc(std::uint64_t length, error_code& ec) +{ + if(fail()) + { + do_fail(ec); + return; + } + f_.trunc(length, ec); +} + +template +bool +fail_file:: +fail() +{ + if(c_) + return c_->fail(); + return false; +} + +} // test +} // nudb + +#endif + diff --git a/extras/nudb/test/temp_dir.hpp b/extras/nudb/test/temp_dir.hpp new file mode 100644 index 0000000000..470d6603b8 --- /dev/null +++ b/extras/nudb/test/temp_dir.hpp @@ -0,0 +1,73 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_TEST_TEMP_DIR_HPP +#define NUDB_TEST_TEMP_DIR_HPP + +#include +#include + +namespace nudb { +namespace test { + +/** RAII temporary directory path. + + The directory and all its contents are deleted when + the instance of `temp_dir` is destroyed. +*/ +class temp_dir +{ + boost::filesystem::path path_; + +public: + temp_dir(const temp_dir&) = delete; + temp_dir& operator=(const temp_dir&) = delete; + + /// Construct a temporary directory. + explicit + temp_dir(boost::filesystem::path dir) + { + if (dir.empty()) + dir = boost::filesystem::temp_directory_path(); + + do + { + path_ = + dir / boost::filesystem::unique_path(); + } + while(boost::filesystem::exists(path_)); + boost::filesystem::create_directory(path_); + } + + /// Destroy a temporary directory. + ~temp_dir() + { + boost::filesystem::remove_all(path_); + } + + /// Get the native path for the temporary directory + std::string + path() const + { + return path_.string(); + } + + /** Get the native path for the a file. + + The file does not need to exist. + */ + std::string + file(std::string const& name) const + { + return (path_ / name).string(); + } +}; + +} // test +} // nudb + +#endif diff --git a/extras/nudb/test/test_store.hpp b/extras/nudb/test/test_store.hpp new file mode 100644 index 0000000000..a43cdda93a --- /dev/null +++ b/extras/nudb/test/test_store.hpp @@ -0,0 +1,451 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_TEST_TEST_STORE_HPP +#define NUDB_TEST_TEST_STORE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace test { + +template +class Buffer_t +{ + std::size_t size_ = 0; + std::size_t capacity_ = 0; + std::unique_ptr p_; + +public: + Buffer_t() = default; + + Buffer_t(Buffer_t&& other); + + Buffer_t(Buffer_t const& other); + + Buffer_t& operator=(Buffer_t&& other); + + Buffer_t& operator=(Buffer_t const& other); + + bool + empty() const + { + return size_ == 0; + } + + std::size_t + size() const + { + return size_; + } + + std::uint8_t* + data() + { + return p_.get(); + } + + std::uint8_t const* + data() const + { + return p_.get(); + } + + void + clear(); + + void + shrink_to_fit(); + + std::uint8_t* + resize(std::size_t size); + + std::uint8_t* + operator()(void const* data, std::size_t size); +}; + +template +Buffer_t<_>:: +Buffer_t(Buffer_t&& other) + : size_(other.size_) + , capacity_(other.capacity_) + , p_(std::move(other.p_)) +{ + other.size_ = 0; + other.capacity_ = 0; +} + +template +Buffer_t<_>:: +Buffer_t(Buffer_t const& other) +{ + if(! other.empty()) + std::memcpy(resize(other.size()), + other.data(), other.size()); +} + +template +auto +Buffer_t<_>:: +operator=(Buffer_t&& other) -> + Buffer_t& +{ + if(&other != this) + { + size_ = other.size_; + capacity_ = other.capacity_; + p_ = std::move(other.p_); + other.size_ = 0; + other.capacity_ = 0; + } + return *this; +} + +template +auto +Buffer_t<_>:: +operator=(Buffer_t const& other) -> + Buffer_t& +{ + if(&other != this) + { + if(other.empty()) + size_ = 0; + else + std::memcpy(resize(other.size()), + other.data(), other.size()); + } + return *this; +} + +template +void +Buffer_t<_>:: +clear() +{ + size_ = 0; + capacity_ = 0; + p_.reset(); +} + +template +void +Buffer_t<_>:: +shrink_to_fit() +{ + if(empty() || size_ == capacity_) + return; + std::unique_ptr p{ + new std::uint8_t[size_]}; + capacity_ = size_; + std::memcpy(p.get(), p_.get(), size_); + std::swap(p, p_); +} + +template +std::uint8_t* +Buffer_t<_>:: +resize(std::size_t size) +{ + if(capacity_ < size) + { + p_.reset(new std::uint8_t[size]); + capacity_ = size; + } + size_ = size; + return p_.get(); +} + +template +std::uint8_t* +Buffer_t<_>:: +operator()(void const* data, std::size_t size) +{ + if(data == nullptr || size == 0) + return resize(0); + return reinterpret_cast( + std::memcpy(resize(size), data, size)); +} + +using Buffer = Buffer_t<>; + +//------------------------------------------------------------------------------ + +/// Describes a test generated key/value pair +struct item_type +{ + std::uint8_t* key; + std::uint8_t* data; + std::size_t size; +}; + +/// Interface to facilitate tests +template +class basic_test_store +{ + using Hasher = xxhasher; + + temp_dir td_; + std::uniform_int_distribution sizef_; + std::function createf_; + std::function openf_; + Buffer buf_; + +public: + path_type const dp; + path_type const kp; + path_type const lp; + std::size_t const keySize; + std::size_t const blockSize; + float const loadFactor; + static std::uint64_t constexpr appnum = 1; + static std::uint64_t constexpr salt = 42; + basic_store db; + + template + basic_test_store(std::size_t keySize, + std::size_t blockSize, float loadFactor, + Args&&... args); + + template + basic_test_store( + boost::filesystem::path const& temp_dir, + std::size_t keySize, std::size_t blockSize, float loadFactor, + Args&&... args); + + ~basic_test_store(); + + item_type + operator[](std::uint64_t i); + + void + create(error_code& ec); + + void + open(error_code& ec); + + void + close(error_code& ec) + { + db.close(ec); + } + + void + erase(); + +private: + template + static + void + rngfill( + void* dest, std::size_t size, Generator& g); +}; + +template +template +basic_test_store::basic_test_store( + boost::filesystem::path const& temp_dir, + std::size_t keySize_, std::size_t blockSize_, + float loadFactor_, Args&&... args) + : td_(temp_dir) + , sizef_(250, 750) + , createf_( + [this, args...](error_code& ec) + { + nudb::create( + dp, kp, lp, appnum, salt, + keySize, blockSize, loadFactor, ec, + args...); + }) + , openf_( + [this, args...](error_code& ec) + { + db.open(dp, kp, lp, ec, args...); + }) + , dp(td_.file("nudb.dat")) + , kp(td_.file("nudb.key")) + , lp(td_.file("nudb.log")) + , keySize(keySize_) + , blockSize(blockSize_) + , loadFactor(loadFactor_) +{ +} + +template +template +basic_test_store::basic_test_store(std::size_t keySize_, + std::size_t blockSize_, float loadFactor_, + Args&&... args) + : basic_test_store(boost::filesystem::path{}, + keySize_, + blockSize_, + loadFactor_, + std::forward(args)...) +{ +} + +template +basic_test_store:: +~basic_test_store() +{ + erase(); +} + +template +auto +basic_test_store:: +operator[](std::uint64_t i) -> + item_type +{ + xor_shift_engine g{i + 1}; + item_type item; + item.size = sizef_(g); + auto const needed = keySize + item.size; + rngfill(buf_.resize(needed), needed, g); + // put key last so we can get some unaligned + // keys, this increases coverage of xxhash. + item.data = buf_.data(); + item.key = buf_.data() + item.size; + return item; +} + +template +void +basic_test_store:: +create(error_code& ec) +{ + createf_(ec); +} + +template +void +basic_test_store:: +open(error_code& ec) +{ + openf_(ec); + if(ec) + return; + if(db.key_size() != keySize) + ec = error::invalid_key_size; + else if(db.block_size() != blockSize) + ec = error::invalid_block_size; +} + +template +void +basic_test_store:: +erase() +{ + erase_file(dp); + erase_file(kp); + erase_file(lp); +} + +template +template +void +basic_test_store:: +rngfill( + void* dest, std::size_t size, Generator& g) +{ + using result_type = + typename Generator::result_type; + while(size >= sizeof(result_type)) + { + auto const v = g(); + std::memcpy(dest, &v, sizeof(v)); + dest = reinterpret_cast< + std::uint8_t*>(dest) + sizeof(v); + size -= sizeof(v); + } + if(size > 0) + { + auto const v = g(); + std::memcpy(dest, &v, size); + } +} + +using test_store = basic_test_store; + +//------------------------------------------------------------------------------ + +template +static +std::string +num (T t) +{ + std::string s = std::to_string(t); + std::reverse(s.begin(), s.end()); + std::string s2; + s2.reserve(s.size() + (s.size()+2)/3); + int n = 0; + for (auto c : s) + { + if (n == 3) + { + n = 0; + s2.insert (s2.begin(), ','); + } + ++n; + s2.insert(s2.begin(), c); + } + return s2; +} + +template +std::ostream& +operator<<(std::ostream& os, verify_info const& info) +{ + os << + "avg_fetch: " << std::fixed << std::setprecision(3) << info.avg_fetch << "\n" << + "waste: " << std::fixed << std::setprecision(3) << info.waste * 100 << "%" << "\n" << + "overhead: " << std::fixed << std::setprecision(1) << info.overhead * 100 << "%" << "\n" << + "actual_load: " << std::fixed << std::setprecision(0) << info.actual_load * 100 << "%" << "\n" << + "version: " << num(info.version) << "\n" << + "uid: " << fhex(info.uid) << "\n" << + "appnum: " << info.appnum << "\n" << + "key_size: " << num(info.key_size) << "\n" << + "salt: " << fhex(info.salt) << "\n" << + "pepper: " << fhex(info.pepper) << "\n" << + "block_size: " << num(info.block_size) << "\n" << + "bucket_size: " << num(info.bucket_size) << "\n" << + "load_factor: " << std::fixed << std::setprecision(0) << info.load_factor * 100 << "%" << "\n" << + "capacity: " << num(info.capacity) << "\n" << + "buckets: " << num(info.buckets) << "\n" << + "key_count: " << num(info.key_count) << "\n" << + "value_count: " << num(info.value_count) << "\n" << + "value_bytes: " << num(info.value_bytes) << "\n" << + "spill_count: " << num(info.spill_count) << "\n" << + "spill_count_tot: " << num(info.spill_count_tot) << "\n" << + "spill_bytes: " << num(info.spill_bytes) << "\n" << + "spill_bytes_tot: " << num(info.spill_bytes_tot) << "\n" << + "key_file_size: " << num(info.key_file_size) << "\n" << + "dat_file_size: " << num(info.dat_file_size) << std::endl; + + std::string s; + for (size_t i = 0; i < info.hist.size(); ++i) + s += (i==0) ? + std::to_string(info.hist[i]) : + (", " + std::to_string(info.hist[i])); + os << "hist: " << s << std::endl; + return os; +} + +} // test +} // nudb + +#endif + diff --git a/extras/nudb/test/xor_shift_engine.hpp b/extras/nudb/test/xor_shift_engine.hpp new file mode 100644 index 0000000000..a6529933e5 --- /dev/null +++ b/extras/nudb/test/xor_shift_engine.hpp @@ -0,0 +1,105 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_TEST_XOR_SHIFT_ENGINE_HPP +#define NUDB_TEST_XOR_SHIFT_ENGINE_HPP + +#include +#include +#include + +namespace nudb { +namespace test { + +/** XOR-shift Generator. + + Meets the requirements of UniformRandomNumberGenerator. + + Simple and fast RNG based on: + http://xorshift.di.unimi.it/xorshift128plus.c + does not accept seed==0 +*/ +class xor_shift_engine +{ +public: + using result_type = std::uint64_t; + + xor_shift_engine(xor_shift_engine const&) = default; + xor_shift_engine& operator=(xor_shift_engine const&) = default; + + explicit + xor_shift_engine(result_type val = 1977u) + { + seed(val); + } + + void + seed(result_type seed); + + result_type + operator()(); + + static + result_type constexpr + min() + { + return std::numeric_limits::min(); + } + + static + result_type constexpr + max() + { + return std::numeric_limits::max(); + } + +private: + result_type s_[2]; + + static + result_type + murmurhash3(result_type x); +}; + +inline +void +xor_shift_engine::seed(result_type seed) +{ + if(seed == 0) + throw std::domain_error("invalid seed"); + s_[0] = murmurhash3(seed); + s_[1] = murmurhash3(s_[0]); +} + +inline +auto +xor_shift_engine::operator()() -> + result_type +{ + result_type s1 = s_[0]; + result_type const s0 = s_[1]; + s_[0] = s0; + s1 ^= s1<< 23; + return(s_[1] =(s1 ^ s0 ^(s1 >> 17) ^(s0 >> 26))) + s0; +} + +inline +auto +xor_shift_engine::murmurhash3(result_type x) + -> result_type +{ + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + return x ^= x >> 33; +} + +} // test +} // nudb + +#endif diff --git a/extras/nudb/util.hpp b/extras/nudb/util.hpp new file mode 100644 index 0000000000..c34f574075 --- /dev/null +++ b/extras/nudb/util.hpp @@ -0,0 +1,288 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef UTIL_HPP +#define UTIL_HPP + +#include "basic_seconds_clock.hpp" + +#include +#include +#include +#include + +namespace nudb { + +template +int +log2(std::uint64_t n) +{ + int i = -(n == 0); + + auto const S = + [&](int k) + { + if(n >=(std::uint64_t{1} << k)) + { + i += k; + n >>= k; + } + }; + S(32); S(16); S(8); S(4); S(2); S(1); + return i; +} + +// Format a decimal integer with comma separators +template +std::string +fdec(T t) +{ + std::string s = std::to_string(t); + std::reverse(s.begin(), s.end()); + std::string s2; + s2.reserve(s.size() +(s.size()+2)/3); + int n = 0; + for(auto c : s) + { + if(n == 3) + { + n = 0; + s2.insert(s2.begin(), ','); + } + ++n; + s2.insert(s2.begin(), c); + } + return s2; +} + +// format 64-bit unsigned as fixed width, 0 padded hex +template +std::string +fhex(T v) +{ + std::string s{"0x0000000000000000"}; + auto it = s.end(); + for(it = s.end(); v; v >>= 4) + *--it = "0123456789abcdef"[v & 0xf]; + return s; +} + +// Format an array of integers as a comma separated list +template +static +std::string +fhist(std::array const& hist) +{ + std::size_t n; + for(n = hist.size() - 1; n > 0; --n) + if(hist[n]) + break; + std::string s = std::to_string(hist[0]); + for(std::size_t i = 1; i <= n; ++i) + s += ", " + std::to_string(hist[i]); + return s; +} + +class save_stream_state +{ + std::ostream& os_; + std::streamsize precision_; + std::ios::fmtflags flags_; + std::ios::char_type fill_; + +public: + ~save_stream_state() + { + os_.precision(precision_); + os_.flags(flags_); + os_.fill(fill_); + } + save_stream_state(save_stream_state const&) = delete; + save_stream_state& operator=(save_stream_state const&) = delete; + explicit save_stream_state(std::ostream& os) + : os_(os) + , precision_(os.precision()) + , flags_(os.flags()) + , fill_(os.fill()) + { + } +}; + +template +std::ostream& +pretty_time(std::ostream& os, std::chrono::duration d) +{ + save_stream_state _(os); + using namespace std::chrono; + if(d < microseconds{1}) + { + // use nanoseconds + if(d < nanoseconds{100}) + { + // use floating + using ns = duration; + os << std::fixed << std::setprecision(1) << ns(d).count(); + } + else + { + // use integral + os << round(d).count(); + } + os << "ns"; + } + else if(d < milliseconds{1}) + { + // use microseconds + if(d < microseconds{100}) + { + // use floating + using ms = duration; + os << std::fixed << std::setprecision(1) << ms(d).count(); + } + else + { + // use integral + os << round(d).count(); + } + os << "us"; + } + else if(d < seconds{1}) + { + // use milliseconds + if(d < milliseconds{100}) + { + // use floating + using ms = duration; + os << std::fixed << std::setprecision(1) << ms(d).count(); + } + else + { + // use integral + os << round(d).count(); + } + os << "ms"; + } + else if(d < minutes{1}) + { + // use seconds + if(d < seconds{100}) + { + // use floating + using s = duration; + os << std::fixed << std::setprecision(1) << s(d).count(); + } + else + { + // use integral + os << round(d).count(); + } + os << "s"; + } + else + { + // use minutes + if(d < minutes{100}) + { + // use floating + using m = duration>; + os << std::fixed << std::setprecision(1) << m(d).count(); + } + else + { + // use integral + os << round(d).count(); + } + os << "min"; + } + return os; +} + +template +std::string +fmtdur(std::chrono::duration const& d) +{ + std::stringstream ss; + pretty_time(ss, d); + return ss.str(); +} + +//------------------------------------------------------------------------------ + +class progress +{ + using clock_type = basic_seconds_clock; + + std::ostream& os_; + clock_type::time_point start_; + clock_type::time_point now_; + clock_type::time_point report_; + std::uint64_t prev_; + bool estimate_; + +public: + explicit + progress(std::ostream& os) + : os_(os) + { + } + + void + operator()(std::uint64_t amount, std::uint64_t total) + { + using namespace std::chrono; + auto const now = clock_type::now(); + if(amount == 0) + { + now_ = clock_type::now(); + start_ = now_; + report_ = now_; + prev_ = 0; + estimate_ = false; + return; + } + if(now == now_) + return; + now_ = now; + auto const elapsed = now - start_; + if(! estimate_) + { + // Wait a bit before showing the first estimate + if(elapsed < seconds{30}) + return; + estimate_ = true; + } + else if(now - report_ < seconds{60}) + { + // Only show estimates periodically + return; + } + auto const rate = double(amount) / elapsed.count(); + auto const remain = clock_type::duration{ + static_cast( + (total - amount) / rate)}; + os_ << + "Remaining: " << fmtdur(remain) << + " (" << fdec(amount) << " of " << fdec(total) << + " in " << fmtdur(elapsed) << + ", " << fdec(amount - prev_) << + " in " << fmtdur(now - report_) << + ")\n"; + report_ = now; + prev_ = amount; + } + + clock_type::duration + elapsed() const + { + using namespace std::chrono; + return now_ - start_; + } +}; + +} // nudb + +#endif diff --git a/extras/rocksdb b/extras/rocksdb new file mode 160000 index 0000000000..a297643f2e --- /dev/null +++ b/extras/rocksdb @@ -0,0 +1 @@ +Subproject commit a297643f2e327a8bc7061bfc838fdf11935a2cf2 diff --git a/include/nudb/basic_store.hpp b/include/nudb/basic_store.hpp new file mode 100644 index 0000000000..0e69195fbb --- /dev/null +++ b/include/nudb/basic_store.hpp @@ -0,0 +1,436 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_BASIC_STORE_HPP +#define NUDB_BASIC_STORE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +/** A high performance, insert-only key/value database for SSDs. + + To create a database first call the @ref create + free function. Then construct a @ref basic_store and + call @ref open on it: + + @code + error_code ec; + create( + "db.dat", "db.key", "db.log", + 1, make_salt(), 8, 4096, 0.5f, ec); + basic_store db; + db.open("db.dat", "db.key", "db.log", ec); + @endcode + + @tparam Hasher The hash function to use. This type + must meet the requirements of @b Hasher. + + @tparam File The type of File object to use. This type + must meet the requirements of @b File. +*/ +template +class basic_store +{ +public: + using hash_type = Hasher; + using file_type = File; + +private: + using clock_type = + std::chrono::steady_clock; + + using time_point = + typename clock_type::time_point; + + struct state + { + File df; + File kf; + File lf; + path_type dp; + path_type kp; + path_type lp; + Hasher hasher; + detail::pool p0; + detail::pool p1; + detail::cache c1; + detail::key_file_header kh; + + std::size_t rate = 0; + time_point when = clock_type::now(); + + state(state const&) = delete; + state& operator=(state const&) = delete; + + state(state&&) = default; + state& operator=(state&&) = default; + + state(File&& df_, File&& kf_, File&& lf_, + path_type const& dp_, path_type const& kp_, + path_type const& lp_, + detail::key_file_header const& kh_); + }; + + bool open_ = false; + + // Use optional because some + // members cannot be default-constructed. + // + boost::optional s_; // State of an open database + + std::size_t frac_; // accumulates load + std::size_t thresh_; // split threshold + nbuck_t buckets_; // number of buckets + nbuck_t modulus_; // hash modulus + + std::mutex u_; // serializes insert() + detail::gentex g_; + boost::shared_mutex m_; + std::thread t_; + std::condition_variable_any cv_; + + error_code ec_; + std::atomic ecb_; // `true` when ec_ set + + std::size_t dataWriteSize_; + std::size_t logWriteSize_; + +public: + /** Default constructor. + + A default constructed database is initially closed. + */ + basic_store() = default; + + /// Copy constructor (disallowed) + basic_store(basic_store const&) = delete; + + /// Copy assignment (disallowed) + basic_store& operator=(basic_store const&) = delete; + + /** Destroy the database. + + Files are closed, memory is freed, and data that has not been + committed is discarded. To ensure that all inserted data is + written, it is necessary to call @ref close before destroying + the @ref basic_store. + + This function ignores errors returned by @ref close; to receive + those errors it is necessary to call @ref close before the + @ref basic_store is destroyed. + */ + ~basic_store(); + + /** Returns `true` if the database is open. + + @par Thread safety + + Safe to call concurrently with any function + except @ref open or @ref close. + */ + bool + is_open() const + { + return open_; + } + + /** Return the path to the data file. + + @par Requirements + + The database must be open. + + @par Thread safety + + Safe to call concurrently with any function + except @ref open or @ref close. + + @return The data file path. + */ + path_type const& + dat_path() const; + + /** Return the path to the key file. + + @par Requirements + + The database must be open. + + @par Thread safety + + Safe to call concurrently with any function + except @ref open or @ref close. + + @return The key file path. + */ + path_type const& + key_path() const; + + /** Return the path to the log file. + + @par Requirements + + The database must be open. + + @par Thread safety + + Safe to call concurrently with any function + except @ref open or @ref close. + + @return The log file path. + */ + path_type const& + log_path() const; + + /** Return the appnum associated with the database. + + This is an unsigned 64-bit integer associated with the + database and defined by the application. It is set + once when the database is created in a call to + @ref create. + + @par Requirements + + The database must be open. + + @par Thread safety + + Safe to call concurrently with any function + except @ref open or @ref close. + + @return The appnum. + */ + std::uint64_t + appnum() const; + + /** Return the key size associated with the database. + + The key size is defined by the application when the + database is created in a call to @ref create. The + key size cannot be changed on an existing database. + + @par Requirements + + The database must be open. + + @par Thread safety + + Safe to call concurrently with any function + except @ref open or @ref close. + + @return The size of keys in the database. + */ + std::size_t + key_size() const; + + /** Return the block size associated with the database. + + The block size is defined by the application when the + database is created in a call to @ref create or when a + key file is regenerated in a call to @ref rekey. The + block size cannot be changed on an existing key file. + Instead, a new key file may be created with a different + block size. + + @par Requirements + + The database must be open. + + @par Thread safety + + Safe to call concurrently with any function + except @ref open or @ref close. + + @return The size of blocks in the key file. + */ + std::size_t + block_size() const; + + /** Close the database. + + All data is committed before closing. + + If an error occurs, the database is still closed. + + @par Requirements + + The database must be open. + + @par Thread safety + + Not thread safe. The caller is responsible for + ensuring that no other member functions are + called concurrently. + + @param ec Set to the error, if any occurred. + */ + void + close(error_code& ec); + + /** Open a database. + + The database identified by the specified data, key, and + log file paths is opened. If a log file is present, the + recovery mechanism is invoked to restore database integrity + before the function returns. + + @par Requirements + + The database must be not be open. + + @par Thread safety + + Not thread safe. The caller is responsible for + ensuring that no other member functions are + called concurrently. + + @param dat_path The path to the data file. + + @param key_path The path to the key file. + + @param log_path The path to the log file. + + @param ec Set to the error, if any occurred. + + @param args Optional arguments passed to @b File constructors. + + */ + template + void + open( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + error_code& ec, + Args&&... args); + + /** Fetch a value. + + The function checks the database for the specified + key, and invokes the callback if it is found. If + the key is not found, `ec` is set to @ref error::key_not_found. + If any other errors occur, `ec` is set to the + corresponding error. + + @par Requirements + + The database must be open. + + @par Thread safety + + Safe to call concurrently with any function except + @ref close. + + @note If the implementation encounters an error while + committing data to the database, this function will + immediately return with `ec` set to the error which + occurred. All subsequent calls to @ref fetch will + return the same error until the database is closed. + + @param key A pointer to a memory buffer of at least + @ref key_size() bytes, containing the key to be searched + for. + + @param callback A function which will be called with the + value data if the fetch is successful. The equivalent + signature must be: + @code + void callback( + void const* buffer, // A buffer holding the value + std::size_t size // The size of the value in bytes + ); + @endcode + The buffer provided to the callback remains valid + until the callback returns, ownership is not transferred. + + @param ec Set to the error, if any occurred. + */ + template + void + fetch(void const* key, Callback && callback, error_code& ec); + + /** Insert a value. + + This function attempts to insert the specified key/value + pair into the database. If the key already exists, + `ec` is set to @ref error::key_exists. If an error + occurs, `ec` is set to the corresponding error. + + @par Requirements + + The database must be open. + + @par Thread safety + + Safe to call concurrently with any function except + @ref close. + + @note If the implementation encounters an error while + committing data to the database, this function will + immediately return with `ec` set to the error which + occurred. All subsequent calls to @ref insert will + return the same error until the database is closed. + + @param key A buffer holding the key to be inserted. The + size of the buffer should be at least the `key_size` + associated with the open database. + + @param data A buffer holding the value to be inserted. + + @param bytes The size of the buffer holding the value + data. This value must be greater than 0 and no more + than 0xffffffff. + + @param ec Set to the error, if any occurred. + */ + void + insert(void const* key, void const* data, + nsize_t bytes, error_code& ec); + +private: + template + void + fetch(detail::nhash_t h, void const* key, + detail::bucket b, Callback && callback, error_code& ec); + + bool + exists(detail::nhash_t h, void const* key, + detail::shared_lock_type* lock, detail::bucket b, error_code& ec); + + void + split(detail::bucket& b1, detail::bucket& b2, + detail::bucket& tmp, nbuck_t n1, nbuck_t n2, + nbuck_t buckets, nbuck_t modulus, + detail::bulk_writer& w, error_code& ec); + + detail::bucket + load(nbuck_t n, detail::cache& c1, + detail::cache& c0, void* buf, error_code& ec); + + void + commit(detail::unique_lock_type& m, + std::size_t& work, error_code& ec); + + void + run(); +}; + +} // nudb + +#include + +#endif diff --git a/include/nudb/concepts.hpp b/include/nudb/concepts.hpp new file mode 100644 index 0000000000..3d01d36a90 --- /dev/null +++ b/include/nudb/concepts.hpp @@ -0,0 +1,205 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_CONCEPTS_HPP +#define NUDB_CONCEPTS_HPP + +#include +#include +#include +#include +#include + +namespace nudb { + +namespace detail { + +template +class check_is_File +{ + template().is_open()), + bool>> + static R check1(int); + template + static std::false_type check1(...); + using type1 = decltype(check1(0)); + + template().close(), + std::true_type{})> + static R check2(int); + template + static std::false_type check2(...); + using type2 = decltype(check2(0)); + + template().create( + std::declval(), + std::declval(), + std::declval()), + std::true_type{})> + static R check3(int); + template + static std::false_type check3(...); + using type3 = decltype(check3(0)); + + template().open( + std::declval(), + std::declval(), + std::declval()), + std::true_type{})> + static R check4(int); + template + static std::false_type check4(...); + using type4 = decltype(check4(0)); + + template(), + std::declval()), + std::true_type{})> + static R check5(int); + template + static std::false_type check5(...); + using type5 = decltype(check5(0)); + + template().size( + std::declval())), + std::uint64_t>> + static R check6(int); + template + static std::false_type check6(...); + using type6 = decltype(check6(0)); + + template().read( + std::declval(), + std::declval(), + std::declval(), + std::declval()), + std::true_type{})> + static R check7(int); + template + static std::false_type check7(...); + using type7 = decltype(check7(0)); + + template().write( + std::declval(), + std::declval(), + std::declval(), + std::declval()), + std::true_type{})> + static R check8(int); + template + static std::false_type check8(...); + using type8 = decltype(check8(0)); + + template().sync( + std::declval()), + std::true_type{})> + static R check9(int); + template + static std::false_type check9(...); + using type9 = decltype(check9(0)); + + template().trunc( + std::declval(), + std::declval()), + std::true_type{})> + static R check10(int); + template + static std::false_type check10(...); + using type10 = decltype(check10(0)); + +public: + using type = std::integral_constant::value && + type1::value && type2::value && type3::value && + type4::value && type5::value && type6::value && + type7::value && type8::value && type9::value && + type10::value + >; +}; + +template +class check_is_Hasher +{ + template> + static R check1(int); + template + static std::false_type check1(...); + using type1 = decltype(check1(0)); + + template().operator()( + std::declval(), + std::declval())), + std::uint64_t>> + static R check2(int); + template + static std::false_type check2(...); + using type2 = decltype(check2(0)); +public: + using type = std::integral_constant; +}; + +template +class check_is_Progress +{ + template().operator()( + std::declval(), + std::declval()), + std::true_type{})> + static R check1(int); + template + static std::false_type check1(...); +public: + using type = decltype(check1(0)); +}; + +} // detail + +/// Determine if `T` meets the requirements of @b `File` +template +#if GENERATING_DOCS +struct is_File : std::integral_constant{}; +#else +using is_File = typename detail::check_is_File::type; +#endif + + +/// Determine if `T` meets the requirements of @b `Hasher` +template +#if GENERATING_DOCS +struct is_Hasher : std::integral_constant{}; +#else +using is_Hasher = typename detail::check_is_Hasher::type; +#endif + +/// Determine if `T` meets the requirements of @b `Progress` +template +#if GENERATING_DOCS +struct is_Progress : std::integral_constant{}; +#else +using is_Progress = typename detail::check_is_Progress::type; +#endif + +} // nudb + +#endif diff --git a/include/nudb/create.hpp b/include/nudb/create.hpp new file mode 100644 index 0000000000..cb37ecbd91 --- /dev/null +++ b/include/nudb/create.hpp @@ -0,0 +1,117 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_CREATE_HPP +#define NUDB_CREATE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +/** Return a random salt. + + This function will use the system provided random + number device to generate a uniformly distributed + 64-bit unsigned value suitable for use the salt + value in a call to @ref create. +*/ +template +std::uint64_t +make_salt(); + +/** Create a new database. + + This function creates a set of new database files with + the given parameters. The files must not already exist or + else an error is returned. + + If an error occurs while the files are being created, + the function attempts to remove the files before + returning. + + @par Example + @code + error_code ec; + create( + "db.dat", "db.key", "db.log", + 1, make_salt(), 8, 4096, 0.5f, ec); + @endcode + + @par Template Parameters + + @tparam Hasher The hash function to use. This type must + meet the requirements of @b Hasher. The same hash + function must be used every time the database is opened, + or else an error is returned. The provided @ref xxhasher + is a suitable general purpose hash function. + + @tparam File The type of file to use. Use the default of + @ref native_file unless customizing the file behavior. + + @param dat_path The path to the data file. + + @param key_path The path to the key file. + + @param log_path The path to the log file. + + @param appnum A caller-defined value stored in the file + headers. When opening the database, the same value is + preserved and returned to the caller. + + @param salt A random unsigned integer used to permute + the hash function to make it unpredictable. The return + value of @ref make_salt returns a suitable value. + + @param key_size The number of bytes in each key. + + @param blockSize The size of a key file block. Larger + blocks hold more keys but require more I/O cycles per + operation. The ideal block size the largest size that + may be read in a single I/O cycle, and device dependent. + The return value of @ref block_size returns a suitable + value for the volume of a given path. + + @param load_factor A number between zero and one + representing the average bucket occupancy (number of + items). A value of 0.5 is perfect. Lower numbers + waste space, and higher numbers produce negligible + savings at the cost of increased I/O cycles. + + @param ec Set to the error, if any occurred. + + @param args Optional arguments passed to @b File constructors. +*/ +template< + class Hasher, + class File = native_file, + class... Args +> +void +create( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + std::uint64_t appnum, + std::uint64_t salt, + nsize_t key_size, + nsize_t blockSize, + float load_factor, + error_code& ec, + Args&&... args); + +} // nudb + +#include + +#endif diff --git a/include/nudb/detail/arena.hpp b/include/nudb/detail/arena.hpp new file mode 100644 index 0000000000..e033140e7c --- /dev/null +++ b/include/nudb/detail/arena.hpp @@ -0,0 +1,296 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_ARENA_HPP +#define NUDB_DETAIL_ARENA_HPP + +#include +#include +#include +#include +#include +#include +#include + +#if NUDB_DEBUG_ARENA +#include +#include +#endif + +namespace nudb { +namespace detail { + +/* Custom memory manager that allocates in large blocks. + + The implementation measures the rate of allocations in + bytes per second and tunes the large block size to fit + one second's worth of allocations. +*/ +template +class arena_t +{ + using clock_type = + std::chrono::steady_clock; + + using time_point = + typename clock_type::time_point; + + class element; + + char const* label_; // diagnostic + std::size_t alloc_ = 0; // block size + std::size_t used_ = 0; // bytes allocated + element* list_ = nullptr; // list of blocks + time_point when_ = clock_type::now(); + +public: + arena_t(arena_t const&) = delete; + arena_t& operator=(arena_t&&) = delete; + arena_t& operator=(arena_t const&) = delete; + + ~arena_t(); + + explicit + arena_t(char const* label = ""); + + arena_t(arena_t&& other); + + // Set the allocation size + void + hint(std::size_t alloc) + { + alloc_ = alloc; + } + + // Free all memory + void + clear(); + + void + periodic_activity(); + + std::uint8_t* + alloc(std::size_t n); + + template + friend + void + swap(arena_t& lhs, arena_t& rhs); +}; + +//------------------------------------------------------------------------------ + +template +class arena_t<_>::element +{ + std::size_t const capacity_; + std::size_t used_ = 0; + element* next_; + +public: + element(std::size_t capacity, element* next) + : capacity_(capacity) + , next_(next) + { + } + + element* + next() const + { + return next_; + } + + void + clear() + { + used_ = 0; + } + + std::size_t + remain() const + { + return capacity_ - used_; + } + + std::size_t + capacity() const + { + return capacity_; + } + + std::uint8_t* + alloc(std::size_t n); +}; + +template +std::uint8_t* +arena_t<_>::element:: +alloc(std::size_t n) +{ + if(n > capacity_ - used_) + return nullptr; + auto const p = const_cast( + reinterpret_cast(this + 1) + ) + used_; + used_ += n; + return p; +} + +//------------------------------------------------------------------------------ + +template +arena_t<_>:: +arena_t(char const* label) + : label_(label) +{ +} + +template +arena_t<_>:: +~arena_t() +{ + clear(); +} + +template +arena_t<_>:: +arena_t(arena_t&& other) + : label_(other.label_) + , alloc_(other.alloc_) + , used_(other.used_) + , list_(other.list_) + , when_(other.when_) +{ + other.used_ = 0; + other.list_ = nullptr; + other.when_ = clock_type::now(); + other.alloc_ = 0; +} + +template +void +arena_t<_>:: +clear() +{ + used_ = 0; + while(list_) + { + auto const e = list_; + list_ = list_->next(); + e->~element(); + delete[] reinterpret_cast(e); + } +} + +template +void +arena_t<_>:: +periodic_activity() +{ + using namespace std::chrono; + auto const now = clock_type::now(); + auto const elapsed = now - when_; + if(elapsed < milliseconds{500}) + return; + when_ = now; + auto const rate = static_cast(std::ceil( + used_ / duration_cast>(elapsed).count())); +#if NUDB_DEBUG_ARENA + beast::unit_test::dstream dout{std::cout}; + auto const size = + [](element* e) + { + std::size_t n = 0; + while(e) + { + ++n; + e = e->next(); + } + return n; + }; +#endif + if(rate >= alloc_ * 2) + { + // adjust up + alloc_ = std::max(rate, alloc_ * 2); + #if NUDB_DEBUG_ARENA + dout << label_ << ": " + "rate=" << rate << + ", alloc=" << alloc_ << " UP" + ", nused=" << used_ << + ", used=" << size(list_) << + "\n"; + #endif + } + else if(rate <= alloc_ / 2) + { + // adjust down + alloc_ /= 2; + #if NUDB_DEBUG_ARENA + dout << label_ << ": " + "rate=" << rate << + ", alloc=" << alloc_ << " DOWN" + ", nused=" << used_ << + ", used=" << size(list_) << + "\n"; + #endif + } + else + { + #if NUDB_DEBUG_ARENA + dout << label_ << ": " + "rate=" << rate << + ", alloc=" << alloc_ << + ", nused=" << used_ << + ", used=" << size(list_) << + "\n"; + #endif + } +} + +template +std::uint8_t* +arena_t<_>:: +alloc(std::size_t n) +{ + // Undefined behavior: Zero byte allocations + BOOST_ASSERT(n != 0); + n = 8 *((n + 7) / 8); + std::uint8_t* p; + if(list_) + { + p = list_->alloc(n); + if(p) + { + used_ += n; + return p; + } + } + auto const size = std::max(alloc_, n); + auto const e = reinterpret_cast( + new std::uint8_t[sizeof(element) + size]); + list_ = ::new(e) element{size, list_}; + used_ += n; + return list_->alloc(n); +} + +template +void +swap(arena_t<_>& lhs, arena_t<_>& rhs) +{ + using std::swap; + swap(lhs.used_, rhs.used_); + swap(lhs.list_, rhs.list_); + // don't swap alloc_ or when_ +} + +using arena = arena_t<>; + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/bucket.hpp b/include/nudb/detail/bucket.hpp new file mode 100644 index 0000000000..8a8efbf610 --- /dev/null +++ b/include/nudb/detail/bucket.hpp @@ -0,0 +1,473 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_BUCKET_HPP +#define NUDB_DETAIL_BUCKET_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +// Returns bucket index given hash, buckets, and modulus +// +inline +nbuck_t +bucket_index(nhash_t h, nbuck_t buckets, std::uint64_t modulus) +{ + BOOST_ASSERT(modulus <= 0x100000000ULL); + auto n = h % modulus; + if(n >= buckets) + n -= modulus / 2; + return static_cast(n); +} + +//------------------------------------------------------------------------------ + +// Tag for constructing empty buckets +struct empty_t +{ + constexpr empty_t() = default; +}; + +static empty_t constexpr empty{}; + +// Allows inspection and manipulation of bucket blobs in memory +template +class bucket_t +{ + nsize_t block_size_; // Size of a key file block + nkey_t size_; // Current key count + noff_t spill_; // Offset of next spill record or 0 + std::uint8_t* p_; // Pointer to the bucket blob + +public: + struct value_type + { + noff_t offset; + nhash_t hash; + nsize_t size; + }; + + bucket_t() = default; + bucket_t(bucket_t const&) = default; + bucket_t& operator=(bucket_t const&) = default; + + bucket_t(nsize_t block_size, void* p); + + bucket_t(nsize_t block_size, void* p, empty_t); + + nsize_t + block_size() const + { + return block_size_; + } + + // Serialized bucket size. + // Excludes empty + nsize_t + actual_size() const + { + return bucket_size(size_); + } + + bool + empty() const + { + return size_ == 0; + } + + bool + full() const + { + return size_ >= + detail::bucket_capacity(block_size_); + } + + nkey_t + size() const + { + return size_; + } + + // Returns offset of next spill record or 0 + // + noff_t + spill() const + { + return spill_; + } + + // Set offset of next spill record + // + void + spill(noff_t offset); + + // Clear contents of the bucket + // + void + clear(); + + // Returns the record for a key + // entry without bounds checking. + // + value_type const + at(nkey_t i) const; + + value_type const + operator[](nkey_t i) const + { + return at(i); + } + + // Returns index of entry with prefix + // equal to or greater than the given prefix. + // + nkey_t + lower_bound(nhash_t h) const; + + void + insert(noff_t offset, nsize_t size, nhash_t h); + + // Erase an element by index + // + void + erase(nkey_t i); + + // Read a full bucket from the + // file at the specified offset. + // + template + void + read(File& f, noff_t, error_code& ec); + + // Read a compact bucket + // + template + void + read(bulk_reader& r, error_code& ec); + + // Write a compact bucket to the stream. + // This only writes entries that are not empty. + // + void + write(ostream& os) const; + + // Write a bucket to the file at the specified offset. + // The full block_size() bytes are written. + // + template + void + write(File& f,noff_t offset, error_code& ec) const; + +private: + // Update size and spill in the blob + void + update(); +}; + +//------------------------------------------------------------------------------ + +template +bucket_t<_>:: +bucket_t(nsize_t block_size, void* p) + : block_size_(block_size) + , p_(reinterpret_cast(p)) +{ + // Bucket Record + istream is(p_, block_size); + detail::read(is, size_); // Count + detail::read(is, spill_); // Spill +} + +template +bucket_t<_>:: +bucket_t(nsize_t block_size, void* p, empty_t) + : block_size_(block_size) + , size_(0) + , spill_(0) + , p_(reinterpret_cast(p)) +{ + clear(); +} + +template +void +bucket_t<_>:: +spill(noff_t offset) +{ + spill_ = offset; + update(); +} + +template +void +bucket_t<_>::clear() +{ + size_ = 0; + spill_ = 0; + std::memset(p_, 0, block_size_); +} + +template +auto +bucket_t<_>:: +at(nkey_t i) const -> + value_type const +{ + value_type result; + // Bucket Entry + auto const w = + field::size + // Offset + field::size + // Size + field::size; // Prefix + // Bucket Record + detail::istream is{p_ + + field::size + // Count + field::size + // Spill + i * w, w}; + // Bucket Entry + detail::read( + is, result.offset); // Offset + detail::read_size48( + is, result.size); // Size + detail::read( + is, result.hash); // Hash + return result; +} + +template +nkey_t +bucket_t<_>:: +lower_bound(nhash_t h) const +{ + // Bucket Entry + auto const w = + field::size + // Offset + field::size + // Size + field::size; // Hash + // Bucket Record + auto const p = p_ + + field::size + // Count + field::size + // Spill + // Bucket Entry + field::size + // Offset + field::size; // Size + nkey_t step; + nkey_t first = 0; + nkey_t count = size_; + while(count > 0) + { + step = count / 2; + nkey_t i = first + step; + nhash_t h1; + readp(p + i * w, h1); + if(h1 < h) + { + first = i + 1; + count -= step + 1; + } + else + { + count = step; + } + } + return first; +} + +template +void +bucket_t<_>:: +insert( + noff_t offset, nsize_t size, nhash_t h) +{ + auto const i = lower_bound(h); + // Bucket Record + auto const p = p_ + + field< + std::uint16_t>::size + // Count + field::size; // Spill + // Bucket Entry + auto const w = + field::size + // Offset + field::size + // Size + field::size; // Hash + std::memmove( + p +(i + 1) * w, + p + i * w, + (size_ - i) * w); + ++size_; + update(); + // Bucket Entry + ostream os{p + i * w, w}; + detail::write( + os, offset); // Offset + detail::write( + os, size); // Size + detail::write( + os, h); // Prefix +} + +template +void +bucket_t<_>:: +erase(nkey_t i) +{ + // Bucket Record + auto const p = p_ + + field::size + // Count + field::size; // Spill + auto const w = + field::size + // Offset + field::size + // Size + field::size; // Hash + --size_; + if(i < size_) + std::memmove( + p + i * w, + p +(i + 1) * w, + (size_ - i) * w); + std::memset(p + size_ * w, 0, w); + update(); +} + +template +template +void +bucket_t<_>:: +read(File& f, noff_t offset, error_code& ec) +{ + auto const cap = bucket_capacity(block_size_); + // Excludes padding to block size + f.read(offset, p_, bucket_size(cap), ec); + if(ec) + return; + istream is{p_, block_size_}; + detail::read(is, size_); // Count + detail::read(is, spill_); // Spill + if(size_ > cap) + { + ec = error::invalid_bucket_size; + return; + } +} + +template +template +void +bucket_t<_>:: +read(bulk_reader& r, error_code& ec) +{ + // Bucket Record(compact) + auto is = r.prepare( + detail::field::size + + detail::field::size, ec); + if(ec) + return; + detail::read(is, size_); // Count + detail::read(is, spill_); // Spill + update(); + // Excludes empty bucket entries + auto const w = size_ * ( + field::size + // Offset + field::size + // Size + field::size); // Hash + is = r.prepare(w, ec); + if(ec) + return; + std::memcpy(p_ + + field::size + // Count + field::size, // Spill + is.data(w), w); // Entries +} + +template +void +bucket_t<_>:: +write(ostream& os) const +{ + // Does not pad up to the block size. This + // is called to write to the data file. + auto const size = actual_size(); + // Bucket Record + std::memcpy(os.data(size), p_, size); +} + +template +template +void +bucket_t<_>:: +write(File& f, noff_t offset, error_code& ec) const +{ + // Includes zero pad up to the block + // size, to make the key file size always + // a multiple of the block size. + auto const size = actual_size(); + std::memset(p_ + size, 0, block_size_ - size); + // Bucket Record + f.write(offset, p_, block_size_, ec); + if(ec) + return; +} + +template +void +bucket_t<_>:: +update() +{ + // Bucket Record + ostream os{p_, block_size_}; + detail::write(os, size_); // Count + detail::write(os, spill_); // Spill +} + +using bucket = bucket_t<>; + +//------------------------------------------------------------------------------ + +// Spill bucket if full. +// The bucket is cleared after it spills. +// +template +void +maybe_spill( + bucket& b, bulk_writer& w, error_code& ec) +{ + if(b.full()) + { + // Spill Record + auto const offset = w.offset(); + auto os = w.prepare( + field::size + // Zero + field::size + // Size + b.actual_size(), ec); + if(ec) + return; + write(os, 0ULL); // Zero + write( + os, b.actual_size()); // Size + auto const spill = + offset + os.size(); + b.write(os); // Bucket + // Update bucket + b.clear(); + b.spill(spill); + } +} + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/buffer.hpp b/include/nudb/detail/buffer.hpp new file mode 100644 index 0000000000..fc9f187ff7 --- /dev/null +++ b/include/nudb/detail/buffer.hpp @@ -0,0 +1,86 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_BUFFER_HPP +#define NUDB_DETAIL_BUFFER_HPP + +#include +#include +#include + +namespace nudb { +namespace detail { + +// Simple growable memory buffer +class buffer +{ +private: + std::size_t size_ = 0; + std::unique_ptr buf_; + +public: + ~buffer() = default; + buffer() = default; + buffer(buffer const&) = delete; + buffer& operator=(buffer const&) = delete; + + explicit + buffer(std::size_t n) + : size_(n) + , buf_(new std::uint8_t[n]) + { + } + + buffer(buffer&& other) + : size_(other.size_) + , buf_(std::move(other.buf_)) + { + other.size_ = 0; + } + + buffer& + operator=(buffer&& other) + { + size_ = other.size_; + buf_ = std::move(other.buf_); + other.size_ = 0; + return *this; + } + + std::size_t + size() const + { + return size_; + } + + std::uint8_t* + get() const + { + return buf_.get(); + } + + void + reserve(std::size_t n) + { + if(size_ < n) + buf_.reset(new std::uint8_t[n]); + size_ = n; + } + + // BufferFactory + void* + operator()(std::size_t n) + { + reserve(n); + return buf_.get(); + } +}; + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/bulkio.hpp b/include/nudb/detail/bulkio.hpp new file mode 100644 index 0000000000..22e72073a3 --- /dev/null +++ b/include/nudb/detail/bulkio.hpp @@ -0,0 +1,196 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_BULKIO_HPP +#define NUDB_DETAIL_BULKIO_HPP + +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +// Scans a file in sequential large reads +template +class bulk_reader +{ + File& f_; + buffer buf_; + noff_t last_; // size of file + noff_t offset_; // current position + std::size_t avail_; // bytes left to read in buf + std::size_t used_; // bytes consumed in buf + +public: + bulk_reader(File& f, noff_t offset, + noff_t last, std::size_t buffer_size); + + noff_t + offset() const + { + return offset_ - avail_; + } + + bool + eof() const + { + return offset() >= last_; + } + + istream + prepare(std::size_t needed, error_code& ec); +}; + +template +bulk_reader:: +bulk_reader(File& f, noff_t offset, + noff_t last, std::size_t buffer_size) + : f_(f) + , last_(last) + , offset_(offset) + , avail_(0) + , used_(0) +{ + buf_.reserve(buffer_size); +} + +template +istream +bulk_reader:: +prepare(std::size_t needed, error_code& ec) +{ + if(needed > avail_) + { + if(offset_ + needed - avail_ > last_) + { + ec = error::short_read; + return {}; + } + if(needed > buf_.size()) + { + buffer buf; + buf.reserve(needed); + std::memcpy(buf.get(), + buf_.get() + used_, avail_); + buf_ = std::move(buf); + } + else + { + std::memmove(buf_.get(), + buf_.get() + used_, avail_); + } + + auto const n = std::min(buf_.size() - avail_, + static_cast(last_ - offset_)); + f_.read(offset_, buf_.get() + avail_, n, ec); + if(ec) + return {}; + offset_ += n; + avail_ += n; + used_ = 0; + } + istream is{buf_.get() + used_, needed}; + used_ += needed; + avail_ -= needed; + return is; +} + +//------------------------------------------------------------------------------ + +// Buffers file writes +// Caller must call flush manually at the end +template +class bulk_writer +{ + File& f_; + buffer buf_; + noff_t offset_; // current position + std::size_t used_; // bytes written to buf + +public: + bulk_writer(File& f, noff_t offset, + std::size_t buffer_size); + + ostream + prepare(std::size_t needed, error_code& ec); + + // Returns the number of bytes buffered + std::size_t + size() + { + return used_; + } + + // Return current offset in file. This + // is advanced with each call to prepare. + noff_t + offset() const + { + return offset_ + used_; + } + + // Caller must invoke flush manually in + // order to handle any error conditions. + void + flush(error_code& ec); +}; + +template +bulk_writer:: +bulk_writer(File& f, + noff_t offset, std::size_t buffer_size) + : f_(f) + , offset_(offset) + , used_(0) + +{ + buf_.reserve(buffer_size); +} + +template +ostream +bulk_writer:: +prepare(std::size_t needed, error_code& ec) +{ + if(used_ + needed > buf_.size()) + { + flush(ec); + if(ec) + return{}; + } + if(needed > buf_.size()) + buf_.reserve(needed); + ostream os(buf_.get() + used_, needed); + used_ += needed; + return os; +} + +template +void +bulk_writer:: +flush(error_code& ec) +{ + if(used_) + { + auto const offset = offset_; + auto const used = used_; + offset_ += used_; + used_ = 0; + f_.write(offset, buf_.get(), used, ec); + if(ec) + return; + } +} + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/cache.hpp b/include/nudb/detail/cache.hpp new file mode 100644 index 0000000000..0606ae4233 --- /dev/null +++ b/include/nudb/detail/cache.hpp @@ -0,0 +1,236 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_CACHE_HPP +#define NUDB_DETAIL_CACHE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +// Associative container storing +// bucket blobs keyed by bucket index. +// +template +class cache_t +{ +public: + using value_type = std::pair; + +private: + using map_type = + std::unordered_map; + + struct transform + { + using argument_type = + typename map_type::value_type; + using result_type = value_type; + + cache_t* cache_; + + transform() + : cache_(nullptr) + { + } + + explicit + transform(cache_t& cache) + : cache_(&cache) + { + } + + value_type + operator()(argument_type const& e) const + { + return std::make_pair(e.first, + bucket{cache_->block_size_, e.second}); + } + }; + + nsize_t key_size_ = 0; + nsize_t block_size_ = 0; + arena arena_; + map_type map_; + +public: + using iterator = boost::transform_iterator< + transform, typename map_type::iterator, + value_type, value_type>; + + cache_t(cache_t const&) = delete; + cache_t& operator=(cache_t&&) = delete; + cache_t& operator=(cache_t const&) = delete; + + // Constructs a cache that will never have inserts + cache_t() = default; + + cache_t(cache_t&& other); + + explicit + cache_t(nsize_t key_size, + nsize_t block_size, char const* label); + + std::size_t + size() const + { + return map_.size(); + } + + iterator + begin() + { + return iterator{map_.begin(), transform{*this}}; + } + + iterator + end() + { + return iterator{map_.end(), transform{*this}}; + } + + bool + empty() const + { + return map_.empty(); + } + + void + clear(); + + void + reserve(std::size_t n); + + void + periodic_activity(); + + iterator + find(nbuck_t n); + + // Create an empty bucket + // + bucket + create(nbuck_t n); + + // Insert a copy of a bucket. + // + iterator + insert(nbuck_t n, bucket const& b); + + template + friend + void + swap(cache_t& lhs, cache_t& rhs); +}; + +template +cache_t<_>:: +cache_t(cache_t&& other) + : key_size_{other.key_size_} + , block_size_(other.block_size_) + , arena_(std::move(other.arena_)) + , map_(std::move(other.map_)) +{ +} + +template +cache_t<_>:: +cache_t(nsize_t key_size, + nsize_t block_size, char const* label) + : key_size_(key_size) + , block_size_(block_size) + , arena_(label) +{ +} + +template +void +cache_t<_>:: +reserve(std::size_t n) +{ + arena_.hint(n * block_size_); + map_.reserve(n); +} + +template +void +cache_t<_>:: +clear() +{ + arena_.clear(); + map_.clear(); +} + +template +void +cache_t<_>:: +periodic_activity() +{ + arena_.periodic_activity(); +} + +template +auto +cache_t<_>:: +find(nbuck_t n) -> + iterator +{ + auto const iter = map_.find(n); + if(iter == map_.end()) + return iterator{map_.end(), transform(*this)}; + return iterator{iter, transform(*this)}; +} + +template +bucket +cache_t<_>:: +create(nbuck_t n) +{ + auto const p = arena_.alloc(block_size_); + map_.emplace(n, p); + return bucket{block_size_, p, detail::empty}; +} + +template +auto +cache_t<_>:: +insert(nbuck_t n, bucket const& b) -> + iterator +{ + void* const p = arena_.alloc(b.block_size()); + ostream os{p, b.block_size()}; + b.write(os); + auto const result = map_.emplace(n, p); + return iterator{result.first, transform(*this)}; +} + +template +void +swap(cache_t& lhs, cache_t& rhs) +{ + using std::swap; + swap(lhs.key_size_, rhs.key_size_); + swap(lhs.block_size_, rhs.block_size_); + swap(lhs.arena_, rhs.arena_); + swap(lhs.map_, rhs.map_); +} + +using cache = cache_t<>; + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/endian.hpp b/include/nudb/detail/endian.hpp new file mode 100644 index 0000000000..37c0e06c9a --- /dev/null +++ b/include/nudb/detail/endian.hpp @@ -0,0 +1,93 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_ENDIAN_HPP +#define NUDB_DETAIL_ENDIAN_HPP + +#include +#include + +namespace nudb { +namespace detail { + +// This is a modified work, original implementation +// by Howard Hinnant +// +// "This should be standardized" - Howard + +// Endian provides answers to the following questions: +// 1. Is this system big or little endian? +// 2. Is the "desired endian" of some class or function the same as the +// native endian? +enum class endian +{ +#ifdef _MSC_VER + big = 1, + little = 0, + native = little +#else + native = __BYTE_ORDER__, + little = __ORDER_LITTLE_ENDIAN__, + big = __ORDER_BIG_ENDIAN__ +#endif +}; + +using is_little_endian = + std::integral_constant; + +static_assert( + endian::native == endian::little || endian::native == endian::big, + "endian::native shall be one of endian::little or endian::big"); + +static_assert( + endian::big != endian::little, + "endian::big and endian::little shall have different values"); + +// The pepper got baked into the file format as +// the hash of the little endian salt so now we +// need this function. +// +template +std::uint64_t +to_little_endian(std::uint64_t v, std::false_type) +{ + union U + { + std::uint64_t vi; + std::uint8_t va[8]; + }; + U u; + u.va[0] = v & 0xff; + u.va[1] = (v >> 8) & 0xff; + u.va[2] = (v >> 16) & 0xff; + u.va[3] = (v >> 24) & 0xff; + u.va[4] = (v >> 32) & 0xff; + u.va[5] = (v >> 40) & 0xff; + u.va[6] = (v >> 48) & 0xff; + u.va[7] = (v >> 56) & 0xff; + return u.vi; +} + +inline +std::uint64_t +to_little_endian(std::uint64_t v, std::true_type) +{ + return v; +} + +inline +std::uint64_t +to_little_endian(std::uint64_t v) +{ + return to_little_endian(v, is_little_endian{}); +} + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/field.hpp b/include/nudb/detail/field.hpp new file mode 100644 index 0000000000..e3101b128f --- /dev/null +++ b/include/nudb/detail/field.hpp @@ -0,0 +1,265 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_FIELD_HPP +#define NUDB_FIELD_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +// A 24-bit integer +struct uint24_t; + +// A 48-bit integer +struct uint48_t; + +// These metafunctions describe the binary format of fields on disk + +template +struct field; + +template<> +struct field +{ + static std::size_t constexpr size = 1; + static std::uint64_t constexpr max = 0xff; +}; + +template<> +struct field +{ + static std::size_t constexpr size = 2; + static std::uint64_t constexpr max = 0xffff; +}; + +template<> +struct field +{ + static std::size_t constexpr size = 3; + static std::uint64_t constexpr max = 0xffffff; +}; + +template<> +struct field +{ + static std::size_t constexpr size = 4; + static std::uint64_t constexpr max = 0xffffffff; +}; + +template<> +struct field +{ + static std::size_t constexpr size = 6; + static std::uint64_t constexpr max = 0x0000ffffffffffff; +}; + +template<> +struct field +{ + static std::size_t constexpr size = 8; + static std::uint64_t constexpr max = 0xffffffffffffffff; +}; + +// read field from memory + +template::value>::type* = nullptr> +void +readp(void const* v, U& u) +{ + auto p = reinterpret_cast(v); + u = *p; +} + +template::value>::type* = nullptr> +void +readp(void const* v, U& u) +{ + auto p = reinterpret_cast(v); + T t; + t = T(*p++)<< 8; + t = T(*p ) | t; + u = t; +} + +template::value>::type* = nullptr> +void +readp(void const* v, U& u) +{ + auto p = reinterpret_cast(v); + std::uint32_t t; + t = std::uint32_t(*p++)<<16; + t = (std::uint32_t(*p++)<< 8) | t; + t = std::uint32_t(*p ) | t; + u = t; +} + +template::value>::type* = nullptr> +void +readp(void const* v, U& u) +{ + auto const* p = reinterpret_cast(v); + T t; + t = T(*p++)<<24; + t = (T(*p++)<<16) | t; + t = (T(*p++)<< 8) | t; + t = T(*p ) | t; + u = t; +} + +template::value>::type* = nullptr> +void +readp(void const* v, U& u) +{ + auto p = reinterpret_cast(v); + std::uint64_t t; + t = (std::uint64_t(*p++)<<40); + t = (std::uint64_t(*p++)<<32) | t; + t = (std::uint64_t(*p++)<<24) | t; + t = (std::uint64_t(*p++)<<16) | t; + t = (std::uint64_t(*p++)<< 8) | t; + t = std::uint64_t(*p ) | t; + u = t; +} + +template::value>::type* = nullptr> +void +readp(void const* v, U& u) +{ + auto p = reinterpret_cast(v); + T t; + t = T(*p++)<<56; + t = (T(*p++)<<48) | t; + t = (T(*p++)<<40) | t; + t = (T(*p++)<<32) | t; + t = (T(*p++)<<24) | t; + t = (T(*p++)<<16) | t; + t = (T(*p++)<< 8) | t; + t = T(*p ) | t; + u = t; +} + +// read field from istream + +template +void +read(istream& is, U& u) +{ + readp(is.data(field::size), u); +} + +inline +void +read_size48(istream& is, std::size_t& u) +{ + std::uint64_t v; + read(is, v); + BOOST_ASSERT(v <= std::numeric_limits::max()); + u = static_cast(v); +} + +// write field to ostream + +template::value>::type* = nullptr> +void +write(ostream& os, U u) +{ + BOOST_ASSERT(u <= field::max); + std::uint8_t* p = os.data(field::size); + *p = static_cast(u); +} + +template::value>::type* = nullptr> +void +write(ostream& os, U u) +{ + BOOST_ASSERT(u <= field::max); + auto const t = static_cast(u); + std::uint8_t* p = os.data(field::size); + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +template::value>::type* = nullptr> +void +write(ostream& os, U u) +{ + BOOST_ASSERT(u <= field::max); + auto const t = static_cast(u); + std::uint8_t* p = os.data(field::size); + *p++ = (t>>16)&0xff; + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +template::value>::type* = nullptr> +void +write(ostream& os, U u) +{ + BOOST_ASSERT(u <= field::max); + auto const t = static_cast(u); + std::uint8_t* p = os.data(field::size); + *p++ = (t>>24)&0xff; + *p++ = (t>>16)&0xff; + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +template::value>::type* = nullptr> +void +write(ostream& os, U u) +{ + BOOST_ASSERT(u <= field::max); + auto const t = static_cast(u); + std::uint8_t* p = os.data(field::size); + *p++ = (t>>40)&0xff; + *p++ = (t>>32)&0xff; + *p++ = (t>>24)&0xff; + *p++ = (t>>16)&0xff; + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +template::value>::type* = nullptr> +void +write(ostream& os, U u) +{ + auto const t = static_cast(u); + std::uint8_t* p = os.data(field::size); + *p++ = (t>>56)&0xff; + *p++ = (t>>48)&0xff; + *p++ = (t>>40)&0xff; + *p++ = (t>>32)&0xff; + *p++ = (t>>24)&0xff; + *p++ = (t>>16)&0xff; + *p++ = (t>> 8)&0xff; + *p = t &0xff; +} + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/format.hpp b/include/nudb/detail/format.hpp new file mode 100644 index 0000000000..c6ea314e7a --- /dev/null +++ b/include/nudb/detail/format.hpp @@ -0,0 +1,629 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_FORMAT_HPP +#define NUDB_DETAIL_FORMAT_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +// Format of the nudb files: + +/* + +Integer sizes + +block_size less than 32 bits (maybe restrict it to 16 bits) +buckets more than 32 bits +capacity (same as bucket index) +file offsets 63 bits +hash up to 64 bits (48 currently) +item index less than 32 bits (index of item in bucket) +modulus (same as buckets) +value size up to 32 bits (or 32-bit builds can't read it) + +*/ + +static std::size_t constexpr currentVersion = 2; + +struct dat_file_header +{ + static std::size_t constexpr size = + 8 + // Type + 2 + // Version + 8 + // UID + 8 + // Appnum + 2 + // KeySize + + 64; // (Reserved) + + char type[8]; + std::size_t version; + std::uint64_t uid; + std::uint64_t appnum; + nsize_t key_size; +}; + +struct key_file_header +{ + static std::size_t constexpr size = + 8 + // Type + 2 + // Version + 8 + // UID + 8 + // Appnum + 2 + // KeySize + + 8 + // Salt + 8 + // Pepper + 2 + // BlockSize + 2 + // LoadFactor + + 56; // (Reserved) + + char type[8]; + std::size_t version; + std::uint64_t uid; + std::uint64_t appnum; + nsize_t key_size; + + std::uint64_t salt; + std::uint64_t pepper; + nsize_t block_size; + std::size_t load_factor; + + // Computed values + nkey_t capacity; // Entries per bucket + nbuck_t buckets; // Number of buckets + nbuck_t modulus; // pow(2,ceil(log2(buckets))) +}; + +struct log_file_header +{ + static std::size_t constexpr size = + 8 + // Type + 2 + // Version + 8 + // UID + 8 + // Appnum + 2 + // KeySize + + 8 + // Salt + 8 + // Pepper + 2 + // BlockSize + + 8 + // KeyFileSize + 8; // DataFileSize + + char type[8]; + std::size_t version; + std::uint64_t uid; + std::uint64_t appnum; + nsize_t key_size; + std::uint64_t salt; + std::uint64_t pepper; + nsize_t block_size; + noff_t key_file_size; + noff_t dat_file_size; +}; + +// Type used to store hashes in buckets. +// This can be smaller than the output +// of the hash function. +// +using f_hash = uint48_t; + +static_assert(field::size <= + sizeof(nhash_t), ""); + +template +nhash_t +make_hash(nhash_t h); + +template<> +inline +nhash_t +make_hash(nhash_t h) +{ + return(h>>16)&0xffffffffffff; +} + +// Returns the hash of a key given the salt. +// Note: The hash is expressed in f_hash units +// +template +inline +nhash_t +hash(void const* key, nsize_t key_size, std::uint64_t salt) +{ + Hasher h{salt}; + return make_hash(h(key, key_size)); +} + +template +inline +nhash_t +hash(void const* key, nsize_t key_size, Hasher const& h) +{ + return make_hash(h(key, key_size)); +} + +// Computes pepper from salt +// +template +std::uint64_t +pepper(std::uint64_t salt) +{ + auto const v = to_little_endian(salt); + Hasher h{salt}; + return h(&v, sizeof(v)); +} + +// Returns the actual size of a bucket. +// This can be smaller than the block size. +// +template +nsize_t +bucket_size(nkey_t capacity) +{ + // Bucket Record + return + field::size + // Count + field::size + // Spill + capacity * ( + field::size + // Offset + field::size + // Size + field::size); // Hash +} + +// Returns the number of entries that fit in a bucket +// +template +nkey_t +bucket_capacity(nsize_t block_size) +{ + // Bucket Record + auto const size = + field::size + // Count + field::size; // Spill + auto const entry_size = + field::size + // Offset + field::size + // Size + field::size; // Hash + if(block_size < key_file_header::size || + block_size < size) + return 0; + auto const n = + (block_size - size) / entry_size; + BOOST_ASSERT(n <= std::numeric_limits::max()); + return static_cast(std::min( + std::numeric_limits::max(), n)); +} + +// Returns the number of bytes occupied by a value record +// VFALCO TODO Fix this +inline +std::size_t +value_size(std::size_t size, + std::size_t key_size) +{ + // Data Record + return + field::size + // Size + key_size + // Key + size; // Data +} + +// Returns the closest power of 2 not less than x +template +T +ceil_pow2(T x) +{ + static const unsigned long long t[6] = { + 0xFFFFFFFF00000000ull, + 0x00000000FFFF0000ull, + 0x000000000000FF00ull, + 0x00000000000000F0ull, + 0x000000000000000Cull, + 0x0000000000000002ull + }; + + int y =(((x &(x - 1)) == 0) ? 0 : 1); + int j = 32; + int i; + + for(i = 0; i < 6; i++) { + int k =(((x & t[i]) == 0) ? 0 : j); + y += k; + x >>= k; + j >>= 1; + } + + return T{1}< +void +read(istream& is, dat_file_header& dh) +{ + read(is, dh.type, sizeof(dh.type)); + read(is, dh.version); + read(is, dh.uid); + read(is, dh.appnum); + read(is, dh.key_size); + std::array reserved; + read(is, reserved.data(), reserved.size()); +} + +// Read data file header from file +template +void +read(File& f, dat_file_header& dh, error_code& ec) +{ + std::array buf; + f.read(0, buf.data(), buf.size(), ec); + if(ec) + return; + istream is(buf); + read(is, dh); +} + +// Write data file header to stream +template +void +write(ostream& os, dat_file_header const& dh) +{ + write(os, "nudb.dat", 8); + write(os, dh.version); + write(os, dh.uid); + write(os, dh.appnum); + write(os, dh.key_size); + std::array reserved; + reserved.fill(0); + write(os, reserved.data(), reserved.size()); +} + +// Write data file header to file +template +void +write(File& f, dat_file_header const& dh, error_code& ec) +{ + std::array buf; + ostream os(buf); + write(os, dh); + f.write(0, buf.data(), buf.size(), ec); +} + +// Read key file header from stream +template +void +read(istream& is, noff_t file_size, key_file_header& kh) +{ + read(is, kh.type, sizeof(kh.type)); + read(is, kh.version); + read(is, kh.uid); + read(is, kh.appnum); + read(is, kh.key_size); + read(is, kh.salt); + read(is, kh.pepper); + read(is, kh.block_size); + read(is, kh.load_factor); + std::array reserved; + read(is, reserved.data(), reserved.size()); + + // VFALCO These need to be checked to handle + // when the file size is too small + kh.capacity = bucket_capacity(kh.block_size); + if(file_size > kh.block_size) + { + if(kh.block_size > 0) + kh.buckets = static_cast( + (file_size - kh.block_size) / kh.block_size); + else + // VFALCO Corruption or logic error + kh.buckets = 0; + } + else + { + kh.buckets = 0; + } + kh.modulus = ceil_pow2(kh.buckets); +} + +// Read key file header from file +template +void +read(File& f, key_file_header& kh, error_code& ec) +{ + std::array buf; + f.read(0, buf.data(), buf.size(), ec); + if(ec) + return; + istream is{buf}; + auto const size = f.size(ec); + if(ec) + return; + read(is, size, kh); +} + +// Write key file header to stream +template +void +write(ostream& os, key_file_header const& kh) +{ + write(os, "nudb.key", 8); + write(os, kh.version); + write(os, kh.uid); + write(os, kh.appnum); + write(os, kh.key_size); + write(os, kh.salt); + write(os, kh.pepper); + write(os, kh.block_size); + write(os, kh.load_factor); + std::array reserved; + reserved.fill(0); + write(os, reserved.data(), reserved.size()); +} + +// Write key file header to file +template +void +write(File& f, key_file_header const& kh, error_code& ec) +{ + buffer buf; + buf.reserve(kh.block_size); + if(kh.block_size < key_file_header::size) + { + ec = error::invalid_block_size; + return; + } + std::fill(buf.get(), buf.get() + buf.size(), 0); + ostream os{buf.get(), buf.size()}; + write(os, kh); + f.write(0, buf.get(), buf.size(), ec); +} + +// Read log file header from stream +template +void +read(istream& is, log_file_header& lh) +{ + read(is, lh.type, sizeof(lh.type)); + read(is, lh.version); + read(is, lh.uid); + read(is, lh.appnum); + read(is, lh.key_size); + read(is, lh.salt); + read(is, lh.pepper); + read(is, lh.block_size); + read(is, lh.key_file_size); + read(is, lh.dat_file_size); +} + +// Read log file header from file +template +void +read(File& f, log_file_header& lh, error_code& ec) +{ + std::array buf; + f.read(0, buf.data(), buf.size(), ec); + if(ec) + return; + istream is{buf}; + read(is, lh); +} + +// Write log file header to stream +template +void +write(ostream& os, log_file_header const& lh) +{ + write(os, "nudb.log", 8); + write(os, lh.version); + write(os, lh.uid); + write(os, lh.appnum); + write(os, lh.key_size); + write(os, lh.salt); + write(os, lh.pepper); + write(os, lh.block_size); + write(os, lh.key_file_size); + write(os, lh.dat_file_size); +} + +// Write log file header to file +template +void +write(File& f, log_file_header const& lh, error_code& ec) +{ + std::array buf; + ostream os{buf}; + write(os, lh); + f.write(0, buf.data(), buf.size(), ec); +} + +// Verify contents of data file header +template +void +verify(dat_file_header const& dh, error_code& ec) +{ + std::string const type{dh.type, 8}; + if(type != "nudb.dat") + { + ec = error::not_data_file; + return; + } + if(dh.version != currentVersion) + { + ec = error::different_version; + return; + } + if(dh.key_size < 1) + { + ec = error::invalid_key_size; + return; + } +} + +// Verify contents of key file header +template +void +verify(key_file_header const& kh, error_code& ec) +{ + std::string const type{kh.type, 8}; + if(type != "nudb.key") + { + ec = error::not_key_file; + return; + } + if(kh.version != currentVersion) + { + ec = error::different_version; + return; + } + if(kh.key_size < 1) + { + ec = error::invalid_key_size; + return; + } + if(kh.pepper != pepper(kh.salt)) + { + ec = error::hash_mismatch; + return; + } + if(kh.load_factor < 1) + { + ec = error::invalid_load_factor; + return; + } + if(kh.capacity < 1) + { + ec = error::invalid_capacity; + return; + } + if(kh.buckets < 1) + { + ec = error::invalid_bucket_count; + return; + } +} + +// Verify contents of log file header +template +void +verify(log_file_header const& lh, error_code& ec) +{ + std::string const type{lh.type, 8}; + if(type != "nudb.log") + { + ec = error::not_log_file; + return; + } + if(lh.version != currentVersion) + { + ec = error::different_version; + return; + } + if(lh.pepper != pepper(lh.salt)) + { + ec = error::hash_mismatch; + return; + } + if(lh.key_size < 1) + { + ec = error::invalid_key_size; + return; + } +} + +// Make sure key file and value file headers match +template +void +verify(dat_file_header const& dh, + key_file_header const& kh, error_code& ec) +{ + verify(kh, ec); + if(ec) + return; + if(kh.uid != dh.uid) + { + ec = error::uid_mismatch; + return; + } + if(kh.appnum != dh.appnum) + { + ec = error::appnum_mismatch; + return; + } + if(kh.key_size != dh.key_size) + { + ec = error::key_size_mismatch; + return; + } +} + +// Make sure key file and log file headers match +template +void +verify(key_file_header const& kh, + log_file_header const& lh, error_code& ec) +{ + verify(lh, ec); + if(ec) + return; + if(kh.uid != lh.uid) + { + ec = error::uid_mismatch; + return; + } + if(kh.appnum != lh.appnum) + { + ec = error::appnum_mismatch; + return; + } + if(kh.key_size != lh.key_size) + { + ec = error::key_size_mismatch; + return; + } + if(kh.salt != lh.salt) + { + ec = error::salt_mismatch; + return; + } + if(kh.pepper != lh.pepper) + { + ec = error::pepper_mismatch; + return; + } + if(kh.block_size != lh.block_size) + { + ec = error::block_size_mismatch; + return; + } +} + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/gentex.hpp b/include/nudb/detail/gentex.hpp new file mode 100644 index 0000000000..9adeeb6d96 --- /dev/null +++ b/include/nudb/detail/gentex.hpp @@ -0,0 +1,259 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_GENTEX_HPP +#define NUDB_DETAIL_GENTEX_HPP + +#include +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +// Generation counting mutex +// +template +class gentex_t +{ +private: + std::mutex m_; + std::size_t gen_ = 0; + std::size_t cur_ = 0; + std::size_t prev_ = 0; + std::condition_variable cond_; + +public: + gentex_t() = default; + gentex_t(gentex_t const&) = delete; + gentex_t& operator=(gentex_t const&) = delete; + + void + start(); + + void + finish(); + + std::size_t + lock_gen(); + + void + unlock_gen(std::size_t gen); +}; + +template +void +gentex_t<_>:: +start() +{ + std::unique_lock l{m_}; + prev_ += cur_; + cur_ = 0; + ++gen_; +} + +template +void +gentex_t<_>:: +finish() +{ + std::unique_lock l{m_}; + while(prev_ > 0) + cond_.wait(l); +} + +template +std::size_t +gentex_t<_>:: +lock_gen() +{ + std::lock_guard< + std::mutex> l{m_}; + ++cur_; + return gen_; +} + +template +void +gentex_t<_>:: +unlock_gen(std::size_t gen) +{ + std::unique_lock l{m_}; + if(gen == gen_) + { + --cur_; + } + else + { + --prev_; + if(prev_ == 0) + cond_.notify_all(); + } +} + +using gentex = gentex_t<>; + +//------------------------------------------------------------------------------ + +template +class genlock +{ +private: + bool owned_ = false; + GenerationLockable* g_ = nullptr; + std::size_t gen_; + +public: + using mutex_type = GenerationLockable; + + genlock() = default; + genlock(genlock const&) = delete; + genlock& operator=(genlock const&) = delete; + + genlock(genlock&& other); + + genlock& operator=(genlock&& other); + + explicit + genlock(mutex_type& g); + + genlock(mutex_type& g, std::defer_lock_t); + + ~genlock(); + + mutex_type* + mutex() noexcept + { + return g_; + } + + bool + owns_lock() const noexcept + { + return g_ && owned_; + } + + explicit + operator bool() const noexcept + { + return owns_lock(); + } + + void + lock(); + + void + unlock(); + + mutex_type* + release() noexcept; + + template + friend + void + swap(genlock& lhs, genlock& rhs) noexcept; +}; + +template +genlock:: +genlock(genlock&& other) + : owned_(other.owned_) + , g_(other.g_) +{ + other.owned_ = false; + other.g_ = nullptr; +} + +template +genlock& +genlock:: +operator=(genlock&& other) +{ + if(owns_lock()) + unlock(); + owned_ = other.owned_; + g_ = other.g_; + other.owned_ = false; + other.g_ = nullptr; + return *this; +} + +template +genlock:: +genlock(mutex_type& g) + : g_(&g) +{ + lock(); +} + +template +genlock:: +genlock(mutex_type& g, std::defer_lock_t) + : g_(&g) +{ +} + +template +genlock:: +~genlock() +{ + if(owns_lock()) + unlock(); +} + +template +void +genlock:: +lock() +{ + // no associated gentex + BOOST_ASSERT(g_ != nullptr); + // gentex is already owned + BOOST_ASSERT(! owned_); + gen_ = g_->lock_gen(); + owned_ = true; +} + +template +void +genlock:: +unlock() +{ + // no associated gentex + BOOST_ASSERT(g_ != nullptr); + // gentex is not owned + BOOST_ASSERT(owned_); + g_->unlock_gen(gen_); + owned_ = false; +} + +template +auto +genlock:: +release() noexcept -> + mutex_type* +{ + mutex_type* const g = g_; + g_ = nullptr; + return g; +} + +template +void +swap(genlock& lhs, genlock& rhs) noexcept +{ + using namespace std; + swap(lhs.owned_, rhs.owned_); + swap(lhs.g_, rhs.g_); +} + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/mutex.hpp b/include/nudb/detail/mutex.hpp new file mode 100644 index 0000000000..779e39fdc6 --- /dev/null +++ b/include/nudb/detail/mutex.hpp @@ -0,0 +1,26 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_MUTEX_HPP +#define NUDB_DETAIL_MUTEX_HPP + +#include +#include + +namespace nudb { +namespace detail { + +using shared_lock_type = + boost::shared_lock; + +using unique_lock_type = + boost::unique_lock; + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/pool.hpp b/include/nudb/detail/pool.hpp new file mode 100644 index 0000000000..0b0a5daa96 --- /dev/null +++ b/include/nudb/detail/pool.hpp @@ -0,0 +1,243 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_POOL_HPP +#define NUDB_DETAIL_POOL_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +// Buffers key/value pairs in a map, associating +// them with a modifiable data file offset. +template +class pool_t +{ +public: + struct value_type; + class compare; + +private: + using map_type = std::map< + value_type, noff_t, compare>; + + arena arena_; + nsize_t key_size_; + nsize_t data_size_ = 0; + map_type map_; + +public: + using iterator = + typename map_type::iterator; + + pool_t(pool_t const&) = delete; + pool_t& operator=(pool_t const&) = delete; + + pool_t(pool_t&& other); + + pool_t(nsize_t key_size, char const* label); + + iterator + begin() + { + return map_.begin(); + } + + iterator + end() + { + return map_.end(); + } + + bool + empty() const + { + return map_.size() == 0; + } + + // Returns the number of elements in the pool + std::size_t + size() const + { + return map_.size(); + } + + // Returns the sum of data sizes in the pool + std::size_t + data_size() const + { + return data_size_; + } + + void + clear(); + + void + periodic_activity(); + + iterator + find(void const* key); + + // Insert a value + // @param h The hash of the key + void + insert(nhash_t h, void const* key, + void const* buffer, nsize_t size); + + template + friend + void + swap(pool_t& lhs, pool_t& rhs); +}; + +template +struct pool_t<_>::value_type +{ + nhash_t hash; + nsize_t size; + void const* key; + void const* data; + + value_type(value_type const&) = default; + value_type& operator=(value_type const&) = default; + + value_type(nhash_t hash_, nsize_t size_, + void const* key_, void const* data_) + : hash(hash_) + , size(size_) + , key(key_) + , data(data_) + { + } +}; + +template +class pool_t<_>::compare +{ + std::size_t key_size_; + +public: + using result_type = bool; + using first_argument_type = value_type; + using second_argument_type = value_type; + + compare(compare const&) = default; + compare& operator=(compare const&) = default; + + explicit + compare(nsize_t key_size) + : key_size_(key_size) + { + } + + bool + operator()(value_type const& lhs, + value_type const& rhs) const + { + return std::memcmp( + lhs.key, rhs.key, key_size_) < 0; + } +}; + +//------------------------------------------------------------------------------ + +template +pool_t<_>:: +pool_t(pool_t&& other) + : arena_(std::move(other.arena_)) + , key_size_(other.key_size_) + , data_size_(other.data_size_) + , map_(std::move(other.map_)) +{ +} + +template +pool_t<_>:: +pool_t(nsize_t key_size, char const* label) + : arena_(label) + , key_size_(key_size) + , map_(compare{key_size}) +{ +} + +template +void +pool_t<_>:: +clear() +{ + arena_.clear(); + data_size_ = 0; + map_.clear(); +} + +template +void +pool_t<_>:: +periodic_activity() +{ + arena_.periodic_activity(); +} + +template +auto +pool_t<_>:: +find(void const* key) -> + iterator +{ + // VFALCO need is_transparent here + value_type tmp{0, 0, key, nullptr}; + auto const iter = map_.find(tmp); + return iter; +} + +template +void +pool_t<_>:: +insert(nhash_t h, + void const* key, void const* data, nsize_t size) +{ + auto const k = arena_.alloc(key_size_); + auto const d = arena_.alloc(size); + std::memcpy(k, key, key_size_); + std::memcpy(d, data, size); + auto const result = map_.emplace( + std::piecewise_construct, + std::make_tuple(h, size, k, d), + std::make_tuple(0)); + (void)result.second; + // Must not already exist! + BOOST_ASSERT(result.second); + data_size_ += size; +} + +template +void +swap(pool_t<_>& lhs, pool_t<_>& rhs) +{ + using std::swap; + swap(lhs.arena_, rhs.arena_); + swap(lhs.key_size_, rhs.key_size_); + swap(lhs.data_size_, rhs.data_size_); + swap(lhs.map_, rhs.map_); +} + +using pool = pool_t<>; + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/stream.hpp b/include/nudb/detail/stream.hpp new file mode 100644 index 0000000000..6c07bf11b9 --- /dev/null +++ b/include/nudb/detail/stream.hpp @@ -0,0 +1,149 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_STREAM_HPP +#define NUDB_DETAIL_STREAM_HPP + +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +// Input stream from bytes +template +class istream_t +{ + std::uint8_t const* buf_ = nullptr; + std::size_t size_ = 0; + +public: + istream_t() = default; + istream_t(istream_t const&) = default; + istream_t& operator=(istream_t const&) = default; + + istream_t(void const* data, std::size_t size) + : buf_(reinterpret_cast(data)) + , size_(size) + { + } + + template + istream_t(std::array const& a) + : buf_(a.data()) + , size_(a.size()) + { + } + + std::uint8_t const* + data(std::size_t bytes); + + std::uint8_t const* + operator()(std::size_t bytes) + { + return data(bytes); + } +}; + +// Precondition: bytes <= size_ +// +template +std::uint8_t const* +istream_t<_>::data(std::size_t bytes) +{ + BOOST_ASSERT(bytes <= size_); + if(size_ < bytes) + throw std::logic_error("short read from istream"); + auto const data = buf_; + buf_ = buf_ + bytes; + size_ -= bytes; + return data; +} + +using istream = istream_t<>; + +//------------------------------------------------------------------------------ + +// Output stream to bytes +// VFALCO Should this assert on overwriting the buffer? +template +class ostream_t +{ + std::uint8_t* buf_ = nullptr; + std::size_t size_ = 0; + +public: + ostream_t() = default; + ostream_t(ostream_t const&) = default; + ostream_t& operator=(ostream_t const&) = default; + + ostream_t(void* data, std::size_t) + : buf_(reinterpret_cast(data)) + { + } + + template + ostream_t(std::array& a) + : buf_(a.data()) + { + } + + // Returns the number of bytes written + std::size_t + size() const + { + return size_; + } + + std::uint8_t* + data(std::size_t bytes); + + std::uint8_t* + operator()(std::size_t bytes) + { + return data(bytes); + } +}; + +template +std::uint8_t* +ostream_t<_>::data(std::size_t bytes) +{ + auto const data = buf_; + buf_ = buf_ + bytes; + size_ += bytes; + return data; +} + +using ostream = ostream_t<>; + +//------------------------------------------------------------------------------ + +// read blob +inline +void +read(istream& is, void* buffer, std::size_t bytes) +{ + std::memcpy(buffer, is.data(bytes), bytes); +} + +// write blob +inline +void +write(ostream& os, void const* buffer, std::size_t bytes) +{ + std::memcpy(os.data(bytes), buffer, bytes); +} + +} // detail +} // nudb + +#endif diff --git a/include/nudb/detail/xxhash.hpp b/include/nudb/detail/xxhash.hpp new file mode 100644 index 0000000000..648b2cf849 --- /dev/null +++ b/include/nudb/detail/xxhash.hpp @@ -0,0 +1,328 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// +// This is a derivative work based on xxHash 0.6.2, copyright below: +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +#ifndef NUDB_DETAIL_XXHASH_HPP +#define NUDB_DETAIL_XXHASH_HPP + +#include +#include +#include +#include + +namespace nudb { +namespace detail { + +#define NUDB_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// minGW _rotl gives poor performance +#if defined(_MSC_VER) +# define NUDB_XXH_rotl64(x,r) _rotl64(x,r) +#else +# define NUDB_XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) +# define NUDB_XXH_swap32 _byteswap_ulong +#elif NUDB_GCC_VERSION >= 403 +# define NUDB_XXH_swap32 __builtin_bswap32 +#endif + +#if defined(_MSC_VER) +# define NUDB_XXH_swap64 _byteswap_uint64 +#elif NUDB_GCC_VERSION >= 403 +# define NUDB_XXH_swap64 __builtin_bswap64 +#endif + +#ifndef NUDB_XXH_swap32 +inline +std::uint32_t +NUDB_XXH_swap32(std::uint32_t x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + +#ifndef NUDB_XXH_swap64 +inline +std::uint64_t +NUDB_XXH_swap64(std::uint64_t x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +static std::uint64_t constexpr prime64_1 = 11400714785074694791ULL; +static std::uint64_t constexpr prime64_2 = 14029467366897019727ULL; +static std::uint64_t constexpr prime64_3 = 1609587929392839161ULL; +static std::uint64_t constexpr prime64_4 = 9650029242287828579ULL; +static std::uint64_t constexpr prime64_5 = 2870177450012600261ULL; + +// Portable and safe solution. Generally efficient. +// see : http://stackoverflow.com/a/32095106/646947 + +inline +std::uint32_t +XXH_read32(void const* p) +{ + std::uint32_t v; + memcpy(&v, p, sizeof(v)); + return v; +} + +inline +std::uint64_t +XXH_read64(void const* p) +{ + std::uint64_t v; + memcpy(&v, p, sizeof(v)); + return v; +} + +// little endian, aligned +inline +std::uint32_t +XXH_readLE32_align(void const* p, std::true_type, std::true_type) +{ + return *reinterpret_cast(p); +} + +// little endian, unaligned +inline +std::uint32_t +XXH_readLE32_align(void const* p, std::true_type, std::false_type) +{ + return XXH_read32(p); +} + +// big endian, aligned +inline +std::uint32_t +XXH_readLE32_align(void const* p, std::false_type, std::true_type) +{ + return NUDB_XXH_swap32( + *reinterpret_cast(p)); +} + +// big endian, unaligned +inline +std::uint32_t +XXH_readLE32_align(void const* p, std::false_type, std::false_type) +{ + return NUDB_XXH_swap32(XXH_read32(p)); +} + +// little endian, aligned +inline +std::uint64_t +XXH_readLE64_align(void const* p, std::true_type, std::true_type) +{ + return *reinterpret_cast(p); +} + +// little endian, unaligned +inline +std::uint64_t +XXH_readLE64_align(void const* p, std::true_type, std::false_type) +{ + return XXH_read64(p); +} + +// big endian, aligned +inline +std::uint64_t +XXH_readLE64_align(void const* p, std::false_type, std::true_type) +{ + return NUDB_XXH_swap64( + *reinterpret_cast(p)); +} + +// big endian, unaligned +inline +std::uint64_t +XXH_readLE64_align(void const* p, std::false_type, std::false_type) +{ + return NUDB_XXH_swap64(XXH_read64(p)); +} + +inline +std::uint64_t +XXH64_round(std::uint64_t acc, std::uint64_t input) +{ + acc += input * prime64_2; + acc = NUDB_XXH_rotl64(acc, 31); + acc *= prime64_1; + return acc; +} + +inline +std::uint64_t +XXH64_mergeRound(std::uint64_t acc, std::uint64_t val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * prime64_1 + prime64_4; + return acc; +} + +template +std::uint64_t +XXH64_endian_align( + void const* input, std::size_t len, std::uint64_t seed, + std::integral_constant endian, + std::integral_constant align) +{ + const std::uint8_t* p = (const std::uint8_t*)input; + const std::uint8_t* const bEnd = p + len; + std::uint64_t h64; + auto const XXH_get32bits = + [](void const* p) + { + return XXH_readLE32_align(p, + decltype(endian){}, decltype(align){}); + }; + auto const XXH_get64bits = + [](void const* p) + { + return XXH_readLE64_align(p, + decltype(endian){}, decltype(align){}); + }; + if(len>=32) + { + const std::uint8_t* const limit = bEnd - 32; + std::uint64_t v1 = seed + prime64_1 + prime64_2; + std::uint64_t v2 = seed + prime64_2; + std::uint64_t v3 = seed + 0; + std::uint64_t v4 = seed - prime64_1; + + do + { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } + while(p<=limit); + + h64 = NUDB_XXH_rotl64(v1, 1) + + NUDB_XXH_rotl64(v2, 7) + + NUDB_XXH_rotl64(v3, 12) + + NUDB_XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } + else + { + h64 = seed + prime64_5; + } + h64 += len; + while(p + 8 <= bEnd) + { + std::uint64_t const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = NUDB_XXH_rotl64(h64,27) * prime64_1 + prime64_4; + p+=8; + } + if(p+4<=bEnd) + { + h64 ^= (std::uint64_t)(XXH_get32bits(p)) * prime64_1; + h64 = NUDB_XXH_rotl64(h64, 23) * prime64_2 + prime64_3; + p+=4; + } + while(p> 33; + h64 *= prime64_2; + h64 ^= h64 >> 29; + h64 *= prime64_3; + h64 ^= h64 >> 32; + return h64; +} + +/* Calculate the 64-bit hash of a block of memory. + + @param data A pointer to the buffer to compute the hash on. + The buffer may be unaligned. + + @note This function runs faster on 64-bits systems, but slower + on 32-bits systems (see benchmark). + + @param bytes The size of the buffer in bytes. + + @param seed A value which may be used to permute the output. + Using a different seed with the same input will produce a + different value. + + @return The 64-bit hash of the input data. +*/ +template +std::uint64_t +XXH64(void const* data, size_t bytes, std::uint64_t seed) +{ + // Use faster algorithm if aligned + if((reinterpret_cast(data) & 7) == 0) + return XXH64_endian_align(data, bytes, seed, + is_little_endian{}, std::false_type{}); + return XXH64_endian_align(data, bytes, seed, + is_little_endian{}, std::true_type{}); +} + +} // detail +} // nudb + +#endif + diff --git a/include/nudb/error.hpp b/include/nudb/error.hpp new file mode 100644 index 0000000000..082ff41f50 --- /dev/null +++ b/include/nudb/error.hpp @@ -0,0 +1,263 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_ERROR_HPP +#define NUDB_ERROR_HPP + +#include +#include + +namespace nudb { + +/// The type of system-specific error code returned by the implementation +#if GENERATING_DOCS +class error_code{}; + +#else +using boost::system::error_code; + +#endif + +/// The type of cross-platform error code used by the implementation +#if GENERATING_DOCS +class error_condition{}; + +#else +using boost::system::error_condition; + +#endif + +/// The type of system-specific exception used when throwing +#if GENERATING_DOCS +class system_error{}; + +#else +using boost::system::system_error; + +#endif + +/// Returns the category used for system-specific error codes +#if GENERATING_DOCS +error_category const& +system_category(); + +#else +using boost::system::system_category; + +#endif + +/// Returns the category used for cross-platform error codes +#if GENERATING_DOCS +error_category const& +generic_category(); + +#else +using boost::system::generic_category; + +#endif + +/// The base class used for error categories +#if GENERATING_DOCS +class error_category{}; + +#else +using boost::system::error_category; + +#endif + +/// The set of constants used for cross-platform error codes +#if GENERATING_DOCS +enum errc{}; + +#else +namespace errc = boost::system::errc; + +#endif + +/// Database error codes. +enum class error +{ + /** No error. + + The operation completed successfully. + */ + success = 0, + + /** The specified key was not found. + + Returned when @ref basic_store::fetch does not + find the specified key. + */ + key_not_found, + + /** The specified key already exists. + + Returned when @ref basic_store::insert finds + the specified key already in the database. + */ + key_exists, + + /** A file read returned less data than expected. + + This can be caused by premature application + termination during a commit cycle. + */ + short_read, + + /** A log file is present. + + Indicates that the database needs to have the + associated log file applied to perform a recovery. + This error is returned by functions such as @ref rekey. + */ + log_file_exists, + + /** No key file exists. + + This error is returned by the recover process when + there is no valid key file. It happens when a + @ref rekey operation prematurely terminates. A + database without a key file cannot be opened. To + fix this error, it is necessary for an invocation of + @ref rekey to complete successfully. + */ + no_key_file, + + /// Too many buckets in key file + too_many_buckets, + + /// Not a data file + not_data_file, + + /// Not a key file + not_key_file, + + /// Not a log file + not_log_file, + + /// Different version + different_version, + + /// Invalid key size + invalid_key_size, + + /// Invalid block size + invalid_block_size, + + /// Short key file + short_key_file, + + /// Short bucket + short_bucket, + + /// Short spill + short_spill, + + /// Short record + short_data_record, + + /// Short value + short_value, + + /// Hash mismatch + hash_mismatch, + + /// Invalid load factor + invalid_load_factor, + + /// Invalid capacity + invalid_capacity, + + /// Invalid bucket count + invalid_bucket_count, + + /// Invalid bucket size + invalid_bucket_size, + + /// The data file header was incomplete + incomplete_data_file_header, + + /// The key file header was incomplete + incomplete_key_file_header, + + /// Invalid log record + invalid_log_record, + + /// Invalid spill in log record + invalid_log_spill, + + /// Invalid offset in log record + invalid_log_offset, + + /// Invalid index in log record + invalid_log_index, + + /// Invalid size in spill + invalid_spill_size, + + /// UID mismatch + uid_mismatch, + + /// appnum mismatch + appnum_mismatch, + + /// key size mismatch + key_size_mismatch, + + /// salt mismatch + salt_mismatch, + + /// pepper mismatch + pepper_mismatch, + + /// block size mismatch + block_size_mismatch, + + /// orphaned value + orphaned_value, + + /// missing value + missing_value, + + /// size mismatch + size_mismatch, + + /// duplicate value + duplicate_value +}; + +/// Returns the error category used for database error codes. +error_category const& +nudb_category(); + +/** Returns a database error code. + + This function is used by the implementation to convert + @ref error values into @ref error_code objects. +*/ +inline +error_code +make_error_code(error ev) +{ + return error_code{static_cast(ev), nudb_category()}; +} + +} // nudb + +namespace boost { +namespace system { +template<> +struct is_error_code_enum +{ + static bool const value = true; +}; +} // system +} // boost + +#include + +#endif diff --git a/include/nudb/file.hpp b/include/nudb/file.hpp new file mode 100644 index 0000000000..409aa1981f --- /dev/null +++ b/include/nudb/file.hpp @@ -0,0 +1,53 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_FILE_HPP +#define NUDB_FILE_HPP + +#include +#include + +namespace nudb { + +/// The type used to hold paths to files +using path_type = std::string; + +/** Returns the best guess at the volume's block size. + + @param path A path to a file on the device. The file does + not need to exist. +*/ +inline +std::size_t +block_size(path_type const& path) +{ + // A reasonable default for many SSD devices + return 4096; +} + +/** File create and open modes. + + These are used by @ref native_file. +*/ +enum class file_mode +{ + /// Open the file for sequential reads + scan, + + /// Open the file for random reads + read, + + /// Open the file for random reads and appending writes + append, + + /// Open the file for random reads and writes + write +}; + +} // nudb + +#endif diff --git a/include/nudb/impl/basic_store.ipp b/include/nudb/impl/basic_store.ipp new file mode 100644 index 0000000000..404a4e3098 --- /dev/null +++ b/include/nudb/impl/basic_store.ipp @@ -0,0 +1,793 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_BASIC_STORE_IPP +#define NUDB_IMPL_BASIC_STORE_IPP + +#include +#include +#include +#include +#include + +#ifndef NUDB_DEBUG_LOG +#define NUDB_DEBUG_LOG 0 +#endif +#if NUDB_DEBUG_LOG +#include +#include +#endif + +namespace nudb { + +template +basic_store::state:: +state(File&& df_, File&& kf_, File&& lf_, + path_type const& dp_, path_type const& kp_, + path_type const& lp_, + detail::key_file_header const& kh_) + : df(std::move(df_)) + , kf(std::move(kf_)) + , lf(std::move(lf_)) + , dp(dp_) + , kp(kp_) + , lp(lp_) + , hasher(kh_.salt) + , p0(kh_.key_size, "p0") + , p1(kh_.key_size, "p1") + , c1(kh_.key_size, kh_.block_size, "c1") + , kh(kh_) +{ + static_assert(is_File::value, + "File requirements not met"); +} + +//------------------------------------------------------------------------------ + +template +basic_store:: +~basic_store() +{ + error_code ec; + // We call close here to make sure data is intact + // if an exception destroys the basic_store, but callers + // should always call close manually to receive the + // error code. + close(ec); +} + +template +path_type const& +basic_store:: +dat_path() const +{ + BOOST_ASSERT(is_open()); + return s_->dp; +} + +template +path_type const& +basic_store:: +key_path() const +{ + BOOST_ASSERT(is_open()); + return s_->kp; +} + +template +path_type const& +basic_store:: +log_path() const +{ + BOOST_ASSERT(is_open()); + return s_->lp; +} + +template +std::uint64_t +basic_store:: +appnum() const +{ + BOOST_ASSERT(is_open()); + return s_->kh.appnum; +} + +template +std::size_t +basic_store:: +key_size() const +{ + BOOST_ASSERT(is_open()); + return s_->kh.key_size; +} + +template +std::size_t +basic_store:: +block_size() const +{ + BOOST_ASSERT(is_open()); + return s_->kh.block_size; +} + +template +template +void +basic_store:: +open( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + error_code& ec, + Args&&... args) +{ + static_assert(is_Hasher::value, + "Hasher requirements not met"); + using namespace detail; + BOOST_ASSERT(! is_open()); + ec_ = {}; + ecb_.store(false); + recover( + dat_path, key_path, log_path, ec, args...); + if(ec) + return; + File df(args...); + File kf(args...); + File lf(args...); + df.open(file_mode::append, dat_path, ec); + if(ec) + return; + kf.open(file_mode::write, key_path, ec); + if(ec) + return; + lf.create(file_mode::append, log_path, ec); + if(ec) + return; + // VFALCO TODO Erase empty log file if this + // function subsequently fails. + dat_file_header dh; + read(df, dh, ec); + if(ec) + return; + verify(dh, ec); + if(ec) + return; + key_file_header kh; + read(kf, kh, ec); + if(ec) + return; + verify(kh, ec); + if(ec) + return; + verify(dh, kh, ec); + if(ec) + return; + boost::optional s; + s.emplace(std::move(df), std::move(kf), std::move(lf), + dat_path, key_path, log_path, kh); + thresh_ = std::max(65536UL, + kh.load_factor * kh.capacity); + frac_ = thresh_ / 2; + buckets_ = kh.buckets; + modulus_ = ceil_pow2(kh.buckets); + // VFALCO TODO This could be better + if(buckets_ < 1) + { + ec = error::short_key_file; + return; + } + dataWriteSize_ = 32 * nudb::block_size(dat_path); + logWriteSize_ = 32 * nudb::block_size(log_path); + s_.emplace(std::move(*s)); + open_ = true; + t_ = std::thread(&basic_store::run, this); +} + +template +void +basic_store:: +close(error_code& ec) +{ + if(open_) + { + open_ = false; + cv_.notify_all(); + t_.join(); + if(ecb_) + { + ec = ec_; + return; + } + s_->lf.close(); + state s{std::move(*s_)}; + File::erase(s.lp, ec_); + if(ec_) + ec = ec_; + } +} + +template +template +void +basic_store:: +fetch( + void const* key, + Callback && callback, + error_code& ec) +{ + using namespace detail; + BOOST_ASSERT(is_open()); + if(ecb_) + { + ec = ec_; + return; + } + auto const h = + hash(key, s_->kh.key_size, s_->hasher); + shared_lock_type m{m_}; + { + auto iter = s_->p1.find(key); + if(iter == s_->p1.end()) + { + iter = s_->p0.find(key); + if(iter == s_->p0.end()) + goto cont; + } + callback(iter->first.data, iter->first.size); + return; + } +cont: + auto const n = bucket_index(h, buckets_, modulus_); + auto const iter = s_->c1.find(n); + if(iter != s_->c1.end()) + return fetch(h, key, iter->second, callback, ec); + genlock g{g_}; + m.unlock(); + buffer buf{s_->kh.block_size}; + // b constructs from uninitialized buf + bucket b{s_->kh.block_size, buf.get()}; + b.read(s_->kf, (n + 1) * b.block_size(), ec); + if(ec) + return; + fetch(h, key, b, callback, ec); +} + +template +void +basic_store:: +insert( + void const* key, + void const* data, + nsize_t size, + error_code& ec) +{ + using namespace detail; + using namespace std::chrono; + BOOST_ASSERT(is_open()); + if(ecb_) + { + ec = ec_; + return; + } + // Data Record + BOOST_ASSERT(size > 0); // zero disallowed + BOOST_ASSERT(size <= field::max); // too large + auto const h = + hash(key, s_->kh.key_size, s_->hasher); + std::lock_guard u{u_}; + { + shared_lock_type m{m_}; + if(s_->p1.find(key) != s_->p1.end() || + s_->p0.find(key) != s_->p0.end()) + { + ec = error::key_exists; + return; + } + auto const n = bucket_index(h, buckets_, modulus_); + auto const iter = s_->c1.find(n); + if(iter != s_->c1.end()) + { + auto const found = exists( + h, key, &m, iter->second, ec); + if(ec) + return; + if(found) + { + ec = error::key_exists; + return; + } + // m is now unlocked + } + else + { + // VFALCO Audit for concurrency + genlock g{g_}; + m.unlock(); + buffer buf; + buf.reserve(s_->kh.block_size); + bucket b{s_->kh.block_size, buf.get()}; + b.read(s_->kf, + static_cast(n + 1) * s_->kh.block_size, ec); + if(ec) + return; + auto const found = exists(h, key, nullptr, b, ec); + if(ec) + return; + if(found) + { + ec = error::key_exists; + return; + } + } + } + // Perform insert + unique_lock_type m{m_}; + s_->p1.insert(h, key, data, size); + auto const now = clock_type::now(); + auto const elapsed = duration_cast>( + now > s_->when ? now - s_->when : clock_type::duration{1}); + auto const work = s_->p1.data_size() + + 3 * s_->p1.size() * s_->kh.block_size; + auto const rate = static_cast( + std::ceil(work / elapsed.count())); + auto const sleep = + s_->rate && rate > s_->rate; + m.unlock(); + if(sleep) + std::this_thread::sleep_for(milliseconds{25}); +} + +// Fetch key in loaded bucket b or its spills. +// +template +template +void +basic_store:: +fetch( + detail::nhash_t h, + void const* key, + detail::bucket b, + Callback&& callback, + error_code& ec) +{ + using namespace detail; + buffer buf0; + buffer buf1; + for(;;) + { + for(auto i = b.lower_bound(h); i < b.size(); ++i) + { + auto const item = b[i]; + if(item.hash != h) + break; + // Data Record + auto const len = + s_->kh.key_size + // Key + item.size; // Value + buf0.reserve(len); + s_->df.read(item.offset + + field::size, // Size + buf0.get(), len, ec); + if(ec) + return; + if(std::memcmp(buf0.get(), key, + s_->kh.key_size) == 0) + { + callback( + buf0.get() + s_->kh.key_size, item.size); + return; + } + } + auto const spill = b.spill(); + if(! spill) + break; + buf1.reserve(s_->kh.block_size); + b = bucket(s_->kh.block_size, + buf1.get()); + b.read(s_->df, spill, ec); + if(ec) + return; + } + ec = error::key_not_found; +} + +// Returns `true` if the key exists +// lock is unlocked after the first bucket processed +// +template +bool +basic_store:: +exists( + detail::nhash_t h, + void const* key, + detail::shared_lock_type* lock, + detail::bucket b, + error_code& ec) +{ + using namespace detail; + buffer buf{s_->kh.key_size + s_->kh.block_size}; + void* pk = buf.get(); + void* pb = buf.get() + s_->kh.key_size; + for(;;) + { + for(auto i = b.lower_bound(h); i < b.size(); ++i) + { + auto const item = b[i]; + if(item.hash != h) + break; + // Data Record + s_->df.read(item.offset + + field::size, // Size + pk, s_->kh.key_size, ec); // Key + if(ec) + return false; + if(std::memcmp(pk, key, s_->kh.key_size) == 0) + return true; + } + auto spill = b.spill(); + if(lock && lock->owns_lock()) + lock->unlock(); + if(! spill) + break; + b = bucket(s_->kh.block_size, pb); + b.read(s_->df, spill, ec); + if(ec) + return false; + } + return false; +} + +// Split the bucket in b1 to b2 +// b1 must be loaded +// tmp is used as a temporary buffer +// splits are written but not the new buckets +// +template +void +basic_store:: +split( + detail::bucket& b1, + detail::bucket& b2, + detail::bucket& tmp, + nbuck_t n1, + nbuck_t n2, + nbuck_t buckets, + nbuck_t modulus, + detail::bulk_writer& w, + error_code& ec) +{ + using namespace detail; + // Trivial case: split empty bucket + if(b1.empty()) + return; + // Split + for(std::size_t i = 0; i < b1.size();) + { + auto const e = b1[i]; + auto const n = bucket_index(e.hash, buckets, modulus); + (void)n1; + (void)n2; + BOOST_ASSERT(n==n1 || n==n2); + if(n == n2) + { + b2.insert(e.offset, e.size, e.hash); + b1.erase(i); + } + else + { + ++i; + } + } + noff_t spill = b1.spill(); + if(spill) + { + b1.spill(0); + do + { + // If any part of the spill record is + // in the write buffer then flush first + if(spill + bucket_size(s_->kh.capacity) > + w.offset() - w.size()) + { + w.flush(ec); + if(ec) + return; + } + tmp.read(s_->df, spill, ec); + if(ec) + return; + for(std::size_t i = 0; i < tmp.size(); ++i) + { + auto const e = tmp[i]; + auto const n = bucket_index( + e.hash, buckets, modulus); + BOOST_ASSERT(n==n1 || n==n2); + if(n == n2) + { + maybe_spill(b2, w, ec); + if(ec) + return; + b2.insert(e.offset, e.size, e.hash); + } + else + { + maybe_spill(b1, w, ec); + if(ec) + return; + b1.insert(e.offset, e.size, e.hash); + } + } + spill = tmp.spill(); + } + while(spill); + } +} + +template +detail::bucket +basic_store:: +load( + nbuck_t n, + detail::cache& c1, + detail::cache& c0, + void* buf, + error_code& ec) +{ + using namespace detail; + auto iter = c1.find(n); + if(iter != c1.end()) + return iter->second; + iter = c0.find(n); + if(iter != c0.end()) + return c1.insert(n, iter->second)->second; + bucket tmp{s_->kh.block_size, buf}; + tmp.read(s_->kf, + static_cast(n + 1) * s_->kh.block_size, ec); + if(ec) + return {}; + c0.insert(n, tmp); + return c1.insert(n, tmp)->second; +} + +template +void +basic_store:: +commit(detail::unique_lock_type& m, + std::size_t& work, error_code& ec) +{ + using namespace detail; + BOOST_ASSERT(m.owns_lock()); + BOOST_ASSERT(! s_->p1.empty()); + swap(s_->p0, s_->p1); + m.unlock(); + work = s_->p0.data_size(); + cache c0(s_->kh.key_size, s_->kh.block_size, "c0"); + cache c1(s_->kh.key_size, s_->kh.block_size, "c1"); + // 0.63212 ~= 1 - 1/e + { + auto const size = static_cast( + std::ceil(0.63212 * s_->p0.size())); + c0.reserve(size); + c1.reserve(size); + } + buffer buf1{s_->kh.block_size}; + buffer buf2{s_->kh.block_size}; + bucket tmp{s_->kh.block_size, buf1.get()}; + // Prepare rollback information + log_file_header lh; + lh.version = currentVersion; // Version + lh.uid = s_->kh.uid; // UID + lh.appnum = s_->kh.appnum; // Appnum + lh.key_size = s_->kh.key_size; // Key Size + lh.salt = s_->kh.salt; // Salt + lh.pepper = pepper(lh.salt); // Pepper + lh.block_size = s_->kh.block_size; // Block Size + lh.key_file_size = s_->kf.size(ec); // Key File Size + if(ec) + return; + lh.dat_file_size = s_->df.size(ec); // Data File Size + if(ec) + return; + write(s_->lf, lh, ec); + if(ec) + return; + // Checkpoint + s_->lf.sync(ec); + if(ec) + return; + // Append data and spills to data file + auto modulus = modulus_; + auto buckets = buckets_; + { + // Bulk write to avoid write amplification + auto const size = s_->df.size(ec); + if(ec) + return; + bulk_writer w{s_->df, size, dataWriteSize_}; + // Write inserted data to the data file + for(auto& e : s_->p0) + { + // VFALCO This could be UB since other + // threads are reading other data members + // of this object in memory + e.second = w.offset(); + auto os = w.prepare(value_size( + e.first.size, s_->kh.key_size), ec); + if(ec) + return; + // Data Record + write(os, e.first.size); // Size + write(os, e.first.key, s_->kh.key_size); // Key + write(os, e.first.data, e.first.size); // Data + } + // Do inserts, splits, and build view + // of original and modified buckets + for(auto const e : s_->p0) + { + // VFALCO Should this be >= or > ? + if((frac_ += 65536) >= thresh_) + { + // split + frac_ -= thresh_; + if(buckets == modulus) + modulus *= 2; + auto const n1 = buckets - (modulus / 2); + auto const n2 = buckets++; + auto b1 = load(n1, c1, c0, buf2.get(), ec); + if(ec) + return; + auto b2 = c1.create(n2); + // If split spills, the writer is + // flushed which can amplify writes. + split(b1, b2, tmp, n1, n2, + buckets, modulus, w, ec); + if(ec) + return; + } + // Insert + auto const n = bucket_index( + e.first.hash, buckets, modulus); + auto b = load(n, c1, c0, buf2.get(), ec); + if(ec) + return; + // This can amplify writes if it spills. + maybe_spill(b, w, ec); + if(ec) + return; + b.insert(e.second, e.first.size, e.first.hash); + } + w.flush(ec); + if(ec) + return; + } + work += s_->kh.block_size * (2 * c0.size() + c1.size()); + // Give readers a view of the new buckets. + // This might be slightly better than the old + // view since there could be fewer spills. + m.lock(); + swap(c1, s_->c1); + s_->p0.clear(); + buckets_ = buckets; + modulus_ = modulus; + g_.start(); + m.unlock(); + // Write clean buckets to log file + { + auto const size = s_->lf.size(ec); + if(ec) + return; + bulk_writer w{s_->lf, size, logWriteSize_}; + for(auto const e : c0) + { + // Log Record + auto os = w.prepare( + field::size + // Index + e.second.actual_size(), ec); // Bucket + if(ec) + return; + // Log Record + write(os, e.first); // Index + e.second.write(os); // Bucket + } + c0.clear(); + w.flush(ec); + if(ec) + return; + s_->lf.sync(ec); + if(ec) + return; + } + g_.finish(); + // Write new buckets to key file + for(auto const e : s_->c1) + { + e.second.write(s_->kf, + (e.first + 1) * s_->kh.block_size, ec); + if(ec) + return; + } + // Finalize the commit + s_->df.sync(ec); + if(ec) + return; + s_->kf.sync(ec); + if(ec) + return; + s_->lf.trunc(0, ec); + if(ec) + return; + s_->lf.sync(ec); + if(ec) + return; + // Cache is no longer needed, all fetches will go straight + // to disk again. Do this after the sync, otherwise readers + // might get blocked longer due to the extra I/O. + m.lock(); + s_->c1.clear(); +} + +template +void +basic_store:: +run() +{ + using namespace std::chrono; + using namespace detail; + +#if NUDB_DEBUG_LOG + beast::unit_test::dstream dout{std::cout}; +#endif + for(;;) + { + unique_lock_type m{m_}; + if(! s_->p1.empty()) + { + std::size_t work; + commit(m, work, ec_); + if(ec_) + { + ecb_.store(true); + return; + } + BOOST_ASSERT(m.owns_lock()); + auto const now = clock_type::now(); + auto const elapsed = duration_cast>( + now > s_->when ? now - s_->when : clock_type::duration{1}); + s_->rate = static_cast( + std::ceil(work / elapsed.count())); + #if NUDB_DEBUG_LOG + dout << + "work=" << work << + ", time=" << elapsed.count() << + ", rate=" << s_->rate << + "\n"; + #endif + } + s_->p1.periodic_activity(); + + cv_.wait_until(m, s_->when + seconds{1}, + [this]{ return ! open_; }); + if(! open_) + break; + s_->when = clock_type::now(); + } + { + unique_lock_type m{m_}; + std::size_t work; + if(! s_->p1.empty()) + commit(m, work, ec_); + } + if(ec_) + { + ecb_.store(true); + return; + } +} + +} // nudb + +#endif diff --git a/include/nudb/impl/create.ipp b/include/nudb/impl/create.ipp new file mode 100644 index 0000000000..b0b8511fff --- /dev/null +++ b/include/nudb/impl/create.ipp @@ -0,0 +1,163 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_CREATE_IPP +#define NUDB_IMPL_CREATE_IPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +namespace detail { + +template +std::uint64_t +make_uid() +{ + std::random_device rng; + std::mt19937_64 gen {rng()}; + std::uniform_int_distribution dist; + return dist(gen); +} + +} // detail + +template +std::uint64_t +make_salt() +{ + std::random_device rng; + std::mt19937_64 gen {rng()}; + std::uniform_int_distribution dist; + return dist(gen); +} + +template< + class Hasher, + class File, + class... Args +> +void +create( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + std::uint64_t appnum, + std::uint64_t salt, + nsize_t key_size, + nsize_t blockSize, + float load_factor, + error_code& ec, + Args&&... args) +{ + static_assert(is_File::value, + "File requirements not met"); + + using namespace detail; + if(key_size < 1) + { + ec = error::invalid_key_size; + return; + } + if(blockSize > field::max) + { + ec = error::invalid_block_size; + return; + } + if(load_factor <= 0.f || load_factor >= 1.f) + { + ec = error::invalid_load_factor; + return; + } + auto const capacity = + bucket_capacity(blockSize); + if(capacity < 1) + { + ec = error::invalid_block_size; + return; + } + bool edf = false; + bool ekf = false; + bool elf = false; + { + File df(args...); + File kf(args...); + File lf(args...); + df.create(file_mode::append, dat_path, ec); + if(ec) + goto fail; + edf = true; + kf.create(file_mode::append, key_path, ec); + if(ec) + goto fail; + ekf = true; + lf.create(file_mode::append, log_path, ec); + if(ec) + goto fail; + elf = true; + dat_file_header dh; + dh.version = currentVersion; + dh.uid = make_uid(); + dh.appnum = appnum; + dh.key_size = key_size; + + key_file_header kh; + kh.version = currentVersion; + kh.uid = dh.uid; + kh.appnum = appnum; + kh.key_size = key_size; + kh.salt = salt; + kh.pepper = pepper(salt); + kh.block_size = blockSize; + kh.load_factor = std::min( + static_cast( + 65536.0 * load_factor), 65535); + write(df, dh, ec); + if(ec) + goto fail; + write(kf, kh, ec); + if(ec) + goto fail; + buffer buf{blockSize}; + std::memset(buf.get(), 0, blockSize); + bucket b(blockSize, buf.get(), empty); + b.write(kf, blockSize, ec); + if(ec) + goto fail; + // VFALCO Leave log file empty? + df.sync(ec); + if(ec) + goto fail; + kf.sync(ec); + if(ec) + goto fail; + lf.sync(ec); + if(ec) + goto fail; + // Success + return; + } +fail: + if(edf) + erase_file(dat_path); + if(ekf) + erase_file(key_path); + if(elf) + erase_file(log_path); +} + +} // nudb + +#endif diff --git a/include/nudb/impl/error.ipp b/include/nudb/impl/error.ipp new file mode 100644 index 0000000000..9409a779e9 --- /dev/null +++ b/include/nudb/impl/error.ipp @@ -0,0 +1,180 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_ERROR_IPP +#define NUDB_IMPL_ERROR_IPP + +namespace nudb { + +inline +error_category const& +nudb_category() +{ + struct cat_t : public error_category + { + char const* + name() const noexcept override + { + return "nudb"; + } + + std::string + message(int ev) const override + { + switch(static_cast(ev)) + { + case error::success: + return "the operation completed successfully"; + + case error::key_not_found: + return "key not found"; + + case error::key_exists: + return "key already exists"; + + case error::short_read: + return "short read"; + + case error::log_file_exists: + return "a log file exists"; + + case error::no_key_file: + return "no key file"; + + case error::too_many_buckets: + return "too many buckets"; + + case error::not_data_file: + return "not a data file"; + + case error::not_key_file: + return "not a key file"; + + case error::not_log_file: + return "not a log file"; + + case error::different_version: + return "different version"; + + case error::invalid_key_size: + return "invalid key size"; + + case error::invalid_block_size: + return "invalid block size"; + + case error::short_key_file: + return "short key file"; + + case error::short_bucket: + return "short bucket"; + + case error::short_spill: + return "short spill"; + + case error::short_data_record: + return "short data record"; + + case error::short_value: + return "short value"; + + case error::hash_mismatch: + return "hash mismatch"; + + case error::invalid_load_factor: + return "invalid load factor"; + + case error::invalid_capacity: + return "invalid capacity"; + + case error::invalid_bucket_count: + return "invalid bucket count"; + + case error::invalid_bucket_size: + return "invalid_bucket_size"; + + case error::incomplete_data_file_header: + return "incomplete data file header"; + + case error::incomplete_key_file_header: + return "incomplete key file header"; + + case error::invalid_log_record: + return "invalid log record"; + + case error::invalid_log_spill: + return "invalid spill in log"; + + case error::invalid_log_offset: + return "invalid offset in log"; + + case error::invalid_log_index: + return "invalid index in log"; + + case error::invalid_spill_size: + return "invalid size in spill"; + + case error::uid_mismatch: + return "uid mismatch"; + + case error::appnum_mismatch: + return "appnum mismatch"; + + case error::key_size_mismatch: + return "key size mismatch"; + + case error::salt_mismatch: + return "salt mismatch"; + + case error::pepper_mismatch: + return "pepper mismatch"; + + case error::block_size_mismatch: + return "block size mismatch"; + + case error::orphaned_value: + return "orphaned value"; + + case error::missing_value: + return "missing value"; + + case error::size_mismatch: + return "size mismatch"; + + case error::duplicate_value: + return "duplicate value"; + + default: + return "nudb error"; + } + } + + error_condition + default_error_condition(int ev) const noexcept override + { + return error_condition{ev, *this}; + } + + bool + equivalent(int ev, + error_condition const& ec) const noexcept override + { + return ec.value() == ev && &ec.category() == this; + } + + bool + equivalent(error_code const& ec, int ev) const noexcept override + { + return ec.value() == ev && &ec.category() == this; + } + }; + static cat_t const cat{}; + return cat; +} + +} // nudb + +#endif diff --git a/include/nudb/impl/posix_file.ipp b/include/nudb/impl/posix_file.ipp new file mode 100644 index 0000000000..a03262a304 --- /dev/null +++ b/include/nudb/impl/posix_file.ipp @@ -0,0 +1,259 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_POSIX_FILE_IPP +#define NUDB_IMPL_POSIX_FILE_IPP + +#include +#include + +namespace nudb { + +inline +posix_file:: +~posix_file() +{ + close(); +} + +inline +posix_file:: +posix_file(posix_file &&other) + : fd_(other.fd_) +{ + other.fd_ = -1; +} + +inline +posix_file& +posix_file:: +operator=(posix_file&& other) +{ + if(&other == this) + return *this; + close(); + fd_ = other.fd_; + other.fd_ = -1; + return *this; +} + +inline +void +posix_file:: +close() +{ + if(fd_ != -1) + { + ::close(fd_); + fd_ = -1; + } +} + +inline +void +posix_file:: +create(file_mode mode, path_type const& path, error_code& ec) +{ + auto const result = flags(mode); + BOOST_ASSERT(! is_open()); + fd_ = ::open(path.c_str(), result.first); + if(fd_ != -1) + { + ::close(fd_); + fd_ = -1; + ec = error_code{errc::file_exists, generic_category()}; + return ; + } + int errnum = errno; + if(errnum != ENOENT) + return err(errnum, ec); + fd_ = ::open(path.c_str(), result.first | O_CREAT, 0644); + if(fd_ == -1) + return last_err(ec); +#ifndef __APPLE__ + if(::posix_fadvise(fd_, 0, 0, result.second) != 0) + return last_err(ec); +#endif +} + +inline +void +posix_file:: +open(file_mode mode, path_type const& path, error_code& ec) +{ + BOOST_ASSERT(! is_open()); + auto const result = flags(mode); + fd_ = ::open(path.c_str(), result.first); + if(fd_ == -1) + return last_err(ec); +#ifndef __APPLE__ + if(::posix_fadvise(fd_, 0, 0, result.second) != 0) + return last_err(ec); +#endif +} + +inline +void +posix_file:: +erase(path_type const& path, error_code& ec) +{ + if(::unlink(path.c_str()) != 0) + { + int const ev = errno; + return err(ev, ec); + } +} + +inline +std::uint64_t +posix_file:: +size(error_code& ec) const +{ + static_assert(sizeof(stat::st_size) == sizeof(std::uint64_t), ""); + struct stat st; + if(::fstat(fd_, &st) != 0) + { + last_err(ec); + return 0; + } + return st.st_size; +} +inline +void +posix_file:: +read(std::uint64_t offset, + void* buffer, std::size_t bytes, error_code& ec) +{ + static_assert(sizeof(off_t) >= sizeof(offset), ""); + while(bytes > 0) + { + auto const amount = static_cast( + std::min(bytes, static_cast(SSIZE_MAX))); + auto const n = ::pread(fd_, buffer, amount, offset); + if(n == -1) + { + auto const ev = errno; + if(ev == EINTR) + continue; + return err(ev, ec); + } + if(n == 0) + { + ec = error::short_read; + return; + } + offset += n; + bytes -= n; + buffer = reinterpret_cast(buffer) + n; + } +} + +inline +void +posix_file:: +write(std::uint64_t offset, + void const* buffer, std::size_t bytes, error_code& ec) +{ + static_assert(sizeof(off_t) >= sizeof(offset), ""); + while(bytes > 0) + { + auto const amount = static_cast( + std::min(bytes, static_cast(SSIZE_MAX))); + auto const n = ::pwrite(fd_, buffer, amount, offset); + if(n == -1) + { + auto const ev = errno; + if(ev == EINTR) + continue; + return err(ev, ec); + } + offset += n; + bytes -= n; + buffer = reinterpret_cast(buffer) + n; + } +} + +inline +void +posix_file:: +sync(error_code& ec) +{ + for(;;) + { + if(::fsync(fd_) == 0) + break; + auto const ev = errno; + if(ev == EINTR) + continue; + return err(ev, ec); + } +} + +inline +void +posix_file:: +trunc(std::uint64_t length, error_code& ec) +{ + for(;;) + { + if(::ftruncate(fd_, length) == 0) + break; + auto const ev = errno; + if(ev == EINTR) + continue; + return err(ev, ec); + } +} + +inline +std::pair +posix_file:: +flags(file_mode mode) +{ + std::pair result; + switch(mode) + { + case file_mode::scan: + result.first = + O_RDONLY; + #ifndef __APPLE__ + result.second = + POSIX_FADV_SEQUENTIAL; + #endif + break; + case file_mode::read: + result.first = + O_RDONLY; + #ifndef __APPLE__ + result.second = + POSIX_FADV_RANDOM; + #endif + break; + case file_mode::append: + result.first = + O_RDWR | + O_APPEND; + #ifndef __APPLE__ + result.second = + POSIX_FADV_RANDOM; + #endif + break; + case file_mode::write: + result.first = + O_RDWR; + #ifndef __APPLE__ + result.second = + POSIX_FADV_NORMAL; + #endif + break; + } + return result; +} + +} // nudb + +#endif diff --git a/include/nudb/impl/recover.ipp b/include/nudb/impl/recover.ipp new file mode 100644 index 0000000000..e449c36f28 --- /dev/null +++ b/include/nudb/impl/recover.ipp @@ -0,0 +1,209 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_RECOVER_IPP +#define NUDB_IMPL_RECOVER_IPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +template< + class Hasher, + class File, + class... Args> +void +recover( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + error_code& ec, + Args&&... args) +{ + static_assert(is_File::value, + "File requirements not met"); + static_assert(is_Hasher::value, + "Hasher requirements not met"); + using namespace detail; + + // Open data file + File df{args...}; + df.open(file_mode::write, dat_path, ec); + if(ec) + return; + auto const dataFileSize = df.size(ec); + if(ec) + return; + dat_file_header dh; + read(df, dh, ec); + if(ec) + return; + verify(dh, ec); + if(ec) + return; + + // Open key file + File kf{args...}; + kf.open(file_mode::write, key_path, ec); + if(ec) + return; + auto const keyFileSize = kf.size(ec); + if(ec) + return; + if(keyFileSize <= key_file_header::size) + { + kf.close(); + erase_file(log_path, ec); + if(ec) + return; + File::erase(key_path, ec); + if(ec) + return; + ec = error::no_key_file; + return; + } + + // Open log file + File lf{args...}; + lf.open(file_mode::append, log_path, ec); + if(ec == errc::no_such_file_or_directory) + { + ec = {}; + return; + } + if(ec) + return; + auto const logFileSize = lf.size(ec); + if(ec) + return; + // Read log file header + log_file_header lh; + read(lf, lh, ec); + if(ec == error::short_read) + { + BOOST_ASSERT(keyFileSize > key_file_header::size); + ec = {}; + goto clear_log; + } + if(ec) + return; + verify(lh, ec); + if(ec) + return; + if(lh.key_file_size == 0) + goto trunc_files; + { + // Read key file header + key_file_header kh; + read(kf, kh, ec); + if(ec) + return; + verify(kh, ec); + if(ec) + return; + verify(dh, kh, ec); + if(ec) + return; + verify(kh, lh, ec); + if(ec) + return; + + auto const readSize = 1024 * kh.block_size; + auto const bucketSize = bucket_size(kh.capacity); + buffer buf{kh.block_size}; + bucket b{kh.block_size, buf.get()}; + bulk_reader r{lf, + log_file_header::size, logFileSize, readSize}; + while(! r.eof()) + { + // Log Record + auto is = r.prepare(field::size, ec); + // Log file is incomplete, so roll back. + if(ec == error::short_read) + { + ec = {}; + break; + } + if(ec) + return; + nsize_t n; + { + std::uint64_t v; + // VFALCO This should have been a uint32_t + read(is, v); // Index + BOOST_ASSERT(v <= std::numeric_limits::max()); + n = static_cast(v); + } + b.read(r, ec); // Bucket + if(ec == error::short_read) + { + ec = {}; + break; + } + if(b.spill() && b.spill() + bucketSize > dataFileSize) + { + ec = error::invalid_log_spill; + return; + } + if(n > kh.buckets) + { + ec = error::invalid_log_index; + return; + } + b.write(kf, static_cast(n + 1) * kh.block_size, ec); + if(ec) + return; + } + } +trunc_files: + df.trunc(lh.dat_file_size, ec); + if(ec) + return; + df.sync(ec); + if(ec) + return; + if(lh.key_file_size != 0) + { + kf.trunc(lh.key_file_size, ec); + if(ec) + return; + kf.sync(ec); + if(ec) + return; + } + else + { + kf.close(); + File::erase(key_path, ec); + if(ec) + return; + } +clear_log: + lf.trunc(0, ec); + if(ec) + return; + lf.sync(ec); + if(ec) + return; + lf.close(); + File::erase(log_path, ec); + if(ec) + return; +} + +} // nudb + +#endif diff --git a/include/nudb/impl/rekey.ipp b/include/nudb/impl/rekey.ipp new file mode 100644 index 0000000000..ab247c77db --- /dev/null +++ b/include/nudb/impl/rekey.ipp @@ -0,0 +1,248 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_REKEY_IPP +#define NUDB_IMPL_REKEY_IPP + +#include +#include +#include +#include +#include +#include + +namespace nudb { + +// VFALCO Should this delete the key file on an error? +template< + class Hasher, + class File, + class Progress, + class... Args +> +void +rekey( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + std::size_t blockSize, + float loadFactor, + std::uint64_t itemCount, + std::size_t bufferSize, + error_code& ec, + Progress&& progress, + Args&&... args) +{ + static_assert(is_File::value, + "File requirements not met"); + static_assert(is_Hasher::value, + "Hasher requirements not met"); + static_assert(is_Progress::value, + "Progress requirements not met"); + using namespace detail; + auto const readSize = 1024 * block_size(dat_path); + auto const writeSize = 16 * block_size(key_path); + + // Open data file for reading and appending + File df{args...}; + df.open(file_mode::append, dat_path, ec); + if(ec) + return; + dat_file_header dh; + read(df, dh, ec); + if(ec) + return; + verify(dh, ec); + if(ec) + return; + auto const dataFileSize = df.size(ec); + if(ec) + return; + + // Make sure log file doesn't exist + File lf{args...}; + lf.open(file_mode::read, log_path, ec); + if(! ec) + ec = error::log_file_exists; + if(ec != errc::no_such_file_or_directory) + return; + ec = {}; + + // Set up key file header + key_file_header kh; + kh.version = currentVersion; + kh.uid = dh.uid; + kh.appnum = dh.appnum; + kh.key_size = dh.key_size; + kh.salt = make_salt(); + kh.pepper = pepper(kh.salt); + kh.block_size = blockSize; + kh.load_factor = std::min( + static_cast(65536.0 * loadFactor), 65535); + kh.buckets = static_cast( + std::ceil(itemCount /( + bucket_capacity(kh.block_size) * loadFactor))); + kh.modulus = ceil_pow2(kh.buckets); + // Create key file + File kf{args...}; + kf.create(file_mode::write, key_path, ec); + if(ec) + return; + // Write key file header + // Note, file size is less than any valid block_size here + { + std::array buf; + ostream os{buf.data(), buf.size()}; + write(os, kh); + kf.write(0, buf.data(), buf.size(), ec); + if(ec) + return; + kf.sync(ec); + if(ec) + return; + } + + // Create log file + lf.create(file_mode::append, log_path, ec); + if(ec) + return; + // Write log file header + { + log_file_header lh; + lh.version = currentVersion; // Version + lh.uid = kh.uid; // UID + lh.appnum = kh.appnum; // Appnum + lh.key_size = kh.key_size; // Key Size + lh.salt = kh.salt; // Salt + lh.pepper = pepper(kh.salt); // Pepper + lh.block_size = kh.block_size; // Block Size + lh.key_file_size = 0; // Key File Size + lh.dat_file_size = dataFileSize; // Data File Size + write(lf, lh, ec); + if(ec) + return; + lf.sync(ec); + if(ec) + return; + } + + // Create full key file + buffer buf{kh.block_size}; + { + // Write key file header + std::memset(buf.get(), 0, kh.block_size); + ostream os{buf.get(), kh.block_size}; + write(os, kh); + kf.write(0, buf.get(), buf.size(), ec); + if(ec) + return; + kf.sync(ec); + if(ec) + return; + // Pre-allocate space for the entire key file + std::uint8_t zero = 0; + kf.write( + static_cast(kh.buckets + 1) * kh.block_size - 1, + &zero, 1, ec); + if(ec) + return; + kf.sync(ec); + if(ec) + return; + } + + // Build contiguous sequential sections of the + // key file using multiple passes over the data. + // + auto const chunkSize = std::max(1, + bufferSize / kh.block_size); + // Calculate work required + auto const passes = + (kh.buckets + chunkSize - 1) / chunkSize; + auto const nwork = passes * dataFileSize; + progress(0, nwork); + + buf.reserve(chunkSize * kh.block_size); + bulk_writer dw{df, dataFileSize, writeSize}; + for(nbuck_t b0 = 0; b0 < kh.buckets; b0 += chunkSize) + { + auto const b1 = std::min(b0 + chunkSize, kh.buckets); + // Buffered range is [b0, b1) + auto const bn = b1 - b0; + // Create empty buckets + for(std::size_t i = 0; i < bn; ++i) + bucket b{kh.block_size, + buf.get() + i * kh.block_size, empty}; + // Insert all keys into buckets + // Iterate Data File + bulk_reader r{df, + dat_file_header::size, dataFileSize, readSize}; + while(! r.eof()) + { + auto const offset = r.offset(); + // Data Record or Spill Record + nsize_t size; + auto is = r.prepare( + field::size, ec); // Size + if(ec) + return; + progress((b0 / chunkSize) * dataFileSize + r.offset(), nwork); + read_size48(is, size); + if(size > 0) + { + // Data Record + is = r.prepare( + dh.key_size + // Key + size, ec); // Data + if(ec) + return; + std::uint8_t const* const key = + is.data(dh.key_size); + auto const h = hash( + key, dh.key_size, kh.salt); + auto const n = bucket_index( + h, kh.buckets, kh.modulus); + if(n < b0 || n >= b1) + continue; + bucket b{kh.block_size, buf.get() + + (n - b0) * kh.block_size}; + maybe_spill(b, dw, ec); + if(ec) + return; + b.insert(offset, size, h); + } + else + { + // VFALCO Should never get here + // Spill Record + is = r.prepare( + field::size, ec); + if(ec) + return; + read(is, size); // Size + r.prepare(size, ec); // skip + if(ec) + return; + } + } + kf.write((b0 + 1) * kh.block_size, buf.get(), + static_cast(bn * kh.block_size), ec); + if(ec) + return; + } + dw.flush(ec); + if(ec) + return; + lf.close(); + File::erase(log_path, ec); + if(ec) + return; +} + +} // nudb + +#endif diff --git a/include/nudb/impl/verify.ipp b/include/nudb/impl/verify.ipp new file mode 100644 index 0000000000..c1d221eebb --- /dev/null +++ b/include/nudb/impl/verify.ipp @@ -0,0 +1,630 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_VERIFY_IPP +#define NUDB_IMPL_VERIFY_IPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +namespace detail { + +// Normal verify that does not require a buffer +// +template< + class Hasher, + class File, + class Progress> +void +verify_normal( + verify_info& info, + File& df, + File& kf, + dat_file_header& dh, + key_file_header& kh, + Progress&& progress, + error_code& ec) +{ + static_assert(is_File::value, + "File requirements not met"); + static_assert(is_Hasher::value, + "Hasher requirements not met"); + static_assert(is_Progress::value, + "Progress requirements not met"); + info.algorithm = 0; + auto const readSize = 1024 * kh.block_size; + + // This ratio balances the 2 work phases. + // The number is determined empirically. + auto const adjust = 1.75; + + // Calculate the work required + auto const keys = static_cast( + double(kh.load_factor) / 65536.0 * kh.buckets * kh.capacity); + std::uint64_t const nwork = static_cast( + info.dat_file_size + keys * kh.block_size + + adjust * (info.key_file_size + keys * kh.block_size)); + std::uint64_t work = 0; + progress(0, nwork); + + // Iterate Data File + // Data Record + auto const dh_len = + field::size + // Size + kh.key_size; // Key + std::uint64_t fetches = 0; + buffer buf{kh.block_size + dh_len}; + bucket b{kh.block_size, buf.get()}; + std::uint8_t* pd = buf.get() + kh.block_size; + { + bulk_reader r{df, dat_file_header::size, + info.dat_file_size, readSize}; + while(! r.eof()) + { + auto const offset = r.offset(); + // Data Record or Spill Record + auto is = r.prepare( + field::size, ec); // Size + if(ec) + return; + nsize_t size; + read_size48(is, size); + if(size > 0) + { + // Data Record + is = r.prepare( + kh.key_size + // Key + size, ec); // Data + if(ec) + return; + std::uint8_t const* const key = + is.data(kh.key_size); + std::uint8_t const* const data = + is.data(size); + (void)data; + auto const h = hash( + key, kh.key_size, kh.salt); + // Check bucket and spills + auto const n = bucket_index( + h, kh.buckets, kh.modulus); + b.read(kf, + static_cast(n + 1) * kh.block_size, ec); + if(ec) + return; + work += kh.block_size; + ++fetches; + for(;;) + { + for(auto i = b.lower_bound(h); + i < b.size(); ++i) + { + auto const item = b[i]; + if(item.hash != h) + break; + if(item.offset == offset) + goto found; + ++fetches; + } + auto const spill = b.spill(); + if(! spill) + { + ec = error::orphaned_value; + return; + } + b.read(df, spill, ec); + if(ec == error::short_read) + { + ec = error::short_spill; + return; + } + if(ec) + return; + ++fetches; + } + found: + // Update + ++info.value_count; + info.value_bytes += size; + } + else + { + // Spill Record + is = r.prepare( + field::size, ec); + if(ec == error::short_read) + { + ec = error::short_spill; + return; + } + if(ec) + return; + read(is, size); // Size + if(size != info.bucket_size) + { + ec = error::invalid_spill_size; + return; + } + if(ec) + return; + b.read(r, ec); // Bucket + if(ec == error::short_read) + { + ec = error::short_spill; + return; + } + if(ec) + return; + ++info.spill_count_tot; + info.spill_bytes_tot += + field::size + // Zero + field::size + // Size + b.actual_size(); // Bucket + } + progress(work + offset, nwork); + } + work += info.dat_file_size; + } + + // Iterate Key File + { + for(std::size_t n = 0; n < kh.buckets; ++n) + { + std::size_t nspill = 0; + b.read(kf, static_cast( + n + 1) * kh.block_size, ec); + if(ec) + return; + work += static_cast( + adjust * kh.block_size); + bool spill = false; + for(;;) + { + info.key_count += b.size(); + for(nkey_t i = 0; i < b.size(); ++i) + { + auto const e = b[i]; + df.read(e.offset, pd, dh_len, ec); + if(ec == error::short_read) + { + ec = error::missing_value; + return; + } + if(ec) + return; + if(! spill) + work += static_cast( + adjust * kh.block_size); + // Data Record + istream is{pd, dh_len}; + std::uint64_t size; + // VFALCO This should really be a 32-bit field + read(is, size); // Size + void const* key = + is.data(kh.key_size); // Key + if(size != e.size) + { + ec = error::size_mismatch; + return; + } + auto const h = hash(key, + kh.key_size, kh.salt); + if(h != e.hash) + { + ec = error::hash_mismatch; + return; + } + } + if(! b.spill()) + break; + b.read(df, b.spill(), ec); + if(ec) + return; + spill = true; + ++nspill; + ++info.spill_count; + info.spill_bytes += + field::size + // Zero + field::size + // Size + b.actual_size(); // SpillBucket + } + if(nspill >= info.hist.size()) + nspill = info.hist.size() - 1; + ++info.hist[nspill]; + progress(work, nwork); + } + } + float sum = 0; + for(size_t i = 0; i < info.hist.size(); ++i) + sum += info.hist[i] * (i + 1); + if(info.value_count) + info.avg_fetch = + float(fetches) / info.value_count; + else + info.avg_fetch = 0; + info.waste = (info.spill_bytes_tot - info.spill_bytes) / + float(info.dat_file_size); + if(info.value_count) + info.overhead = + float(info.key_file_size + info.dat_file_size) / + ( + info.value_bytes + + info.key_count * + (info.key_size + + // Data Record + field::size) // Size + ) - 1; + else + info.overhead = 0; + info.actual_load = info.key_count / float( + info.capacity * info.buckets); +} + +// Fast version of verify that uses a buffer +// +template +void +verify_fast( + verify_info& info, + File& df, + File& kf, + dat_file_header& dh, + key_file_header& kh, + std::size_t bufferSize, + Progress&& progress, + error_code& ec) +{ + info.algorithm = 1; + auto const readSize = 1024 * kh.block_size; + + // Counts unverified keys per bucket + if(kh.buckets > std::numeric_limits::max()) + { + ec = error::too_many_buckets; + return; + } + std::unique_ptr nkeys( + new nkey_t[kh.buckets]); + + // Verify contiguous sequential sections of the + // key file using multiple passes over the data. + // + if(bufferSize < 2 * kh.block_size + sizeof(nkey_t)) + throw std::logic_error("invalid buffer size"); + auto chunkSize = std::min(kh.buckets, + (bufferSize - kh.block_size) / + (kh.block_size + sizeof(nkey_t))); + auto const passes = + (kh.buckets + chunkSize - 1) / chunkSize; + + // Calculate the work required + std::uint64_t work = 0; + std::uint64_t const nwork = + passes * info.dat_file_size + info.key_file_size; + progress(0, nwork); + + std::uint64_t fetches = 0; + buffer buf{(chunkSize + 1) * kh.block_size}; + bucket tmp{kh.block_size, + buf.get() + chunkSize * kh.block_size}; + for(nsize_t b0 = 0; b0 < kh.buckets; b0 += chunkSize) + { + // Load key file chunk to buffer + auto const b1 = std::min(b0 + chunkSize, kh.buckets); + // Buffered range is [b0, b1) + auto const bn = b1 - b0; + kf.read( + static_cast(b0 + 1) * kh.block_size, + buf.get(), + static_cast(bn * kh.block_size), + ec); + if(ec) + return; + work += bn * kh.block_size; + progress(work, nwork); + // Count keys in buckets, including spills + for(nbuck_t i = 0 ; i < bn; ++i) + { + bucket b{kh.block_size, + buf.get() + i * kh.block_size}; + nkeys[i] = b.size(); + std::size_t nspill = 0; + auto spill = b.spill(); + while(spill != 0) + { + tmp.read(df, spill, ec); + if(ec == error::short_read) + { + ec = error::short_spill; + return; + } + if(ec) + return; + nkeys[i] += tmp.size(); + spill = tmp.spill(); + ++nspill; + ++info.spill_count; + info.spill_bytes += + field::size + // Zero + field::size + // Size + tmp.actual_size(); // SpillBucket + } + if(nspill >= info.hist.size()) + nspill = info.hist.size() - 1; + ++info.hist[nspill]; + info.key_count += nkeys[i]; + } + // Iterate Data File + bulk_reader r(df, dat_file_header::size, + info.dat_file_size, readSize); + while(! r.eof()) + { + auto const offset = r.offset(); + // Data Record or Spill Record + auto is = r.prepare( + field::size, ec); // Size + if(ec == error::short_read) + { + ec = error::short_data_record; + return; + } + if(ec) + return; + nsize_t size; + detail::read_size48(is, size); + if(size > 0) + { + // Data Record + is = r.prepare( + kh.key_size + // Key + size, ec); // Data + if(ec == error::short_read) + { + ec = error::short_value; + return; + } + if(ec) + return; + std::uint8_t const* const key = + is.data(kh.key_size); + std::uint8_t const* const data = + is.data(size); + (void)data; + auto const h = hash( + key, kh.key_size, kh.salt); + auto const n = bucket_index( + h, kh.buckets, kh.modulus); + if(n < b0 || n >= b1) + continue; + // Check bucket and spills + bucket b{kh.block_size, buf.get() + + (n - b0) * kh.block_size}; + ++fetches; + for(;;) + { + for(auto i = b.lower_bound(h); + i < b.size(); ++i) + { + auto const item = b[i]; + if(item.hash != h) + break; + if(item.offset == offset) + goto found; + ++fetches; + } + auto const spill = b.spill(); + if(! spill) + { + ec = error::orphaned_value; + return; + } + b = tmp; + b.read(df, spill, ec); + if(ec == error::short_read) + { + ec = error::short_spill; + return; + } + if(ec) + return; + ++fetches; + } + found: + // Update + ++info.value_count; + info.value_bytes += size; + if(nkeys[n - b0]-- == 0) + { + ec = error::orphaned_value; + return; + } + } + else + { + // Spill Record + is = r.prepare( + field::size, ec); + if(ec == error::short_read) + { + ec = error::short_spill; + return; + } + if(ec) + return; + read(is, size); // Size + if(bucket_size( + bucket_capacity(size)) != size) + { + ec = error::invalid_spill_size; + return; + } + r.prepare(size, ec); // Bucket + if(ec == error::short_read) + { + ec = error::short_spill; + return; + } + if(ec) + return; + if(b0 == 0) + { + ++info.spill_count_tot; + info.spill_bytes_tot += + field::size + // Zero + field::size + // Size + tmp.actual_size(); // Bucket + } + } + progress(work + offset, nwork); + } + // Make sure every key in every bucket was visited + for(std::size_t i = 0; i < bn; ++i) + { + if(nkeys[i] != 0) + { + ec = error::missing_value; + return; + } + } + work += info.dat_file_size; + } + + float sum = 0; + for(std::size_t i = 0; i < info.hist.size(); ++i) + sum += info.hist[i] * (i + 1); + if(info.value_count) + info.avg_fetch = + float(fetches) / info.value_count; + else + info.avg_fetch = 0; + info.waste = (info.spill_bytes_tot - info.spill_bytes) / + float(info.dat_file_size); + if(info.value_count) + info.overhead = + float(info.key_file_size + info.dat_file_size) / + ( + info.value_bytes + + info.key_count * + (info.key_size + + // Data Record + field::size) // Size + ) - 1; + else + info.overhead = 0; + info.actual_load = info.key_count / float( + info.capacity * info.buckets); +} + +} // detail + +template +void +verify( + verify_info& info, + path_type const& dat_path, + path_type const& key_path, + std::size_t bufferSize, + Progress&& progress, + error_code& ec) +{ + static_assert(is_Hasher::value, + "Hasher requirements not met"); + static_assert(is_Progress::value, + "Progress requirements not met"); + info = {}; + using namespace detail; + using File = native_file; + File df; + df.open(file_mode::scan, dat_path, ec); + if(ec) + return; + File kf; + kf.open (file_mode::read, key_path, ec); + if(ec) + return; + dat_file_header dh; + read(df, dh, ec); + if(ec) + return; + verify(dh, ec); + if(ec) + return; + key_file_header kh; + read(kf, kh, ec); + if(ec) + return; + verify(kh, ec); + if(ec) + return; + verify(dh, kh, ec); + if(ec) + return; + info.dat_path = dat_path; + info.key_path = key_path; + info.version = dh.version; + info.uid = dh.uid; + info.appnum = dh.appnum; + info.key_size = dh.key_size; + info.salt = kh.salt; + info.pepper = kh.pepper; + info.block_size = kh.block_size; + info.load_factor = kh.load_factor / 65536.f; + info.capacity = kh.capacity; + info.buckets = kh.buckets; + info.bucket_size = bucket_size(kh.capacity); + info.key_file_size = kf.size(ec); + if(ec) + return; + info.dat_file_size = df.size(ec); + if(ec) + return; + + // Determine which algorithm requires the least amount + // of file I/O given the available buffer size + std::size_t chunkSize; + if(bufferSize >= 2 * kh.block_size + sizeof(nkey_t)) + chunkSize = std::min(kh.buckets, + (bufferSize - kh.block_size) / + (kh.block_size + sizeof(nkey_t))); + else + chunkSize = 0; + std::size_t passes; + if(chunkSize > 0) + passes = (kh.buckets + chunkSize - 1) / chunkSize; + else + passes = 0; + if(! chunkSize || + (( + info.dat_file_size + + (kh.buckets * kh.load_factor * kh.capacity * kh.block_size) + + info.key_file_size + ) < ( + passes * info.dat_file_size + info.key_file_size + ))) + { + detail::verify_normal(info, + df, kf, dh, kh, progress, ec); + } + else + { + detail::verify_fast(info, + df, kf, dh, kh, bufferSize, progress, ec); + } +} + +} // nudb + +#endif diff --git a/include/nudb/impl/visit.ipp b/include/nudb/impl/visit.ipp new file mode 100644 index 0000000000..4d57b9595a --- /dev/null +++ b/include/nudb/impl/visit.ipp @@ -0,0 +1,96 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_VISIT_IPP +#define NUDB_IMPL_VISIT_IPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +template< + class Callback, + class Progress> +void +visit( + path_type const& path, + Callback&& callback, + Progress&& progress, + error_code& ec) +{ + // VFALCO Need concept check for Callback + static_assert(is_Progress::value, + "Progress requirements not met"); + using namespace detail; + using File = native_file; + auto const readSize = 1024 * block_size(path); + File df; + df.open(file_mode::scan, path, ec); + if(ec) + return; + dat_file_header dh; + read(df, dh, ec); + if(ec) + return; + verify(dh, ec); + if(ec) + return; + auto const fileSize = df.size(ec); + if(ec) + return; + bulk_reader r(df, + dat_file_header::size, fileSize, readSize); + progress(0, fileSize); + while(! r.eof()) + { + // Data Record or Spill Record + nsize_t size; + auto is = r.prepare( + field::size, ec); // Size + if(ec) + return; + detail::read_size48(is, size); + if(size > 0) + { + // Data Record + is = r.prepare( + dh.key_size + // Key + size, ec); // Data + std::uint8_t const* const key = + is.data(dh.key_size); + callback(key, dh.key_size, + is.data(size), size, ec); + if(ec) + return; + } + else + { + // Spill Record + is = r.prepare( + field::size, ec); + if(ec) + return; + read(is, size); // Size + r.prepare(size, ec); // skip bucket + if(ec) + return; + } + progress(r.offset(), fileSize); + } +} + +} // nudb + +#endif diff --git a/include/nudb/impl/win32_file.ipp b/include/nudb/impl/win32_file.ipp new file mode 100644 index 0000000000..c0e5cdae65 --- /dev/null +++ b/include/nudb/impl/win32_file.ipp @@ -0,0 +1,264 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_IMPL_WIN32_FILE_IPP +#define NUDB_IMPL_WIN32_FILE_IPP + +#include + +namespace nudb { + +inline +win32_file:: +~win32_file() +{ + close(); +} + +inline +win32_file:: +win32_file(win32_file&& other) + : hf_(other.hf_) +{ + other.hf_ = INVALID_HANDLE_VALUE; +} + +inline +win32_file& +win32_file:: +operator=(win32_file&& other) +{ + if(&other == this) + return *this; + close(); + hf_ = other.hf_; + other.hf_ = INVALID_HANDLE_VALUE; + return *this; +} + +inline +void +win32_file:: +close() +{ + if(hf_ != INVALID_HANDLE_VALUE) + { + ::CloseHandle(hf_); + hf_ = INVALID_HANDLE_VALUE; + } +} + +inline +void +win32_file:: +create(file_mode mode, path_type const& path, error_code& ec) +{ + BOOST_ASSERT(! is_open()); + auto const f = flags(mode); + hf_ = ::CreateFileA(path.c_str(), + f.first, + 0, + NULL, + CREATE_NEW, + f.second, + NULL); + if(hf_ == INVALID_HANDLE_VALUE) + return last_err(ec); +} + +inline +void +win32_file:: +open(file_mode mode, path_type const& path, error_code& ec) +{ + BOOST_ASSERT(! is_open()); + auto const f = flags(mode); + hf_ = ::CreateFileA(path.c_str(), + f.first, + 0, + NULL, + OPEN_EXISTING, + f.second, + NULL); + if(hf_ == INVALID_HANDLE_VALUE) + return last_err(ec); +} + +inline +void +win32_file:: +erase(path_type const& path, error_code& ec) +{ + BOOL const bSuccess = + ::DeleteFileA(path.c_str()); + if(! bSuccess) + return last_err(ec); +} + +inline +std::uint64_t +win32_file:: +size(error_code& ec) const +{ + BOOST_ASSERT(is_open()); + LARGE_INTEGER fileSize; + if(! ::GetFileSizeEx(hf_, &fileSize)) + { + last_err(ec); + return 0; + } + return fileSize.QuadPart; +} + +inline +void +win32_file:: +read(std::uint64_t offset, void* buffer, std::size_t bytes, error_code& ec) +{ + while(bytes > 0) + { + DWORD bytesRead; + LARGE_INTEGER li; + li.QuadPart = static_cast(offset); + OVERLAPPED ov; + ov.Offset = li.LowPart; + ov.OffsetHigh = li.HighPart; + ov.hEvent = NULL; + DWORD amount; + if(bytes > std::numeric_limits::max()) + amount = std::numeric_limits::max(); + else + amount = static_cast(bytes); + BOOL const bSuccess = ::ReadFile( + hf_, buffer, amount, &bytesRead, &ov); + if(! bSuccess) + { + DWORD const dwError = ::GetLastError(); + if(dwError != ERROR_HANDLE_EOF) + return err(dwError, ec); + ec = make_error_code(error::short_read); + return; + } + if(bytesRead == 0) + { + ec = make_error_code(error::short_read); + return; + } + offset += bytesRead; + bytes -= bytesRead; + buffer = reinterpret_cast( + buffer) + bytesRead; + } +} + +inline +void +win32_file:: +write(std::uint64_t offset, + void const* buffer, std::size_t bytes, error_code& ec) +{ + while(bytes > 0) + { + LARGE_INTEGER li; + li.QuadPart = static_cast(offset); + OVERLAPPED ov; + ov.Offset = li.LowPart; + ov.OffsetHigh = li.HighPart; + ov.hEvent = NULL; + DWORD amount; + if(bytes > std::numeric_limits::max()) + amount = std::numeric_limits::max(); + else + amount = static_cast(bytes); + DWORD bytesWritten; + BOOL const bSuccess = ::WriteFile( + hf_, buffer, amount, &bytesWritten, &ov); + if(! bSuccess) + return last_err(ec); + if(bytesWritten == 0) + { + ec = error_code{errc::no_space_on_device, + generic_category()};; + return; + } + offset += bytesWritten; + bytes -= bytesWritten; + buffer = reinterpret_cast( + buffer) + bytesWritten; + } +} + +inline +void +win32_file:: +sync(error_code& ec) +{ + if(! ::FlushFileBuffers(hf_)) + return last_err(ec); +} + +inline +void +win32_file:: +trunc(std::uint64_t length, error_code& ec) +{ + LARGE_INTEGER li; + li.QuadPart = length; + BOOL bSuccess; + bSuccess = ::SetFilePointerEx( + hf_, li, NULL, FILE_BEGIN); + if(bSuccess) + bSuccess = ::SetEndOfFile(hf_); + if(! bSuccess) + return last_err(ec); +} + +inline +std::pair +win32_file:: +flags(file_mode mode) +{ + std::pair result{0, 0}; + switch(mode) + { + case file_mode::scan: + result.first = + GENERIC_READ; + result.second = + FILE_FLAG_SEQUENTIAL_SCAN; + break; + + case file_mode::read: + result.first = + GENERIC_READ; + result.second = + FILE_FLAG_RANDOM_ACCESS; + break; + + case file_mode::append: + result.first = + GENERIC_READ | GENERIC_WRITE; + result.second = + FILE_FLAG_RANDOM_ACCESS + //| FILE_FLAG_NO_BUFFERING + //| FILE_FLAG_WRITE_THROUGH + ; + break; + + case file_mode::write: + result.first = + GENERIC_READ | GENERIC_WRITE; + result.second = + FILE_FLAG_RANDOM_ACCESS; + break; + } + return result; +} + +} // nudb + +#endif diff --git a/include/nudb/native_file.hpp b/include/nudb/native_file.hpp new file mode 100644 index 0000000000..241119ab6d --- /dev/null +++ b/include/nudb/native_file.hpp @@ -0,0 +1,76 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_NATIVE_FILE_HPP +#define NUDB_NATIVE_FILE_HPP + +#include +#include +#include +#include +#include + +namespace nudb { + +/** A native file handle. + + This type is set to the appropriate platform-specific + implementation to meet the file wrapper requirements. +*/ +using native_file = +#ifdef _MSC_VER + win32_file; +#else + posix_file; +#endif + +/** Erase a file if it exists. + + This function attempts to erase the specified file. + No error is generated if the file does not already + exist. + + @param path The path to the file to erase. + + @param ec Set to the error, if any occurred. + + @tparam File A type meeting the requirements of @b File. + If this type is unspecified, @ref native_file is used. +*/ +template +inline +void +erase_file(path_type const& path, error_code& ec) +{ + native_file::erase(path, ec); + if(ec == errc::no_such_file_or_directory) + ec = {}; +} + +/** Erase a file without returnign an error. + + This function attempts to erase the specified file. + Any errors are ignored, including if the file does + not exist. + + @param path The path to the file to erase. + + @tparam File A type meeting the requirements of @b File. + If this type is unspecified, @ref native_file is used. +*/ +template +inline +void +erase_file(path_type const& path) +{ + error_code ec; + File::erase(path, ec); +} + +} // nudb + +#endif diff --git a/include/nudb/nudb.hpp b/include/nudb/nudb.hpp new file mode 100644 index 0000000000..d62801d2de --- /dev/null +++ b/include/nudb/nudb.hpp @@ -0,0 +1,27 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_HPP +#define NUDB_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/include/nudb/posix_file.hpp b/include/nudb/posix_file.hpp new file mode 100644 index 0000000000..8d3b8e5b75 --- /dev/null +++ b/include/nudb/posix_file.hpp @@ -0,0 +1,228 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_POSIX_FILE_HPP +#define NUDB_DETAIL_POSIX_FILE_HPP + +#include +#include +#include +#include +#include +#include + +#ifndef NUDB_POSIX_FILE +# ifdef _MSC_VER +# define NUDB_POSIX_FILE 0 +# else +# define NUDB_POSIX_FILE 1 +# endif +#endif + +#if NUDB_POSIX_FILE +# include +# include +# include +# include +# include +#endif + +#if NUDB_POSIX_FILE + +namespace nudb { + +class posix_file +{ + int fd_ = -1; + +public: + /// Constructor + posix_file() = default; + + /// Copy constructor (disallowed) + posix_file(posix_file const&) = delete; + + // Copy assignment (disallowed) + posix_file& operator=(posix_file const&) = delete; + + /** Destructor. + + If open, the file is closed. + */ + ~posix_file(); + + /** Move constructor. + + @note The state of the moved-from object is as if default constructed. + */ + posix_file(posix_file&&); + + /** Move assignment. + + @note The state of the moved-from object is as if default constructed. + */ + posix_file& + operator=(posix_file&& other); + + /// Returns `true` if the file is open. + bool + is_open() const + { + return fd_ != -1; + } + + /// Close the file if it is open. + void + close(); + + /** Create a new file. + + After the file is created, it is opened as if by `open(mode, path, ec)`. + + @par Requirements + + The file must not already exist, or else `errc::file_exists` + is returned. + + @param mode The open mode, which must be a valid @ref file_mode. + + @param path The path of the file to create. + + @param ec Set to the error, if any occurred. + */ + void + create(file_mode mode, path_type const& path, error_code& ec); + + /** Open a file. + + @par Requirements + + The file must not already be open. + + @param mode The open mode, which must be a valid @ref file_mode. + + @param path The path of the file to open. + + @param ec Set to the error, if any occurred. + */ + void + open(file_mode mode, path_type const& path, error_code& ec); + + /** Remove a file from the file system. + + It is not an error to attempt to erase a file that does not exist. + + @param path The path of the file to remove. + + @param ec Set to the error, if any occurred. + */ + static + void + erase(path_type const& path, error_code& ec); + + /** Return the size of the file. + + @par Requirements + + The file must be open. + + @param ec Set to the error, if any occurred. + + @return The size of the file, in bytes. + */ + std::uint64_t + size(error_code& ec) const; + + /** Read data from a location in the file. + + @par Requirements + + The file must be open. + + @param offset The position in the file to read from, + expressed as a byte offset from the beginning. + + @param buffer The location to store the data. + + @param bytes The number of bytes to read. + + @param ec Set to the error, if any occurred. + */ + void + read(std::uint64_t offset, + void* buffer, std::size_t bytes, error_code& ec); + + /** Write data to a location in the file. + + @par Requirements + + The file must be open with a mode allowing writes. + + @param offset The position in the file to write from, + expressed as a byte offset from the beginning. + + @param buffer The data the write. + + @param bytes The number of bytes to write. + + @param ec Set to the error, if any occurred. + */ + void + write(std::uint64_t offset, + void const* buffer, std::size_t bytes, error_code& ec); + + /** Perform a low level file synchronization. + + @par Requirements + + The file must be open with a mode allowing writes. + + @param ec Set to the error, if any occurred. + */ + void + sync(error_code& ec); + + /** Truncate the file at a specific size. + + @par Requirements + + The file must be open with a mode allowing writes. + + @param length The new file size. + + @param ec Set to the error, if any occurred. + */ + void + trunc(std::uint64_t length, error_code& ec); + +private: + static + void + err(int ev, error_code& ec) + { + ec = error_code{ev, system_category()}; + } + + static + void + last_err(error_code& ec) + { + err(errno, ec); + } + + static + std::pair + flags(file_mode mode); +}; + +} // nudb + +#include + +#endif + +#endif diff --git a/include/nudb/progress.hpp b/include/nudb/progress.hpp new file mode 100644 index 0000000000..df417a0b2b --- /dev/null +++ b/include/nudb/progress.hpp @@ -0,0 +1,32 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_PROGRESS_HPP +#define NUDB_PROGRESS_HPP + +namespace nudb { + +/** Progress function that does nothing. + + This type meets the requirements of @b Progress, + and does nothing when invoked. +*/ +struct +no_progress +{ + no_progress() = default; + + /// Called to indicate progress + void + operator()(std::uint64_t, std::uint64_t) const noexcept + { + }; +}; + +} // nudb + +#endif diff --git a/include/nudb/recover.hpp b/include/nudb/recover.hpp new file mode 100644 index 0000000000..b6b69c1df5 --- /dev/null +++ b/include/nudb/recover.hpp @@ -0,0 +1,73 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_RECOVER_HPP +#define NUDB_RECOVER_HPP + +#include +#include + +namespace nudb { + +/** Perform recovery on a database. + + This implements the recovery algorithm by rolling back + any partially committed data. If no log file is present, + the function does nothing. + + During the commit phase of a NuDB database, a log file + is generated with information that may be used to roll + back the results of a partial commit. This function + checks for the presence of a log file. If present, the + log file is replayed on the key and data files belonging + to the database, restoring the database to its state + before the partial commit. When @ref recover is + successful, it erases the log file. + + It is normally not necessary to call this function + directly, it is called automatically when a database is + opened in a call to @ref basic_store::open. Callers may + use this function to implement auxiliary tools for + manipulating the database. + + @par Template Parameters + + @tparam Hasher The hash function to use. This type must + meet the requirements of @b Hasher. The hash function + must be the same as that used to create the database, or + else an error is returned. + + @tparam File The type of file to use. Use the default of + @ref native_file unless customizing the file behavior. + + @param dat_path The path to the data file. + + @param key_path The path to the key file. + + @param log_path The path to the log file. + + @param args Optional parameters passed to File constructors. + + @param ec Set to the error, if any occurred. +*/ +template< + class Hasher, + class File = native_file, + class... Args> +void +recover( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + error_code& ec, + Args&&... args); + +} // nudb + +#include + +#endif diff --git a/include/nudb/rekey.hpp b/include/nudb/rekey.hpp new file mode 100644 index 0000000000..429cd1ac85 --- /dev/null +++ b/include/nudb/rekey.hpp @@ -0,0 +1,110 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_REKEY_HPP +#define NUDB_REKEY_HPP + +#include +#include +#include +#include + +namespace nudb { + +/** Create a new key file from a data file. + + This algorithm rebuilds a key file for the given data file. + It works efficiently by iterating the data file multiple times. + During the iteration, a contiguous block of the key file is + rendered in memory, then flushed to disk when the iteration is + complete. The size of this memory buffer is controlled by the + `bufferSize` parameter, larger is better. The algorithm works + the fastest when `bufferSize` is large enough to hold the entire + key file in memory; only a single iteration of the data file + is needed in this case. + + During the rekey, spill records may be appended to the data + file. If the rekey operation is abnormally terminated, this + would normally result in a corrupted data file. To prevent this, + the function creates a log file using the specified path so + that the database can be fixed in a subsequent call to + @ref recover. + + @note If a log file is already present, this function will + fail with @ref error::log_file_exists. + + @par Template Parameters + + @tparam Hasher The hash function to use. This type must + meet the requirements of @b Hasher. The hash function + must be the same as that used to create the database, or + else an error is returned. + + @tparam File The type of file to use. This type must meet + the requirements of @b File. + + @param dat_path The path to the data file. + + @param key_path The path to the key file. + + @param log_path The path to the log file. + + @param blockSize The size of a key file block. Larger + blocks hold more keys but require more I/O cycles per + operation. The ideal block size the largest size that + may be read in a single I/O cycle, and device dependent. + The return value of @ref block_size returns a suitable + value for the volume of a given path. + + @param loadFactor A number between zero and one + representing the average bucket occupancy (number of + items). A value of 0.5 is perfect. Lower numbers + waste space, and higher numbers produce negligible + savings at the cost of increased I/O cycles. + + @param itemCount The number of items in the data file. + + @param bufferSize The number of bytes to allocate for the buffer. + + @param ec Set to the error if any occurred. + + @param progress A function which will be called periodically + as the algorithm proceeds. The equivalent signature of the + progress function must be: + @code + void progress( + std::uint64_t amount, // Amount of work done so far + std::uint64_t total // Total amount of work to do + ); + @endcode + + @param args Optional arguments passed to @b File constructors. +*/ +template< + class Hasher, + class File, + class Progress, + class... Args +> +void +rekey( + path_type const& dat_path, + path_type const& key_path, + path_type const& log_path, + std::size_t blockSize, + float loadFactor, + std::uint64_t itemCount, + std::size_t bufferSize, + error_code& ec, + Progress&& progress, + Args&&... args); + +} // nudb + +#include + +#endif diff --git a/include/nudb/store.hpp b/include/nudb/store.hpp new file mode 100644 index 0000000000..5853c7bead --- /dev/null +++ b/include/nudb/store.hpp @@ -0,0 +1,27 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_STORE_HPP +#define NUDB_STORE_HPP + +#include +#include +#include + +namespace nudb { + +/** A key/value database. + + The @b Hasher used is is @ref xxhasher, which works very + well for almost all cases. The @b File is @ref native_file which + works on Windows and POSIX platforms. +*/ +using store = basic_store; + +} // nudb + +#endif diff --git a/include/nudb/type_traits.hpp b/include/nudb/type_traits.hpp new file mode 100644 index 0000000000..f20c4d4c9e --- /dev/null +++ b/include/nudb/type_traits.hpp @@ -0,0 +1,63 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_TYPE_TRAITS_HPP +#define NUDB_TYPE_TRAITS_HPP + +#include +#include + +namespace nudb { + +#if ! GENERATING_DOCS + +namespace detail { + +// Holds a full digest +using nhash_t = std::uint64_t; + +} // detail + +/** Holds a bucket index or bucket count. + + The maximum number of buckets in a key file is 2^32-1. +*/ +//using nbuck_t = std::uint32_t; +using nbuck_t = std::size_t; + +/** Holds a key index or count in bucket. + + A bucket is limited to 2^16-1 items. The practical + limit is lower, since a bucket cannot be larger than + the block size. +*/ +//using nkey_t = std::uint16_t; +using nkey_t = std::size_t; + +/** Holds a file size or offset. + + Operating system support for large files is required. + Practically, data files cannot exceed 2^48 since offsets + are stored as 48 bit unsigned values. +*/ +using noff_t = std::uint64_t; + +/** Holds a block, key, or value size. + + Block size is limited to 2^16 + + Key file blocks are limited to the block size. + + Value sizes are limited to 2^31-1. +*/ +using nsize_t = std::size_t; + +#endif + +} // nudb + +#endif diff --git a/include/nudb/verify.hpp b/include/nudb/verify.hpp new file mode 100644 index 0000000000..23b12604d5 --- /dev/null +++ b/include/nudb/verify.hpp @@ -0,0 +1,200 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_VERIFY_HPP +#define NUDB_VERIFY_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +/// Describes database statistics calculated by @ref verify. +struct verify_info +{ + /** Indicates the verify algorithm used. + + @li @b 0 Normal algorithm + @li @b 1 Fast algorith + */ + int algorithm; // 0 = normal, 1 = fast + + /// The path to the data file + path_type dat_path; + + /// The path to the key file + path_type key_path; + + /// The API version used to create the database + std::size_t version = 0; + + /// The unique identifier + std::uint64_t uid = 0; + + /// The application-defined constant + std::uint64_t appnum = 0; + + /// The size of each key, in bytes + nsize_t key_size = 0; + + /// The salt used in the key file + std::uint64_t salt = 0; + + /// The salt fingerprint + std::uint64_t pepper = 0; + + /// The block size used in the key file + nsize_t block_size = 0; + + /// The target load factor used in the key file + float load_factor = 0; + + /// The maximum number of keys each bucket can hold + nkey_t capacity = 0; + + /// The number of buckets in the key file + nbuck_t buckets = 0; + + /// The size of a bucket in bytes + nsize_t bucket_size = 0; + + /// The size of the key file + noff_t key_file_size = 0; + + /// The size of the data file + noff_t dat_file_size = 0; + + /// The number of keys found + std::uint64_t key_count = 0; + + /// The number of values found + std::uint64_t value_count = 0; + + /// The total number of bytes occupied by values + std::uint64_t value_bytes = 0; + + /// The number of spill records in use + std::uint64_t spill_count = 0; + + /// The total number of spill records + std::uint64_t spill_count_tot = 0; + + /// The number of bytes occupied by spill records in use + std::uint64_t spill_bytes = 0; + + /// The number of bytes occupied by all spill records + std::uint64_t spill_bytes_tot = 0; + + /// Average number of key file reads per fetch + float avg_fetch = 0; + + /// The fraction of the data file that is wasted + float waste = 0; + + /// The data amplification ratio + float overhead = 0; + + /// The measured bucket load fraction + float actual_load = 0; + + /// A histogram of the number of buckets having N spill records + std::array hist; + + /// Default constructor + verify_info() + { + hist.fill(0); + } +}; + +/** Verify consistency of the key and data files. + + This function opens the key and data files, and + performs the following checks on the contents: + + @li Data file header validity + + @li Key file header validity + + @li Data and key file header agreements + + @li Check that each value is contained in a bucket + + @li Check that each bucket item reflects a value + + @li Ensure no values with duplicate keys + + Undefined behavior results when verifying a database + that still has a log file. Use @ref recover on such + databases first. + + This function selects one of two algorithms to use, the + normal version, and a faster version that can take advantage + of a buffer of sufficient size. Depending on the value of + the bufferSize argument, the appropriate algorithm is chosen. + + A good value of bufferSize is one that is a large fraction + of the key file size. For example, 20% of the size of the + key file. Larger is better, with the highest usable value + depending on the size of the key file. If presented with + a buffer size that is too large to be of extra use, the + fast algorithm will simply allocate what it needs. + + @par Template Parameters + + @tparam Hasher The hash function to use. This type must + meet the requirements of @b HashFunction. The hash function + must be the same as that used to create the database, or + else an error is returned. + + @param info A structure which will be default constructed + inside this function, and filled in if the operation completes + successfully. If an error is indicated, the contents of this + variable are undefined. + + @param dat_path The path to the data file. + + @param key_path The path to the key file. + + @param bufferSize The number of bytes to allocate for the buffer. + If this number is too small, or zero, a slower algorithm will be + used that does not require a buffer. + + @param progress A function which will be called periodically + as the algorithm proceeds. The equivalent signature of the + progress function must be: + @code + void progress( + std::uint64_t amount, // Amount of work done so far + std::uint64_t total // Total amount of work to do + ); + @endcode + + @param ec Set to the error, if any occurred. +*/ +template +void +verify( + verify_info& info, + path_type const& dat_path, + path_type const& key_path, + std::size_t bufferSize, + Progress&& progress, + error_code& ec); + +} // nudb + +#include + +#endif diff --git a/include/nudb/version.hpp b/include/nudb/version.hpp new file mode 100644 index 0000000000..9ccab10634 --- /dev/null +++ b/include/nudb/version.hpp @@ -0,0 +1,21 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_VERSION_HPP +#define NUDB_VERSION_HPP + +// follows http://semver.org + +// NUDB_VERSION % 100 is the patch level +// NUDB_VERSION / 100 % 1000 is the minor version +// NUDB_VERSION / 100000 is the major version +// +#define NUDB_VERSION 100000 + +#define NUDB_VERSION_STRING "1.0.0-b6" + +#endif diff --git a/include/nudb/visit.hpp b/include/nudb/visit.hpp new file mode 100644 index 0000000000..abb0c91a97 --- /dev/null +++ b/include/nudb/visit.hpp @@ -0,0 +1,63 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_VISIT_HPP +#define NUDB_VISIT_HPP + +#include +#include + +namespace nudb { + +/** Visit each key/data pair in a data file. + + This function will open and iterate the contents of a + data file, invoking the callback for each key/value + pair found. Only a data file is necessary, the key + file may be omitted. + + @param path The path to the data file. + + @param callback A function which will be called with + each item found in the data file. The equivalent signature + of the callback must be: + @code + void callback( + void const* key, // A pointer to the item key + std::size_t key_size, // The size of the key (always the same) + void const* data, // A pointer to the item data + std::size_t data_size, // The size of the item data + error_code& ec // Indicates an error (out parameter) + ); + @endcode + If the callback sets ec to an error, the visit is terminated. + + @param progress A function which will be called periodically + as the algorithm proceeds. The equivalent signature of the + progress function must be: + @code + void progress( + std::uint64_t amount, // Amount of work done so far + std::uint64_t total // Total amount of work to do + ); + @endcode + + @param ec Set to the error, if any occurred. +*/ +template +void +visit( + path_type const& path, + Callback&& callback, + Progress&& progress, + error_code& ec); + +} // nudb + +#include + +#endif diff --git a/include/nudb/win32_file.hpp b/include/nudb/win32_file.hpp new file mode 100644 index 0000000000..d225ae9421 --- /dev/null +++ b/include/nudb/win32_file.hpp @@ -0,0 +1,246 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_DETAIL_WIN32_FILE_HPP +#define NUDB_DETAIL_WIN32_FILE_HPP + +#include +#include +#include +#include +#include + +#ifndef NUDB_WIN32_FILE +# ifdef _MSC_VER +# define NUDB_WIN32_FILE 1 +# else +# define NUDB_WIN32_FILE 0 +# endif +#endif + +#if NUDB_WIN32_FILE +#pragma push_macro("NOMINMAX") +#pragma push_macro("UNICODE") +#pragma push_macro("STRICT") +# ifndef NOMINMAX +# define NOMINMAX +# endif +# ifndef UNICODE +# define UNICODE +# endif +# ifndef STRICT +# define STRICT +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include +#pragma pop_macro("STRICT") +#pragma pop_macro("UNICODE") +#pragma pop_macro("NOMINMAX") +#endif + +#if NUDB_WIN32_FILE + +namespace nudb { + +/** A descriptor to a Win32 file. + + This class provides a Win32 implementation of the @b File + concept. +*/ +class win32_file +{ + HANDLE hf_ = INVALID_HANDLE_VALUE; + +public: + /// Constructor + win32_file() = default; + + /// Copy constructor (disallowed) + win32_file(win32_file const&) = delete; + + // Copy assignment (disallowed) + win32_file& operator=(win32_file const&) = delete; + + /** Destructor. + + If open, the file is closed. + */ + ~win32_file(); + + /** Move constructor. + + @note The state of the moved-from object is as if default constructed. + */ + win32_file(win32_file&&); + + /** Move assignment. + + @note The state of the moved-from object is as if default constructed. + */ + win32_file& + operator=(win32_file&& other); + + /// Returns `true` if the file is open. + bool + is_open() const + { + return hf_ != INVALID_HANDLE_VALUE; + } + + /// Close the file if it is open. + void + close(); + + /** Create a new file. + + After the file is created, it is opened as if by `open(mode, path, ec)`. + + @par Requirements + + The file must not already exist, or else `errc::file_exists` + is returned. + + @param mode The open mode, which must be a valid @ref file_mode. + + @param path The path of the file to create. + + @param ec Set to the error, if any occurred. + */ + void + create(file_mode mode, path_type const& path, error_code& ec); + + /** Open a file. + + @par Requirements + + The file must not already be open. + + @param mode The open mode, which must be a valid @ref file_mode. + + @param path The path of the file to open. + + @param ec Set to the error, if any occurred. + */ + void + open(file_mode mode, path_type const& path, error_code& ec); + + /** Remove a file from the file system. + + It is not an error to attempt to erase a file that does not exist. + + @param path The path of the file to remove. + + @param ec Set to the error, if any occurred. + */ + static + void + erase(path_type const& path, error_code& ec); + + /** Return the size of the file. + + @par Requirements + + The file must be open. + + @param ec Set to the error, if any occurred. + + @return The size of the file, in bytes. + */ + std::uint64_t + size(error_code& ec) const; + + /** Read data from a location in the file. + + @par Requirements + + The file must be open. + + @param offset The position in the file to read from, + expressed as a byte offset from the beginning. + + @param buffer The location to store the data. + + @param bytes The number of bytes to read. + + @param ec Set to the error, if any occurred. + */ + void + read(std::uint64_t offset, + void* buffer, std::size_t bytes, error_code& ec); + + /** Write data to a location in the file. + + @par Requirements + + The file must be open with a mode allowing writes. + + @param offset The position in the file to write from, + expressed as a byte offset from the beginning. + + @param buffer The data the write. + + @param bytes The number of bytes to write. + + @param ec Set to the error, if any occurred. + */ + void + write(std::uint64_t offset, + void const* buffer, std::size_t bytes, error_code& ec); + + /** Perform a low level file synchronization. + + @par Requirements + + The file must be open with a mode allowing writes. + + @param ec Set to the error, if any occurred. + */ + void + sync(error_code& ec); + + /** Truncate the file at a specific size. + + @par Requirements + + The file must be open with a mode allowing writes. + + @param length The new file size. + + @param ec Set to the error, if any occurred. + */ + void + trunc(std::uint64_t length, error_code& ec); + +private: + static + void + err(DWORD dwError, error_code& ec) + { + ec = error_code{static_cast(dwError), system_category()}; + } + + static + void + last_err(error_code& ec) + { + err(::GetLastError(), ec); + } + + static + std::pair + flags(file_mode mode); +}; + +} // nudb + +#include + +#endif + +#endif diff --git a/include/nudb/xxhasher.hpp b/include/nudb/xxhasher.hpp new file mode 100644 index 0000000000..5fe1e93c27 --- /dev/null +++ b/include/nudb/xxhasher.hpp @@ -0,0 +1,45 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef NUDB_XXHASHER_HPP +#define NUDB_XXHASHER_HPP + +#include +#include +#include +#include + +namespace nudb { + +/** A Hasher that uses xxHash. + + This object meets the requirements of @b Hasher. It is + the default hash function unless otherwise specified. +*/ +class xxhasher +{ + std::uint64_t seed_; + +public: + using result_type = std::uint64_t; + + explicit + xxhasher(std::uint64_t seed) + : seed_(seed) + { + } + + result_type + operator()(void const* data, std::size_t bytes) const noexcept + { + return detail::XXH64(data, bytes, seed_); + } +}; + +} // nudb + +#endif diff --git a/scripts/blacklist.supp b/scripts/blacklist.supp new file mode 100644 index 0000000000..e161cb10c8 --- /dev/null +++ b/scripts/blacklist.supp @@ -0,0 +1,38 @@ +# Remember that this blacklist file is GLOBAL to all sanitizers +# Be therefore extremely careful when considering to add a sanitizer +# filter here instead of using a runtime suppression +# +# Remember also that filters here quite literally completely +# remove instrumentation altogether, so filtering here means +# that sanitizers such as tsan will false positive on problems +# introduced by code filtered here. +# +# The main use for this file is ubsan, as it's the only sanitizer +# without a runtime suppression facility. +# +# Be ESPECIALLY careful when filtering out entire source files! +# Try if at all possible to filter only functions using fun:regex +# Remember you must use mangled symbol names with fun:regex + +#### Compile time filters for ubsan #### + +## The well known ubsan failure in libstdc++ extant for years :) +# Line 96:24: runtime error: load of value 4294967221, which is not a valid value for type 'std::_Ios_Fmtflags' +fun:*_Ios_Fmtflags* + +# boost/any.hpp:259:16: runtime error: downcast of address 0x000004392e70 which does not point to an object of type 'any::holder' +fun:*any_cast* + +# boost/lexical_cast.hpp:1625:43: runtime error: downcast of address 0x7fbb4fffbce8 which does not point to an object of type 'buffer_t' (aka 'parser_buf >, char>') +fun:*shl_input_streamable* + + + + +#### Compile time filters for asan #### + + +#### Compile time filters for msan #### + + +#### Compile time filters for tsan #### diff --git a/scripts/build-and-test.sh b/scripts/build-and-test.sh new file mode 100755 index 0000000000..c7aa08a716 --- /dev/null +++ b/scripts/build-and-test.sh @@ -0,0 +1,150 @@ +#!/usr/bin/env bash + +set -euxo pipefail +# The above bash options do the following: + +# -e When this option is on, if a simple command fails for any of the reasons +# listed in Consequences of Shell Errors or returns an exit status value >0, +# and is not part of the compound list following a while, until, or if +# keyword, and is not a part of an AND or OR list, and is not a pipeline +# preceded by the ! reserved word, then the shell shall immediately exit. +# -u The shell shall write a message to standard error when it tries to expand a +# variable that is not set and immediately exit. An interactive shell shall +# not exit. +# -x The shell shall write to standard error a trace for each command after it +# expands the command and before it executes it. It is unspecified +# whether the command that turns tracing off is traced. +# -o pipefail +# Pipelines fail on the first command which fails instead of dying later on +# down the pipeline. + +shopt -s globstar + +################################## ENVIRONMENT ################################# + +# If not CI, then set some defaults +if [[ -z ${CI:-} ]]; then + : ${TRAVIS_BRANCH:=feature} + : ${CC:=gcc} + : ${ADDRESS_MODEL:=64} + : ${VARIANT:=debug} + # If running locally we assume we have lcov/valgrind on PATH +else + export PATH=${VALGRIND_ROOT}/bin:${LCOV_ROOT}/usr/bin:${PATH} +fi + +MAIN_BRANCH=0 +# For builds not triggered by a pull request TRAVIS_BRANCH is the name of the +# branch currently being built; whereas for builds triggered by a pull request +# it is the name of the branch targeted by the pull request (in many cases this +# will be master). +if [[ ${TRAVIS_BRANCH} == master || ${TRAVIS_BRANCH} == develop ]]; then + MAIN_BRANCH=1 +fi + +num_jobs=1 +if [[ $(uname) == Darwin ]]; then + num_jobs=$(sysctl -n hw.physicalcpu) +elif [[ $(uname -s) == Linux ]]; then + # CircleCI returns 32 phys procs, but 2 virt proc + num_proc_units=$(nproc) + # Physical cores + num_jobs=$(lscpu -p | grep -v '^#' | sort -u -t, -k 2,4 | wc -l) + if ((${num_proc_units} < ${num_jobs})); then + num_jobs=${num_proc_units} + fi +fi + +echo "using toolset: ${CC}" +echo "using variant: ${VARIANT}" +echo "using address-model: ${ADDRESS_MODEL}" +echo "using PATH: ${PATH}" +echo "using MAIN_BRANCH: ${MAIN_BRANCH}" +echo "using BOOST_ROOT: ${BOOST_ROOT}" + +#################################### HELPERS ################################### + +function run_tests_with_debugger { + for x in bin/**/${VARIANT}/**/test-all; do + scripts/run-with-debugger.sh "${x}" + done +} + +function run_tests { + for x in bin/**/${VARIANT}/**/test-all; do + ${x} + done +} + +function run_benchmark { + for x in bin/**/${VARIANT}/**/bench; do + ${x} --inserts=10000 + done +} + +function run_tests_with_valgrind { + for x in bin/**/${VARIANT}/**/test-all; do + # TODO --max-stackframe=8388608 + # see: https://travis-ci.org/vinniefalco/Beast/jobs/132486245 + valgrind --error-exitcode=1 "${x}" + done +} + +function build_bjam { + ${BOOST_ROOT}/bjam toolset=${CC} \ + variant=${VARIANT} \ + address-model=${ADDRESS_MODEL} \ + -j${num_jobs} +} + +function build_cmake { + mkdir -p build + pushd build > /dev/null + cmake -DVARIANT=${VARIANT} .. + make -j${num_jobs} + mkdir -p ../bin/${VARIANT} + find . -executable -type f -exec cp {} ../bin/${VARIANT}/. \; + popd > /dev/null +} + +##################################### BUILD #################################### + +if [[ ${BUILD_SYSTEM:-} == cmake ]]; then + build_cmake +else + build_bjam +fi + +##################################### TESTS #################################### + +if [[ ${VARIANT} == coverage ]]; then + find . -name "*.gcda" | xargs rm -f + rm *.info -f + # Create baseline coverage data file + lcov --no-external -c -i -d . -o baseline.info > /dev/null + + # Perform test + if [[ ${MAIN_BRANCH} == 1 ]]; then + run_tests_with_valgrind + else + run_tests + fi + + # Create test coverage data file + lcov --no-external -c -d . -o testrun.info > /dev/null + + # Combine baseline and test coverage data + lcov -a baseline.info -a testrun.info -o lcov-all.info > /dev/null + + # Extract only include/*, and don\'t report on examples or test + lcov -e "lcov-all.info" "${PWD}/include/nudb/*" -o lcov.info > /dev/null + + ~/.local/bin/codecov -X gcov + cat lcov.info | node_modules/.bin/coveralls + + # Clean up these stragglers so BOOST_ROOT cache is clean + find ${BOOST_ROOT}/bin.v2 -name "*.gcda" | xargs rm -f +else + run_tests_with_debugger + run_benchmark +fi diff --git a/scripts/install-boost.sh b/scripts/install-boost.sh new file mode 100755 index 0000000000..8365c2d544 --- /dev/null +++ b/scripts/install-boost.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# Assumptions: +# 1) BOOST_ROOT and BOOST_URL are already defined, +# and contain valid values. +# 2) The last namepart of BOOST_ROOT matches the +# folder name internal to boost's .tar.gz +# When testing you can force a boost build by clearing travis caches: +# https://travis-ci.org/ripple/rippled/caches +set -eu +if [ ! -d "$BOOST_ROOT/lib" ] +then + wget $BOOST_URL -O /tmp/boost.tar.gz + cd `dirname $BOOST_ROOT` + rm -fr ${BOOST_ROOT} + tar xzf /tmp/boost.tar.gz + + params="define=_GLIBCXX_USE_CXX11_ABI=0 \ + address-model=$ADDRESS_MODEL --with-program_options \ + --with-system --with-coroutine --with-filesystem" + cd $BOOST_ROOT && \ + ./bootstrap.sh --prefix=$BOOST_ROOT && \ + ./b2 -d1 $params && \ + ./b2 -d0 $params install +else + echo "Using cached boost at $BOOST_ROOT" +fi + diff --git a/scripts/install-dependencies.sh b/scripts/install-dependencies.sh new file mode 100755 index 0000000000..5c26b26a3d --- /dev/null +++ b/scripts/install-dependencies.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +set -euxo pipefail +# The above bash options do the following: + +# -e When this option is on, if a simple command fails for any of the reasons +# listed in Consequences of Shell Errors or returns an exit status value >0, +# and is not part of the compound list following a while, until, or if +# keyword, and is not a part of an AND or OR list, and is not a pipeline +# preceded by the ! reserved word, then the shell shall immediately exit. +# -u The shell shall write a message to standard error when it tries to expand a +# variable that is not set and immediately exit. An interactive shell shall +# not exit. +# -x The shell shall write to standard error a trace for each command after it +# expands the command and before it executes it. It is unspecified +# whether the command that turns tracing off is traced. +# -o pipefail +# Pipelines fail on the first command which fails instead of dying later on +# down the pipeline. + +HERE=${PWD} + +# Override gcc version to $GCC_VER. +# Put an appropriate symlink at the front of the path. +mkdir -v ${HOME}/bin +for g in gcc g++ gcov gcc-ar gcc-nm gcc-ranlib +do + test -x $( type -p ${g}-${GCC_VER} ) + ln -sv $(type -p ${g}-${GCC_VER}) $HOME/bin/${g} +done + +if [[ -n ${CLANG_VER:-} ]]; then + # There are cases where the directory exists, but the exe is not available. + # Use this workaround for now. + if [[ ! -x llvm-${LLVM_VERSION}/bin/llvm-config ]] && [[ -d llvm-${LLVM_VERSION} ]]; then + rm -fr llvm-${LLVM_VERSION} + fi + if [[ ! -d llvm-${LLVM_VERSION} ]]; then + mkdir llvm-${LLVM_VERSION} + LLVM_URL="http://llvm.org/releases/${LLVM_VERSION}/clang+llvm-${LLVM_VERSION}-x86_64-linux-gnu-ubuntu-14.04.tar.xz" + wget -O - ${LLVM_URL} | tar -Jxvf - --strip 1 -C llvm-${LLVM_VERSION} + fi + llvm-${LLVM_VERSION}/bin/llvm-config --version; + export LLVM_CONFIG="llvm-${LLVM_VERSION}/bin/llvm-config"; +fi + +# There are cases where the directory exists, but the exe is not available. +# Use this workaround for now. +if [[ ! -x cmake/bin/cmake && -d cmake ]]; then + rm -fr cmake +fi +if [[ ! -d cmake && ${BUILD_SYSTEM:-} == cmake ]]; then + CMAKE_URL="http://www.cmake.org/files/v3.5/cmake-3.5.2-Linux-x86_64.tar.gz" + mkdir cmake && wget --no-check-certificate -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C cmake +fi + +# NOTE, changed from PWD -> HOME +export PATH=${HOME}/bin:${PATH} + +# What versions are we ACTUALLY running? +if [ -x $HOME/bin/g++ ]; then + ${HOME}/bin/g++ -v +fi +if [ -x ${HOME}/bin/clang ]; then + ${HOME}/bin/clang -v +fi +# Avoid `spurious errors` caused by ~/.npm permission issues +# Does it already exist? Who owns? What permissions? +ls -lah ~/.npm || mkdir ~/.npm +# Make sure we own it +chown -Rc ${USER} ~/.npm +# We use this so we can filter the subtrees from our coverage report +pip install --user https://github.com/codecov/codecov-python/archive/master.zip + +bash scripts/install-boost.sh +bash scripts/install-valgrind.sh + +# Install lcov +# Download the archive +wget http://downloads.sourceforge.net/ltp/lcov-1.12.tar.gz +# Extract to ~/lcov-1.12 +tar xfvz lcov-1.12.tar.gz -C ${HOME} +# Set install path +mkdir -p ${LCOV_ROOT} +cd ${HOME}/lcov-1.12 && make install PREFIX=${LCOV_ROOT} + +# Install coveralls reporter +cd ${HERE} +mkdir -p node_modules +npm install coveralls diff --git a/scripts/install-valgrind.sh b/scripts/install-valgrind.sh new file mode 100755 index 0000000000..943eb86730 --- /dev/null +++ b/scripts/install-valgrind.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Assumptions: +# 1) VALGRIND_ROOT is already defined, and contains a valid values +set -eu +if [ ! -d "$VALGRIND_ROOT/bin" ] +then + # These are specified in the addons/apt section of .travis.yml + # sudo apt-get install subversion automake autotools-dev libc6-dbg + export PATH=$PATH:$VALGRIND_ROOT/bin + svn co svn://svn.valgrind.org/valgrind/trunk valgrind-co + cd valgrind-co + ./autogen.sh + ./configure --prefix=$VALGRIND_ROOT + make + make install + # test it + valgrind ls -l +else + echo "Using cached valgrind at $VALGRIND_ROOT" +fi diff --git a/scripts/run-with-debugger.sh b/scripts/run-with-debugger.sh new file mode 100755 index 0000000000..6a8b55d85d --- /dev/null +++ b/scripts/run-with-debugger.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -eu + +if [[ $(uname) == "Darwin" ]]; then + # -o runs after loading the binary + # -k runs after any crash + # We use a ghetto appromixation of --return-child-result, exiting with + # 1 on a crash + lldb --batch \ + -o 'run' \ + -k 'thread backtrace all' \ + -k 'script import os; os._exit(1)' \ + $@ +else + gdb --silent \ + --batch \ + --return-child-result \ + -ex="set print thread-events off" \ + -ex=run \ + -ex="thread apply all bt full" \ + --args $@ +fi diff --git a/scripts/run-with-gdb.sh b/scripts/run-with-gdb.sh new file mode 100755 index 0000000000..f7ff0bc726 --- /dev/null +++ b/scripts/run-with-gdb.sh @@ -0,0 +1,9 @@ +#!/bin/bash -u +set -e +gdb --silent \ + --batch \ + --return-child-result \ + -ex="set print thread-events off" \ + -ex=run \ + -ex="thread apply all bt full" \ + --args $@ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000000..628c618f28 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,38 @@ +# Part of nudb + +GroupSources(test "/") +GroupSources(include/nudb nudb) +GroupSources(extras/nudb extras) +GroupSources(extras/beast/include/beast beast) +GroupSources(extras/beast/extras/beast beast) + +add_executable(test-all + ${EXTRAS_INCLUDES} + ${NUDB_INCLUDES} + ${BEAST_INCLUDES} + ../extras/beast/extras/beast/unit_test/main.cpp + basic_store.cpp + buffer.cpp + callgrind_test.cpp + concepts.cpp + create.cpp + error.cpp + file.cpp + native_file.cpp + posix_file.cpp + recover.cpp + rekey.cpp + store.cpp + type_traits.cpp + verify.cpp + version.cpp + visit.cpp + win32_file.cpp + xxhasher.cpp +) + +if (WIN32) + target_link_libraries(test-all ${Boost_LIBRARIES}) +else () + target_link_libraries(test-all ${Boost_LIBRARIES} rt Threads::Threads) +endif () diff --git a/test/Jamfile b/test/Jamfile new file mode 100644 index 0000000000..41bfa97365 --- /dev/null +++ b/test/Jamfile @@ -0,0 +1,30 @@ +# +# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +# + +import os ; + +unit-test test-all : + ../extras/beast/extras/beast/unit_test/main.cpp + basic_store.cpp + buffer.cpp + callgrind_test.cpp + concepts.cpp + create.cpp + error.cpp + file.cpp + native_file.cpp + posix_file.cpp + recover.cpp + rekey.cpp + store.cpp + type_traits.cpp + verify.cpp + version.cpp + visit.cpp + win32_file.cpp + xxhasher.cpp + ; diff --git a/test/basic_store.cpp b/test/basic_store.cpp new file mode 100644 index 0000000000..38892ee373 --- /dev/null +++ b/test/basic_store.cpp @@ -0,0 +1,250 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +namespace detail { + +static_assert(!std::is_copy_constructible {}, ""); +static_assert(!std::is_copy_assignable {}, ""); +static_assert( std::is_move_constructible {}, ""); +static_assert(!std::is_move_assignable {}, ""); + +static_assert(!std::is_copy_constructible {}, ""); +static_assert(!std::is_copy_assignable {}, ""); +static_assert( std::is_move_constructible {}, ""); +static_assert(!std::is_move_assignable {}, ""); + +static_assert(!std::is_copy_constructible {}, ""); +static_assert(!std::is_copy_assignable {}, ""); +static_assert( std::is_move_constructible {}, ""); +static_assert(!std::is_move_assignable {}, ""); + +} // detail + +namespace test { + +class basic_store_test : public beast::unit_test::suite +{ +public: + void + test_members() + { + std::size_t const keySize = 4; + std::size_t const blockSize = 4096; + float loadFactor = 0.5f; + + error_code ec; + test_store ts{keySize, blockSize, loadFactor}; + + // Files not found + ts.open(ec); + if(! BEAST_EXPECTS(ec == + errc::no_such_file_or_directory, ec.message())) + return; + ec = {}; + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.open(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + BEAST_EXPECT(ts.db.dat_path() == ts.dp); + BEAST_EXPECT(ts.db.key_path() == ts.kp); + BEAST_EXPECT(ts.db.log_path() == ts.lp); + BEAST_EXPECT(ts.db.appnum() == ts.appnum); + BEAST_EXPECT(ts.db.key_size() == ts.keySize); + BEAST_EXPECT(ts.db.block_size() == ts.blockSize); + } + + // Inserts a bunch of values then fetches them + void + do_insert_fetch( + std::size_t N, + std::size_t keySize, + std::size_t blockSize, + float loadFactor, + bool sleep) + { + testcase << + "N=" << N << ", " + "keySize=" << keySize << ", " + "blockSize=" << blockSize; + error_code ec; + test_store ts{keySize, blockSize, loadFactor}; + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.open(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + // Insert + for(std::size_t n = 0; n < N; ++n) + { + auto const item = ts[n]; + ts.db.insert(item.key, item.data, item.size, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + // Fetch + for(std::size_t n = 0; n < N; ++n) + { + auto const item = ts[n]; + ts.db.fetch(item.key, + [&](void const* data, std::size_t size) + { + if(! BEAST_EXPECT(size == item.size)) + return; + BEAST_EXPECT( + std::memcmp(data, item.data, size) == 0); + }, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + // Insert Duplicate + for(std::size_t n = 0; n < N; ++n) + { + auto const item = ts[n]; + ts.db.insert(item.key, item.data, item.size, ec); + if(! BEAST_EXPECTS( + ec == error::key_exists, ec.message())) + return; + ec = {}; + } + // Insert and Fetch + if(keySize > 1) + { + for(std::size_t n = 0; n < N; ++n) + { + auto item = ts[n]; + ts.db.fetch(item.key, + [&](void const* data, std::size_t size) + { + if(! BEAST_EXPECT(size == item.size)) + return; + BEAST_EXPECT( + std::memcmp(data, item.data, size) == 0); + }, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + item = ts[N + n]; + ts.db.insert(item.key, item.data, item.size, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.db.fetch(item.key, + [&](void const* data, std::size_t size) + { + if(! BEAST_EXPECT(size == item.size)) + return; + BEAST_EXPECT( + std::memcmp(data, item.data, size) == 0); + }, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + } + if(sleep) + { + // Make sure we run periodic activity + std::this_thread::sleep_for( + std::chrono::milliseconds{3000}); + } + ts.close(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + + // Perform insert/fetch test across a range of parameters + void + test_insert_fetch() + { + for(auto const keySize : { + 1, 2, 3, 31, 32, 33, 63, 64, 65, 95, 96, 97 }) + { + std::size_t N; + std::size_t constexpr blockSize = 4096; + float loadFactor = 0.95f; + switch(keySize) + { + case 1: N = 10; break; + case 2: N = 100; break; + case 3: N = 250; break; + default: + N = 5000; + break; + }; + do_insert_fetch(N, keySize, blockSize, loadFactor, + keySize == 97); + } + } + + void + test_bulk_insert(std::size_t N, std::size_t keySize, + std::size_t blockSize, float loadFactor) + { + testcase << + "bulk_insert N=" << N << ", " + "keySize=" << keySize << ", " + "blockSize=" << blockSize; + error_code ec; + test_store ts{keySize, blockSize, loadFactor}; + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.open(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + // Insert + for(std::size_t n = 0; n < N; ++n) + { + auto const item = ts[n]; + ts.db.insert(item.key, item.data, item.size, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + ts.close(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + verify_info info; + verify(info, ts.dp, ts.kp, + 64 * 1024 * 1024 , no_progress{}, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + log << info; + } + + void + run() override + { +#if 1 + test_members(); + test_insert_fetch(); +#else + // bulk-insert performance test + test_bulk_insert(10000000, 8, 4096, 0.5f); +#endif + } +}; + +BEAST_DEFINE_TESTSUITE(basic_store, test, nudb); + +} // test +} // nudb + diff --git a/test/buffer.cpp b/test/buffer.cpp new file mode 100644 index 0000000000..e86a5688e8 --- /dev/null +++ b/test/buffer.cpp @@ -0,0 +1,77 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include + +#include +#include + +namespace nudb { +namespace test { + +class buffer_test : public beast::unit_test::suite +{ +public: + void + run() + { + using buffer = nudb::detail::buffer; + static_assert(std::is_default_constructible::value, ""); +#if 0 + static_assert(std::is_copy_constructible::value, ""); + static_assert(std::is_copy_assignable::value, ""); +#else + static_assert(! std::is_copy_constructible::value, ""); + static_assert(! std::is_copy_assignable::value, ""); +#endif + static_assert(std::is_move_constructible::value, ""); + static_assert(std::is_move_assignable::value, ""); + + { + buffer b; + } + { + buffer b1(1024); + BEAST_EXPECT(b1.size() == 1024); + buffer b2(std::move(b1)); + BEAST_EXPECT(b1.size() == 0); + BEAST_EXPECT(b2.size() == 1024); + } + { + buffer b1(1024); + BEAST_EXPECT(b1.size() == 1024); + buffer b2; + b2 = std::move(b1); + BEAST_EXPECT(b1.size() == 0); + BEAST_EXPECT(b2.size() == 1024); + } + +#if 0 + { + buffer b1(1024); + BEAST_EXPECT(b1.size() == 1024); + buffer b2(b1); + BEAST_EXPECT(b1.size() == 1024); + BEAST_EXPECT(b2.size() == 1024); + } + { + buffer b1(1024); + BEAST_EXPECT(b1.size() == 1024); + buffer b2; + b2 = b1; + BEAST_EXPECT(b1.size() == 1024); + BEAST_EXPECT(b2.size() == 1024); + } +#endif + } +}; + +BEAST_DEFINE_TESTSUITE(buffer, test, nudb); + +} // test +} // nudb diff --git a/test/callgrind_test.cpp b/test/callgrind_test.cpp new file mode 100644 index 0000000000..c10644ea98 --- /dev/null +++ b/test/callgrind_test.cpp @@ -0,0 +1,92 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace test { + +// This test is designed for callgrind runs to find hotspots + +class callgrind_test : public beast::unit_test::suite +{ +public: + // Creates and opens a database, performs a bunch + // of inserts, then alternates fetching all the keys + // with keys not present. + // + void + testCallgrind(std::size_t N) + { + using key_type = std::uint64_t; + std::size_t const blockSize = 4096; + float const loadFactor = 0.5; + + error_code ec; + test_store ts{sizeof(key_type), blockSize, loadFactor}; + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.open(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + for(std::size_t i = 0; i < N; ++i) + { + auto const item = ts[i]; + ts.db.insert(item.key, item.data, item.size, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + Buffer b; + for(std::size_t i = 0; i < N * 2; ++i) + { + if(! (i%2)) + { + auto const item = ts[i/2]; + ts.db.fetch(item.key, b, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + if(! BEAST_EXPECT(b.size() == item.size)) + return; + if(! BEAST_EXPECT(std::memcmp(b.data(), + item.data, item.size) == 0)) + return; + } + else + { + auto const item = ts[N + i/2]; + ts.db.fetch(item.key, b, ec); + if(! BEAST_EXPECTS(ec == + error::key_not_found, ec.message())) + return; + ec = {}; + } + } + ts.close(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + + void run() + { + // higher numbers, more pain + std::size_t constexpr N = 100000; + + testCallgrind(N); + } +}; + +BEAST_DEFINE_TESTSUITE(callgrind, test, nudb); + +} // test +} // nudb diff --git a/test/concepts.cpp b/test/concepts.cpp new file mode 100644 index 0000000000..34dbd74ab2 --- /dev/null +++ b/test/concepts.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/test/create.cpp b/test/create.cpp new file mode 100644 index 0000000000..2762908c9b --- /dev/null +++ b/test/create.cpp @@ -0,0 +1,49 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include + +#include +#include +#include + +namespace nudb { +namespace test { + +class create_test : public beast::unit_test::suite +{ +public: + void + test_create() + { + std::size_t const keySize = 8; + std::size_t const blockSize = 256; + float const loadFactor = 0.5f; + + error_code ec; + test_store ts{keySize, blockSize, loadFactor}; + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.create(ec); + if(! BEAST_EXPECTS( + ec == errc::file_exists, ec.message())) + return; + } + + void + run() override + { + test_create(); + } +}; + +BEAST_DEFINE_TESTSUITE(create, test, nudb); + +} // test +} // nudb diff --git a/test/error.cpp b/test/error.cpp new file mode 100644 index 0000000000..ba73bc96fa --- /dev/null +++ b/test/error.cpp @@ -0,0 +1,83 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include + +#include + +namespace nudb { +namespace test { + +class error_test : public beast::unit_test::suite +{ +public: + void check(char const* name, error ev) + { + auto const ec = make_error_code(ev); + BEAST_EXPECT(std::string{ec.category().name()} == name); + BEAST_EXPECT(! ec.message().empty()); + BEAST_EXPECT(std::addressof(ec.category()) == + std::addressof(nudb_category())); + BEAST_EXPECT(nudb_category().equivalent(static_cast(ev), + ec.category().default_error_condition(static_cast(ev)))); + BEAST_EXPECT(nudb_category().equivalent( + ec, static_cast(ev))); + } + + void run() override + { + nudb_category().message(0); + nudb_category().message(99999); + check("nudb", error::success); + check("nudb", error::key_not_found); + check("nudb", error::key_exists); + check("nudb", error::short_read); + check("nudb", error::log_file_exists); + check("nudb", error::no_key_file); + check("nudb", error::too_many_buckets); + check("nudb", error::not_data_file); + check("nudb", error::not_key_file); + check("nudb", error::not_log_file); + check("nudb", error::different_version); + check("nudb", error::invalid_key_size); + check("nudb", error::invalid_block_size); + check("nudb", error::short_key_file); + check("nudb", error::short_bucket); + check("nudb", error::short_spill); + check("nudb", error::short_data_record); + check("nudb", error::short_value); + check("nudb", error::hash_mismatch); + check("nudb", error::invalid_load_factor); + check("nudb", error::invalid_capacity); + check("nudb", error::invalid_bucket_count); + check("nudb", error::invalid_bucket_size); + check("nudb", error::incomplete_data_file_header); + check("nudb", error::incomplete_key_file_header); + check("nudb", error::invalid_log_record); + check("nudb", error::invalid_log_spill); + check("nudb", error::invalid_log_offset); + check("nudb", error::invalid_log_index); + check("nudb", error::invalid_spill_size); + check("nudb", error::uid_mismatch); + check("nudb", error::appnum_mismatch); + check("nudb", error::key_size_mismatch); + check("nudb", error::salt_mismatch); + check("nudb", error::pepper_mismatch); + check("nudb", error::block_size_mismatch); + check("nudb", error::orphaned_value); + check("nudb", error::missing_value); + check("nudb", error::size_mismatch); + check("nudb", error::duplicate_value); + } +}; + +BEAST_DEFINE_TESTSUITE(error, test, nudb); + +} // test +} // nudb + diff --git a/test/file.cpp b/test/file.cpp new file mode 100644 index 0000000000..2a5f075771 --- /dev/null +++ b/test/file.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/test/native_file.cpp b/test/native_file.cpp new file mode 100644 index 0000000000..726d2bbbe6 --- /dev/null +++ b/test/native_file.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/test/posix_file.cpp b/test/posix_file.cpp new file mode 100644 index 0000000000..eff0d831a5 --- /dev/null +++ b/test/posix_file.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/test/recover.cpp b/test/recover.cpp new file mode 100644 index 0000000000..fbd481959b --- /dev/null +++ b/test/recover.cpp @@ -0,0 +1,191 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { +namespace test { + +class basic_recover_test : public beast::unit_test::suite +{ +public: + using key_type = std::uint32_t; + + void + test_ok() + { + std::size_t const keySize = 8; + std::size_t const blockSize = 256; + float const loadFactor = 0.5f; + + error_code ec; + test_store ts{keySize, blockSize, loadFactor}; + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + recover(ts.dp, ts.kp, ts.lp, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + + // Creates and opens a database, performs a bunch + // of inserts, then fetches all of them to make sure + // they are there. Uses a fail_file that causes the n-th + // I/O to fail, causing an exception. + void + do_work( + test_store& ts, + std::size_t N, + fail_counter& c, + error_code& ec) + { + ts.create(ec); + if(ec) + return; + basic_store> db; + db.open(ts.dp, ts.kp, ts.lp, ec, c); + if(ec) + return; + if(! BEAST_EXPECT(db.appnum() == ts.appnum)) + return; + // Insert + for(std::size_t i = 0; i < N; ++i) + { + auto const item = ts[i]; + db.insert(item.key, item.data, item.size, ec); + if(ec == test_error::failure) + return; + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + // Fetch + Buffer b; + for(std::size_t i = 0; i < N; ++i) + { + auto const item = ts[i]; + db.fetch(item.key, b, ec); + if(ec == test_error::failure) + return; + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + if(! BEAST_EXPECT(b.size() == item.size)) + return; + if(! BEAST_EXPECT(std::memcmp(b.data(), + item.data, item.size) == 0)) + return; + } + db.close(ec); + if(ec == test_error::failure) + return; + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + // Verify + verify_info info; + verify(info, ts.dp, ts.kp, + 0, no_progress{}, ec); + if(ec) + { + log << info; + return; + } + } + + void + do_recover( + test_store& ts, fail_counter& c, error_code& ec) + { + recover>( + ts.dp, ts.kp, ts.lp, ec, c); + if(ec) + return; + // Verify + verify_info info; + verify(info, ts.dp, ts.kp, + 0, no_progress{}, ec); + if(ec) + return; + ts.erase(); + } + + void + test_recover(std::size_t blockSize, + float loadFactor, std::size_t N) + { + testcase(std::to_string(N) + " inserts", + beast::unit_test::abort_on_fail); + test_store ts{sizeof(key_type), blockSize, loadFactor}; + for(std::size_t n = 1;; ++n) + { + { + error_code ec; + fail_counter c{n}; + do_work(ts, N, c, ec); + if(! ec) + { + ts.close(ec); + ts.erase(); + break; + } + if(! BEAST_EXPECTS(ec == + test::test_error::failure, ec.message())) + return; + } + for(std::size_t m = 1;; ++m) + { + error_code ec; + fail_counter c{m}; + do_recover(ts, c, ec); + if(! ec) + break; + if(! BEAST_EXPECTS(ec == + test::test_error::failure, ec.message())) + return; + } + } + } +}; + +class recover_test : public basic_recover_test +{ +public: + void + run() override + { + test_ok(); + test_recover(128, 0.55f, 0); + test_recover(128, 0.55f, 10); + test_recover(128, 0.55f, 100); + } +}; + +class recover_big_test : public basic_recover_test +{ +public: + void + run() override + { + test_recover(256, 0.55f, 1000); + test_recover(256, 0.90f, 10000); + } +}; + +BEAST_DEFINE_TESTSUITE(recover, test, nudb); +//BEAST_DEFINE_TESTSUITE_MANUAL(recover_big, test, nudb); + +} // test +} // nudb diff --git a/test/rekey.cpp b/test/rekey.cpp new file mode 100644 index 0000000000..8b333ec2a5 --- /dev/null +++ b/test/rekey.cpp @@ -0,0 +1,136 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include + +#include +#include +#include +#include +#include + +namespace nudb { +namespace test { + +// Simple test to check that rekey works, and +// also to exercise all its failure paths. +// +class rekey_test : public beast::unit_test::suite +{ +public: + void + do_recover( + std::size_t N, nsize_t blockSize, float loadFactor) + { + using key_type = std::uint32_t; + + auto const keys = static_cast( + loadFactor * detail::bucket_capacity(blockSize)); + std::size_t const bufferSize = + (blockSize * (1 + ((N + keys - 1) / keys))) + / 2; + error_code ec; + test_store ts{sizeof(key_type), blockSize, loadFactor}; + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.open(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + // Insert + for(std::size_t i = 0; i < N; ++i) + { + auto const item = ts[i]; + ts.db.insert(item.key, item.data, item.size, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + ts.close(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + // Verify + verify_info info; + verify( + info, ts.dp, ts.kp, bufferSize, no_progress{}, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + if(! BEAST_EXPECT(info.value_count == N)) + return; + if(! BEAST_EXPECT(info.spill_count > 0)) + return; + // Rekey + auto const kp2 = ts.kp + "2"; + for(std::size_t n = 1;; ++n) + { + fail_counter fc{n}; + rekey>( + ts.dp, kp2, ts.lp, blockSize, loadFactor, + N, bufferSize, ec, no_progress{}, fc); + if(! ec) + break; + if(! BEAST_EXPECTS(ec == + test::test_error::failure, ec.message())) + return; + ec = {}; + recover( + ts.dp, kp2, ts.lp, ec); + if(ec == error::no_key_file || + ec == errc::no_such_file_or_directory) + { + ec = {}; + continue; + } + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + native_file::erase(kp2, ec); + if(ec == errc::no_such_file_or_directory) + ec = {}; + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + // Verify + verify(info, ts.dp, ts.kp, + bufferSize, no_progress{}, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + if(! BEAST_EXPECT(info.value_count == N)) + return; + } + // Verify + verify(info, ts.dp, ts.kp, + bufferSize, no_progress{}, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + if(! BEAST_EXPECT(info.value_count == N)) + return; + verify(info, ts.dp, kp2, + bufferSize, no_progress{}, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + if(! BEAST_EXPECT(info.value_count == N)) + return; + } + + void + run() override + { + enum + { + N = 50000, + blockSize = 256 + }; + + float const loadFactor = 0.95f; + + do_recover(N, blockSize, loadFactor); + } +}; + +BEAST_DEFINE_TESTSUITE(rekey, test, nudb); + +} // test +} // nudb diff --git a/test/store.cpp b/test/store.cpp new file mode 100644 index 0000000000..e0016e0c2b --- /dev/null +++ b/test/store.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/test/type_traits.cpp b/test/type_traits.cpp new file mode 100644 index 0000000000..050ee72e1b --- /dev/null +++ b/test/type_traits.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/test/verify.cpp b/test/verify.cpp new file mode 100644 index 0000000000..e3a6f1c8cf --- /dev/null +++ b/test/verify.cpp @@ -0,0 +1,94 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include + +#include +#include +#include +#include + +namespace nudb { +namespace test { + +class verify_test : public beast::unit_test::suite +{ +public: + // File doesn't exist + void + test_missing() + { + error_code ec; + test_store ts{4, 4096, 0.5f}; + verify_info info; + verify(info, + ts.dp, ts.kp, 0, no_progress{}, ec); + BEAST_EXPECTS(ec == + errc::no_such_file_or_directory, ec.message()); + } + + void + test_verify( + std::size_t N, + std::size_t keySize, + std::size_t blockSize, + float loadFactor) + { + testcase << + "N=" << N << ", " + "keySize=" << keySize << ", " + "blockSize=" << blockSize; + error_code ec; + test_store ts{keySize, blockSize, loadFactor}; + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.open(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + // Insert + for(std::size_t n = 0; n < N; ++n) + { + auto const item = ts[n]; + ts.db.insert(item.key, item.data, item.size, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + ts.close(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + + // Verify + verify_info info; + verify(info, ts.dp, ts.kp, + 0, no_progress{}, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + BEAST_EXPECT(info.hist[1] > 0); + + // Verify fast + verify(info, ts.dp, ts.kp, + 10 * 1024 * 1024, no_progress{}, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + BEAST_EXPECT(info.hist[1] > 0); + } + + void + run() override + { + float const loadFactor = 0.95f; + test_missing(); + test_verify(5000, 4, 256, loadFactor); + } +}; + +BEAST_DEFINE_TESTSUITE(verify, test, nudb); + +} // test +} // nudb diff --git a/test/version.cpp b/test/version.cpp new file mode 100644 index 0000000000..7604c89a2b --- /dev/null +++ b/test/version.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/test/visit.cpp b/test/visit.cpp new file mode 100644 index 0000000000..405710382b --- /dev/null +++ b/test/visit.cpp @@ -0,0 +1,114 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include + +#include +#include +#include +#include + +namespace nudb { +namespace test { + +class visit_test : public beast::unit_test::suite +{ +public: + void + do_visit( + std::size_t N, + std::size_t blockSize, + float loadFactor) + { + using key_type = std::uint32_t; + + error_code ec; + test_store ts{sizeof(key_type), blockSize, loadFactor}; + + // File not present + visit(ts.dp, + [&](void const* key, std::size_t keySize, + void const* data, std::size_t dataSize, + error_code& ec) + { + }, no_progress{}, ec); + if(! BEAST_EXPECTS(ec == + errc::no_such_file_or_directory, ec.message())) + return; + ec = {}; + + ts.create(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + ts.open(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + std::unordered_map map; + // Insert + for(std::size_t i = 0; i < N; ++i) + { + auto const item = ts[i]; + key_type const k = item.key[0] + + (static_cast(item.key[1]) << 8) + + (static_cast(item.key[2]) << 16) + + (static_cast(item.key[3]) << 24); + map[k] = i; + ts.db.insert(item.key, item.data, item.size, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + ts.close(ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + // Visit + visit(ts.dp, + [&](void const* key, std::size_t keySize, + void const* data, std::size_t dataSize, + error_code& ec) + { + auto const fail = + [&ec] + { + ec = error_code{ + errc::invalid_argument, generic_category()}; + }; + if(! BEAST_EXPECT(keySize == sizeof(key_type))) + return fail(); + auto const p = + reinterpret_cast(key); + key_type const k = p[0] + + (static_cast(p[1]) << 8) + + (static_cast(p[2]) << 16) + + (static_cast(p[3]) << 24); + auto const it = map.find(k); + if(it == map.end()) + return fail(); + auto const item = ts[it->second]; + if(! BEAST_EXPECT(dataSize == item.size)) + return fail(); + auto const result = + std::memcmp(data, item.data, item.size); + if(result != 0) + return fail(); + }, no_progress{}, ec); + if(! BEAST_EXPECTS(! ec, ec.message())) + return; + } + + void + run() override + { + float const loadFactor = 0.95f; + do_visit(5000, 4096, loadFactor); + } +}; + +BEAST_DEFINE_TESTSUITE(visit, test, nudb); + +} // test +} // nudb diff --git a/test/win32_file.cpp b/test/win32_file.cpp new file mode 100644 index 0000000000..ca6d9337e4 --- /dev/null +++ b/test/win32_file.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/test/xxhasher.cpp b/test/xxhasher.cpp new file mode 100644 index 0000000000..6c4c9b9608 --- /dev/null +++ b/test/xxhasher.cpp @@ -0,0 +1,9 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Test that header file is self-contained +#include diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 0000000000..8a82003ff6 --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1,17 @@ +# Part of nudb + +GroupSources (include/nudb nudb) +GroupSources (extras/nudb extras) +GroupSources (tools "/") + +add_executable (nudb + ${NUDB_INCLUDES} + ${EXTRAS_INCLUDES} + nudb.cpp +) + +if (WIN32) + target_link_libraries (nudb ${Boost_LIBRARIES}) +else () + target_link_libraries (nudb ${Boost_LIBRARIES} rt Threads::Threads) +endif () diff --git a/tools/Jamfile b/tools/Jamfile new file mode 100644 index 0000000000..384c33f34b --- /dev/null +++ b/tools/Jamfile @@ -0,0 +1,12 @@ +# +# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +# + +import os ; + +exe nudb : + nudb.cpp + ; diff --git a/tools/nudb.cpp b/tools/nudb.cpp new file mode 100644 index 0000000000..5a06a03a3c --- /dev/null +++ b/tools/nudb.cpp @@ -0,0 +1,514 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#include +#include +#include +#include +#include +#include +#include + +namespace nudb { + +namespace detail { + +std::ostream& +operator<<(std::ostream& os, dat_file_header const h) +{ + os << + "type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n" + "version: " << h.version << "\n" + "uid: " << fhex(h.uid) << "\n" + "appnum: " << fhex(h.appnum) << "\n" + "key_size: " << h.key_size << "\n" + ; + return os; +} + +std::ostream& +operator<<(std::ostream& os, key_file_header const h) +{ + os << + "type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n" + "version: " << h.version << "\n" + "uid: " << fhex(h.uid) << "\n" + "appnum: " << fhex(h.appnum) << "\n" + "key_size: " << h.key_size << "\n" + "salt: " << fhex(h.salt) << "\n" + "pepper: " << fhex(h.pepper) << "\n" + "block_size: " << fdec(h.block_size) << "\n" + ; + return os; +} + +std::ostream& +operator<<(std::ostream& os, log_file_header const h) +{ + os << std::setfill('0') << std::internal << std::showbase << + "type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n" + "version: " << h.version << "\n" + "uid: " << fhex(h.uid) << "\n" + "appnum: " << fhex(h.appnum) << "\n" + "key_size: " << h.key_size << "\n" + "salt: " << fhex(h.salt) << "\n" + "pepper: " << fhex(h.pepper) << "\n" + "block_size: " << fdec(h.block_size) << "\n" + "key_file_size: " << fdec(h.key_file_size) << "\n" + "dat_file_size: " << fdec(h.dat_file_size) << "\n" + ; + return os; +} + +} // detail + +std::ostream& +operator<<(std::ostream& os, verify_info const& info) +{ + os << + "dat_path " << info.dat_path << "\n" + "key_path " << info.key_path << "\n" + "algorithm " <<(info.algorithm ? "fast" : "normal") << "\n" + "avg_fetch: " << std::fixed << std::setprecision(3) << info.avg_fetch << "\n" << + "waste: " << std::fixed << std::setprecision(3) << info.waste * 100 << "%" << "\n" << + "overhead: " << std::fixed << std::setprecision(1) << info.overhead * 100 << "%" << "\n" << + "actual_load: " << std::fixed << std::setprecision(0) << info.actual_load * 100 << "%" << "\n" << + "version: " << fdec(info.version) << "\n" << + "uid: " << fhex(info.uid) << "\n" << + "appnum: " << fhex(info.appnum) << "\n" << + "key_size: " << fdec(info.key_size) << "\n" << + "salt: " << fhex(info.salt) << "\n" << + "pepper: " << fhex(info.pepper) << "\n" << + "block_size: " << fdec(info.block_size) << "\n" << + "bucket_size: " << fdec(info.bucket_size) << "\n" << + "load_factor: " << std::fixed << std::setprecision(0) << info.load_factor * 100 << "%" << "\n" << + "capacity: " << fdec(info.capacity) << "\n" << + "buckets: " << fdec(info.buckets) << "\n" << + "key_count: " << fdec(info.key_count) << "\n" << + "value_count: " << fdec(info.value_count) << "\n" << + "value_bytes: " << fdec(info.value_bytes) << "\n" << + "spill_count: " << fdec(info.spill_count) << "\n" << + "spill_count_tot: " << fdec(info.spill_count_tot) << "\n" << + "spill_bytes: " << fdec(info.spill_bytes) << "\n" << + "spill_bytes_tot: " << fdec(info.spill_bytes_tot) << "\n" << + "key_file_size: " << fdec(info.key_file_size) << "\n" << + "dat_file_size: " << fdec(info.dat_file_size) << "\n" << + "hist: " << fhist(info.hist) << "\n" + ; + return os; +} + +template +class admin_tool +{ + int ac_ = 0; + char const* const* av_ = nullptr; + boost::program_options::options_description desc_; + +public: + admin_tool() + : desc_("Options") + { + namespace po = boost::program_options; + desc_.add_options() + ("buffer,b", po::value(), + "Set the buffer size in bytes (larger is faster).") + ("dat,d", po::value(), + "Path to data file.") + ("key,k", po::value(), + "Path to key file.") + ("log,l", po::value(), + "Path to log file.") + ("count,n", po::value(), + "The number of items in the data file.") + ("command", "Command to run.") + ; + } + + std::string + progname() const + { + using namespace boost::filesystem; + return path{av_[0]}.stem().string(); + } + + std::string + filename(std::string const& s) + { + using namespace boost::filesystem; + return path{s}.filename().string(); + } + + void + help() + { + std::cout << + "usage: " << progname() << " [file...] \n"; + std::cout << + "\n" + "Commands:\n" + "\n" + " help\n" + "\n" + " Print this help information.\n" + "\n" + " info [ []]\n" + "\n" + " Show metadata and header information for database files.\n" + "\n" + " recover \n" + "\n" + " Perform a database recovery. A recovery is necessary if a log\n" + " file is present. Running commands on an unrecovered database\n" + " may result in lost or corrupted data.\n" + "\n" + " rekey --count= --buffer=\n" + "\n" + " Generate the key file for a data file. The buffer option is\n" + " required, larger buffers process faster. A buffer equal to\n" + " the size of the key file processes the fastest. This command\n" + " must be passed the count of items in the data file, which\n" + " can be calculated with the 'visit' command.\n" + "\n" + " If the rekey is aborted before completion, the database must\n" + " be subsequently restored by running the 'recover' command.\n" + "\n" + " verify [--buffer=]\n" + "\n" + " Verify the integrity of a database. The buffer option is\n" + " optional, if omitted a slow algorithm is used. When a buffer\n" + " size is provided, a fast algorithm is used with larger\n" + " buffers resulting in bigger speedups. A buffer equal to the\n" + " size of the key file provides the fastest speedup.\n" + "\n" + " visit \n" + "\n" + " Iterate a data file and show information, including the count\n" + " of items in the file and a histogram of their log base2 size.\n" + "\n" + "Notes:\n" + "\n" + " Paths may be full or relative, and should include the extension.\n" + " The recover algorithm should be invoked before running any\n" + " operation which can modify the database.\n" + "\n" + ; + desc_.print(std::cout); + }; + + int + error(std::string const& why) + { + std::cerr << + progname() << ": " << why << ".\n" + "Use '" << progname() << " help' for usage.\n"; + return EXIT_FAILURE; + }; + + int + operator()(int ac, char const* const* av) + { + namespace po = boost::program_options; + + ac_ = ac; + av_ = av; + + try + { + po::positional_options_description pod; + pod.add("command", 1); + pod.add("dat", 1); + pod.add("key", 1); + pod.add("log", 1); + + po::variables_map vm; + po::store(po::command_line_parser(ac, av) + .options(desc_) + .positional(pod) + .run() + ,vm); + po::notify(vm); + + std::string cmd; + + if(vm.count("command")) + cmd = vm["command"].as(); + + if(cmd == "help") + { + help(); + return EXIT_SUCCESS; + } + + if(cmd == "info") + return do_info(vm); + + if(cmd == "recover") + return do_recover(vm); + + if(cmd == "rekey") + return do_rekey(vm); + + if(cmd == "verify") + return do_verify(vm); + + if(cmd == "visit") + return do_visit(vm); + + return error("Unknown command '" + cmd + "'"); + } + catch(std::exception const& e) + { + return error(e.what()); + } + } + +private: + int + do_info(boost::program_options::variables_map const& vm) + { + if(! vm.count("dat") && ! vm.count("key") && ! vm.count("log")) + return error("No files specified"); + if(vm.count("dat")) + do_info(vm["dat"].as()); + if(vm.count("key")) + do_info(vm["key"].as()); + if(vm.count("log")) + do_info(vm["log"].as()); + return EXIT_SUCCESS; + } + + void + do_info(path_type const& path) + { + error_code ec; + auto const err = + [&] + { + std::cout << path << ": " << ec.message() << "\n"; + }; + native_file f; + f.open(file_mode::read, path, ec); + if(ec) + return err(); + auto const size = f.size(ec); + if(ec) + return err(); + if(size < 8) + { + std::cout << "File " << path << " is too small to be a database file.\n"; + return; + } + std::array ta; + f.read(0, ta.data(), ta.size(), ec); + if(ec) + return err(); + std::string ts{ta.data(), ta.size()}; + + if(ts == "nudb.dat") + { + detail::dat_file_header h; + detail::read(f, h, ec); + if(ec) + return err(); + f.close(); + std::cout << + "data file: " << path << "\n" + "file size: " << fdec(size) << "\n" << + h << "\n"; + return; + } + + if(ts == "nudb.key") + { + detail::key_file_header h; + detail::read(f, h, ec); + if(ec) + return err(); + f.close(); + std::cout << + "key file: " << path << "\n" + "file size: " << fdec(size) << "\n" << + h << "\n"; + return; + } + + if(ts == "nudb.log") + { + detail::log_file_header h; + detail::read(f, h, ec); + if(ec) + return err(); + f.close(); + std::cout << + "log file: " << path << "\n" + "file size: " << fdec(size) << "\n" << + h << "\n"; + return; + } + + std::cout << "File " << path << " has unknown type '" << ts << "'.\n"; + } + + int + do_recover(boost::program_options::variables_map const& vm) + { + if(! vm.count("dat") || ! vm.count("key") || ! vm.count("log")) + return error("Missing file specifications"); + error_code ec; + recover( + vm["dat"].as(), + vm["key"].as(), + vm["log"].as(), + ec); + if(ec) + { + std::cerr << "recover: " << ec.message() << "\n"; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; + } + + int + do_rekey(boost::program_options::variables_map const& vm) + { + if(! vm.count("dat")) + return error("Missing data file path"); + if(! vm.count("key")) + return error("Missing key file path"); + if(! vm.count("log")) + return error("Missing log file path"); + if(! vm.count("count")) + return error("Missing item count"); + if(! vm.count("buffer")) + return error("Missing buffer size"); + auto const dp = vm["dat"].as(); + auto const kp = vm["key"].as(); + auto const lp = vm["log"].as(); + auto const itemCount = vm["count"].as(); + auto const bufferSize = vm["buffer"].as(); + error_code ec; + progress p{std::cout}; + rekey(dp, kp, lp, + block_size(kp), 0.5f, itemCount, + bufferSize, ec, p); + if(ec) + { + std::cerr << "rekey: " << ec.message() << "\n"; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; + } + + int + do_verify(boost::program_options::variables_map const& vm) + { + if(! vm.count("dat")) + return error("Missing data file path"); + if(! vm.count("key")) + return error("Missing key file path"); + + auto const bufferSize = vm.count("buffer") ? + vm["buffer"].as() : 0; + auto const dp = vm["dat"].as(); + auto const kp = vm.count("key") ? + vm["key"].as() : std::string{}; + + if(! vm.count("key")) + { + // todo + std::cerr << "unimplemented: dat-only verify\n"; + return EXIT_FAILURE; + } + + error_code ec; + progress p(std::cout); + { + verify_info info; + verify(info, dp, kp, bufferSize, p, ec); + if(! ec) + std::cout << info; + } + if(ec) + { + std::cerr << "verify: " << ec.message() << "\n"; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; + } + + int + do_visit(boost::program_options::variables_map const& vm) + { + if(! vm.count("dat")) + return error("Missing dat path"); + auto const path = vm["dat"].as(); + error_code ec; + auto const err = + [&] + { + std::cout << path << ": " << ec.message() << "\n"; + return EXIT_FAILURE; + }; + { + native_file f; + f.open(file_mode::read, path, ec); + if(ec) + return err(); + auto const fileSize = f.size(ec); + if(ec) + return err(); + detail::dat_file_header h; + detail::read(f, h, ec); + if(ec) + return err(); + f.close(); + std::cout << + "data file: " << path << "\n" + "file size: " << fdec(fileSize) << "\n" << + h; + std::cout.flush(); + } + + std::uint64_t n = 0; + std::array hist; + hist.fill(0); + progress p{std::cout}; + visit(path, + [&](void const*, std::size_t, + void const*, std::size_t data_size, + error_code& ec) + { + ++n; + ++hist[log2(data_size)]; + //std::this_thread::sleep_for(std::chrono::milliseconds{1}); + }, p, ec); + if(! ec) + std::cout << + "value_count " << fdec(n) << "\n" << + "sizes: " << fhist(hist) << "\n"; + if(ec) + { + std::cerr << "visit: " << ec.message() << "\n"; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; + } +}; + +} // nudb + +int +main(int ac, char const* const* av) +{ + using namespace nudb; + admin_tool t; + auto const rv = t(ac, av); + std::cout.flush(); + basic_seconds_clock_main_hook(); + return rv; +}