Merge commit '79159ffd87bf86e92ab5af6fffd5cc93c205a630' as 'src/nudb'

This commit is contained in:
Vinnie Falco
2016-09-29 19:24:13 -04:00
113 changed files with 15806 additions and 0 deletions

2
src/nudb/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
bin/
bin64/

9
src/nudb/.gitmodules vendored Normal file
View File

@@ -0,0 +1,9 @@
[submodule "extras/beast"]
path = extras/beast
url = https://github.com/vinniefalco/Beast.git
[submodule "extras/rocksdb"]
path = extras/rocksdb
url = https://github.com/facebook/rocksdb.git
[submodule "doc/docca"]
path = doc/docca
url = https://github.com/vinniefalco/docca.git

89
src/nudb/.travis.yml Normal file
View File

@@ -0,0 +1,89 @@
language: cpp
env:
global:
- LLVM_VERSION=3.8.0
# Maintenance note: to move to a new version
# of boost, update both BOOST_ROOT and BOOST_URL.
# Note that for simplicity, BOOST_ROOT's final
# namepart must match the folder name internal
# to boost's .tar.gz.
- LCOV_ROOT=$HOME/lcov
- VALGRIND_ROOT=$HOME/valgrind-install
- BOOST_ROOT=$HOME/boost_1_60_0
- BOOST_URL='http://downloads.sourceforge.net/project/boost/boost/1.60.0/boost_1_60_0.tar.gz?r=https%3A%2F%2Fsourceforge.net%2Fprojects%2Fboost%2Ffiles%2Fboost%2F1.60.0%2Fboost_1_60_0.tar.gz&ts=1460417589&use_mirror=netix'
packages: &gcc5_pkgs
- gcc-5
- g++-5
- python-software-properties
- libssl-dev
- libffi-dev
- libstdc++6
- binutils-gold
# Provides a backtrace if the unittests crash
- gdb
# Needed for installing valgrind
- subversion
- automake
- autotools-dev
- libc6-dbg
# Needed to build rocksdb
- libsnappy-dev
matrix:
include:
# GCC/Coverage/Autobahn
- compiler: gcc
env:
- GCC_VER=5
- VARIANT=coverage
- ADDRESS_MODEL=64
- BUILD_SYSTEM=cmake
- PATH=$PWD/cmake/bin:$PATH
addons: &ao_gcc5
apt:
sources: ['ubuntu-toolchain-r-test']
packages: *gcc5_pkgs
# Clang/UndefinedBehaviourSanitizer
- compiler: clang
env:
- GCC_VER=5
- VARIANT=usan
- CLANG_VER=3.8
- ADDRESS_MODEL=64
- UBSAN_OPTIONS='print_stacktrace=1'
- BUILD_SYSTEM=cmake
- PATH=$PWD/cmake/bin:$PATH
- PATH=$PWD/llvm-$LLVM_VERSION/bin:$PATH
addons: *ao_gcc5
# Clang/AddressSanitizer
- compiler: clang
env:
- GCC_VER=5
- VARIANT=asan
- CLANG_VER=3.8
- ADDRESS_MODEL=64
- PATH=$PWD/llvm-$LLVM_VERSION/bin:$PATH
addons: *ao_gcc5
cache:
directories:
- $BOOST_ROOT
- $VALGRIND_ROOT
- llvm-$LLVM_VERSION
- cmake
before_install:
- scripts/install-dependencies.sh
script:
- scripts/build-and-test.sh
after_script:
- cat nohup.out || echo "nohup.out already deleted"
notifications:
email:
false

58
src/nudb/CHANGELOG.md Normal file
View File

@@ -0,0 +1,58 @@
1.0.0-b6
* Fix incorrect file deletion in create()
---
1.0.0-b5
* fail_file also fails on reads
* Fix bug in rekey where an error code wasn't checked
* Increase coverage
* Add buffer unit test
* Add is_File concept and checks
* Update documentation
* Add example program
* Demote exceptions to asserts in gentex
* Improved commit process
* Dynamic block size in custom allocator
---
1.0.0-b4
* Improved test coverage
* Use master branch for codecov badge
* Throw on API calls when no database open
* Benchmarks vs. RocksDB
### API Changes:
* `insert` sets `error::key_exists` instead of returning `false`
* `fetch` sets `error::key_not_found` instead of returning `false`
---
1.0.0-b3
* Tune buffer sizes for performance
* Fix large POSIX and Win32 writes
* Adjust progress indicator for nudb tool
* Document link requirements
* Add visit test
* Improved coverage
---
1.0.0-b2
* Minor documentation and tidying
* Add CHANGELOG
---
1.0.0-b1
* Initial source tree

87
src/nudb/CMakeLists.txt Normal file
View File

@@ -0,0 +1,87 @@
cmake_minimum_required (VERSION 3.2)
project (nudb)
set_property (GLOBAL PROPERTY USE_FOLDERS ON)
if (WIN32)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP /W4 /wd4100 /D _WIN32_WINNT=0x0600 /D_SCL_SECURE_NO_WARNINGS=1 /D_CRT_SECURE_NO_WARNINGS=1")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SAFESEH:NO")
else ()
set (Boost_USE_STATIC_LIBS ON)
set (Boost_USE_MULTITHREADED ON)
find_package (Boost REQUIRED COMPONENTS filesystem program_options system thread)
include_directories (SYSTEM ${Boost_INCLUDE_DIRS})
link_directories (${Boost_LIBRARY_DIR})
set (THREADS_PREFER_PTHREAD_FLAG ON)
find_package (Threads)
set (CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wpedantic")
endif ()
if ("${VARIANT}" STREQUAL "coverage")
set (CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
set (CMAKE_BUILD_TYPE RELWITHDEBINFO)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov")
elseif ("${VARIANT}" STREQUAL "asan")
set (CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
set (CMAKE_BUILD_TYPE RELWITHDEBINFO)
elseif ("${VARIANT}" STREQUAL "usan")
set (CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
set (CMAKE_BUILD_TYPE RELWITHDEBINFO)
elseif ("${VARIANT}" STREQUAL "debug")
set (CMAKE_BUILD_TYPE DEBUG)
elseif ("${VARIANT}" STREQUAL "release")
set (CMAKE_BUILD_TYPE RELEASE)
endif ()
function (DoGroupSources curdir rootdir folder)
file (GLOB children RELATIVE ${PROJECT_SOURCE_DIR}/${curdir} ${PROJECT_SOURCE_DIR}/${curdir}/*)
foreach (child ${children})
if (IS_DIRECTORY ${PROJECT_SOURCE_DIR}/${curdir}/${child})
DoGroupSources (${curdir}/${child} ${rootdir} ${folder})
elseif (${child} STREQUAL "CMakeLists.txt")
source_group ("" FILES ${PROJECT_SOURCE_DIR}/${curdir}/${child})
else ()
string (REGEX REPLACE ^${rootdir} ${folder} groupname ${curdir})
#set (groupname ${curdir})
string (REPLACE "/" "\\" groupname ${groupname})
source_group (${groupname} FILES ${PROJECT_SOURCE_DIR}/${curdir}/${child})
endif ()
endforeach ()
endfunction ()
function (GroupSources curdir folder)
DoGroupSources (${curdir} ${curdir} ${folder})
endfunction ()
include_directories (
include
extras
extras/beast/extras
)
file (GLOB_RECURSE BEAST_INCLUDES
${PROJECT_SOURCE_DIR}/extras/beast/extras/beast/unit_test/*.hpp
${PROJECT_SOURCE_DIR}/extras/beast/extras/beast/unit_test/*.ipp
)
file (GLOB_RECURSE EXTRAS_INCLUDES
${PROJECT_SOURCE_DIR}/extras/nudb/*
)
file (GLOB_RECURSE NUDB_INCLUDES
${PROJECT_SOURCE_DIR}/include/nudb/*
)
add_subdirectory (bench)
add_subdirectory (examples)
add_subdirectory (test)
add_subdirectory (tools)

93
src/nudb/Jamroot Normal file
View File

@@ -0,0 +1,93 @@
#
# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#
import os ;
import feature ;
import boost ;
boost.use-project ;
variant coverage
:
debug
:
<cxxflags>"-fprofile-arcs -ftest-coverage"
<linkflags>"-lgcov"
;
variant asan
:
release
:
<cxxflags>"-fsanitize=address -fno-omit-frame-pointer"
<linkflags>"-fsanitize=address"
;
variant msan
:
debug
:
<cxxflags>"-fsanitize=memory -fno-omit-frame-pointer -fsanitize-memory-track-origins=2 -fsanitize-memory-use-after-dtor"
<linkflags>"-fsanitize=memory"
;
variant usan
:
debug
:
<cxxflags>"-fsanitize=undefined -fno-omit-frame-pointer"
<linkflags>"-fsanitize=undefined"
;
project nudb
: requirements
<include>./extras
<include>./extras/beast/extras
<include>./include
#<use>/boost//headers
<library>/boost/system//boost_system
<library>/boost/thread//boost_thread
<library>/boost/filesystem//boost_filesystem
<library>/boost/program_options//boost_program_options
<define>BOOST_ALL_NO_LIB=1
<define>BOOST_SYSTEM_NO_DEPRECATED=1
<threading>multi
<link>static
<runtime-link>shared
<debug-symbols>on
<toolset>gcc:<cxxflags>-std=c++11
<toolset>gcc:<cxxflags>-Wno-unused-variable
<toolset>clang:<cxxflags>-std=c++11
<toolset>msvc:<define>_SCL_SECURE_NO_WARNINGS=1
<toolset>msvc:<define>_CRT_SECURE_NO_WARNINGS=1
<toolset>msvc:<cxxflags>-bigobj
<os>LINUX:<define>_XOPEN_SOURCE=600
<os>LINUX:<define>_GNU_SOURCE=1
<os>SOLARIS:<define>_XOPEN_SOURCE=500
<os>SOLARIS:<define>__EXTENSIONS__
<os>SOLARIS:<library>socket
<os>SOLARIS:<library>nsl
<os>NT:<define>_WIN32_WINNT=0x0601
<os>NT,<toolset>cw:<library>ws2_32
<os>NT,<toolset>cw:<library>mswsock
<os>NT,<toolset>gcc:<library>ws2_32
<os>NT,<toolset>gcc:<library>mswsock
<os>NT,<toolset>gcc-cygwin:<define>__USE_W32_SOCKETS
<os>HPUX,<toolset>gcc:<define>_XOPEN_SOURCE_EXTENDED
<os>HPUX:<library>ipv6
<os>QNXNTO:<library>socket
<os>HAIKU:<library>network
: usage-requirements
<include>.
:
build-dir bin
;
build-project bench ;
build-project examples ;
build-project test ;
build-project tools ;

23
src/nudb/LICENSE_1_0.txt Normal file
View File

@@ -0,0 +1,23 @@
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

466
src/nudb/README.md Normal file
View File

@@ -0,0 +1,466 @@
<img width="880" height = "80" alt = "NuDB"
src="https://raw.githubusercontent.com/vinniefalco/NuDB/master/doc/images/readme2.png">
[![Join the chat at https://gitter.im/vinniefalco/NuDB](https://badges.gitter.im/vinniefalco/NuDB.svg)](https://gitter.im/vinniefalco/NuDB?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status]
(https://travis-ci.org/vinniefalco/NuDB.svg?branch=master)](https://travis-ci.org/vinniefalco/NuDB) [![codecov]
(https://codecov.io/gh/vinniefalco/NuDB/branch/master/graph/badge.svg)](https://codecov.io/gh/vinniefalco/NuDB) [![coveralls]
(https://coveralls.io/repos/github/vinniefalco/NuDB/badge.svg?branch=master)](https://coveralls.io/github/vinniefalco/NuDB?branch=master) [![Documentation]
(https://img.shields.io/badge/documentation-master-brightgreen.svg)](http://vinniefalco.github.io/nudb/) [![License]
(https://img.shields.io/badge/license-boost-brightgreen.svg)](LICENSE_1_0.txt)
# A Key/Value Store For SSDs
---
## Contents
- [Introduction](#introduction)
- [Description](#description)
- [Requirements](#requirements)
- [Example](#example)
- [Building](#building)
- [Algorithm](#algorithm)
- [Licence](#licence)
- [Contact](#contact)
---
## Introduction
NuDB is an append-only, key/value store specifically optimized for random
read performance on modern SSDs or equivalent high-IOPS devices. The most
common application for NuDB is content addressible storage where a
cryptographic digest of the data is used as the key. The read performance
and memory usage are independent of the size of the database. These are
some other features:
* Low memory footprint
* Database size up to 281TB
* All keys are the same size
* Append-only, no update or delete
* Value sizes from 1 to 2^32 bytes (4GB)
* Performance independent of growth
* Optimized for concurrent fetch
* Key file can be rebuilt if needed
* Inserts are atomic and consistent
* Data file may be efficiently iterated
* Key and data files may be on different devices
* Hardened against algorithmic complexity attacks
* Header-only, no separate library to build
## Description
This software is close to final. Interfaces are stable.
For recent changes see the [CHANGELOG](CHANGELOG.md).
NuDB has been in use for over a year on production servers
running [rippled](https://github.com/ripple/rippled), with
database sizes over 3 terabytes.
* [Repository](https://github.com/vinniefalco/Beast)
* [Documentation](http://vinniefalco.github.io/nudb/)
## Requirements
* Boost 1.58 or higher
* C++11 or greater
* SSD drive, or equivalent device with high IOPS
## Example
This complete program creates a database, opens the database,
inserts several key/value pairs, fetches the key/value pairs,
closes the database, then erases the database files. Source
code for this program is located in the examples directory.
```C++
#include <nudb/nudb.hpp>
#include <cstddef>
#include <cstdint>
int main()
{
using namespace nudb;
std::size_t constexpr N = 1000;
using key_type = std::uint32_t;
error_code ec;
auto const dat_path = "db.dat";
auto const key_path = "db.key";
auto const log_path = "db.log";
create<xxhasher>(
dat_path, key_path, log_path,
1,
make_salt(),
sizeof(key_type),
block_size("."),
0.5f,
ec);
store db;
db.open(dat_path, key_path, log_path, ec);
char data = 0;
// Insert
for(key_type i = 0; i < N; ++i)
db.insert(&i, &data, sizeof(data), ec);
// Fetch
for(key_type i = 0; i < N; ++i)
db.fetch(&i,
[&](void const* buffer, std::size_t size)
{
// do something with buffer, size
}, ec);
db.close(ec);
erase_file(dat_path);
erase_file(key_path);
erase_file(log_path);
}
```
## Building
NuDB is header-only so there are no libraries to build. To use it in your
project, simply copy the NuDB sources to your project's source tree
(alternatively, bring NuDB into your Git repository using the
`git subtree` or `git submodule` commands). Then, edit your build scripts
to add the `include/` directory to the list of paths checked by the C++
compiler when searching for includes. NuDB `#include` lines will look
like this:
```
#include <nudb/nudb.hpp>
```
To link your program successfully, you'll need to add the Boost.Thread and
Boost.System libraries to link with. Please visit the Boost documentation
for instructions on how to do this for your particular build system.
NuDB tests require Beast, and the benchmarks require RocksDB. These projects
are linked to the repository using git submodules. Before building the tests
or benchmarks, these commands should be issued at the root of the repository:
```
git submodule init
git submodule update
```
For the examples and tests, NuDB provides build scripts for Boost.Build (b2)
and CMake. To generate build scripts using CMake, execute these commands at
the root of the repository (project and solution files will be generated
for Visual Studio users):
```
cd bin
cmake .. # for 32-bit Windows build
cd ../bin64
cmake .. # for Linux/Mac builds, OR
cmake -G"Visual Studio 14 2015 Win64" .. # for 64-bit Windows builds
```
To build with Boost.Build, it is necessary to have the b2 executable
in your path. And b2 needs to know how to find the Boost sources. The
easiest way to do this is make sure that the version of b2 in your path
is the one at the root of the Boost source tree, which is built when
running `bootstrap.sh` (or `bootstrap.bat` on Windows).
Once b2 is in your path, simply run b2 in the root of the Beast
repository to automatically build the required Boost libraries if they
are not already built, build the examples, then build and run the unit
tests.
On OSX it may be necessary to pass "toolset=clang" on the b2 command line.
Alternatively, this may be site in site-config.jam or user-config.jam.
The files in the repository are laid out thusly:
```
./
bench/ Holds the benchmark sources and scripts
bin/ Holds executables and project files
bin64/ Holds 64-bit Windows executables and project files
examples/ Holds example program source code
extras/ Additional APIs, may change
include/ Add this to your compiler includes
nudb/
test/ Unit tests and benchmarks
tools/ Holds the command line tool sources
```
## Algorithm
Three files are used.
* The data file holds keys and values stored sequentially and size-prefixed.
* The key file holds a series of fixed-size bucket records forming an on-disk
hash table.
* The log file stores bookkeeping information used to restore consistency when
an external failure occurs.
In typical cases a fetch costs one I/O cycle to consult the key file, and if the
key is present, one I/O cycle to read the value.
### Usage
Callers must define these parameters when _creating_ a database:
* `KeySize`: The size of a key in bytes.
* `BlockSize`: The physical size of a key file record.
The ideal block size matches the sector size or block size of the
underlying physical media that holds the key file. Functions are
provided to return a best estimate of this value for a particular
device, but a default of 4096 should work for typical installations.
The implementation tries to fit as many entries as possible in a key
file record, to maximize the amount of useful work performed per I/O.
* `LoadFactor`: The desired fraction of bucket occupancy
`LoadFactor` is chosen to make bucket overflows unlikely without
sacrificing bucket occupancy. A value of 0.50 seems to work well with
a good hash function.
Callers must also provide these parameters when a database is _opened:_
* `Appnum`: An application-defined integer constant which can be retrieved
later from the database [TODO].
* `AllocSize`: A significant multiple of the average data size.
Memory is recycled to improve performance, so NuDB needs `AllocSize` as a
hint about the average size of the data being inserted. For an average data size
of 1KB (one kilobyte), `AllocSize` of sixteen megabytes (16MB) is sufficient. If
the `AllocSize` is too low, the memory recycler will not make efficient use of
allocated blocks.
Two operations are defined: `fetch`, and `insert`.
#### `fetch`
The `fetch` operation retrieves a variable length value given the
key. The caller supplies a factory used to provide a buffer for storing
the value. This interface allows custom memory allocation strategies.
#### `insert`
`insert` adds a key/value pair to the store. Value data must contain at least
one byte. Duplicate keys are disallowed. Insertions are serialized, which means
[TODO].
### Implementation
All insertions are buffered in memory, with inserted values becoming
immediately discoverable in subsequent or concurrent calls to fetch.
Periodically, buffered data is safely committed to disk files using
a separate dedicated thread associated with the database. This commit
process takes place at least once per second, or more often during
a detected surge in insertion activity. In the commit process the
key/value pairs receive the following treatment:
An insertion is performed by appending a value record to the data file.
The value record has some header information including the size of the
data and a copy of the key; the data file is iteratable without the key
file. The value data follows the header. The data file is append-only
and immutable: once written, bytes are never changed.
Initially the hash table in the key file consists of a single bucket.
After the load factor is exceeded from insertions, the hash table grows
in size by one bucket by doing a "split". The split operation is the
[linear hashing algorithm](http://en.wikipedia.org/wiki/Linear_hashing)
as described by Litwin and Larson.
When a bucket is split, each key is rehashed, and either remains in the
original bucket or gets moved to the a bucket appended to the end of
the key file.
An insertion on a full bucket first triggers the "spill" algorithm.
First, a spill record is appended to the data file, containing header
information followed by the entire bucket record. Then the bucket's size is set
to zero and the offset of the spill record is stored in the bucket. At this
point the insertion may proceed normally, since the bucket is empty. Spilled
buckets in the data file are always full.
Because every bucket holds the offset of the next spill record in the
data file, the buckets form a linked list. In practice, careful
selection of capacity and load factor will keep the percentage of
buckets with one spill record to a minimum, with no bucket requiring
two spill records.
The implementation of fetch is straightforward: first the bucket in the
key file is checked, then each spill record in the linked list of
spill records is checked, until the key is found or there are no more
records. As almost all buckets have no spill records, the average
fetch requires one I/O (not including reading the value).
One complication in the scheme is when a split occurs on a bucket that
has one or more spill records. In this case, both the bucket being split
and the new bucket may overflow. This is handled by performing the
spill algorithm for each overflow that occurs. The new buckets may have
one or more spill records each, depending on the number of keys that
were originally present.
Because the data file is immutable, a bucket's original spill records
are no longer referenced after the bucket is split. These blocks of data
in the data file are unrecoverable wasted space. Correctly configured
databases can have a typical waste factor of 1%, which is acceptable.
These unused bytes can be removed by visiting each value in the value
file using an off-line process and inserting it into a new database,
then delete the old database and use the new one instead.
### Recovery
To provide atomicity and consistency, a log file associated with the
database stores information used to roll back partial commits.
### Iteration
Each record in the data file is prefixed with a header identifying
whether it is a value record or a spill record, along with the size of
the record in bytes and a copy of the key if it's a value record, so values can
be iterated by incrementing a byte counter. A key file can be regenerated from
just the data file by iterating the values and performing the key
insertion algorithm.
### Concurrency
Locks are never held during disk reads and writes. Fetches are fully
concurrent, while inserts are serialized. Inserts fail on duplicate
keys, and are atomic: they either succeed immediately or fail.
After an insert, the key is immediately visible to subsequent fetches.
### Formats
All integer values are stored as big endian. The uint48_t format
consists of 6 bytes.
#### Key File
The Key File contains the Header followed by one or more
fixed-length Bucket Records.
#### Header (104 bytes)
char[8] Type The characters "nudb.key"
uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant
uint16 KeySize Key size in bytes
uint64 Salt A random seed
uint64 Pepper The salt hashed
uint16 BlockSize Size of a file block in bytes
uint16 LoadFactor Target fraction in 65536ths
uint8[56] Reserved Zeroes
uint8[] Reserved Zero-pad to block size
`Type` identifies the file as belonging to nudb. `UID` is
generated randomly when the database is created, and this value
is stored in the data and log files as well - it's used
to determine if files belong to the same database. `Salt` is
generated when the database is created and helps prevent
complexity attacks; it is prepended to the key material
when computing a hash, or used to initialize the state of
the hash function. `Appnum` is an application defined constant
set when the database is created. It can be used for anything,
for example to distinguish between different data formats.
`Pepper` is computed by hashing `Salt` using a hash function
seeded with the salt. This is used to fingerprint the hash
function used. If a database is opened and the fingerprint
does not match the hash calculation performed using the template
argument provided when constructing the store, an exception
is thrown.
The header for the key file contains the File Header followed by
the information above. The Capacity is the number of keys per
bucket, and defines the size of a bucket record. The load factor
is the target fraction of bucket occupancy.
None of the information in the key file header or the data file
header may be changed after the database is created, including
the Appnum.
#### Bucket Record (fixed-length)
uint16 Count Number of keys in this bucket
uint48 Spill Offset of the next spill record or 0
BucketEntry[] Entries The bucket entries
#### Bucket Entry
uint48 Offset Offset in data file of the data
uint48 Size The size of the value in bytes
uint48 Hash The hash of the key
### Data File
The Data File contains the Header followed by zero or more
variable-length Value Records and Spill Records.
#### Header (92 bytes)
char[8] Type The characters "nudb.dat"
uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant
uint16 KeySize Key size in bytes
uint8[64] (reserved) Zeroes
UID contains the same value as the salt in the corresponding key
file. This is placed in the data file so that key and value files
belonging to the same database can be identified.
#### Data Record (variable-length)
uint48 Size Size of the value in bytes
uint8[KeySize] Key The key.
uint8[Size] Data The value data.
#### Spill Record (fixed-length)
uint48 Zero All zero, identifies a spill record
uint16 Size Bytes in spill bucket (for skipping)
Bucket SpillBucket Bucket Record
#### Log File
The Log file contains the Header followed by zero or more fixed size
log records. Each log record contains a snapshot of a bucket. When a
database is not closed cleanly, the recovery process applies the log
records to the key file, overwriting data that may be only partially
updated with known good information. After the log records are applied,
the data and key files are truncated to the last known good size.
#### Header (62 bytes)
char[8] Type The characters "nudb.log"
uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant
uint16 KeySize Key size in bytes
uint64 Salt A random seed.
uint64 Pepper The salt hashed
uint16 BlockSize Size of a file block in bytes
uint64 KeyFileSize Size of key file.
uint64 DataFileSize Size of data file.
#### Log Record
uint64_t Index Bucket index (0-based)
Bucket Bucket Compact Bucket record
Compact buckets include only Size entries. These are primarily
used to minimize the volume of writes to the log file.
## License
Distributed under the Boost Software License, Version 1.0.
(See accompanying file [LICENSE_1_0.txt](LICENSE_1_0.txt) or copy at
http://www.boost.org/LICENSE_1_0.txt)
## Contact
Please report issues or questions here:
https://github.com/vinniefalco/NuDB/issues

2
src/nudb/TODO.txt Normal file
View File

@@ -0,0 +1,2 @@
* Support 32/64-bit
-- xxhasher specialization for 4/8 byte size-t

View File

@@ -0,0 +1,363 @@
cmake_minimum_required (VERSION 3.2)
GroupSources(bench /)
GroupSources(include/nudb nudb)
GroupSources(extras/nudb extras)
GroupSources(extras/beast/include/beast beast)
GroupSources(extras/beast/extras/beast beast)
GroupSources(extras/rocksdb rocksdb)
if (WIN32)
set(CMAKE_CONFIGURATION_TYPES Release)
endif ()
project (bench)
############################################################
macro(append_flags name)
foreach (arg ${ARGN})
set(${name} "${${name}} ${arg}")
endforeach()
endmacro()
############################################################
set (DEPS "${PROJECT_SOURCE_DIR}/../extras")
set (DEFAULT_WITH_ROCKSDB true)
set (WITH_ROCKSDB ${DEFAULT_WITH_ROCKSDB} CACHE BOOL "Runs benchmarks against rocksdb")
if (WITH_ROCKSDB)
set(ROCKSDB ${DEPS}/rocksdb)
set(ROCKSDB_SRC
${ROCKSDB}/db/auto_roll_logger.cc
${ROCKSDB}/db/builder.cc
${ROCKSDB}/db/c.cc
${ROCKSDB}/db/column_family.cc
${ROCKSDB}/db/compacted_db_impl.cc
${ROCKSDB}/db/compaction.cc
${ROCKSDB}/db/compaction_iterator.cc
${ROCKSDB}/db/compaction_job.cc
${ROCKSDB}/db/compaction_picker.cc
${ROCKSDB}/db/convenience.cc
${ROCKSDB}/db/db_filesnapshot.cc
${ROCKSDB}/db/dbformat.cc
${ROCKSDB}/db/db_impl.cc
${ROCKSDB}/db/db_impl_debug.cc
${ROCKSDB}/db/db_impl_readonly.cc
${ROCKSDB}/db/db_impl_experimental.cc
${ROCKSDB}/db/db_impl_add_file.cc
${ROCKSDB}/db/db_info_dumper.cc
${ROCKSDB}/db/db_iter.cc
${ROCKSDB}/db/experimental.cc
${ROCKSDB}/db/event_helpers.cc
${ROCKSDB}/db/file_indexer.cc
${ROCKSDB}/db/filename.cc
${ROCKSDB}/db/flush_job.cc
${ROCKSDB}/db/flush_scheduler.cc
${ROCKSDB}/db/forward_iterator.cc
${ROCKSDB}/db/internal_stats.cc
${ROCKSDB}/db/log_reader.cc
${ROCKSDB}/db/log_writer.cc
${ROCKSDB}/db/managed_iterator.cc
${ROCKSDB}/db/memtable_allocator.cc
${ROCKSDB}/db/memtable.cc
${ROCKSDB}/db/memtable_list.cc
${ROCKSDB}/db/merge_helper.cc
${ROCKSDB}/db/merge_operator.cc
${ROCKSDB}/db/repair.cc
${ROCKSDB}/db/snapshot_impl.cc
${ROCKSDB}/db/table_cache.cc
${ROCKSDB}/db/table_properties_collector.cc
${ROCKSDB}/db/transaction_log_impl.cc
${ROCKSDB}/db/version_builder.cc
${ROCKSDB}/db/version_edit.cc
${ROCKSDB}/db/version_set.cc
${ROCKSDB}/db/wal_manager.cc
${ROCKSDB}/db/write_batch.cc
${ROCKSDB}/db/write_batch_base.cc
${ROCKSDB}/db/write_controller.cc
${ROCKSDB}/db/write_thread.cc
${ROCKSDB}/db/xfunc_test_points.cc
${ROCKSDB}/memtable/hash_cuckoo_rep.cc
${ROCKSDB}/memtable/hash_linklist_rep.cc
${ROCKSDB}/memtable/hash_skiplist_rep.cc
${ROCKSDB}/memtable/skiplistrep.cc
${ROCKSDB}/memtable/vectorrep.cc
${ROCKSDB}/port/stack_trace.cc
${ROCKSDB}/table/adaptive_table_factory.cc
${ROCKSDB}/table/block_based_filter_block.cc
${ROCKSDB}/table/block_based_table_builder.cc
${ROCKSDB}/table/block_based_table_factory.cc
${ROCKSDB}/table/block_based_table_reader.cc
${ROCKSDB}/table/block_builder.cc
${ROCKSDB}/table/block.cc
${ROCKSDB}/table/block_prefix_index.cc
${ROCKSDB}/table/bloom_block.cc
${ROCKSDB}/table/cuckoo_table_builder.cc
${ROCKSDB}/table/cuckoo_table_factory.cc
${ROCKSDB}/table/cuckoo_table_reader.cc
${ROCKSDB}/table/flush_block_policy.cc
${ROCKSDB}/table/format.cc
${ROCKSDB}/table/full_filter_block.cc
${ROCKSDB}/table/get_context.cc
${ROCKSDB}/table/iterator.cc
${ROCKSDB}/table/merger.cc
${ROCKSDB}/table/meta_blocks.cc
${ROCKSDB}/table/sst_file_writer.cc
${ROCKSDB}/table/plain_table_builder.cc
${ROCKSDB}/table/plain_table_factory.cc
${ROCKSDB}/table/plain_table_index.cc
${ROCKSDB}/table/plain_table_key_coding.cc
${ROCKSDB}/table/plain_table_reader.cc
${ROCKSDB}/table/persistent_cache_helper.cc
${ROCKSDB}/table/table_properties.cc
${ROCKSDB}/table/two_level_iterator.cc
${ROCKSDB}/tools/dump/db_dump_tool.cc
${ROCKSDB}/util/arena.cc
${ROCKSDB}/util/bloom.cc
# ${ROCKSDB}/util/build_version.cc
${ROCKSDB}/util/coding.cc
${ROCKSDB}/util/comparator.cc
${ROCKSDB}/util/compaction_job_stats_impl.cc
${ROCKSDB}/util/concurrent_arena.cc
${ROCKSDB}/util/crc32c.cc
${ROCKSDB}/util/delete_scheduler.cc
${ROCKSDB}/util/dynamic_bloom.cc
${ROCKSDB}/util/env.cc
${ROCKSDB}/util/env_chroot.cc
${ROCKSDB}/util/env_hdfs.cc
${ROCKSDB}/util/file_util.cc
${ROCKSDB}/util/file_reader_writer.cc
${ROCKSDB}/util/filter_policy.cc
${ROCKSDB}/util/hash.cc
${ROCKSDB}/util/histogram.cc
${ROCKSDB}/util/histogram_windowing.cc
${ROCKSDB}/util/instrumented_mutex.cc
${ROCKSDB}/util/iostats_context.cc
${ROCKSDB}/util/lru_cache.cc
${ROCKSDB}/util/threadpool.cc
${ROCKSDB}/util/transaction_test_util.cc
${ROCKSDB}/util/sharded_cache.cc
${ROCKSDB}/util/sst_file_manager_impl.cc
${ROCKSDB}/utilities/backupable/backupable_db.cc
${ROCKSDB}/utilities/blob_db/blob_db.cc
${ROCKSDB}/utilities/convenience/info_log_finder.cc
${ROCKSDB}/utilities/checkpoint/checkpoint.cc
${ROCKSDB}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
${ROCKSDB}/utilities/document/document_db.cc
${ROCKSDB}/utilities/document/json_document_builder.cc
${ROCKSDB}/utilities/document/json_document.cc
${ROCKSDB}/utilities/env_mirror.cc
${ROCKSDB}/utilities/env_registry.cc
${ROCKSDB}/utilities/flashcache/flashcache.cc
${ROCKSDB}/utilities/geodb/geodb_impl.cc
${ROCKSDB}/utilities/leveldb_options/leveldb_options.cc
${ROCKSDB}/utilities/memory/memory_util.cc
${ROCKSDB}/utilities/merge_operators/put.cc
${ROCKSDB}/utilities/merge_operators/max.cc
${ROCKSDB}/utilities/merge_operators/string_append/stringappend2.cc
${ROCKSDB}/utilities/merge_operators/string_append/stringappend.cc
${ROCKSDB}/utilities/merge_operators/uint64add.cc
${ROCKSDB}/utilities/option_change_migration/option_change_migration.cc
${ROCKSDB}/utilities/options/options_util.cc
${ROCKSDB}/utilities/persistent_cache/persistent_cache_tier.cc
${ROCKSDB}/utilities/persistent_cache/volatile_tier_impl.cc
${ROCKSDB}/utilities/persistent_cache/block_cache_tier_file.cc
${ROCKSDB}/utilities/persistent_cache/block_cache_tier_metadata.cc
${ROCKSDB}/utilities/persistent_cache/block_cache_tier.cc
${ROCKSDB}/utilities/redis/redis_lists.cc
${ROCKSDB}/utilities/simulator_cache/sim_cache.cc
${ROCKSDB}/utilities/spatialdb/spatial_db.cc
${ROCKSDB}/utilities/table_properties_collectors/compact_on_deletion_collector.cc
${ROCKSDB}/utilities/transactions/optimistic_transaction_impl.cc
${ROCKSDB}/utilities/transactions/optimistic_transaction_db_impl.cc
${ROCKSDB}/utilities/transactions/transaction_base.cc
${ROCKSDB}/utilities/transactions/transaction_db_impl.cc
${ROCKSDB}/utilities/transactions/transaction_db_mutex_impl.cc
${ROCKSDB}/utilities/transactions/transaction_lock_mgr.cc
${ROCKSDB}/utilities/transactions/transaction_impl.cc
${ROCKSDB}/utilities/transactions/transaction_util.cc
${ROCKSDB}/utilities/ttl/db_ttl_impl.cc
${ROCKSDB}/utilities/date_tiered/date_tiered_db_impl.cc
${ROCKSDB}/utilities/write_batch_with_index/write_batch_with_index.cc
${ROCKSDB}/utilities/write_batch_with_index/write_batch_with_index_internal.cc
${ROCKSDB}/util/event_logger.cc
${ROCKSDB}/util/log_buffer.cc
${ROCKSDB}/util/logging.cc
${ROCKSDB}/util/memenv.cc
${ROCKSDB}/util/murmurhash.cc
${ROCKSDB}/util/mutable_cf_options.cc
${ROCKSDB}/util/options.cc
${ROCKSDB}/util/options_helper.cc
${ROCKSDB}/util/options_parser.cc
${ROCKSDB}/util/options_sanity_check.cc
${ROCKSDB}/util/perf_context.cc
${ROCKSDB}/util/perf_level.cc
${ROCKSDB}/util/random.cc
${ROCKSDB}/util/rate_limiter.cc
${ROCKSDB}/util/slice.cc
${ROCKSDB}/util/statistics.cc
${ROCKSDB}/util/status.cc
${ROCKSDB}/util/status_message.cc
${ROCKSDB}/util/string_util.cc
${ROCKSDB}/util/sync_point.cc
${ROCKSDB}/util/thread_local.cc
${ROCKSDB}/util/thread_status_impl.cc
${ROCKSDB}/util/thread_status_updater.cc
${ROCKSDB}/util/thread_status_updater_debug.cc
${ROCKSDB}/util/thread_status_util.cc
${ROCKSDB}/util/thread_status_util_debug.cc
${ROCKSDB}/util/xfunc.cc
${ROCKSDB}/util/xxhash.cc
)
if (WIN32)
add_definitions(
-DOS_WIN
)
set(ROCKSDB_SRC ${ROCKSDB_SRC}
${ROCKSDB}/port/win/io_win.cc
${ROCKSDB}/port/win/env_default.cc
${ROCKSDB}/port/win/env_win.cc
${ROCKSDB}/port/win/port_win.cc
${ROCKSDB}/port/win/win_logger.cc
${ROCKSDB}/port/win/xpress_win.cc
)
else ()
#if (${CMAKE_SYSTEM_NAME} MATCHES Linux)
add_definitions(
-DOS_LINUX
-DROCKSDB_PLATFORM_POSIX
-DROCKSDB_LIB_IO_POSIX
)
set(ROCKSDB_SRC ${ROCKSDB_SRC}
${ROCKSDB}/util/io_posix.cc
${ROCKSDB}/util/env_posix.cc
${ROCKSDB}/port/port_posix.cc
)
endif ()
include_directories(
SYSTEM
${ROCKSDB}
${ROCKSDB}/include
${ROCKSDB}/third-party/gtest-1.7.0/fused-src
)
add_definitions(
-DWITH_ROCKSDB
)
endif(WITH_ROCKSDB)
if (NOT WIN32)
append_flags(CMAKE_CXX_FLAGS -std=c++11)
endif ()
if(WIN32)
add_compile_options(
/bigobj # Increase object file max size
/EHa # ExceptionHandling all
/fp:precise # Floating point behavior
/Gd # __cdecl calling convention
/Gm- # Minimal rebuild: disabled
/GR # Enable RTTI
/Gy- # Function level linking: disabled
/FS
/MP # Multiprocessor compilation
/openmp- # pragma omp: disabled
/Zc:forScope # Language extension: for scope
/Zi # Generate complete debug info
/errorReport:none # No error reporting to Internet
/nologo # Suppress login banner
/W3 # Warning level 3
/WX- # Disable warnings as errors
/wd"4018"
/wd"4244"
/wd"4267"
/wd"4800" # Disable C4800(int to bool performance)
/wd"4503" # Decorated name length exceeded, name was truncated
)
add_definitions(
-D_WIN32_WINNT=0x6000
-D_ITERATOR_DEBUG_LEVEL=0
-D_SCL_SECURE_NO_WARNINGS
-D_CRT_SECURE_NO_WARNINGS
-DWIN32_CONSOLE
-DNOMINMAX)
append_flags(CMAKE_EXE_LINKER_FLAGS
/DEBUG
/DYNAMICBASE
/ERRORREPORT:NONE
/MACHINE:X64
/MANIFEST
/nologo
/NXCOMPAT
/SUBSYSTEM:CONSOLE
/TLBID:1)
# There seems to be an issue using generator experssions with multiple values,
# split the expression
# /GS Buffers security check: enable
add_compile_options($<$<CONFIG:Debug>:/GS>)
# /MTd Language: Multi-threaded Debug CRT
add_compile_options($<$<CONFIG:Debug>:/MTd>)
# /Od Optimization: Disabled
add_compile_options($<$<CONFIG:Debug>:/Od>)
# /RTC1 Run-time error checks:
add_compile_options($<$<CONFIG:Debug>:/RTC1>)
# Generator expressions are not supported in add_definitions, use set_property instead
set_property(
DIRECTORY
APPEND
PROPERTY COMPILE_DEFINITIONS
$<$<CONFIG:Debug>:_CRTDBG_MAP_ALLOC>)
# /MT Language: Multi-threaded CRT
add_compile_options($<$<CONFIG:Release>:/MT>)
add_compile_options($<$<CONFIG:Release>:/Ox>)
# /Ox Optimization: Full
endif (WIN32)
include_directories(
../include
../test
.
${DEPS}
)
add_executable(bench
${BEAST_INCLUDES}
${EXTRAS_INCLUDES}
${NUDB_INCLUDES}
${ROCKSDB_SRC}
bench.cpp
)
target_link_libraries(bench
${Boost_LIBRARIES}
)
if (WITH_ROCKSDB)
if (WIN32)
target_link_libraries(bench
Rpcrt4
)
else ()
target_link_libraries(bench
rt
Threads::Threads
z
snappy
)
endif ()
endif ()

226
src/nudb/bench/Jamfile Normal file
View File

@@ -0,0 +1,226 @@
#
# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#
# Properties:
#
# with-rocksdb=no|yes Select building with rocksdb support (not supported on windows)
import feature : feature ;
path-constant ROCKSDB : ../extras/rocksdb ;
feature with-rocksdb : no yes : propagated optional ;
path-constant ROCKSDB_SRC :
$(ROCKSDB)/db/auto_roll_logger.cc
$(ROCKSDB)/db/builder.cc
$(ROCKSDB)/db/c.cc
$(ROCKSDB)/db/column_family.cc
$(ROCKSDB)/db/compacted_db_impl.cc
$(ROCKSDB)/db/compaction.cc
$(ROCKSDB)/db/compaction_iterator.cc
$(ROCKSDB)/db/compaction_job.cc
$(ROCKSDB)/db/compaction_picker.cc
$(ROCKSDB)/db/convenience.cc
$(ROCKSDB)/db/db_filesnapshot.cc
$(ROCKSDB)/db/dbformat.cc
$(ROCKSDB)/db/db_impl.cc
$(ROCKSDB)/db/db_impl_debug.cc
$(ROCKSDB)/db/db_impl_readonly.cc
$(ROCKSDB)/db/db_impl_experimental.cc
$(ROCKSDB)/db/db_impl_add_file.cc
$(ROCKSDB)/db/db_info_dumper.cc
$(ROCKSDB)/db/db_iter.cc
$(ROCKSDB)/db/experimental.cc
$(ROCKSDB)/db/event_helpers.cc
$(ROCKSDB)/db/file_indexer.cc
$(ROCKSDB)/db/filename.cc
$(ROCKSDB)/db/flush_job.cc
$(ROCKSDB)/db/flush_scheduler.cc
$(ROCKSDB)/db/forward_iterator.cc
$(ROCKSDB)/db/internal_stats.cc
$(ROCKSDB)/db/log_reader.cc
$(ROCKSDB)/db/log_writer.cc
$(ROCKSDB)/db/managed_iterator.cc
$(ROCKSDB)/db/memtable_allocator.cc
$(ROCKSDB)/db/memtable.cc
$(ROCKSDB)/db/memtable_list.cc
$(ROCKSDB)/db/merge_helper.cc
$(ROCKSDB)/db/merge_operator.cc
$(ROCKSDB)/db/repair.cc
$(ROCKSDB)/db/snapshot_impl.cc
$(ROCKSDB)/db/table_cache.cc
$(ROCKSDB)/db/table_properties_collector.cc
$(ROCKSDB)/db/transaction_log_impl.cc
$(ROCKSDB)/db/version_builder.cc
$(ROCKSDB)/db/version_edit.cc
$(ROCKSDB)/db/version_set.cc
$(ROCKSDB)/db/wal_manager.cc
$(ROCKSDB)/db/write_batch.cc
$(ROCKSDB)/db/write_batch_base.cc
$(ROCKSDB)/db/write_controller.cc
$(ROCKSDB)/db/write_thread.cc
$(ROCKSDB)/db/xfunc_test_points.cc
$(ROCKSDB)/memtable/hash_cuckoo_rep.cc
$(ROCKSDB)/memtable/hash_linklist_rep.cc
$(ROCKSDB)/memtable/hash_skiplist_rep.cc
$(ROCKSDB)/memtable/skiplistrep.cc
$(ROCKSDB)/memtable/vectorrep.cc
$(ROCKSDB)/port/stack_trace.cc
$(ROCKSDB)/table/adaptive_table_factory.cc
$(ROCKSDB)/table/block_based_filter_block.cc
$(ROCKSDB)/table/block_based_table_builder.cc
$(ROCKSDB)/table/block_based_table_factory.cc
$(ROCKSDB)/table/block_based_table_reader.cc
$(ROCKSDB)/table/block_builder.cc
$(ROCKSDB)/table/block.cc
$(ROCKSDB)/table/block_prefix_index.cc
$(ROCKSDB)/table/bloom_block.cc
$(ROCKSDB)/table/cuckoo_table_builder.cc
$(ROCKSDB)/table/cuckoo_table_factory.cc
$(ROCKSDB)/table/cuckoo_table_reader.cc
$(ROCKSDB)/table/flush_block_policy.cc
$(ROCKSDB)/table/format.cc
$(ROCKSDB)/table/full_filter_block.cc
$(ROCKSDB)/table/get_context.cc
$(ROCKSDB)/table/iterator.cc
$(ROCKSDB)/table/merger.cc
$(ROCKSDB)/table/meta_blocks.cc
$(ROCKSDB)/table/sst_file_writer.cc
$(ROCKSDB)/table/plain_table_builder.cc
$(ROCKSDB)/table/plain_table_factory.cc
$(ROCKSDB)/table/plain_table_index.cc
$(ROCKSDB)/table/plain_table_key_coding.cc
$(ROCKSDB)/table/plain_table_reader.cc
$(ROCKSDB)/table/persistent_cache_helper.cc
$(ROCKSDB)/table/table_properties.cc
$(ROCKSDB)/table/two_level_iterator.cc
$(ROCKSDB)/tools/dump/db_dump_tool.cc
$(ROCKSDB)/util/arena.cc
$(ROCKSDB)/util/bloom.cc
# $(ROCKSDB)/util/build_version.cc
$(ROCKSDB)/util/coding.cc
$(ROCKSDB)/util/comparator.cc
$(ROCKSDB)/util/compaction_job_stats_impl.cc
$(ROCKSDB)/util/concurrent_arena.cc
$(ROCKSDB)/util/crc32c.cc
$(ROCKSDB)/util/delete_scheduler.cc
$(ROCKSDB)/util/dynamic_bloom.cc
$(ROCKSDB)/util/env.cc
$(ROCKSDB)/util/env_chroot.cc
$(ROCKSDB)/util/env_hdfs.cc
$(ROCKSDB)/util/file_util.cc
$(ROCKSDB)/util/file_reader_writer.cc
$(ROCKSDB)/util/filter_policy.cc
$(ROCKSDB)/util/hash.cc
$(ROCKSDB)/util/histogram.cc
$(ROCKSDB)/util/histogram_windowing.cc
$(ROCKSDB)/util/instrumented_mutex.cc
$(ROCKSDB)/util/iostats_context.cc
$(ROCKSDB)/util/lru_cache.cc
$(ROCKSDB)/util/threadpool.cc
$(ROCKSDB)/util/transaction_test_util.cc
$(ROCKSDB)/util/sharded_cache.cc
$(ROCKSDB)/util/sst_file_manager_impl.cc
$(ROCKSDB)/utilities/backupable/backupable_db.cc
$(ROCKSDB)/utilities/blob_db/blob_db.cc
$(ROCKSDB)/utilities/convenience/info_log_finder.cc
$(ROCKSDB)/utilities/checkpoint/checkpoint.cc
$(ROCKSDB)/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
$(ROCKSDB)/utilities/document/document_db.cc
$(ROCKSDB)/utilities/document/json_document_builder.cc
$(ROCKSDB)/utilities/document/json_document.cc
$(ROCKSDB)/utilities/env_mirror.cc
$(ROCKSDB)/utilities/env_registry.cc
$(ROCKSDB)/utilities/flashcache/flashcache.cc
$(ROCKSDB)/utilities/geodb/geodb_impl.cc
$(ROCKSDB)/utilities/leveldb_options/leveldb_options.cc
$(ROCKSDB)/utilities/memory/memory_util.cc
$(ROCKSDB)/utilities/merge_operators/put.cc
$(ROCKSDB)/utilities/merge_operators/max.cc
$(ROCKSDB)/utilities/merge_operators/string_append/stringappend2.cc
$(ROCKSDB)/utilities/merge_operators/string_append/stringappend.cc
$(ROCKSDB)/utilities/merge_operators/uint64add.cc
$(ROCKSDB)/utilities/option_change_migration/option_change_migration.cc
$(ROCKSDB)/utilities/options/options_util.cc
$(ROCKSDB)/utilities/persistent_cache/persistent_cache_tier.cc
$(ROCKSDB)/utilities/persistent_cache/volatile_tier_impl.cc
$(ROCKSDB)/utilities/persistent_cache/block_cache_tier_file.cc
$(ROCKSDB)/utilities/persistent_cache/block_cache_tier_metadata.cc
$(ROCKSDB)/utilities/persistent_cache/block_cache_tier.cc
$(ROCKSDB)/utilities/redis/redis_lists.cc
$(ROCKSDB)/utilities/simulator_cache/sim_cache.cc
$(ROCKSDB)/utilities/spatialdb/spatial_db.cc
$(ROCKSDB)/utilities/table_properties_collectors/compact_on_deletion_collector.cc
$(ROCKSDB)/utilities/transactions/optimistic_transaction_impl.cc
$(ROCKSDB)/utilities/transactions/optimistic_transaction_db_impl.cc
$(ROCKSDB)/utilities/transactions/transaction_base.cc
$(ROCKSDB)/utilities/transactions/transaction_db_impl.cc
$(ROCKSDB)/utilities/transactions/transaction_db_mutex_impl.cc
$(ROCKSDB)/utilities/transactions/transaction_lock_mgr.cc
$(ROCKSDB)/utilities/transactions/transaction_impl.cc
$(ROCKSDB)/utilities/transactions/transaction_util.cc
$(ROCKSDB)/utilities/ttl/db_ttl_impl.cc
$(ROCKSDB)/utilities/date_tiered/date_tiered_db_impl.cc
$(ROCKSDB)/utilities/write_batch_with_index/write_batch_with_index.cc
$(ROCKSDB)/utilities/write_batch_with_index/write_batch_with_index_internal.cc
$(ROCKSDB)/util/event_logger.cc
$(ROCKSDB)/util/log_buffer.cc
$(ROCKSDB)/util/logging.cc
$(ROCKSDB)/util/memenv.cc
$(ROCKSDB)/util/murmurhash.cc
$(ROCKSDB)/util/mutable_cf_options.cc
$(ROCKSDB)/util/options.cc
$(ROCKSDB)/util/options_helper.cc
$(ROCKSDB)/util/options_parser.cc
$(ROCKSDB)/util/options_sanity_check.cc
$(ROCKSDB)/util/perf_context.cc
$(ROCKSDB)/util/perf_level.cc
$(ROCKSDB)/util/random.cc
$(ROCKSDB)/util/rate_limiter.cc
$(ROCKSDB)/util/slice.cc
$(ROCKSDB)/util/statistics.cc
$(ROCKSDB)/util/status.cc
$(ROCKSDB)/util/status_message.cc
$(ROCKSDB)/util/string_util.cc
$(ROCKSDB)/util/sync_point.cc
$(ROCKSDB)/util/thread_local.cc
$(ROCKSDB)/util/thread_status_impl.cc
$(ROCKSDB)/util/thread_status_updater.cc
$(ROCKSDB)/util/thread_status_updater_debug.cc
$(ROCKSDB)/util/thread_status_util.cc
$(ROCKSDB)/util/thread_status_util_debug.cc
$(ROCKSDB)/util/xfunc.cc
$(ROCKSDB)/util/xxhash.cc
;
path-constant ROCKSDB_POSIX_SRC :
# Posix only
$(ROCKSDB)/util/io_posix.cc
$(ROCKSDB)/util/env_posix.cc
$(ROCKSDB)/port/port_posix.cc
;
project bench
: requirements
<with-rocksdb>yes:<source>$(ROCKSDB_SRC)
<with-rocksdb>yes:<define>WITH_ROCKSDB
<with-rocksdb>yes:<include>$(ROCKSDB)
<with-rocksdb>yes:<include>$(ROCKSDB)/include
<with-rocksdb>yes:<include>$(ROCKSDB)/third-party/gtest-1.7.0/fused-src
# Posix only
<os>LINUX,<with-rocksdb>yes:<define>ROCKSDB_PLATFORM_POSIX
<os>LINUX,<with-rocksdb>yes:<define>ROCKSDB_LIB_IO_POSIX
# LINUX Only
<os>LINUX,<with-rocksdb>yes:<define>OS_LINUX
<os>LINUX,<with-rocksdb>yes:<library>/boost/thread//boost_thread
<os>LINUX,<with-rocksdb>yes:<source>$(ROCKSDB_POSIX_SRC)
;
exe bench :
bench.cpp
;

102
src/nudb/bench/README.md Normal file
View File

@@ -0,0 +1,102 @@
# Benchmarks for NuDB
These benchmarks time two operations:
1. The time to insert N values into a database. The inserted keys and values are
pseudo-randomly generated. The random number generator is always seeded with
the same value for each run, so the same values are always inserted.
2. The time to fetch M existing values from a database with N values. The order
that the keys are fetched are pseudo-randomly generated. The random number
generator is always seeded with the same value on each fun, so the keys are
always looked up in the same order.
At the end of a run, the program outputs a table of operations per second. The
tables have a row for each database size, and a column for each database (in
cases where NuDB is compared against other databases). A cell in the table is
the number of operations per second for that trial. For example, in the table
below NuDB had 340397 Ops/Sec when fetching from an existing database with
10,000,000 values. This is a summary report, and only reports samples at order
of magnitudes of ten.
A sample output:
```
insert (per second)
num_db_keys nudb rocksdb
100000 406598 231937
1000000 374330 258519
10000000 NA NA
fetch (per second)
num_db_keys nudb rocksdb
100000 325228 697158
1000000 333443 34557
10000000 337300 20835
```
In addition to the summary report, the benchmark can collect detailed samples.
The `--raw_out` command line options is used to specify a file to output the raw
samples. The python 3 script `plot_bench.py` may be used to plot the result. For
example, if bench was run as `bench --raw_out=samples.txt`, the the python
script can be run as `python plot_bench.py -i samples.txt`. The python script
requires the `pandas` and `seaborn` packages (anaconda python is a good way to
install and manage python if these packages are not already
installed: [anaconda download](https://www.continuum.io/downloads)).
# Building
## Building with CMake
Note: Building with RocksDB is currently not supported on Windows.
1. The benchmark requires boost. If building with rocksdb, it also requires zlib
and snappy. These are popular libraries and should be available through the
package manager.
1. The benchmark and test programs require some submodules that are not
installed by default. Get these submodules by running:
`git submodule update --init`
2. From the main nudb directory, create a directory for the build and change to
that directory: `mkdir bench_build;cd bench_build`
3. Generate a project file or makefile.
* If building on Linux, generate a makefile. If building with rocksdb
support, use: `cmake -DCMAKE_BUILD_TYPE=Release ../bench` If building
without rocksdb support, use: `cmake -DCMAKE_BUILD_TYPE=Release ../bench
-DWITH_ROCKSDB=false` Replace `../bench` with the path to the `bench`
directory if the build directory is not in the suggested location.
* If building on windows, generate a project file. The CMake gui program is
useful for this. Use the `bench` directory as the `source` directory and
the `bench_build` directory as the `binaries` directory. Press the `Add
Entry` button and add a `BOOST_ROOT` variable that points to the `boost`
directory. Hit `configure`. A dialog box will pop up. Select the generator
for Win64. Select `generate` to generate the visual studio project.
4. Compile the program.
* If building on Linux, run: `make`
* If building on Windows, open the project file generated above in Visual
Studio.
## Test the build
Try running the benchmark with a small database: `./bench --num_batches=10`. A
report similar to sample should appear after a few seconds.
# Command Line Options
* `batch_size arg` : Number of elements to insert or fetch per batch. If not
specified, it defaults to 20000.
* `num_batches arg` : Number of batches to run. If not specified, it defaults to
500.
* `db_dir arg` : Directory to place the databases. If not specified, it defaults to
boost::filesystem::temp_directory_path (likely `/tmp` on Linux)
* `raw_out arg` : File to record the raw measurements. This is useful for plotting. If
not specified the raw measurements will not be output.
* `--dbs arg` : Databases to run the benchmark on. Currently, only `nudb` and
`rocksdb` are supported. Building with `rocksdb` is optional on Linux, and
only `nudb` is supported on windows. The argument may be a list. If `dbs` is
not specified, it defaults to all the database the build supports (either
`nudb` or `nudb rocksdb`).
* `--key_size arg` : nudb key size. If not specified the default is 64.
* `--block_size arg` : nudb block size. This is an advanced argument. If not
specified the default is 4096.
* `--load_factor arg` : nudb load factor. This is an advanced argument. If not
specified the default is 0.5.

535
src/nudb/bench/bench.cpp Normal file
View File

@@ -0,0 +1,535 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#include <nudb/test/test_store.hpp>
#include <nudb/util.hpp>
#include <beast/unit_test/dstream.hpp>
#if WITH_ROCKSDB
#include "rocksdb/db.h"
char const* rocksdb_build_git_sha="Benchmark Dummy Sha";
char const* rocksdb_build_compile_date="Benchmark Dummy Compile Date";
#endif
#include <boost/container/flat_map.hpp>
#include <boost/program_options.hpp>
#include <boost/system/system_error.hpp>
#include <array>
#include <chrono>
#include <cmath>
#include <fstream>
#include <iomanip>
#include <map>
#include <memory>
#include <numeric>
#include <random>
#include <set>
#include <thread>
#include <utility>
namespace nudb {
namespace test {
beast::unit_test::dstream dout{std::cout};
beast::unit_test::dstream derr{std::cerr};
struct stop_watch
{
using clock = std::chrono::steady_clock;
using time_point = clock::time_point;
time_point start_;
stop_watch() : start_(clock::now())
{
}
std::chrono::duration<double>
elapsed() const
{
return std::chrono::duration_cast<std::chrono::duration<double>>(
clock::now() - start_);
}
};
class bench_progress
{
progress p_;
std::uint64_t const total_=0;
std::uint64_t batch_start_=0;
public:
bench_progress(std::ostream& os, std::uint64_t total)
: p_(os), total_(total)
{
p_(0, total);
}
void
update(std::uint64_t batch_amount)
{
p_(batch_start_ + batch_amount, total_);
batch_start_ += batch_amount;
}
};
class gen_key_value
{
test_store& ts_;
std::uint64_t cur_;
public:
gen_key_value(test_store& ts, std::uint64_t cur)
: ts_(ts),
cur_(cur)
{
}
item_type
operator()()
{
return ts_[cur_++];
}
};
class rand_existing_key
{
xor_shift_engine rng_;
std::uniform_int_distribution<std::uint64_t> dist_;
test_store& ts_;
public:
rand_existing_key(test_store& ts,
std::uint64_t max_index,
std::uint64_t seed = 1337)
: dist_(0, max_index),
ts_(ts)
{
rng_.seed(seed);
}
item_type
operator()()
{
return ts_[dist_(rng_)];
}
};
template <class Generator, class F>
std::chrono::duration<double>
time_block(std::uint64_t n, Generator&& g, F&& f)
{
stop_watch timer;
for (std::uint64_t i = 0; i < n; ++i)
{
f(g());
}
return timer.elapsed();
}
template <class Inserter, class Fetcher, class AddSample, class PreFetchHook>
void
time_fetch_insert_interleaved(
std::uint64_t batch_size,
std::uint64_t num_batches,
test_store& ts,
Inserter&& inserter,
Fetcher&& fetcher,
AddSample&& add_sample,
PreFetchHook&& pre_fetch_hook,
bench_progress& progress)
{
std::uint64_t next_insert_index = 0;
for (auto b = 0ull; b < num_batches; ++b)
{
auto const insert_time = time_block(
batch_size, gen_key_value{ts, next_insert_index}, inserter);
add_sample(
"insert", next_insert_index, batch_size / insert_time.count());
next_insert_index += batch_size;
progress.update(batch_size);
pre_fetch_hook();
auto const fetch_time = time_block(
batch_size, rand_existing_key{ts, next_insert_index - 1}, fetcher);
add_sample("fetch", next_insert_index, batch_size / fetch_time.count());
progress.update(batch_size);
}
}
#if WITH_ROCKSDB
template<class AddSample>
void
do_timings_rocks(
std::string const& db_dir,
std::uint64_t batch_size,
std::uint64_t num_batches,
std::uint32_t key_size,
AddSample&& add_sample,
bench_progress& progress)
{
temp_dir td{db_dir};
std::unique_ptr<rocksdb::DB> pdb = [&td] {
rocksdb::DB* db = nullptr;
rocksdb::Options options;
options.create_if_missing = true;
auto const status = rocksdb::DB::Open(options, td.path(), &db);
if (!status.ok())
db = nullptr;
return std::unique_ptr<rocksdb::DB>{db};
}();
if (!pdb)
{
derr << "Failed to open rocks db.\n";
return;
}
auto inserter = [key_size, &pdb](item_type const& v) {
auto const s = pdb->Put(rocksdb::WriteOptions(),
rocksdb::Slice(reinterpret_cast<char const*>(v.key), key_size),
rocksdb::Slice(reinterpret_cast<char const*>(v.data), v.size));
if (!s.ok())
throw std::runtime_error("Rocks Insert: " + s.ToString());
};
auto fetcher = [key_size, &pdb](item_type const& v) {
std::string value;
auto const s = pdb->Get(rocksdb::ReadOptions(),
rocksdb::Slice(reinterpret_cast<char const*>(v.key), key_size),
&value);
if (!s.ok())
throw std::runtime_error("Rocks Fetch: " + s.ToString());
};
test_store ts{key_size, 0, 0};
try
{
time_fetch_insert_interleaved(batch_size, num_batches, ts,
std::move(inserter), std::move(fetcher),
std::forward<AddSample>(add_sample), [] {}, progress);
}
catch (std::exception const& e)
{
derr << "Error: " << e.what() << '\n';
}
}
#endif
template <class AddSample>
void
do_timings(std::string const& db_dir,
std::uint64_t batch_size,
std::uint64_t num_batches,
std::uint32_t key_size,
std::size_t block_size,
float load_factor,
AddSample&& add_sample,
bench_progress& progress)
{
boost::system::error_code ec;
try
{
test_store ts{db_dir, key_size, block_size, load_factor};
ts.create(ec);
if (ec)
goto fail;
ts.open(ec);
if (ec)
goto fail;
auto inserter = [&ts, &ec](item_type const& v) {
ts.db.insert(v.key, v.data, v.size, ec);
if (ec)
throw boost::system::system_error(ec);
};
auto fetcher = [&ts, &ec](item_type const& v) {
ts.db.fetch(v.key, [&](void const* data, std::size_t size) {}, ec);
if (ec)
throw boost::system::system_error(ec);
};
auto pre_fetch_hook = [&ts, &ec]() {
// Close then open the db otherwise the
// commit thread confounds the timings
ts.close(ec);
if (ec)
throw boost::system::system_error(ec);
ts.open(ec);
if (ec)
throw boost::system::system_error(ec);
};
time_fetch_insert_interleaved(batch_size, num_batches, ts,
std::move(inserter), std::move(fetcher),
std::forward<AddSample>(add_sample), std::move(pre_fetch_hook),
progress);
}
catch (boost::system::system_error const& e)
{
ec = e.code();
}
catch (std::exception const& e)
{
derr << "Error: " << e.what() << '\n';
}
fail:
if (ec)
derr << "Error: " << ec.message() << '\n';
return;
}
namespace po = boost::program_options;
void
print_help(std::string const& prog_name, const po::options_description& desc)
{
derr << prog_name << ' ' << desc;
}
po::variables_map
parse_args(int argc, char** argv, po::options_description& desc)
{
#if WITH_ROCKSDB
std::vector<std::string> const default_dbs = {"nudb", "rocksdb"};
#else
std::vector<std::string> const default_dbs = {"nudb"};
#endif
std::vector<std::uint64_t> const default_ops({100000,1000000});
desc.add_options()
("help,h", "Display this message.")
("batch_size",
po::value<std::uint64_t>(),
"Batch Size Default: 20000)")
("num_batches",
po::value<std::uint64_t>(),
"Num Batches Default: 500)")
("dbs",
po::value<std::vector<std::string>>()->multitoken(),
"databases (Default: nudb rocksdb)")
("block_size", po::value<size_t>(),
"nudb block size (default: 4096)")
("key_size", po::value<size_t>(),
"key size (default: 64)")
("load_factor", po::value<float>(),
"nudb load factor (default: 0.5)")
("db_dir", po::value<std::string>(),
"Directory to place the databases"
" (default: boost::filesystem::temp_directory_path)")
("raw_out", po::value<std::string>(),
"File to record the raw measurements (useful for plotting)"
" (default: no output)")
;
po::variables_map vm;
po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
po::notify(vm);
return vm;
}
template<class T>
T
get_opt(po::variables_map const& vm, std::string const& key, T const& default_value)
{
return vm.count(key) ? vm[key].as<T>() : default_value;
}
} // test
} // nudb
int
main(int argc, char** argv)
{
using namespace nudb::test;
po::variables_map vm;
{
po::options_description desc{"Benchmark Options"};
bool parse_error = false;
try
{
vm = parse_args(argc, argv, desc);
}
catch (std::exception const& e)
{
derr << "Incorrect command line syntax.\n";
derr << "Exception: " << e.what() << '\n';
parse_error = true;
}
if (vm.count("help") || parse_error)
{
auto prog_name = boost::filesystem::path(argv[0]).stem().string();
print_help(prog_name, desc);
return 0;
}
}
auto const batch_size = get_opt<size_t>(vm, "batch_size", 20000);
auto const num_batches = get_opt<size_t>(vm, "num_batches", 500);
auto const block_size = get_opt<size_t>(vm, "block_size", 4096);
auto const load_factor = get_opt<float>(vm, "load_factor", 0.5f);
auto const key_size = get_opt<size_t>(vm, "key_size", 64);
auto const db_dir = [&vm]() -> std::string {
auto r = get_opt<std::string>(vm, "db_dir", "");
if (!r.empty() && r.back() != '/' && r.back() != '\\')
{
r += '/';
}
return r;
}();
auto const raw_out = get_opt<std::string>(vm, "raw_out", "");
#if WITH_ROCKSDB
std::vector<std::string> const default_dbs({"nudb", "rocksdb"});
#else
std::vector<std::string> const default_dbs({"nudb"});
#endif
auto to_set = [](std::vector<std::string> const& v) {
return std::set<std::string>(v.begin(), v.end());
};
auto const dbs = to_set(get_opt<std::vector<std::string>>(vm, "dbs", default_dbs));
for (auto const& db : dbs)
{
if (db == "rocksdb")
{
#if !WITH_ROCKSDB
derr << "Benchmark was not built with rocksdb support\n";
exit(1);
#endif
continue;
}
if (db != "nudb" && db != "rocksdb")
{
derr << "Unsupported database: " << db << '\n';
exit(1);
}
}
bool const with_rocksdb = dbs.count("rocksdb") != 0;
(void) with_rocksdb;
bool const with_nudb = dbs.count("nudb") != 0;
std::uint64_t const num_db = int(with_nudb) + int(with_rocksdb);
std::uint64_t const total_ops = num_db * batch_size * num_batches * 2;
bench_progress progress(derr, total_ops);
enum
{
db_nudb,
db_rocks,
db_last
};
enum
{
op_insert,
op_fetch,
op_last
};
std::array<std::string, db_last> db_names{{"nudb", "rocksdb"}};
std::array<std::string, db_last> op_names{{"insert", "fetch"}};
using result_dict = boost::container::flat_multimap<std::uint64_t, double>;
result_dict ops_per_sec[db_last][op_last];
// Reserve up front to database that run later don't have less memory
for (int i = 0; i < db_last; ++i)
for (int j = 0; j < op_last; ++j)
ops_per_sec[i][j].reserve(num_batches);
std::ofstream raw_out_stream;
bool const record_raw_out = !raw_out.empty();
if (record_raw_out)
{
raw_out_stream.open(raw_out, std::ios::trunc);
raw_out_stream << "num_db_items,db,op,ops/sec\n";
}
for (int i = 0; i < db_last; ++i)
{
auto result = [&]
(std::string const& op_name, std::uint64_t num_items,
double sample) {
auto op_idx = op_name == "insert" ? op_insert : op_fetch;
ops_per_sec[i][op_idx].emplace(num_items, sample);
if (record_raw_out)
raw_out_stream << num_items << ',' << db_names[i] << ','
<< op_name << ',' << std::fixed << sample
<< std::endl; // flush
};
if (with_nudb && i == db_nudb)
do_timings(db_dir, batch_size, num_batches, key_size, block_size,
load_factor, result, progress);
#if WITH_ROCKSDB
if (with_rocksdb && i == db_rocks)
do_timings_rocks(
db_dir, batch_size, num_batches, key_size, result, progress);
#endif
}
// Write summary by sampling raw data at powers of 10
auto const col_w = 14;
auto const iter_w = 15;
for (int op_idx = 0; op_idx < op_last; ++op_idx)
{
auto const& t = op_names[op_idx];
dout << '\n' << t << " (per second)\n";
dout << std::setw(iter_w) << "num_db_keys";
if (with_nudb)
dout << std::setw(col_w) << "nudb";
#if WITH_ROCKSDB
if (with_rocksdb)
dout << std::setw(col_w) << "rocksdb";
#endif
dout << '\n';
auto const max_sample = [&ops_per_sec] {
std::uint64_t r = 0;
for (auto i = 0; i < db_last; ++i)
for (auto j = 0; j < op_last; ++j)
if (!ops_per_sec[i][j].empty())
r = std::max(r, ops_per_sec[i][j].rbegin()->first); // no `back()`
return r;
}();
auto const min_sample = batch_size;
auto write_val = [&](
result_dict const& dict, std::uint64_t key) {
dout << std::setw(col_w) << std::fixed << std::setprecision(2);
// Take the average of all the values, or "NA" if none collected
auto l = dict.lower_bound(key);
auto u = dict.upper_bound(key);
if (l == u)
dout << "NA";
else
{
auto const total = std::accumulate(l, u, 0,
[](double a, std::pair<std::uint64_t, double> const& b) {
return a + b.second;
});
dout << total / std::distance(l, u);
}
};
for (std::uint64_t n = 100; n <= max_sample; n *= 10)
{
if (n<min_sample)
continue;
dout << std::setw(iter_w) << n;
if (with_nudb)
write_val(ops_per_sec[db_nudb][op_idx], n);
#if WITH_ROCKSDB
if (with_rocksdb)
write_val(ops_per_sec[db_rocks][op_idx], n);
#endif
dout << '\n';
}
}
}

View File

@@ -0,0 +1,37 @@
#/usr/bin/env python
# Script to read the result of the benchmark program and plot the results.
# Options:
# `-i arg` : input file (benchmark result)
# Notes: After the script runs the plot will automatically be shown in matplotlib.
# Tested with python 3 only.
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def run_main(result_filename):
d = pd.read_csv(result_filename)
p = sns.lmplot(x='num_db_items', y='ops/sec', data=d[d['num_db_items']>=500000], hue='db', col='op')
plt.show(p)
return d # for testing
def parse_args():
parser = argparse.ArgumentParser(
description=('Plot the benchmark results'))
parser.add_argument(
'--input',
'-i',
help=('input'), )
return parser.parse_args()
if __name__ == '__main__':
args = parse_args()
result_filename = args.input
if not result_filename:
print('No result file specified. Exiting')
else:
run_main(result_filename)

5
src/nudb/doc/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
bin
html
temp
reference.qbk
out.txt

77
src/nudb/doc/Jamfile.v2 Normal file
View File

@@ -0,0 +1,77 @@
#
# Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#
import os ;
local broot = [ os.environ BOOST_ROOT ] ;
project nudb/doc ;
using boostbook ;
using quickbook ;
using doxygen ;
path-constant out : . ;
install stylesheets
:
$(broot)/doc/src/boostbook.css
:
<location>$(out)/html
;
explicit stylesheets ;
install images
:
[ glob $(broot)/doc/src/images/*.png ]
images/logo.png
:
<location>$(out)/html/images
;
explicit images ;
install callouts
:
[ glob $(broot)/doc/src/images/callouts/*.png ]
:
<location>$(out)/html/images/callouts
;
explicit callout ;
xml doc
:
main.qbk
:
<location>temp
<include>$(broot)/tools/boostbook/dtd
;
boostbook boostdoc
:
doc
:
<xsl:param>chapter.autolabel=0
<xsl:param>boost.image.src=images/logo.png
<xsl:param>boost.image.alt="NuDB Logo"
<xsl:param>boost.image.w=1270
<xsl:param>boost.image.h=80
<xsl:param>boost.root=$(broot)
<xsl:param>chapter.autolabel=0
<xsl:param>chunk.first.sections=1 # Chunk the first top-level section?
<xsl:param>chunk.section.depth=8 # Depth to which sections should be chunked
<xsl:param>generate.section.toc.level=1 # Control depth of TOC generation in sections
<xsl:param>toc.max.depth=2 # How many levels should be created for each TOC?
<xsl:param>toc.section.depth=2 # How deep should recursive sections appear in the TOC?
<xsl:param>generate.toc="chapter nop section nop"
:
<location>temp
<dependency>stylesheets
<dependency>images
;

439
src/nudb/doc/boostbook.dtd Normal file
View File

@@ -0,0 +1,439 @@
<!--
BoostBook DTD - development version
For further information, see: http://www.crystalclearsoftware.com/cgi-bin/boost_wiki/wiki.pl?Boost_Documentation_Format
Copyright (c) 2002 by Peter Simons <simons@cryp.to>
Copyright (c) 2003-2004 by Douglas Gregor <doug.gregor -at- gmail.com>
Copyright (c) 2007 by Frank Mori Hess <fmhess@users.sourceforge.net>
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
The latest stable DTD module is identified by the PUBLIC and SYSTEM identifiers:
PUBLIC "-//Boost//DTD BoostBook XML V1.1//EN"
SYSTEM "http://www.boost.org/tools/boostbook/dtd/1.1/boostbook.dtd"
$Revision$
$Date$
-->
<!--========== Define XInclude features. ==========-->
<!-- This is not really integrated into the DTD yet. Needs more
research. -->
<!--
<!ELEMENT xi:include (xi:fallback)?>
<!ATTLIST xi:include
xmlns:xi CDATA #FIXED "http://www.w3.org/2001/XInclude"
href CDATA #REQUIRED
parse (xml|text) "xml"
encoding CDATA #IMPLIED>
<!ELEMENT xi:fallback ANY>
<!ATTLIST xi:fallback
xmlns:xi CDATA #FIXED "http://www.w3.org/2001/XInclude">
-->
<!ENTITY % local.common.attrib "last-revision CDATA #IMPLIED">
<!--========== Define the BoostBook extensions ==========-->
<!ENTITY % boost.common.attrib "%local.common.attrib;
id CDATA #IMPLIED">
<!ENTITY % boost.namespace.mix
"class|class-specialization|struct|struct-specialization|
union|union-specialization|typedef|enum|
free-function-group|function|overloaded-function|
namespace">
<!ENTITY % boost.template.mix
"template-type-parameter|template-nontype-parameter|template-varargs">
<!ENTITY % boost.class.members
"static-constant|typedef|enum|
copy-assignment|constructor|destructor|method-group|
method|overloaded-method|data-member|class|class-specialization|struct|
struct-specialization|union|union-specialization">
<!ENTITY % boost.class.mix
"%boost.class.members;|free-function-group|function|overloaded-function">
<!ENTITY % boost.class.content
"template?, inherit*, purpose?, description?,
(%boost.class.mix;|access)*">
<!ENTITY % boost.class-specialization.content
"template?, specialization?, inherit?, purpose?, description?,
(%boost.class.mix;|access)*">
<!ENTITY % boost.function.semantics
"purpose?, description?, requires?, effects?, postconditions?,
returns?, throws?, complexity?, notes?, rationale?">
<!ENTITY % library.content
"libraryinfo, (title, ((section|library-reference|testsuite))+)?">
<!ELEMENT library (%library.content;)>
<!ATTLIST library
name CDATA #REQUIRED
dirname CDATA #REQUIRED
html-only CDATA #IMPLIED
url CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT boostbook (title, (chapter|library)*)>
<!ATTLIST boostbook %boost.common.attrib;>
<!ELEMENT libraryinfo (author+, copyright*, legalnotice*, librarypurpose, librarycategory*)>
<!ATTLIST libraryinfo %boost.common.attrib;>
<!ELEMENT librarypurpose (#PCDATA|code|ulink|functionname|methodname|classname|macroname|headername|enumname|globalname)*>
<!ATTLIST librarypurpose %boost.common.attrib;>
<!ELEMENT librarycategory (#PCDATA)>
<!ATTLIST librarycategory
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT libraryname (#PCDATA)>
<!ATTLIST libraryname %boost.common.attrib;>
<!ELEMENT library-reference ANY>
<!ATTLIST library-reference
%boost.common.attrib;>
<!ELEMENT librarylist EMPTY>
<!ATTLIST librarylist %boost.common.attrib;>
<!ELEMENT librarycategorylist (librarycategorydef)*>
<!ATTLIST librarycategorylist %boost.common.attrib;>
<!ELEMENT librarycategorydef (#PCDATA)>
<!ATTLIST librarycategorydef
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT header ANY>
<!ATTLIST header
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT namespace (%boost.namespace.mix;)*>
<!ATTLIST namespace
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT class (%boost.class.content;)>
<!ATTLIST class
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT struct (%boost.class.content;)>
<!ATTLIST struct
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT union (%boost.class.content;)>
<!ATTLIST union
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT class-specialization (%boost.class-specialization.content;)>
<!ATTLIST class-specialization
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT struct-specialization (%boost.class-specialization.content;)>
<!ATTLIST struct-specialization
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT union-specialization (%boost.class-specialization.content;)>
<!ATTLIST union-specialization
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT access (%boost.class.members;)+>
<!ATTLIST access
name CDATA #REQUIRED
%boost.common.attrib;>
<!--========= C++ Templates =========-->
<!ELEMENT template (%boost.template.mix;)*>
<!ATTLIST template %boost.common.attrib;>
<!ELEMENT template-type-parameter (default?, purpose?)>
<!ATTLIST template-type-parameter
name CDATA #REQUIRED
pack CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT template-nontype-parameter (type, default?, purpose?)>
<!ATTLIST template-nontype-parameter
name CDATA #REQUIRED
pack CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT template-varargs EMPTY>
<!ATTLIST template-varargs %boost.common.attrib;>
<!ELEMENT specialization (template-arg)*>
<!ATTLIST specialization %boost.common.attrib;>
<!ELEMENT template-arg ANY>
<!ATTLIST template-arg
pack CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT default ANY>
<!ATTLIST default %boost.common.attrib;>
<!ELEMENT inherit (type, purpose?)>
<!ATTLIST inherit
access CDATA #IMPLIED
pack CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT purpose ANY>
<!ATTLIST purpose %boost.common.attrib;>
<!ELEMENT description ANY>
<!ATTLIST description %boost.common.attrib;>
<!ELEMENT type ANY>
<!ATTLIST type %boost.common.attrib;>
<!ELEMENT typedef (type, purpose?, description?)>
<!ATTLIST typedef
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT enum (enumvalue*, purpose?, description?)>
<!ATTLIST enum
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT enumvalue (default?, purpose?, description?)>
<!ATTLIST enumvalue
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT static-constant (type, default, purpose?, description?)>
<!ATTLIST static-constant
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT data-member (type, purpose?, description?)>
<!ATTLIST data-member
name CDATA #REQUIRED
specifiers CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT paramtype ANY>
<!ATTLIST paramtype %boost.common.attrib;>
<!ELEMENT effects ANY>
<!ATTLIST effects %boost.common.attrib;>
<!ELEMENT postconditions ANY>
<!ATTLIST postconditions %boost.common.attrib;>
<!ELEMENT method-group (method|overloaded-method)*>
<!ATTLIST method-group
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT constructor (template?, parameter*, %boost.function.semantics;)>
<!ATTLIST constructor
specifiers CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT destructor (%boost.function.semantics;)>
<!ATTLIST destructor
specifiers CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT method (template?, type, parameter*, %boost.function.semantics;)>
<!ATTLIST method
name CDATA #REQUIRED
cv CDATA #IMPLIED
specifiers CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT function (template?, type, parameter*, %boost.function.semantics;)>
<!ATTLIST function
name CDATA #REQUIRED
specifiers CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT overloaded-method (signature*, %boost.function.semantics;)>
<!ATTLIST overloaded-method
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT overloaded-function (signature*, %boost.function.semantics;)>
<!ATTLIST overloaded-function
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT signature (template?, type, parameter*)>
<!ATTLIST signature
cv CDATA #IMPLIED
specifiers CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT requires ANY>
<!ATTLIST requires %boost.common.attrib;>
<!ELEMENT returns ANY>
<!ATTLIST returns %boost.common.attrib;>
<!ELEMENT throws ANY>
<!ATTLIST throws %boost.common.attrib;>
<!ELEMENT complexity ANY>
<!ATTLIST complexity %boost.common.attrib;>
<!ELEMENT notes ANY>
<!ATTLIST notes %boost.common.attrib;>
<!ELEMENT rationale ANY>
<!ATTLIST rationale %boost.common.attrib;>
<!ELEMENT functionname (#PCDATA)>
<!ATTLIST functionname
alt CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT enumname (#PCDATA)>
<!ATTLIST enumname
alt CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT macroname (#PCDATA)>
<!ATTLIST macroname
alt CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT headername (#PCDATA)>
<!ATTLIST headername
alt CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT globalname (#PCDATA)>
<!ATTLIST globalname
alt CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT copy-assignment
(template?, type?, parameter*, %boost.function.semantics;)>
<!ATTLIST copy-assignment
cv CDATA #IMPLIED
specifiers CDATA #IMPLIED
%boost.common.attrib;>
<!ELEMENT free-function-group (function|overloaded-function)*>
<!ATTLIST free-function-group
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT precondition ANY>
<!ATTLIST precondition %boost.common.attrib;>
<!ELEMENT code ANY>
<!ATTLIST code %boost.common.attrib;>
<!ELEMENT using-namespace EMPTY>
<!ATTLIST using-namespace
name CDATA #REQUIRED
%boost.common.attrib;>
<!ELEMENT using-class EMPTY>
<!ATTLIST using-class
name CDATA #REQUIRED
%boost.common.attrib;>
<!--========== Boost Testsuite Extensions ==========-->
<!ENTITY % boost.testsuite.tests
"compile-test|link-test|run-test|
compile-fail-test|link-fail-test|run-fail-test">
<!ENTITY % boost.testsuite.test.content
"source*, lib*, requirement*, purpose, if-fails?">
<!ELEMENT testsuite ((%boost.testsuite.tests;)+)>
<!ATTLIST testsuite %boost.common.attrib;>
<!ELEMENT compile-test (%boost.testsuite.test.content;)>
<!ATTLIST compile-test
filename CDATA #REQUIRED
name CDATA #IMPLIED>
<!ELEMENT link-test (%boost.testsuite.test.content;)>
<!ATTLIST link-test
filename CDATA #REQUIRED
name CDATA #IMPLIED>
<!ELEMENT run-test (%boost.testsuite.test.content;)>
<!ATTLIST run-test
filename CDATA #REQUIRED
name CDATA #IMPLIED>
<!ELEMENT compile-fail-test (%boost.testsuite.test.content;)>
<!ATTLIST compile-fail-test
filename CDATA #REQUIRED
name CDATA #IMPLIED>
<!ELEMENT link-fail-test (%boost.testsuite.test.content;)>
<!ATTLIST link-fail-test
filename CDATA #REQUIRED
name CDATA #IMPLIED>
<!ELEMENT run-fail-test (%boost.testsuite.test.content;)>
<!ATTLIST run-fail-test
filename CDATA #REQUIRED
name CDATA #IMPLIED>
<!ELEMENT source (#PCDATA|snippet)*>
<!ELEMENT snippet EMPTY>
<!ATTLIST snippet
name CDATA #REQUIRED>
<!ELEMENT lib (#PCDATA)>
<!ELEMENT requirement (#PCDATA)>
<!ATTLIST requirement
name CDATA #REQUIRED>
<!ELEMENT if-fails ANY>
<!ELEMENT parameter (paramtype, default?, description?)>
<!ATTLIST parameter
name CDATA #IMPLIED
pack CDATA #IMPLIED>
<!ELEMENT programlisting ANY>
<!ATTLIST programlisting
name CDATA #IMPLIED>
<!--========== Customize the DocBook DTD ==========-->
<!ENTITY % local.tech.char.class "|functionname|libraryname|enumname|headername|macroname|code">
<!ENTITY % local.para.class
"|using-namespace|using-class|librarylist|librarycategorylist">
<!ENTITY % local.descobj.class "|libraryinfo">
<!ENTITY % local.classname.attrib "alt CDATA #IMPLIED">
<!ENTITY % local.methodname.attrib "alt CDATA #IMPLIED">
<!ENTITY % local.refentry.class "|library-reference|testsuite">
<!ENTITY % local.title.char.mix "">
<!ENTITY % programlisting.module "IGNORE">
<!ENTITY % parameter.module "IGNORE">
<!ENTITY % function.module "IGNORE">
<!ENTITY % type.module "IGNORE">
<!--========== Import DocBook DTD ==========-->
<!ENTITY % DocBook PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
%DocBook;

1
src/nudb/doc/docca Submodule

Submodule src/nudb/doc/docca added at 335dbf9c36

Binary file not shown.

After

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

14
src/nudb/doc/index.xml Normal file
View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN" "boostbook.dtd">
<!--
Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-->
<section id="nudb.index">
<title>Index</title>
<index/>
</section>

342
src/nudb/doc/main.qbk Normal file
View File

@@ -0,0 +1,342 @@
[/
Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
]
[library NuDB
[quickbook 1.6]
[copyright 2015 - 2016 Vinnie Falco]
[purpose C++ Library]
[license
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
[@http://www.boost.org/LICENSE_1_0.txt])
]
[authors [Falco, Vinnie]]
[category template]
[category generic]
]
[template mdash[] '''&mdash; ''']
[template indexterm1[term1] '''<indexterm><primary>'''[term1]'''</primary></indexterm>''']
[template indexterm2[term1 term2] '''<indexterm><primary>'''[term1]'''</primary><secondary>'''[term2]'''</secondary></indexterm>''']
[variablelist
[[
[link nudb.overview Overview]
][
An overview of features, requirements, and credits, plus
rationale and design information.
]]
[[
[link nudb.example Example]
][
An example that illustrates the use of NuDB.
]]
[[
[link nudb.usage Usage]
][
An explanation of operations on the database.
]]
[[
[link nudb.ref Reference]
][
Detailed class and function reference.
]]
[[
[link nudb.index Index]
][
Book-style text index of the documentation.
]]
]
[section:overview Overview]
NuDB is an append only, key/value store specifically optimized for random
read performance on modern SSDs or equivalent high-IOPS decices. The most
common application for NuDB is content addressible storage where a
cryptographic digest of the data is used as the key. The read performance
and memory usage are independent of the size of the database. These are
some other features:
[heading History]
The first versions of rippled, the application behind the Ripple consensus
network, used SQLite as their back end for unstructured data. The
performance quickly became a limiting factor.
Then rippled then went through a series of back ends including LMDB, LevelDB, and
RocksDB. Each of these databases performed well at first, but as the data
size increased, memory usage increased and performance dropped off drastically.
The problem is caching. Each of these databases relies on some O(n) data
structure, such as a Bloom filter, to improve their performance. These work
well until the structures no longer fit in memory. In addition, many virtual
machines are memory constrained.
To address this issue, the developers performed a thought experiment -- if
you assume the data size is so large that no O(n) caching is effective, what
is the best read performance you could expect? They reached the following
conclusions:
1) Writes should not block reads.
2) Reads should be limited only by the SSD's IOPS limit.
3) A read for a non-present key should require one IOP.
4) A read for a present key whose data can be read in a single IOP should
only require two IOPs, one to figure out where it is and one to read it in.
NuDB is designed to come as close to this ideal as possible.
[heading Design]
NuDB uses three files to hold the data and indexes. The data file is append
only and contains sufficient information to rebuild the index. The index
file is random access and contains hash buckets. When an update is in
progress, a temporary journal file is used to roll the update back if
needed.
NuDB uses linear hashing to dynamically increase the number of buckets in
the index file as the data size grows. Bucket overflows are handled by
adding "overflow" records to the data file. Bucket overflows can be
minimized by increasing the number of buckets, leading to a size/speed
tradeoff. Typical databases keep the average bucket half full (or half
empty, depending on your point of view) resulting in spill records
accounting for less than 1% of reads.
Inserts are buffered in memory and appended to the data file immediately.
Updates to the index file are performed as an atomic operation. Fetch
operations retrieve records in the process of being modified from memory
during the update operation so that writes do not block fetches.
Before the index file is modified, a journal file is created to recover
consistency in the event of a crash during the update. The recovery process
will index all records written to the data file, so the aggregation of index
updates does not increase the time which a crash would result in loss of
data.
Iteration can be performed on the data file directly. Since it is append
only, there is no risk of other operations corrupting an iteration in
progress.
[heading Performance]
Writes do not block reads. Read rates are typically around 90% of the SSD's
IOPS limit. An average fetch for a non-present key typically requires fewer
than 1.01 IOPs. An average fetch for a present key requires fewer than 1.01
IOPs plus however many IOPs it takes to read the data.
[heading Applications]
Content addressable storage associates data with its cryptographic digest.
This type of storage is commonly used in decentralized blockchain applications.
Often these applications require following hash chains -- where one object
contains the hash of another object that ultimately leads to the object
desired. NuDB's low latency and high speed are particularly advantageous
in these kinds of applications.
NuDB is append only and does not support a delete operation. To support
retaining limited historical information, NuDB is often used in a dual
database configuration. One database is older and is read only, the other
is newer and is read/write. Periodically, the older database is discarded and
the newer database becomes the new read only database and a new read/write
database is created.
[endsect]
[section:example Example]
This complete program creates a database, opens the database, inserts several
key/value pairs, fetches the key/value pairs, closes the database, then erases
the database files. Source code for this program is located in the examples
directory.
```
#include <nudb/nudb.hpp>
#include <cstddef>
#include <cstdint>
int main()
{
using namespace nudb;
std::size_t constexpr N = 1000;
using key_type = std::uint32_t;
error_code ec;
auto const dat_path = "db.dat";
auto const key_path = "db.key";
auto const log_path = "db.log";
create<xxhasher>(
dat_path, key_path, log_path,
1,
make_salt(),
sizeof(key_type),
block_size("."),
0.5f,
ec);
store db;
db.open(dat_path, key_path, log_path,
16 * 1024 * 1024, ec);
char data = 0;
// Insert
for(key_type i = 0; i < N; ++i)
db.insert(&i, &data, sizeof(data), ec);
// Fetch
for(key_type i = 0; i < N; ++i)
db.fetch(&i,
[&](void const* buffer, std::size_t size)
{
// do something with buffer, size
}, ec);
db.close(ec);
erase_file(dat_path);
erase_file(key_path);
erase_file(log_path);
}
```
[endsect]
[section:usage Usage]
[heading Files]
A database is represented by three files: the data file, the key file,
and the log file. Each file has a distinct header in a well known format.
The data file holds all of the key/value pairs and is serially iterable. The
key file holds a hash table indexing all of the contents in the data file.
The log file holds information used to roll the database back in the event
of a failure.
[heading Create/Open]
The [link nudb.ref.nudb__create create] function creates a new data file and key
file for a database with the specified parameters. The caller specifies
the hash function to use as a template argument, the file paths,
and the database constants:
[note
Sample code and identifiers mentioned in this section are written
as if the following declarations are in effect:
```
#include <nudb/nudb.hpp>
using namespace nudb;
error_code ec;
```
]
```
create<xxhasher>(
"nudb.dat", // Path to data file
"nudb.key", // Path to key file
"nudb.log", // Path to log file
1, // Application-defined constant
make_salt(), // A random integer
4, // The size of keys
block_size(".") // Block size in key file
0.5f // The load factor
ec);
```
The application-defined constant is a 64-bit unsigned integer which the
caller may set to any value. This value can be retrieved from an open
database, where it will be equal to the value used at creation time. This
constant can be used for any purpose. For example, to inform the application
of what application-specific version was used to create the database.
The salt is a 64-bit unsigned integer used to prevent algorithmic complexity
attacks. Hash functions used during database operations are constructed with
the salt, providing an opportunity to permute the hash function. This feature
is useful when inserted database keys come from untrusted sources, such as the
network.
The key size is specified when the database is created, and cannot be changed.
All key files indexing the same data file will use the key size of the data
file.
The block size indicates the size of buckets in the key file. The best choice
for the block size is the natural sector size of the device. For most SSDs
in production today this is 4096, or less often 8192 or 16384. The function
[link nudb.ref.nudb__block_size block_size] returns the best guess of the block
size used by the device mounted at the specified path.
The load factor determines the target bucket occupancy fraction. There is
almost never a need to specify anything other than the recommended value of
0.5, which strikes the perfect balance of space-efficiency and fast lookup.
An open database is represented by objects of type
[link nudb.ref.nudb__basic_store basic_store], templated on the hasher. The type
alias [link nudb.ref.nudb__store store] represents a database using
[link nudb.ref.nudb__xxhasher xxhasher], the default hash function. To open
a database, declare a database object and then call the
[link nudb.ref.nudb__basic_store.open open] member function:
```
store db;
db.open("nudb.dat", "nudb.key", "nudb.log", ec);
```
When opening a database that was previously opened by a program that was
terminated abnormally, the implementation automatically invokes the
recovery process. This process restores the integrity of the database by
replaying the log file if it is present.
[heading Insert/Fetch]
Once a database is open, it becomes possible to insert new key/value pairs
and look them up. Insertions are straightforward:
```
db.insert(key, data, bytes, ec);
```
If the key already exists, the error is set to
[link nudb.ref.nudb__error.key_exists error::key_exists]. All keys in a NuDB
database must be unique. Multiple threads can call insert at the same time.
Internally however, insertions are serialized to present a consistent view
of the database to callers.
Retrieving a key/value pair if it exists is similary straightforward:
```
db.fetch(key,
[&](void const* buffer, std::size_t size)
{
...
}, ec);
```
To give callers control over memory allocation strategies, the fetch
function takes a callback object as a parameter. The callback is invoked
with a pointer to the data and size, if the item exists in the database.
The callback can decide how to store this information, if at all.
[endsect]
[section Command Line Tool]
To allow administration, NuDB comes with the "nudb" command line tool,
which may be built using b2 or CMake. Files for the tool are located in
the "tools" directory. Once the tool is built, and located in your path,
execute this command for additional instructions:
```
nudb help
```
[endsect]
[section:ref Reference]
[xinclude quickref.xml]
[include types/File.qbk]
[include types/Hasher.qbk]
[include types/Progress.qbk]
[include reference.qbk]
[endsect]
[xinclude index.xml]

12
src/nudb/doc/makeqbk.sh Normal file
View File

@@ -0,0 +1,12 @@
#!/usr/bin/bash
# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
mkdir -p temp
doxygen source.dox
cd temp
xsltproc combine.xslt index.xml > all.xml
xsltproc ../reference.xsl all.xml > ../reference.qbk

82
src/nudb/doc/quickref.xml Normal file
View File

@@ -0,0 +1,82 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN" "boostbook.dtd">
<!--
Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-->
<informaltable frame="all">
<tgroup cols="3">
<colspec colname="a"/>
<colspec colname="b"/>
<colspec colname="c"/>
<thead>
<row>
<entry valign="center" namest="a" nameend="c">
<bridgehead renderas="sect2">NuDB</bridgehead>
</entry>
</row>
</thead>
<tbody>
<row>
<entry valign="top">
<bridgehead renderas="sect3">Classes</bridgehead>
<simplelist type="vert" columns="1">
<member><link linkend="nudb.ref.nudb__basic_store">basic_store</link></member>
<member><link linkend="nudb.ref.nudb__native_file">native_file</link></member>
<member><link linkend="nudb.ref.nudb__no_progress">no_progress</link></member>
<member><link linkend="nudb.ref.nudb__posix_file">posix_file</link></member>
<member><link linkend="nudb.ref.nudb__store">store</link></member>
<member><link linkend="nudb.ref.nudb__win32_file">win32_file</link></member>
<member><link linkend="nudb.ref.nudb__xxhasher">xxhasher</link></member>
</simplelist>
<bridgehead renderas="sect3">Constants</bridgehead>
<simplelist type="vert" columns="1">
<member><link linkend="nudb.ref.nudb__errc">errc</link></member>
<member><link linkend="nudb.ref.nudb__error">error</link></member>
<member><link linkend="nudb.ref.nudb__file_mode">file_mode</link></member>
</simplelist>
</entry>
<entry valign="top">
<bridgehead renderas="sect3">Functions</bridgehead>
<simplelist type="vert" columns="1">
<member><link linkend="nudb.ref.nudb__block_size">block_size</link></member>
<member><link linkend="nudb.ref.nudb__create">create</link></member>
<member><link linkend="nudb.ref.nudb__erase_file">erase_file</link></member>
<member><link linkend="nudb.ref.nudb__make_error_code">make_error_code</link></member>
<member><link linkend="nudb.ref.nudb__recover">recover</link></member>
<member><link linkend="nudb.ref.nudb__rekey">rekey</link></member>
<member><link linkend="nudb.ref.nudb__verify">verify</link></member>
<member><link linkend="nudb.ref.nudb__visit">visit</link></member>
</simplelist>
<bridgehead renderas="sect3">Type Traits</bridgehead>
<simplelist type="vert" columns="1">
<member><link linkend="nudb.ref.nudb__is_File">is_File</link></member>
<member><link linkend="nudb.ref.nudb__is_Hasher">is_Hasher</link></member>
<member><link linkend="nudb.ref.nudb__is_Progress">is_Progress</link></member>
</simplelist>
</entry>
<entry valign="top">
<bridgehead renderas="sect3">Types</bridgehead>
<simplelist type="vert" columns="1">
<member><link linkend="nudb.ref.nudb__error_category">error_category</link></member>
<member><link linkend="nudb.ref.nudb__error_code">error_code</link></member>
<member><link linkend="nudb.ref.nudb__error_condition">error_condition</link></member>
<member><link linkend="nudb.ref.nudb__path_type">path_type</link></member>
<member><link linkend="nudb.ref.nudb__system_error">system_error</link></member>
<member><link linkend="nudb.ref.nudb__verify_info">verify_info</link></member>
</simplelist>
<bridgehead renderas="sect3">Concepts</bridgehead>
<simplelist type="vert" columns="1">
<member><link linkend="nudb.ref.File">File</link></member>
<member><link linkend="nudb.ref.Progress">Progress</link></member>
<member><link linkend="nudb.ref.Hasher">Hasher</link></member>
</simplelist>
</entry>
</row>
</tbody>
</tgroup>
</informaltable>

View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<!-- Variables (Edit for your project) -->
<xsl:variable name="doc-ref" select="'nudb.ref.'"/>
<xsl:variable name="doc-ns" select="'nudb'"/>
<xsl:variable name="debug" select="0"/>
<xsl:variable name="private" select="0"/>
<!-- End Variables -->
<xsl:include href="docca/include/docca/doxygen.xsl"/>
</xsl:stylesheet>

333
src/nudb/doc/source.dox Normal file
View File

@@ -0,0 +1,333 @@
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "NuDB"
PROJECT_NUMBER =
PROJECT_BRIEF = C++ Library
PROJECT_LOGO =
OUTPUT_DIRECTORY =
CREATE_SUBDIRS = NO
ALLOW_UNICODE_NAMES = NO
OUTPUT_LANGUAGE = English
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = YES
FULL_PATH_NAMES = NO
STRIP_FROM_PATH = ../include/
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = YES
QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS = YES
SEPARATE_MEMBER_PAGES = NO
TAB_SIZE = 4
ALIASES =
TCL_SUBST =
OPTIMIZE_OUTPUT_FOR_C = NO
OPTIMIZE_OUTPUT_JAVA = NO
OPTIMIZE_FOR_FORTRAN = NO
OPTIMIZE_OUTPUT_VHDL = NO
EXTENSION_MAPPING =
MARKDOWN_SUPPORT = YES
AUTOLINK_SUPPORT = YES
BUILTIN_STL_SUPPORT = NO
CPP_CLI_SUPPORT = NO
SIP_SUPPORT = NO
IDL_PROPERTY_SUPPORT = YES
DISTRIBUTE_GROUP_DOC = NO
GROUP_NESTED_COMPOUNDS = NO
SUBGROUPING = YES
INLINE_GROUPED_CLASSES = NO
INLINE_SIMPLE_STRUCTS = NO
TYPEDEF_HIDES_STRUCT = NO
LOOKUP_CACHE_SIZE = 0
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = YES
EXTRACT_PRIVATE = YES
EXTRACT_PACKAGE = NO
EXTRACT_STATIC = YES
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = YES
EXTRACT_ANON_NSPACES = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
HIDE_COMPOUND_REFERENCE= NO
SHOW_INCLUDE_FILES = NO
SHOW_GROUPED_MEMB_INC = NO
FORCE_LOCAL_INCLUDES = NO
INLINE_INFO = NO
SORT_MEMBER_DOCS = NO
SORT_BRIEF_DOCS = NO
SORT_MEMBERS_CTORS_1ST = YES
SORT_GROUP_NAMES = NO
SORT_BY_SCOPE_NAME = NO
STRICT_PROTO_MATCHING = NO
GENERATE_TODOLIST = NO
GENERATE_TESTLIST = NO
GENERATE_BUGLIST = NO
GENERATE_DEPRECATEDLIST= NO
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = NO
SHOW_FILES = NO
SHOW_NAMESPACES = NO
FILE_VERSION_FILTER =
LAYOUT_FILE =
CITE_BIB_FILES =
#---------------------------------------------------------------------------
# Configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_NO_PARAMDOC = NO
WARN_AS_ERROR = NO
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# Configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = ../include/nudb/
INPUT_ENCODING = UTF-8
FILE_PATTERNS =
RECURSIVE = NO
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS =
EXCLUDE_SYMBOLS =
EXAMPLE_PATH =
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_PATTERNS =
FILTER_SOURCE_FILES = NO
FILTER_SOURCE_PATTERNS =
USE_MDFILE_AS_MAINPAGE =
#---------------------------------------------------------------------------
# Configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = NO
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = NO
REFERENCES_RELATION = NO
REFERENCES_LINK_SOURCE = YES
SOURCE_TOOLTIPS = YES
USE_HTAGS = NO
VERBATIM_HEADERS = YES
CLANG_ASSISTED_PARSING = NO
CLANG_OPTIONS =
#---------------------------------------------------------------------------
# Configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = NO
HTML_OUTPUT = dhtm
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_EXTRA_STYLESHEET =
HTML_EXTRA_FILES =
HTML_COLORSTYLE_HUE = 220
HTML_COLORSTYLE_SAT = 100
HTML_COLORSTYLE_GAMMA = 80
HTML_TIMESTAMP = NO
HTML_DYNAMIC_SECTIONS = NO
HTML_INDEX_NUM_ENTRIES = 100
GENERATE_DOCSET = NO
DOCSET_FEEDNAME = "Doxygen generated docs"
DOCSET_BUNDLE_ID = org.doxygen.Project
DOCSET_PUBLISHER_ID = org.doxygen.Publisher
DOCSET_PUBLISHER_NAME = Publisher
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
CHM_INDEX_ENCODING =
BINARY_TOC = NO
TOC_EXPAND = NO
GENERATE_QHP = NO
QCH_FILE =
QHP_NAMESPACE = org.doxygen.Project
QHP_VIRTUAL_FOLDER = doc
QHP_CUST_FILTER_NAME =
QHP_CUST_FILTER_ATTRS =
QHP_SECT_FILTER_ATTRS =
QHG_LOCATION =
GENERATE_ECLIPSEHELP = NO
ECLIPSE_DOC_ID = org.doxygen.Project
DISABLE_INDEX = NO
GENERATE_TREEVIEW = NO
ENUM_VALUES_PER_LINE = 4
TREEVIEW_WIDTH = 250
EXT_LINKS_IN_WINDOW = NO
FORMULA_FONTSIZE = 10
FORMULA_TRANSPARENT = YES
USE_MATHJAX = NO
MATHJAX_FORMAT = HTML-CSS
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
MATHJAX_EXTENSIONS =
MATHJAX_CODEFILE =
SEARCHENGINE = YES
SERVER_BASED_SEARCH = NO
EXTERNAL_SEARCH = NO
SEARCHENGINE_URL =
SEARCHDATA_FILE = searchdata.xml
EXTERNAL_SEARCH_ID =
EXTRA_SEARCH_MAPPINGS =
#---------------------------------------------------------------------------
# Configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4
EXTRA_PACKAGES =
LATEX_HEADER =
LATEX_FOOTER =
LATEX_EXTRA_STYLESHEET =
LATEX_EXTRA_FILES =
PDF_HYPERLINKS = YES
USE_PDFLATEX = YES
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
LATEX_SOURCE_CODE = NO
LATEX_BIB_STYLE = plain
LATEX_TIMESTAMP = NO
#---------------------------------------------------------------------------
# Configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
RTF_SOURCE_CODE = NO
#---------------------------------------------------------------------------
# Configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_SUBDIR =
MAN_LINKS = NO
#---------------------------------------------------------------------------
# Configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = YES
XML_OUTPUT = temp/
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# Configuration options related to the DOCBOOK output
#---------------------------------------------------------------------------
GENERATE_DOCBOOK = NO
DOCBOOK_OUTPUT = docbook
DOCBOOK_PROGRAMLISTING = NO
#---------------------------------------------------------------------------
# Configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
GENERATE_PERLMOD = NO
PERLMOD_LATEX = NO
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = YES
EXPAND_ONLY_PREDEF = YES
SEARCH_INCLUDES = YES
INCLUDE_PATH = ../
INCLUDE_FILE_PATTERNS =
PREDEFINED = DOXYGEN \
GENERATING_DOCS \
_MSC_VER \
NUDB_POSIX_FILE=1
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration options related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE =
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
EXTERNAL_PAGES = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = NO
MSCGEN_PATH =
DIA_PATH =
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
DOT_NUM_THREADS = 0
DOT_FONTNAME = Helvetica
DOT_FONTSIZE = 10
DOT_FONTPATH =
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
GROUP_GRAPHS = YES
UML_LOOK = NO
UML_LIMIT_NUM_FIELDS = 10
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = png
INTERACTIVE_SVG = NO
DOT_PATH =
DOTFILE_DIRS =
MSCFILE_DIRS =
DIAFILE_DIRS =
PLANTUML_JAR_PATH =
PLANTUML_INCLUDE_PATH =
DOT_GRAPH_MAX_NODES = 50
MAX_DOT_GRAPH_DEPTH = 0
DOT_TRANSPARENT = NO
DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
DOT_CLEANUP = YES

159
src/nudb/doc/types/File.qbk Normal file
View File

@@ -0,0 +1,159 @@
[/
Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
]
[section:File File]
The [*File] concept abstracts access to files in the underlying file system.
Two implementations are provided, one for the Win32 API and the other for
POSIX compliant systems. The [link nudb.ref.nudb__native_file native_file] type
alias is automatically set to either [link nudb.ref.nudb__win32_file win32_file]
or [link nudb.ref.nudb__posix_file posix_file] as appropriate.
To support interfaces other than Win32 or POSIX, callers may provide their
own [*File] type that meets these requirements. The unit test code also provides
its own [*File] type which causes simulated operating system file failures
to exercise all failure paths in the implementation.
In the table below:
* `X` denotes a [*File] type
* `a` and `b` denote values of type `X`
* `c` denotes a (possibly const) value of type `X`
* `m` denotes a value of type [link nudb.ref.nudb__file_mode file_mode]
* `f` denotes a value of type [link nudb.ref.nudb__path_type path_type]
* `q` denotes a value of type `void*`
* `p` denotes a value of type `void const*`
* `ec` denotes a value of type [link nudb.ref.nudb__error_code error_code]
* `o` denotes a value of type `std::uint64_t`
* `n` denotes a value of type `std::size_t`
[table File requirements
[[operation] [type] [semantics, pre/post-conditions]]
[
[`X a{std::move(b)}`]
[ ]
[
`X` is `MoveConstructible`
]
]
[
[`c.is_open()`]
[`bool`]
[
Returns `true` if `c` refers to an open file.
]
]
[
[`a.close()`]
[ ]
[
If `a` refers to an open file, closes the file. Does nothing if
`a` does not refer to an open file. After this call, `a.open()`
will return `false`.
]
]
[
[`a.create(m,f,ec)`]
[ ]
[
Attempts to create a file at the path specified by `f`, and
open it with the mode specified by `m`. If an error occurs,
`ec` is set to the system specific error code. If no error
occurs, a subsequent call to `a.is_open()` will return `true`.
Undefined behavior if `a` already refers to an open file.
]
]
[
[`a.open(m,f,ec)`]
[ ]
[
Attempts to open the file at the path specified by `f`. If
an error occurs, `ec` is set to the system specific error
code. If no error occurs, a subsequent call to `a.is_open()`
will return `true`. Undefined behavior if `a` already refers
to an open file.
]
]
[
[`X::erase(f,ec)`]
[ ]
[
Attempts to delete the file at the path specified by `f`.
If an error occurs, `ec` is set to the system specific error
code.
]
]
[
[`c.size(ec)`]
[ `std::uint64_t` ]
[
Returns the size of the file in bytes. This value is also equal to
lowest byte offset for which a read will always return a
[link nudb.ref.nudb__error short_read] error. Undefined
behavior if `a` does not refer to an open file.
]
]
[
[`a.read(o,p,n,ec)`]
[ ]
[
Attempts to read `n` bytes from the open file referred to by `a`,
starting at offset `o`, and storing the results in the memory
pointed to by `p`, which must be at least of size `n` bytes.
If an error occurs, `ec` is set to the system specific error
code. Undefined behavior if `a` does not refer to an open file.
]
]
[
[`a.write(o,q,n,ec)`]
[ ]
[
Attempts to write `n` bytes to the open file referred to by `a`
and opened with a write mode, starting at offset `o`, and storing
the results in the memory pointed to by `p`, which must be at
least of size `n` bytes. If an error occurs, `ec` is set to the
system specific error code. Undefined behavior if `a` does not
refer to an open file.
]
]
[
[`a.sync(ec)`]
[ ]
[
Attempts to synchronize the file on disk. This instructs the
operating system to ensure that any data which resides in caches
or buffers is fully written to the underlying storage device
before this call returns. If an error occurs, `ec` is set to the
system specific error code. Undefined behavior if `a` does not
refer to an open file.
NuDB's database integrity guarantees are only valid if the
implementation of `sync` assures that all data is fully written
to the underlying file before the call returns.
]
]
[
[`a.trunc(o,ec)`]
[ ]
[
Attempts to change the size of the open file referred to by `a`
and opened with a write mode, to the size in bytes specified
by `o`. If an error occurs, `ec` is set to the system specific
error code. Undefined behavior if `a` does not refer to an open
file. After a successful call, `a.size(ec)` will return `o`.
NuDB's database integrity guarantees are only valid if the
implementation of `trunc` assures that subsequent calls to
`size` will return `o`, even if the program is terminated or the
device is taken offline before calling `size`.
]
]
]
[endsect]

View File

@@ -0,0 +1,56 @@
[/
Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
]
[section:Hasher Hasher]
A [@Hasher] implements a hash algorithm. This is used to compute the small
digests NuDB needs to effectively implement a hash table. NuDB provides
the default implementation [link nudb.ref.nudb__xxhasher xxhasher], which is
suitable for most use cases. For advanced applications, a user supplied
hash function may be supplied which must meet these requirements.
In the table below:
* `X` denotes a hasher class
* `a` denotes a value of type `X const`
* `s` denotes a value of type `std::uint64_t`
* `p` denotes a value of type `void const*`
* `n` denotes a value of type `std::size_t`
[table Hasher requirements
[[operation] [type] [semantics, pre/post-conditions]]
[
[`X a{s}`]
[ ]
[
`a` is constructed with a seed value integer. To achieve resistance
from algorithmic complexity attacks, an implementation of [*Hasher]
should ensure that values returned from the hash function will be
distinctly different for different values of `s` given the same
inputs. If algorithmic complexity attack resistance is not a
requirement, the seed may be ignored upon construction.
]
]
[
[`a(p,n)`]
[ `std::uint64_t` ]
[
Returns the digest of the memory `n` bytes in size and pointed
to by `p`. `n` will never be zero. A good hash function will
return values with these qualities:
* Values are uniformly distributed in the full range
* Values for the same input are distinctly different for different seeds
* Small changes in the input produce unpredictable output values
]
]
]
[endsect]

View File

@@ -0,0 +1,40 @@
[/
Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
]
[section:Progress Progress]
A [*Progress] object provides feedback to callers on the progress of
long running operations such as calls to [link nudb.ref.nudb__verify verify] or
[link nudb.ref.nudb__rekey rekey] which can take days or weeks for database that
measure in the terabytes. These objects are used by passing them as parameters
to the appropriate functions, where the will be called periodically with
numbers that indicate the amount of work completed, versus the total amount
of work required.
In the table below:
* `X` denotes a progress class
* `a` denotes a value of type `X`
* `p` and `q` denote values of type `std::uint64_t`
[table Progress requirements
[[operation] [type] [semantics, pre/post-conditions]]
[
[`a(p, q)`]
[ ]
[
Indicates to the progress object that work has been performed and
intermediate results calculated. `p` represents the amount of work
completed from the beginning of the operation. `q` represents the
total amount of work required. The fraction of completed work is
therefore `p/q`, with zero representing no work complete, and one
represents all work complete. `p` and `q` are unitless.
]
]
]
[endsect]

View File

@@ -0,0 +1,17 @@
# Part of nudb
GroupSources (include/nudb nudb)
GroupSources (extras/nudb extras)
GroupSources (examples/ "/")
add_executable (example
${NUDB_INCLUDES}
${EXTRAS_INCLUDES}
example.cpp
)
if (WIN32)
target_link_libraries (example ${Boost_LIBRARIES})
else ()
target_link_libraries (example ${Boost_LIBRARIES} rt Threads::Threads)
endif ()

12
src/nudb/examples/Jamfile Normal file
View File

@@ -0,0 +1,12 @@
#
# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#
import os ;
exe example :
example.cpp
;

View File

@@ -0,0 +1,46 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#include <nudb/nudb.hpp>
#include <cstddef>
#include <cstdint>
int main()
{
using namespace nudb;
std::size_t constexpr N = 1000;
using key_type = std::uint32_t;
error_code ec;
auto const dat_path = "db.dat";
auto const key_path = "db.key";
auto const log_path = "db.log";
create<xxhasher>(
dat_path, key_path, log_path,
1,
make_salt(),
sizeof(key_type),
block_size("."),
0.5f,
ec);
store db;
db.open(dat_path, key_path, log_path, ec);
char data = 0;
// Insert
for(key_type i = 0; i < N; ++i)
db.insert(&i, &data, sizeof(data), ec);
// Fetch
for(key_type i = 0; i < N; ++i)
db.fetch(&i,
[&](void const* buffer, std::size_t size)
{
// do something with buffer, size
}, ec);
db.close(ec);
erase_file(dat_path);
erase_file(key_path);
erase_file(log_path);
}

View File

@@ -0,0 +1,5 @@
This directory contains:
* Additional interfaces not strictly part of NuDB's public APIs
* Git submodules of dependencies used to build the tests and benchmarks

1
src/nudb/extras/beast Submodule

Submodule src/nudb/extras/beast added at 2f9a8440c2

View File

@@ -0,0 +1,200 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef BASIC_SECONDS_CLOCK_HPP
#define BASIC_SECONDS_CLOCK_HPP
#include "chrono_util.hpp"
#include <algorithm>
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
#include <vector>
namespace detail {
class seconds_clock_worker
{
public:
virtual void sample() = 0;
};
//------------------------------------------------------------------------------
// Updates the clocks
class seconds_clock_thread
{
public:
using mutex = std::mutex;
using cond_var = std::condition_variable;
using lock_guard = std::lock_guard <mutex>;
using unique_lock = std::unique_lock <mutex>;
using clock_type = std::chrono::steady_clock;
using seconds = std::chrono::seconds;
using thread = std::thread;
using workers = std::vector <seconds_clock_worker*>;
bool stop_;
mutex m_;
cond_var cond_;
workers workers_;
thread thread_;
seconds_clock_thread()
: stop_(false)
{
thread_ = thread{
&seconds_clock_thread::run, this};
}
~seconds_clock_thread()
{
stop();
}
void add(seconds_clock_worker& w)
{
lock_guard lock{m_};
workers_.push_back(&w);
}
void remove(seconds_clock_worker& w)
{
lock_guard lock{m_};
workers_.erase(std::find(
workers_.begin(), workers_.end(), &w));
}
void stop()
{
if(thread_.joinable())
{
{
lock_guard lock{m_};
stop_ = true;
}
cond_.notify_all();
thread_.join();
}
}
void run()
{
unique_lock lock{m_};
for(;;)
{
for(auto iter : workers_)
iter->sample();
using namespace std::chrono;
clock_type::time_point const when(
floor <seconds>(
clock_type::now().time_since_epoch()) +
milliseconds(900));
if(cond_.wait_until(lock, when, [this]{ return stop_; }))
return;
}
}
static seconds_clock_thread& instance()
{
static seconds_clock_thread singleton;
return singleton;
}
};
} // detail
//------------------------------------------------------------------------------
/** Called before main exits to terminate the utility thread.
This is a workaround for Visual Studio 2013:
http://connect.microsoft.com/VisualStudio/feedback/details/786016/creating-a-global-c-object-that-used-thread-join-in-its-destructor-causes-a-lockup
http://stackoverflow.com/questions/10915233/stdthreadjoin-hangs-if-called-after-main-exits-when-using-vs2012-rc
*/
inline
void
basic_seconds_clock_main_hook()
{
#ifdef _MSC_VER
detail::seconds_clock_thread::instance().stop();
#endif
}
/** A clock whose minimum resolution is one second.
The purpose of this class is to optimize the performance of the now()
member function call. It uses a dedicated thread that wakes up at least
once per second to sample the requested trivial clock.
@tparam Clock A type meeting these requirements:
http://en.cppreference.com/w/cpp/concept/Clock
*/
template<class Clock>
class basic_seconds_clock
{
public:
using rep = typename Clock::rep;
using period = typename Clock::period;
using duration = typename Clock::duration;
using time_point = typename Clock::time_point;
static bool const is_steady = Clock::is_steady;
static time_point now()
{
// Make sure the thread is constructed before the
// worker otherwise we will crash during destruction
// of objects with static storage duration.
struct initializer
{
initializer()
{
detail::seconds_clock_thread::instance();
}
};
static initializer init;
struct worker : detail::seconds_clock_worker
{
time_point m_now;
std::mutex m_;
worker()
: m_now(Clock::now())
{
detail::seconds_clock_thread::instance().add(*this);
}
~worker()
{
detail::seconds_clock_thread::instance().remove(*this);
}
time_point now()
{
std::lock_guard<std::mutex> lock{m_};
return m_now;
}
void sample()
{
std::lock_guard<std::mutex> lock{m_};
m_now = Clock::now();
}
};
static worker w;
return w.now();
}
};
#endif

View File

@@ -0,0 +1,58 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef CHRONO_UTIL_HPP
#define CHRONO_UTIL_HPP
#include <chrono>
// From Howard Hinnant
// http://home.roadrunner.com/~hinnant/duration_io/chrono_util.html
#if !defined(_MSC_FULL_VER) || (_MSC_FULL_VER <= 190023506)
// round down
template <class To, class Rep, class Period>
To floor(std::chrono::duration <Rep, Period> const& d)
{
To t = std::chrono::duration_cast<To>(d);
if (t > d)
--t;
return t;
}
// round to nearest, to even on tie
template <class To, class Rep, class Period>
To round (std::chrono::duration <Rep, Period> const& d)
{
To t0 = std::chrono::duration_cast<To>(d);
To t1 = t0;
++t1;
auto diff0 = d - t0;
auto diff1 = t1 - d;
if (diff0 == diff1)
{
if (t0.count() & 1)
return t1;
return t0;
}
else if (diff0 < diff1)
return t0;
return t1;
}
// round up
template <class To, class Rep, class Period>
To ceil (std::chrono::duration <Rep, Period> const& d)
{
To t = std::chrono::duration_cast<To>(d);
if (t < d)
++t;
return t;
}
#endif
#endif

View File

@@ -0,0 +1,343 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_TEST_FAIL_FILE_HPP
#define NUDB_TEST_FAIL_FILE_HPP
#include <nudb/concepts.hpp>
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <atomic>
#include <cstddef>
#include <string>
#include <utility>
namespace nudb {
namespace test {
/// Test error codes.
enum class test_error
{
/// No error
success = 0,
/// Simulated failure
failure
};
/// Returns the error category used for test error codes.
inline
error_category const&
test_category()
{
struct cat_t : public error_category
{
char const*
name() const noexcept override
{
return "nudb";
}
std::string
message(int ev) const override
{
switch(static_cast<test_error>(ev))
{
case test_error::failure:
return "test failure";
default:
return "test error";
}
}
error_condition
default_error_condition(int ev) const noexcept override
{
return error_condition{ev, *this};
}
bool
equivalent(int ev,
error_condition const& ec) const noexcept override
{
return ec.value() == ev && &ec.category() == this;
}
bool
equivalent(error_code const& ec, int ev) const noexcept override
{
return ec.value() == ev && &ec.category() == this;
}
};
static cat_t const cat{};
return cat;
}
/// Returns a test error code.
inline
error_code
make_error_code(test_error ev)
{
return error_code{static_cast<int>(ev), test_category()};
}
} // test
} // nudb
namespace boost {
namespace system {
template<>
struct is_error_code_enum<nudb::test::test_error>
{
static bool const value = true;
};
} // system
} // boost
namespace nudb {
namespace test {
/** Countdown to test failure mode.
The counter is constructed with a target ordinal and decremented
by callers. When the count reaches zero, a simulated test failure
is generated.
*/
class fail_counter
{
std::size_t target_;
std::atomic<std::size_t> count_;
public:
fail_counter(fail_counter const&) = delete;
fail_counter& operator=(fail_counter const&) = delete;
/// Construct the counter with a target ordinal.
explicit
fail_counter(std::size_t target = 0)
{
reset(target);
}
/// Reset the counter to fail at the nth step, or 0 for no failure.
void
reset(std::size_t n = 0)
{
target_ = n;
count_.store(0);
}
/// Returns `true` if a simulated failure should be generated.
bool
fail()
{
return target_ && (++count_ >= target_);
}
};
/** A file wrapper to simulate file system failures.
This wraps an object meeting the requirements of File. On each call,
the fail counter is decremented. When the counter reaches zero, a simulated
failure is generated.
*/
template<class File>
class fail_file
{
static_assert(is_File<File>::value,
"File requirements not met");
File f_;
fail_counter* c_ = nullptr;
public:
fail_file() = default;
fail_file(fail_file const&) = delete;
fail_file& operator=(fail_file const&) = delete;
~fail_file() = default;
fail_file(fail_file&&);
fail_file&
operator=(fail_file&& other);
explicit
fail_file(fail_counter& c);
bool
is_open() const
{
return f_.is_open();
}
path_type const&
path() const
{
return f_.path();
}
std::uint64_t
size(error_code& ec) const
{
return f_.size(ec);
}
void
close()
{
f_.close();
}
void
create(file_mode mode, path_type const& path, error_code& ec)
{
return f_.create(mode, path, ec);
}
void
open(file_mode mode, path_type const& path, error_code& ec)
{
return f_.open(mode, path, ec);
}
static
void
erase(path_type const& path, error_code& ec)
{
File::erase(path, ec);
}
void
read(std::uint64_t offset,
void* buffer, std::size_t bytes, error_code& ec);
void
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec);
void
sync(error_code& ec);
void
trunc(std::uint64_t length, error_code& ec);
private:
bool
fail();
void
do_fail(error_code& ec)
{
ec = test_error::failure;
}
};
template<class File>
fail_file<File>::
fail_file(fail_file&& other)
: f_(std::move(other.f_))
, c_(other.c_)
{
other.c_ = nullptr;
}
template<class File>
fail_file<File>&
fail_file<File>::
operator=(fail_file&& other)
{
f_ = std::move(other.f_);
c_ = other.c_;
other.c_ = nullptr;
return *this;
}
template<class File>
fail_file<File>::
fail_file(fail_counter& c)
: c_(&c)
{
}
template<class File>
void
fail_file<File>::
read(std::uint64_t offset,
void* buffer, std::size_t bytes, error_code& ec)
{
if(fail())
{
do_fail(ec);
return;
}
f_.read(offset, buffer, bytes, ec);
}
template<class File>
void
fail_file<File>::
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec)
{
if(fail())
{
do_fail(ec);
return;
}
if(fail())
{
// partial write
f_.write(offset, buffer,(bytes + 1) / 2, ec);
if(ec)
return;
do_fail(ec);
return;
}
f_.write(offset, buffer, bytes, ec);
}
template<class File>
void
fail_file<File>::
sync(error_code& ec)
{
if(fail())
do_fail(ec);
// We don't need a real sync for
// testing, it just slows things down.
//f_.sync();
}
template<class File>
void
fail_file<File>::
trunc(std::uint64_t length, error_code& ec)
{
if(fail())
{
do_fail(ec);
return;
}
f_.trunc(length, ec);
}
template<class File>
bool
fail_file<File>::
fail()
{
if(c_)
return c_->fail();
return false;
}
} // test
} // nudb
#endif

View File

@@ -0,0 +1,73 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_TEST_TEMP_DIR_HPP
#define NUDB_TEST_TEMP_DIR_HPP
#include <boost/filesystem.hpp>
#include <string>
namespace nudb {
namespace test {
/** RAII temporary directory path.
The directory and all its contents are deleted when
the instance of `temp_dir` is destroyed.
*/
class temp_dir
{
boost::filesystem::path path_;
public:
temp_dir(const temp_dir&) = delete;
temp_dir& operator=(const temp_dir&) = delete;
/// Construct a temporary directory.
explicit
temp_dir(boost::filesystem::path dir)
{
if (dir.empty())
dir = boost::filesystem::temp_directory_path();
do
{
path_ =
dir / boost::filesystem::unique_path();
}
while(boost::filesystem::exists(path_));
boost::filesystem::create_directory(path_);
}
/// Destroy a temporary directory.
~temp_dir()
{
boost::filesystem::remove_all(path_);
}
/// Get the native path for the temporary directory
std::string
path() const
{
return path_.string();
}
/** Get the native path for the a file.
The file does not need to exist.
*/
std::string
file(std::string const& name) const
{
return (path_ / name).string();
}
};
} // test
} // nudb
#endif

View File

@@ -0,0 +1,451 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_TEST_TEST_STORE_HPP
#define NUDB_TEST_TEST_STORE_HPP
#include <nudb/util.hpp>
#include <nudb/test/temp_dir.hpp>
#include <nudb/test/xor_shift_engine.hpp>
#include <nudb/create.hpp>
#include <nudb/native_file.hpp>
#include <nudb/store.hpp>
#include <nudb/verify.hpp>
#include <nudb/xxhasher.hpp>
#include <iomanip>
#include <iostream>
namespace nudb {
namespace test {
template<class = void>
class Buffer_t
{
std::size_t size_ = 0;
std::size_t capacity_ = 0;
std::unique_ptr<std::uint8_t[]> p_;
public:
Buffer_t() = default;
Buffer_t(Buffer_t&& other);
Buffer_t(Buffer_t const& other);
Buffer_t& operator=(Buffer_t&& other);
Buffer_t& operator=(Buffer_t const& other);
bool
empty() const
{
return size_ == 0;
}
std::size_t
size() const
{
return size_;
}
std::uint8_t*
data()
{
return p_.get();
}
std::uint8_t const*
data() const
{
return p_.get();
}
void
clear();
void
shrink_to_fit();
std::uint8_t*
resize(std::size_t size);
std::uint8_t*
operator()(void const* data, std::size_t size);
};
template<class _>
Buffer_t<_>::
Buffer_t(Buffer_t&& other)
: size_(other.size_)
, capacity_(other.capacity_)
, p_(std::move(other.p_))
{
other.size_ = 0;
other.capacity_ = 0;
}
template<class _>
Buffer_t<_>::
Buffer_t(Buffer_t const& other)
{
if(! other.empty())
std::memcpy(resize(other.size()),
other.data(), other.size());
}
template<class _>
auto
Buffer_t<_>::
operator=(Buffer_t&& other) ->
Buffer_t&
{
if(&other != this)
{
size_ = other.size_;
capacity_ = other.capacity_;
p_ = std::move(other.p_);
other.size_ = 0;
other.capacity_ = 0;
}
return *this;
}
template<class _>
auto
Buffer_t<_>::
operator=(Buffer_t const& other) ->
Buffer_t&
{
if(&other != this)
{
if(other.empty())
size_ = 0;
else
std::memcpy(resize(other.size()),
other.data(), other.size());
}
return *this;
}
template<class _>
void
Buffer_t<_>::
clear()
{
size_ = 0;
capacity_ = 0;
p_.reset();
}
template<class _>
void
Buffer_t<_>::
shrink_to_fit()
{
if(empty() || size_ == capacity_)
return;
std::unique_ptr<std::uint8_t[]> p{
new std::uint8_t[size_]};
capacity_ = size_;
std::memcpy(p.get(), p_.get(), size_);
std::swap(p, p_);
}
template<class _>
std::uint8_t*
Buffer_t<_>::
resize(std::size_t size)
{
if(capacity_ < size)
{
p_.reset(new std::uint8_t[size]);
capacity_ = size;
}
size_ = size;
return p_.get();
}
template<class _>
std::uint8_t*
Buffer_t<_>::
operator()(void const* data, std::size_t size)
{
if(data == nullptr || size == 0)
return resize(0);
return reinterpret_cast<std::uint8_t*>(
std::memcpy(resize(size), data, size));
}
using Buffer = Buffer_t<>;
//------------------------------------------------------------------------------
/// Describes a test generated key/value pair
struct item_type
{
std::uint8_t* key;
std::uint8_t* data;
std::size_t size;
};
/// Interface to facilitate tests
template<class File>
class basic_test_store
{
using Hasher = xxhasher;
temp_dir td_;
std::uniform_int_distribution<std::size_t> sizef_;
std::function<void(error_code&)> createf_;
std::function<void(error_code&)> openf_;
Buffer buf_;
public:
path_type const dp;
path_type const kp;
path_type const lp;
std::size_t const keySize;
std::size_t const blockSize;
float const loadFactor;
static std::uint64_t constexpr appnum = 1;
static std::uint64_t constexpr salt = 42;
basic_store<xxhasher, File> db;
template<class... Args>
basic_test_store(std::size_t keySize,
std::size_t blockSize, float loadFactor,
Args&&... args);
template<class... Args>
basic_test_store(
boost::filesystem::path const& temp_dir,
std::size_t keySize, std::size_t blockSize, float loadFactor,
Args&&... args);
~basic_test_store();
item_type
operator[](std::uint64_t i);
void
create(error_code& ec);
void
open(error_code& ec);
void
close(error_code& ec)
{
db.close(ec);
}
void
erase();
private:
template<class Generator>
static
void
rngfill(
void* dest, std::size_t size, Generator& g);
};
template <class File>
template <class... Args>
basic_test_store<File>::basic_test_store(
boost::filesystem::path const& temp_dir,
std::size_t keySize_, std::size_t blockSize_,
float loadFactor_, Args&&... args)
: td_(temp_dir)
, sizef_(250, 750)
, createf_(
[this, args...](error_code& ec)
{
nudb::create<Hasher, File>(
dp, kp, lp, appnum, salt,
keySize, blockSize, loadFactor, ec,
args...);
})
, openf_(
[this, args...](error_code& ec)
{
db.open(dp, kp, lp, ec, args...);
})
, dp(td_.file("nudb.dat"))
, kp(td_.file("nudb.key"))
, lp(td_.file("nudb.log"))
, keySize(keySize_)
, blockSize(blockSize_)
, loadFactor(loadFactor_)
{
}
template <class File>
template <class... Args>
basic_test_store<File>::basic_test_store(std::size_t keySize_,
std::size_t blockSize_, float loadFactor_,
Args&&... args)
: basic_test_store(boost::filesystem::path{},
keySize_,
blockSize_,
loadFactor_,
std::forward<Args>(args)...)
{
}
template<class File>
basic_test_store<File>::
~basic_test_store()
{
erase();
}
template<class File>
auto
basic_test_store<File>::
operator[](std::uint64_t i) ->
item_type
{
xor_shift_engine g{i + 1};
item_type item;
item.size = sizef_(g);
auto const needed = keySize + item.size;
rngfill(buf_.resize(needed), needed, g);
// put key last so we can get some unaligned
// keys, this increases coverage of xxhash.
item.data = buf_.data();
item.key = buf_.data() + item.size;
return item;
}
template<class File>
void
basic_test_store<File>::
create(error_code& ec)
{
createf_(ec);
}
template<class File>
void
basic_test_store<File>::
open(error_code& ec)
{
openf_(ec);
if(ec)
return;
if(db.key_size() != keySize)
ec = error::invalid_key_size;
else if(db.block_size() != blockSize)
ec = error::invalid_block_size;
}
template<class File>
void
basic_test_store<File>::
erase()
{
erase_file(dp);
erase_file(kp);
erase_file(lp);
}
template<class File>
template<class Generator>
void
basic_test_store<File>::
rngfill(
void* dest, std::size_t size, Generator& g)
{
using result_type =
typename Generator::result_type;
while(size >= sizeof(result_type))
{
auto const v = g();
std::memcpy(dest, &v, sizeof(v));
dest = reinterpret_cast<
std::uint8_t*>(dest) + sizeof(v);
size -= sizeof(v);
}
if(size > 0)
{
auto const v = g();
std::memcpy(dest, &v, size);
}
}
using test_store = basic_test_store<native_file>;
//------------------------------------------------------------------------------
template<class T>
static
std::string
num (T t)
{
std::string s = std::to_string(t);
std::reverse(s.begin(), s.end());
std::string s2;
s2.reserve(s.size() + (s.size()+2)/3);
int n = 0;
for (auto c : s)
{
if (n == 3)
{
n = 0;
s2.insert (s2.begin(), ',');
}
++n;
s2.insert(s2.begin(), c);
}
return s2;
}
template<class = void>
std::ostream&
operator<<(std::ostream& os, verify_info const& info)
{
os <<
"avg_fetch: " << std::fixed << std::setprecision(3) << info.avg_fetch << "\n" <<
"waste: " << std::fixed << std::setprecision(3) << info.waste * 100 << "%" << "\n" <<
"overhead: " << std::fixed << std::setprecision(1) << info.overhead * 100 << "%" << "\n" <<
"actual_load: " << std::fixed << std::setprecision(0) << info.actual_load * 100 << "%" << "\n" <<
"version: " << num(info.version) << "\n" <<
"uid: " << fhex(info.uid) << "\n" <<
"appnum: " << info.appnum << "\n" <<
"key_size: " << num(info.key_size) << "\n" <<
"salt: " << fhex(info.salt) << "\n" <<
"pepper: " << fhex(info.pepper) << "\n" <<
"block_size: " << num(info.block_size) << "\n" <<
"bucket_size: " << num(info.bucket_size) << "\n" <<
"load_factor: " << std::fixed << std::setprecision(0) << info.load_factor * 100 << "%" << "\n" <<
"capacity: " << num(info.capacity) << "\n" <<
"buckets: " << num(info.buckets) << "\n" <<
"key_count: " << num(info.key_count) << "\n" <<
"value_count: " << num(info.value_count) << "\n" <<
"value_bytes: " << num(info.value_bytes) << "\n" <<
"spill_count: " << num(info.spill_count) << "\n" <<
"spill_count_tot: " << num(info.spill_count_tot) << "\n" <<
"spill_bytes: " << num(info.spill_bytes) << "\n" <<
"spill_bytes_tot: " << num(info.spill_bytes_tot) << "\n" <<
"key_file_size: " << num(info.key_file_size) << "\n" <<
"dat_file_size: " << num(info.dat_file_size) << std::endl;
std::string s;
for (size_t i = 0; i < info.hist.size(); ++i)
s += (i==0) ?
std::to_string(info.hist[i]) :
(", " + std::to_string(info.hist[i]));
os << "hist: " << s << std::endl;
return os;
}
} // test
} // nudb
#endif

View File

@@ -0,0 +1,105 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_TEST_XOR_SHIFT_ENGINE_HPP
#define NUDB_TEST_XOR_SHIFT_ENGINE_HPP
#include <cstdint>
#include <limits>
#include <stdexcept>
namespace nudb {
namespace test {
/** XOR-shift Generator.
Meets the requirements of UniformRandomNumberGenerator.
Simple and fast RNG based on:
http://xorshift.di.unimi.it/xorshift128plus.c
does not accept seed==0
*/
class xor_shift_engine
{
public:
using result_type = std::uint64_t;
xor_shift_engine(xor_shift_engine const&) = default;
xor_shift_engine& operator=(xor_shift_engine const&) = default;
explicit
xor_shift_engine(result_type val = 1977u)
{
seed(val);
}
void
seed(result_type seed);
result_type
operator()();
static
result_type constexpr
min()
{
return std::numeric_limits<result_type>::min();
}
static
result_type constexpr
max()
{
return std::numeric_limits<result_type>::max();
}
private:
result_type s_[2];
static
result_type
murmurhash3(result_type x);
};
inline
void
xor_shift_engine::seed(result_type seed)
{
if(seed == 0)
throw std::domain_error("invalid seed");
s_[0] = murmurhash3(seed);
s_[1] = murmurhash3(s_[0]);
}
inline
auto
xor_shift_engine::operator()() ->
result_type
{
result_type s1 = s_[0];
result_type const s0 = s_[1];
s_[0] = s0;
s1 ^= s1<< 23;
return(s_[1] =(s1 ^ s0 ^(s1 >> 17) ^(s0 >> 26))) + s0;
}
inline
auto
xor_shift_engine::murmurhash3(result_type x)
-> result_type
{
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
return x ^= x >> 33;
}
} // test
} // nudb
#endif

View File

@@ -0,0 +1,288 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef UTIL_HPP
#define UTIL_HPP
#include "basic_seconds_clock.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <sstream>
namespace nudb {
template<class = void>
int
log2(std::uint64_t n)
{
int i = -(n == 0);
auto const S =
[&](int k)
{
if(n >=(std::uint64_t{1} << k))
{
i += k;
n >>= k;
}
};
S(32); S(16); S(8); S(4); S(2); S(1);
return i;
}
// Format a decimal integer with comma separators
template<class T>
std::string
fdec(T t)
{
std::string s = std::to_string(t);
std::reverse(s.begin(), s.end());
std::string s2;
s2.reserve(s.size() +(s.size()+2)/3);
int n = 0;
for(auto c : s)
{
if(n == 3)
{
n = 0;
s2.insert(s2.begin(), ',');
}
++n;
s2.insert(s2.begin(), c);
}
return s2;
}
// format 64-bit unsigned as fixed width, 0 padded hex
template<class T>
std::string
fhex(T v)
{
std::string s{"0x0000000000000000"};
auto it = s.end();
for(it = s.end(); v; v >>= 4)
*--it = "0123456789abcdef"[v & 0xf];
return s;
}
// Format an array of integers as a comma separated list
template<class T, std::size_t N>
static
std::string
fhist(std::array<T, N> const& hist)
{
std::size_t n;
for(n = hist.size() - 1; n > 0; --n)
if(hist[n])
break;
std::string s = std::to_string(hist[0]);
for(std::size_t i = 1; i <= n; ++i)
s += ", " + std::to_string(hist[i]);
return s;
}
class save_stream_state
{
std::ostream& os_;
std::streamsize precision_;
std::ios::fmtflags flags_;
std::ios::char_type fill_;
public:
~save_stream_state()
{
os_.precision(precision_);
os_.flags(flags_);
os_.fill(fill_);
}
save_stream_state(save_stream_state const&) = delete;
save_stream_state& operator=(save_stream_state const&) = delete;
explicit save_stream_state(std::ostream& os)
: os_(os)
, precision_(os.precision())
, flags_(os.flags())
, fill_(os.fill())
{
}
};
template<class Rep, class Period>
std::ostream&
pretty_time(std::ostream& os, std::chrono::duration<Rep, Period> d)
{
save_stream_state _(os);
using namespace std::chrono;
if(d < microseconds{1})
{
// use nanoseconds
if(d < nanoseconds{100})
{
// use floating
using ns = duration<float, std::nano>;
os << std::fixed << std::setprecision(1) << ns(d).count();
}
else
{
// use integral
os << round<nanoseconds>(d).count();
}
os << "ns";
}
else if(d < milliseconds{1})
{
// use microseconds
if(d < microseconds{100})
{
// use floating
using ms = duration<float, std::micro>;
os << std::fixed << std::setprecision(1) << ms(d).count();
}
else
{
// use integral
os << round<microseconds>(d).count();
}
os << "us";
}
else if(d < seconds{1})
{
// use milliseconds
if(d < milliseconds{100})
{
// use floating
using ms = duration<float, std::milli>;
os << std::fixed << std::setprecision(1) << ms(d).count();
}
else
{
// use integral
os << round<milliseconds>(d).count();
}
os << "ms";
}
else if(d < minutes{1})
{
// use seconds
if(d < seconds{100})
{
// use floating
using s = duration<float>;
os << std::fixed << std::setprecision(1) << s(d).count();
}
else
{
// use integral
os << round<seconds>(d).count();
}
os << "s";
}
else
{
// use minutes
if(d < minutes{100})
{
// use floating
using m = duration<float, std::ratio<60>>;
os << std::fixed << std::setprecision(1) << m(d).count();
}
else
{
// use integral
os << round<minutes>(d).count();
}
os << "min";
}
return os;
}
template<class Period, class Rep>
std::string
fmtdur(std::chrono::duration<Period, Rep> const& d)
{
std::stringstream ss;
pretty_time(ss, d);
return ss.str();
}
//------------------------------------------------------------------------------
class progress
{
using clock_type = basic_seconds_clock<std::chrono::steady_clock>;
std::ostream& os_;
clock_type::time_point start_;
clock_type::time_point now_;
clock_type::time_point report_;
std::uint64_t prev_;
bool estimate_;
public:
explicit
progress(std::ostream& os)
: os_(os)
{
}
void
operator()(std::uint64_t amount, std::uint64_t total)
{
using namespace std::chrono;
auto const now = clock_type::now();
if(amount == 0)
{
now_ = clock_type::now();
start_ = now_;
report_ = now_;
prev_ = 0;
estimate_ = false;
return;
}
if(now == now_)
return;
now_ = now;
auto const elapsed = now - start_;
if(! estimate_)
{
// Wait a bit before showing the first estimate
if(elapsed < seconds{30})
return;
estimate_ = true;
}
else if(now - report_ < seconds{60})
{
// Only show estimates periodically
return;
}
auto const rate = double(amount) / elapsed.count();
auto const remain = clock_type::duration{
static_cast<clock_type::duration::rep>(
(total - amount) / rate)};
os_ <<
"Remaining: " << fmtdur(remain) <<
" (" << fdec(amount) << " of " << fdec(total) <<
" in " << fmtdur(elapsed) <<
", " << fdec(amount - prev_) <<
" in " << fmtdur(now - report_) <<
")\n";
report_ = now;
prev_ = amount;
}
clock_type::duration
elapsed() const
{
using namespace std::chrono;
return now_ - start_;
}
};
} // nudb
#endif

Submodule src/nudb/extras/rocksdb added at a297643f2e

View File

@@ -0,0 +1,436 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_BASIC_STORE_HPP
#define NUDB_BASIC_STORE_HPP
#include <nudb/file.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/cache.hpp>
#include <nudb/detail/gentex.hpp>
#include <nudb/detail/mutex.hpp>
#include <nudb/detail/pool.hpp>
#include <boost/optional.hpp>
#include <chrono>
#include <mutex>
#include <thread>
namespace nudb {
/** A high performance, insert-only key/value database for SSDs.
To create a database first call the @ref create
free function. Then construct a @ref basic_store and
call @ref open on it:
@code
error_code ec;
create<xxhasher>(
"db.dat", "db.key", "db.log",
1, make_salt(), 8, 4096, 0.5f, ec);
basic_store<xxhasher, native_file> db;
db.open("db.dat", "db.key", "db.log", ec);
@endcode
@tparam Hasher The hash function to use. This type
must meet the requirements of @b Hasher.
@tparam File The type of File object to use. This type
must meet the requirements of @b File.
*/
template<class Hasher, class File>
class basic_store
{
public:
using hash_type = Hasher;
using file_type = File;
private:
using clock_type =
std::chrono::steady_clock;
using time_point =
typename clock_type::time_point;
struct state
{
File df;
File kf;
File lf;
path_type dp;
path_type kp;
path_type lp;
Hasher hasher;
detail::pool p0;
detail::pool p1;
detail::cache c1;
detail::key_file_header kh;
std::size_t rate = 0;
time_point when = clock_type::now();
state(state const&) = delete;
state& operator=(state const&) = delete;
state(state&&) = default;
state& operator=(state&&) = default;
state(File&& df_, File&& kf_, File&& lf_,
path_type const& dp_, path_type const& kp_,
path_type const& lp_,
detail::key_file_header const& kh_);
};
bool open_ = false;
// Use optional because some
// members cannot be default-constructed.
//
boost::optional<state> s_; // State of an open database
std::size_t frac_; // accumulates load
std::size_t thresh_; // split threshold
nbuck_t buckets_; // number of buckets
nbuck_t modulus_; // hash modulus
std::mutex u_; // serializes insert()
detail::gentex g_;
boost::shared_mutex m_;
std::thread t_;
std::condition_variable_any cv_;
error_code ec_;
std::atomic<bool> ecb_; // `true` when ec_ set
std::size_t dataWriteSize_;
std::size_t logWriteSize_;
public:
/** Default constructor.
A default constructed database is initially closed.
*/
basic_store() = default;
/// Copy constructor (disallowed)
basic_store(basic_store const&) = delete;
/// Copy assignment (disallowed)
basic_store& operator=(basic_store const&) = delete;
/** Destroy the database.
Files are closed, memory is freed, and data that has not been
committed is discarded. To ensure that all inserted data is
written, it is necessary to call @ref close before destroying
the @ref basic_store.
This function ignores errors returned by @ref close; to receive
those errors it is necessary to call @ref close before the
@ref basic_store is destroyed.
*/
~basic_store();
/** Returns `true` if the database is open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
*/
bool
is_open() const
{
return open_;
}
/** Return the path to the data file.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The data file path.
*/
path_type const&
dat_path() const;
/** Return the path to the key file.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The key file path.
*/
path_type const&
key_path() const;
/** Return the path to the log file.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The log file path.
*/
path_type const&
log_path() const;
/** Return the appnum associated with the database.
This is an unsigned 64-bit integer associated with the
database and defined by the application. It is set
once when the database is created in a call to
@ref create.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The appnum.
*/
std::uint64_t
appnum() const;
/** Return the key size associated with the database.
The key size is defined by the application when the
database is created in a call to @ref create. The
key size cannot be changed on an existing database.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The size of keys in the database.
*/
std::size_t
key_size() const;
/** Return the block size associated with the database.
The block size is defined by the application when the
database is created in a call to @ref create or when a
key file is regenerated in a call to @ref rekey. The
block size cannot be changed on an existing key file.
Instead, a new key file may be created with a different
block size.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The size of blocks in the key file.
*/
std::size_t
block_size() const;
/** Close the database.
All data is committed before closing.
If an error occurs, the database is still closed.
@par Requirements
The database must be open.
@par Thread safety
Not thread safe. The caller is responsible for
ensuring that no other member functions are
called concurrently.
@param ec Set to the error, if any occurred.
*/
void
close(error_code& ec);
/** Open a database.
The database identified by the specified data, key, and
log file paths is opened. If a log file is present, the
recovery mechanism is invoked to restore database integrity
before the function returns.
@par Requirements
The database must be not be open.
@par Thread safety
Not thread safe. The caller is responsible for
ensuring that no other member functions are
called concurrently.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param log_path The path to the log file.
@param ec Set to the error, if any occurred.
@param args Optional arguments passed to @b File constructors.
*/
template<class... Args>
void
open(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
error_code& ec,
Args&&... args);
/** Fetch a value.
The function checks the database for the specified
key, and invokes the callback if it is found. If
the key is not found, `ec` is set to @ref error::key_not_found.
If any other errors occur, `ec` is set to the
corresponding error.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function except
@ref close.
@note If the implementation encounters an error while
committing data to the database, this function will
immediately return with `ec` set to the error which
occurred. All subsequent calls to @ref fetch will
return the same error until the database is closed.
@param key A pointer to a memory buffer of at least
@ref key_size() bytes, containing the key to be searched
for.
@param callback A function which will be called with the
value data if the fetch is successful. The equivalent
signature must be:
@code
void callback(
void const* buffer, // A buffer holding the value
std::size_t size // The size of the value in bytes
);
@endcode
The buffer provided to the callback remains valid
until the callback returns, ownership is not transferred.
@param ec Set to the error, if any occurred.
*/
template<class Callback>
void
fetch(void const* key, Callback && callback, error_code& ec);
/** Insert a value.
This function attempts to insert the specified key/value
pair into the database. If the key already exists,
`ec` is set to @ref error::key_exists. If an error
occurs, `ec` is set to the corresponding error.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function except
@ref close.
@note If the implementation encounters an error while
committing data to the database, this function will
immediately return with `ec` set to the error which
occurred. All subsequent calls to @ref insert will
return the same error until the database is closed.
@param key A buffer holding the key to be inserted. The
size of the buffer should be at least the `key_size`
associated with the open database.
@param data A buffer holding the value to be inserted.
@param bytes The size of the buffer holding the value
data. This value must be greater than 0 and no more
than 0xffffffff.
@param ec Set to the error, if any occurred.
*/
void
insert(void const* key, void const* data,
nsize_t bytes, error_code& ec);
private:
template<class Callback>
void
fetch(detail::nhash_t h, void const* key,
detail::bucket b, Callback && callback, error_code& ec);
bool
exists(detail::nhash_t h, void const* key,
detail::shared_lock_type* lock, detail::bucket b, error_code& ec);
void
split(detail::bucket& b1, detail::bucket& b2,
detail::bucket& tmp, nbuck_t n1, nbuck_t n2,
nbuck_t buckets, nbuck_t modulus,
detail::bulk_writer<File>& w, error_code& ec);
detail::bucket
load(nbuck_t n, detail::cache& c1,
detail::cache& c0, void* buf, error_code& ec);
void
commit(detail::unique_lock_type& m,
std::size_t& work, error_code& ec);
void
run();
};
} // nudb
#include <nudb/impl/basic_store.ipp>
#endif

View File

@@ -0,0 +1,205 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_CONCEPTS_HPP
#define NUDB_CONCEPTS_HPP
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <cstddef>
#include <cstdint>
#include <type_traits>
namespace nudb {
namespace detail {
template<class T>
class check_is_File
{
template<class U, class R =
std::is_convertible<decltype(
std::declval<U const>().is_open()),
bool>>
static R check1(int);
template<class>
static std::false_type check1(...);
using type1 = decltype(check1<T>(0));
template<class U, class R = decltype(
std::declval<U>().close(),
std::true_type{})>
static R check2(int);
template<class>
static std::false_type check2(...);
using type2 = decltype(check2<T>(0));
template<class U, class R = decltype(
std::declval<U>().create(
std::declval<file_mode>(),
std::declval<path_type>(),
std::declval<error_code&>()),
std::true_type{})>
static R check3(int);
template<class>
static std::false_type check3(...);
using type3 = decltype(check3<T>(0));
template<class U, class R = decltype(
std::declval<U>().open(
std::declval<file_mode>(),
std::declval<path_type>(),
std::declval<error_code&>()),
std::true_type{})>
static R check4(int);
template<class>
static std::false_type check4(...);
using type4 = decltype(check4<T>(0));
template<class U, class R = decltype(
U::erase(
std::declval<path_type>(),
std::declval<error_code&>()),
std::true_type{})>
static R check5(int);
template<class>
static std::false_type check5(...);
using type5 = decltype(check5<T>(0));
template<class U, class R =
std::is_convertible<decltype(
std::declval<U const>().size(
std::declval<error_code&>())),
std::uint64_t>>
static R check6(int);
template<class>
static std::false_type check6(...);
using type6 = decltype(check6<T>(0));
template<class U, class R = decltype(
std::declval<U>().read(
std::declval<std::uint64_t>(),
std::declval<void*>(),
std::declval<std::size_t>(),
std::declval<error_code&>()),
std::true_type{})>
static R check7(int);
template<class>
static std::false_type check7(...);
using type7 = decltype(check7<T>(0));
template<class U, class R = decltype(
std::declval<U>().write(
std::declval<std::uint64_t>(),
std::declval<void const*>(),
std::declval<std::size_t>(),
std::declval<error_code&>()),
std::true_type{})>
static R check8(int);
template<class>
static std::false_type check8(...);
using type8 = decltype(check8<T>(0));
template<class U, class R = decltype(
std::declval<U>().sync(
std::declval<error_code&>()),
std::true_type{})>
static R check9(int);
template<class>
static std::false_type check9(...);
using type9 = decltype(check9<T>(0));
template<class U, class R = decltype(
std::declval<U>().trunc(
std::declval<std::uint64_t>(),
std::declval<error_code&>()),
std::true_type{})>
static R check10(int);
template<class>
static std::false_type check10(...);
using type10 = decltype(check10<T>(0));
public:
using type = std::integral_constant<bool,
std::is_move_constructible<T>::value &&
type1::value && type2::value && type3::value &&
type4::value && type5::value && type6::value &&
type7::value && type8::value && type9::value &&
type10::value
>;
};
template<class T>
class check_is_Hasher
{
template<class U, class R =
std::is_constructible<U, std::uint64_t>>
static R check1(int);
template<class>
static std::false_type check1(...);
using type1 = decltype(check1<T>(0));
template<class U, class R =
std::is_convertible<decltype(
std::declval<U const>().operator()(
std::declval<void const*>(),
std::declval<std::size_t>())),
std::uint64_t>>
static R check2(int);
template<class>
static std::false_type check2(...);
using type2 = decltype(check2<T>(0));
public:
using type = std::integral_constant<bool,
type1::value && type2::value>;
};
template<class T>
class check_is_Progress
{
template<class U, class R = decltype(
std::declval<U>().operator()(
std::declval<std::uint64_t>(),
std::declval<std::uint64_t>()),
std::true_type{})>
static R check1(int);
template<class>
static std::false_type check1(...);
public:
using type = decltype(check1<T>(0));
};
} // detail
/// Determine if `T` meets the requirements of @b `File`
template<class T>
#if GENERATING_DOCS
struct is_File : std::integral_constant<bool, ...>{};
#else
using is_File = typename detail::check_is_File<T>::type;
#endif
/// Determine if `T` meets the requirements of @b `Hasher`
template<class T>
#if GENERATING_DOCS
struct is_Hasher : std::integral_constant<bool, ...>{};
#else
using is_Hasher = typename detail::check_is_Hasher<T>::type;
#endif
/// Determine if `T` meets the requirements of @b `Progress`
template<class T>
#if GENERATING_DOCS
struct is_Progress : std::integral_constant<bool, ...>{};
#else
using is_Progress = typename detail::check_is_Progress<T>::type;
#endif
} // nudb
#endif

View File

@@ -0,0 +1,117 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_CREATE_HPP
#define NUDB_CREATE_HPP
#include <nudb/native_file.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstring>
#include <random>
#include <stdexcept>
#include <utility>
namespace nudb {
/** Return a random salt.
This function will use the system provided random
number device to generate a uniformly distributed
64-bit unsigned value suitable for use the salt
value in a call to @ref create.
*/
template<class = void>
std::uint64_t
make_salt();
/** Create a new database.
This function creates a set of new database files with
the given parameters. The files must not already exist or
else an error is returned.
If an error occurs while the files are being created,
the function attempts to remove the files before
returning.
@par Example
@code
error_code ec;
create<xxhasher>(
"db.dat", "db.key", "db.log",
1, make_salt(), 8, 4096, 0.5f, ec);
@endcode
@par Template Parameters
@tparam Hasher The hash function to use. This type must
meet the requirements of @b Hasher. The same hash
function must be used every time the database is opened,
or else an error is returned. The provided @ref xxhasher
is a suitable general purpose hash function.
@tparam File The type of file to use. Use the default of
@ref native_file unless customizing the file behavior.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param log_path The path to the log file.
@param appnum A caller-defined value stored in the file
headers. When opening the database, the same value is
preserved and returned to the caller.
@param salt A random unsigned integer used to permute
the hash function to make it unpredictable. The return
value of @ref make_salt returns a suitable value.
@param key_size The number of bytes in each key.
@param blockSize The size of a key file block. Larger
blocks hold more keys but require more I/O cycles per
operation. The ideal block size the largest size that
may be read in a single I/O cycle, and device dependent.
The return value of @ref block_size returns a suitable
value for the volume of a given path.
@param load_factor A number between zero and one
representing the average bucket occupancy (number of
items). A value of 0.5 is perfect. Lower numbers
waste space, and higher numbers produce negligible
savings at the cost of increased I/O cycles.
@param ec Set to the error, if any occurred.
@param args Optional arguments passed to @b File constructors.
*/
template<
class Hasher,
class File = native_file,
class... Args
>
void
create(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::uint64_t appnum,
std::uint64_t salt,
nsize_t key_size,
nsize_t blockSize,
float load_factor,
error_code& ec,
Args&&... args);
} // nudb
#include <nudb/impl/create.ipp>
#endif

View File

@@ -0,0 +1,296 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_ARENA_HPP
#define NUDB_DETAIL_ARENA_HPP
#include <boost/assert.hpp>
#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <memory>
#if NUDB_DEBUG_ARENA
#include <beast/unit_test/dstream.hpp>
#include <iostream>
#endif
namespace nudb {
namespace detail {
/* Custom memory manager that allocates in large blocks.
The implementation measures the rate of allocations in
bytes per second and tunes the large block size to fit
one second's worth of allocations.
*/
template<class = void>
class arena_t
{
using clock_type =
std::chrono::steady_clock;
using time_point =
typename clock_type::time_point;
class element;
char const* label_; // diagnostic
std::size_t alloc_ = 0; // block size
std::size_t used_ = 0; // bytes allocated
element* list_ = nullptr; // list of blocks
time_point when_ = clock_type::now();
public:
arena_t(arena_t const&) = delete;
arena_t& operator=(arena_t&&) = delete;
arena_t& operator=(arena_t const&) = delete;
~arena_t();
explicit
arena_t(char const* label = "");
arena_t(arena_t&& other);
// Set the allocation size
void
hint(std::size_t alloc)
{
alloc_ = alloc;
}
// Free all memory
void
clear();
void
periodic_activity();
std::uint8_t*
alloc(std::size_t n);
template<class U>
friend
void
swap(arena_t<U>& lhs, arena_t<U>& rhs);
};
//------------------------------------------------------------------------------
template<class _>
class arena_t<_>::element
{
std::size_t const capacity_;
std::size_t used_ = 0;
element* next_;
public:
element(std::size_t capacity, element* next)
: capacity_(capacity)
, next_(next)
{
}
element*
next() const
{
return next_;
}
void
clear()
{
used_ = 0;
}
std::size_t
remain() const
{
return capacity_ - used_;
}
std::size_t
capacity() const
{
return capacity_;
}
std::uint8_t*
alloc(std::size_t n);
};
template<class _>
std::uint8_t*
arena_t<_>::element::
alloc(std::size_t n)
{
if(n > capacity_ - used_)
return nullptr;
auto const p = const_cast<std::uint8_t*>(
reinterpret_cast<uint8_t const*>(this + 1)
) + used_;
used_ += n;
return p;
}
//------------------------------------------------------------------------------
template<class _>
arena_t<_>::
arena_t(char const* label)
: label_(label)
{
}
template<class _>
arena_t<_>::
~arena_t()
{
clear();
}
template<class _>
arena_t<_>::
arena_t(arena_t&& other)
: label_(other.label_)
, alloc_(other.alloc_)
, used_(other.used_)
, list_(other.list_)
, when_(other.when_)
{
other.used_ = 0;
other.list_ = nullptr;
other.when_ = clock_type::now();
other.alloc_ = 0;
}
template<class _>
void
arena_t<_>::
clear()
{
used_ = 0;
while(list_)
{
auto const e = list_;
list_ = list_->next();
e->~element();
delete[] reinterpret_cast<std::uint8_t*>(e);
}
}
template<class _>
void
arena_t<_>::
periodic_activity()
{
using namespace std::chrono;
auto const now = clock_type::now();
auto const elapsed = now - when_;
if(elapsed < milliseconds{500})
return;
when_ = now;
auto const rate = static_cast<std::size_t>(std::ceil(
used_ / duration_cast<duration<float>>(elapsed).count()));
#if NUDB_DEBUG_ARENA
beast::unit_test::dstream dout{std::cout};
auto const size =
[](element* e)
{
std::size_t n = 0;
while(e)
{
++n;
e = e->next();
}
return n;
};
#endif
if(rate >= alloc_ * 2)
{
// adjust up
alloc_ = std::max(rate, alloc_ * 2);
#if NUDB_DEBUG_ARENA
dout << label_ << ": "
"rate=" << rate <<
", alloc=" << alloc_ << " UP"
", nused=" << used_ <<
", used=" << size(list_) <<
"\n";
#endif
}
else if(rate <= alloc_ / 2)
{
// adjust down
alloc_ /= 2;
#if NUDB_DEBUG_ARENA
dout << label_ << ": "
"rate=" << rate <<
", alloc=" << alloc_ << " DOWN"
", nused=" << used_ <<
", used=" << size(list_) <<
"\n";
#endif
}
else
{
#if NUDB_DEBUG_ARENA
dout << label_ << ": "
"rate=" << rate <<
", alloc=" << alloc_ <<
", nused=" << used_ <<
", used=" << size(list_) <<
"\n";
#endif
}
}
template<class _>
std::uint8_t*
arena_t<_>::
alloc(std::size_t n)
{
// Undefined behavior: Zero byte allocations
BOOST_ASSERT(n != 0);
n = 8 *((n + 7) / 8);
std::uint8_t* p;
if(list_)
{
p = list_->alloc(n);
if(p)
{
used_ += n;
return p;
}
}
auto const size = std::max(alloc_, n);
auto const e = reinterpret_cast<element*>(
new std::uint8_t[sizeof(element) + size]);
list_ = ::new(e) element{size, list_};
used_ += n;
return list_->alloc(n);
}
template<class _>
void
swap(arena_t<_>& lhs, arena_t<_>& rhs)
{
using std::swap;
swap(lhs.used_, rhs.used_);
swap(lhs.list_, rhs.list_);
// don't swap alloc_ or when_
}
using arena = arena_t<>;
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,473 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_BUCKET_HPP
#define NUDB_DETAIL_BUCKET_HPP
#include <nudb/error.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/field.hpp>
#include <nudb/detail/format.hpp>
#include <boost/assert.hpp>
#include <cstddef>
#include <cstdint>
#include <cstring>
namespace nudb {
namespace detail {
// Returns bucket index given hash, buckets, and modulus
//
inline
nbuck_t
bucket_index(nhash_t h, nbuck_t buckets, std::uint64_t modulus)
{
BOOST_ASSERT(modulus <= 0x100000000ULL);
auto n = h % modulus;
if(n >= buckets)
n -= modulus / 2;
return static_cast<nbuck_t>(n);
}
//------------------------------------------------------------------------------
// Tag for constructing empty buckets
struct empty_t
{
constexpr empty_t() = default;
};
static empty_t constexpr empty{};
// Allows inspection and manipulation of bucket blobs in memory
template<class = void>
class bucket_t
{
nsize_t block_size_; // Size of a key file block
nkey_t size_; // Current key count
noff_t spill_; // Offset of next spill record or 0
std::uint8_t* p_; // Pointer to the bucket blob
public:
struct value_type
{
noff_t offset;
nhash_t hash;
nsize_t size;
};
bucket_t() = default;
bucket_t(bucket_t const&) = default;
bucket_t& operator=(bucket_t const&) = default;
bucket_t(nsize_t block_size, void* p);
bucket_t(nsize_t block_size, void* p, empty_t);
nsize_t
block_size() const
{
return block_size_;
}
// Serialized bucket size.
// Excludes empty
nsize_t
actual_size() const
{
return bucket_size(size_);
}
bool
empty() const
{
return size_ == 0;
}
bool
full() const
{
return size_ >=
detail::bucket_capacity(block_size_);
}
nkey_t
size() const
{
return size_;
}
// Returns offset of next spill record or 0
//
noff_t
spill() const
{
return spill_;
}
// Set offset of next spill record
//
void
spill(noff_t offset);
// Clear contents of the bucket
//
void
clear();
// Returns the record for a key
// entry without bounds checking.
//
value_type const
at(nkey_t i) const;
value_type const
operator[](nkey_t i) const
{
return at(i);
}
// Returns index of entry with prefix
// equal to or greater than the given prefix.
//
nkey_t
lower_bound(nhash_t h) const;
void
insert(noff_t offset, nsize_t size, nhash_t h);
// Erase an element by index
//
void
erase(nkey_t i);
// Read a full bucket from the
// file at the specified offset.
//
template<class File>
void
read(File& f, noff_t, error_code& ec);
// Read a compact bucket
//
template<class File>
void
read(bulk_reader<File>& r, error_code& ec);
// Write a compact bucket to the stream.
// This only writes entries that are not empty.
//
void
write(ostream& os) const;
// Write a bucket to the file at the specified offset.
// The full block_size() bytes are written.
//
template<class File>
void
write(File& f,noff_t offset, error_code& ec) const;
private:
// Update size and spill in the blob
void
update();
};
//------------------------------------------------------------------------------
template<class _>
bucket_t<_>::
bucket_t(nsize_t block_size, void* p)
: block_size_(block_size)
, p_(reinterpret_cast<std::uint8_t*>(p))
{
// Bucket Record
istream is(p_, block_size);
detail::read<uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill
}
template<class _>
bucket_t<_>::
bucket_t(nsize_t block_size, void* p, empty_t)
: block_size_(block_size)
, size_(0)
, spill_(0)
, p_(reinterpret_cast<std::uint8_t*>(p))
{
clear();
}
template<class _>
void
bucket_t<_>::
spill(noff_t offset)
{
spill_ = offset;
update();
}
template<class _>
void
bucket_t<_>::clear()
{
size_ = 0;
spill_ = 0;
std::memset(p_, 0, block_size_);
}
template<class _>
auto
bucket_t<_>::
at(nkey_t i) const ->
value_type const
{
value_type result;
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Prefix
// Bucket Record
detail::istream is{p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
i * w, w};
// Bucket Entry
detail::read<uint48_t>(
is, result.offset); // Offset
detail::read_size48(
is, result.size); // Size
detail::read<f_hash>(
is, result.hash); // Hash
return result;
}
template<class _>
nkey_t
bucket_t<_>::
lower_bound(nhash_t h) const
{
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Hash
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
// Bucket Entry
field<uint48_t>::size + // Offset
field<uint48_t>::size; // Size
nkey_t step;
nkey_t first = 0;
nkey_t count = size_;
while(count > 0)
{
step = count / 2;
nkey_t i = first + step;
nhash_t h1;
readp<f_hash>(p + i * w, h1);
if(h1 < h)
{
first = i + 1;
count -= step + 1;
}
else
{
count = step;
}
}
return first;
}
template<class _>
void
bucket_t<_>::
insert(
noff_t offset, nsize_t size, nhash_t h)
{
auto const i = lower_bound(h);
// Bucket Record
auto const p = p_ +
field<
std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Hash
std::memmove(
p +(i + 1) * w,
p + i * w,
(size_ - i) * w);
++size_;
update();
// Bucket Entry
ostream os{p + i * w, w};
detail::write<uint48_t>(
os, offset); // Offset
detail::write<uint48_t>(
os, size); // Size
detail::write<f_hash>(
os, h); // Prefix
}
template<class _>
void
bucket_t<_>::
erase(nkey_t i)
{
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Hash
--size_;
if(i < size_)
std::memmove(
p + i * w,
p +(i + 1) * w,
(size_ - i) * w);
std::memset(p + size_ * w, 0, w);
update();
}
template<class _>
template<class File>
void
bucket_t<_>::
read(File& f, noff_t offset, error_code& ec)
{
auto const cap = bucket_capacity(block_size_);
// Excludes padding to block size
f.read(offset, p_, bucket_size(cap), ec);
if(ec)
return;
istream is{p_, block_size_};
detail::read<std::uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill
if(size_ > cap)
{
ec = error::invalid_bucket_size;
return;
}
}
template<class _>
template<class File>
void
bucket_t<_>::
read(bulk_reader<File>& r, error_code& ec)
{
// Bucket Record(compact)
auto is = r.prepare(
detail::field<std::uint16_t>::size +
detail::field<uint48_t>::size, ec);
if(ec)
return;
detail::read<std::uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill
update();
// Excludes empty bucket entries
auto const w = size_ * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size); // Hash
is = r.prepare(w, ec);
if(ec)
return;
std::memcpy(p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size, // Spill
is.data(w), w); // Entries
}
template<class _>
void
bucket_t<_>::
write(ostream& os) const
{
// Does not pad up to the block size. This
// is called to write to the data file.
auto const size = actual_size();
// Bucket Record
std::memcpy(os.data(size), p_, size);
}
template<class _>
template<class File>
void
bucket_t<_>::
write(File& f, noff_t offset, error_code& ec) const
{
// Includes zero pad up to the block
// size, to make the key file size always
// a multiple of the block size.
auto const size = actual_size();
std::memset(p_ + size, 0, block_size_ - size);
// Bucket Record
f.write(offset, p_, block_size_, ec);
if(ec)
return;
}
template<class _>
void
bucket_t<_>::
update()
{
// Bucket Record
ostream os{p_, block_size_};
detail::write<std::uint16_t>(os, size_); // Count
detail::write<uint48_t>(os, spill_); // Spill
}
using bucket = bucket_t<>;
//------------------------------------------------------------------------------
// Spill bucket if full.
// The bucket is cleared after it spills.
//
template<class File>
void
maybe_spill(
bucket& b, bulk_writer<File>& w, error_code& ec)
{
if(b.full())
{
// Spill Record
auto const offset = w.offset();
auto os = w.prepare(
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.actual_size(), ec);
if(ec)
return;
write<uint48_t>(os, 0ULL); // Zero
write<std::uint16_t>(
os, b.actual_size()); // Size
auto const spill =
offset + os.size();
b.write(os); // Bucket
// Update bucket
b.clear();
b.spill(spill);
}
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,86 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_BUFFER_HPP
#define NUDB_DETAIL_BUFFER_HPP
#include <atomic>
#include <cstdint>
#include <memory>
namespace nudb {
namespace detail {
// Simple growable memory buffer
class buffer
{
private:
std::size_t size_ = 0;
std::unique_ptr<std::uint8_t[]> buf_;
public:
~buffer() = default;
buffer() = default;
buffer(buffer const&) = delete;
buffer& operator=(buffer const&) = delete;
explicit
buffer(std::size_t n)
: size_(n)
, buf_(new std::uint8_t[n])
{
}
buffer(buffer&& other)
: size_(other.size_)
, buf_(std::move(other.buf_))
{
other.size_ = 0;
}
buffer&
operator=(buffer&& other)
{
size_ = other.size_;
buf_ = std::move(other.buf_);
other.size_ = 0;
return *this;
}
std::size_t
size() const
{
return size_;
}
std::uint8_t*
get() const
{
return buf_.get();
}
void
reserve(std::size_t n)
{
if(size_ < n)
buf_.reset(new std::uint8_t[n]);
size_ = n;
}
// BufferFactory
void*
operator()(std::size_t n)
{
reserve(n);
return buf_.get();
}
};
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,196 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_BULKIO_HPP
#define NUDB_DETAIL_BULKIO_HPP
#include <nudb/type_traits.hpp>
#include <nudb/detail/buffer.hpp>
#include <nudb/detail/stream.hpp>
#include <nudb/error.hpp>
#include <algorithm>
#include <cstddef>
namespace nudb {
namespace detail {
// Scans a file in sequential large reads
template<class File>
class bulk_reader
{
File& f_;
buffer buf_;
noff_t last_; // size of file
noff_t offset_; // current position
std::size_t avail_; // bytes left to read in buf
std::size_t used_; // bytes consumed in buf
public:
bulk_reader(File& f, noff_t offset,
noff_t last, std::size_t buffer_size);
noff_t
offset() const
{
return offset_ - avail_;
}
bool
eof() const
{
return offset() >= last_;
}
istream
prepare(std::size_t needed, error_code& ec);
};
template<class File>
bulk_reader<File>::
bulk_reader(File& f, noff_t offset,
noff_t last, std::size_t buffer_size)
: f_(f)
, last_(last)
, offset_(offset)
, avail_(0)
, used_(0)
{
buf_.reserve(buffer_size);
}
template<class File>
istream
bulk_reader<File>::
prepare(std::size_t needed, error_code& ec)
{
if(needed > avail_)
{
if(offset_ + needed - avail_ > last_)
{
ec = error::short_read;
return {};
}
if(needed > buf_.size())
{
buffer buf;
buf.reserve(needed);
std::memcpy(buf.get(),
buf_.get() + used_, avail_);
buf_ = std::move(buf);
}
else
{
std::memmove(buf_.get(),
buf_.get() + used_, avail_);
}
auto const n = std::min(buf_.size() - avail_,
static_cast<std::size_t>(last_ - offset_));
f_.read(offset_, buf_.get() + avail_, n, ec);
if(ec)
return {};
offset_ += n;
avail_ += n;
used_ = 0;
}
istream is{buf_.get() + used_, needed};
used_ += needed;
avail_ -= needed;
return is;
}
//------------------------------------------------------------------------------
// Buffers file writes
// Caller must call flush manually at the end
template<class File>
class bulk_writer
{
File& f_;
buffer buf_;
noff_t offset_; // current position
std::size_t used_; // bytes written to buf
public:
bulk_writer(File& f, noff_t offset,
std::size_t buffer_size);
ostream
prepare(std::size_t needed, error_code& ec);
// Returns the number of bytes buffered
std::size_t
size()
{
return used_;
}
// Return current offset in file. This
// is advanced with each call to prepare.
noff_t
offset() const
{
return offset_ + used_;
}
// Caller must invoke flush manually in
// order to handle any error conditions.
void
flush(error_code& ec);
};
template<class File>
bulk_writer<File>::
bulk_writer(File& f,
noff_t offset, std::size_t buffer_size)
: f_(f)
, offset_(offset)
, used_(0)
{
buf_.reserve(buffer_size);
}
template<class File>
ostream
bulk_writer<File>::
prepare(std::size_t needed, error_code& ec)
{
if(used_ + needed > buf_.size())
{
flush(ec);
if(ec)
return{};
}
if(needed > buf_.size())
buf_.reserve(needed);
ostream os(buf_.get() + used_, needed);
used_ += needed;
return os;
}
template<class File>
void
bulk_writer<File>::
flush(error_code& ec)
{
if(used_)
{
auto const offset = offset_;
auto const used = used_;
offset_ += used_;
used_ = 0;
f_.write(offset, buf_.get(), used, ec);
if(ec)
return;
}
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,236 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_CACHE_HPP
#define NUDB_DETAIL_CACHE_HPP
#include <nudb/detail/arena.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/format.hpp>
#include <boost/iterator/transform_iterator.hpp>
#include <algorithm>
#include <cstdint>
#include <utility>
#include <vector>
#include <unordered_map>
namespace nudb {
namespace detail {
// Associative container storing
// bucket blobs keyed by bucket index.
//
template<class = void>
class cache_t
{
public:
using value_type = std::pair<nbuck_t, bucket>;
private:
using map_type =
std::unordered_map<nbuck_t, void*>;
struct transform
{
using argument_type =
typename map_type::value_type;
using result_type = value_type;
cache_t* cache_;
transform()
: cache_(nullptr)
{
}
explicit
transform(cache_t& cache)
: cache_(&cache)
{
}
value_type
operator()(argument_type const& e) const
{
return std::make_pair(e.first,
bucket{cache_->block_size_, e.second});
}
};
nsize_t key_size_ = 0;
nsize_t block_size_ = 0;
arena arena_;
map_type map_;
public:
using iterator = boost::transform_iterator<
transform, typename map_type::iterator,
value_type, value_type>;
cache_t(cache_t const&) = delete;
cache_t& operator=(cache_t&&) = delete;
cache_t& operator=(cache_t const&) = delete;
// Constructs a cache that will never have inserts
cache_t() = default;
cache_t(cache_t&& other);
explicit
cache_t(nsize_t key_size,
nsize_t block_size, char const* label);
std::size_t
size() const
{
return map_.size();
}
iterator
begin()
{
return iterator{map_.begin(), transform{*this}};
}
iterator
end()
{
return iterator{map_.end(), transform{*this}};
}
bool
empty() const
{
return map_.empty();
}
void
clear();
void
reserve(std::size_t n);
void
periodic_activity();
iterator
find(nbuck_t n);
// Create an empty bucket
//
bucket
create(nbuck_t n);
// Insert a copy of a bucket.
//
iterator
insert(nbuck_t n, bucket const& b);
template<class U>
friend
void
swap(cache_t<U>& lhs, cache_t<U>& rhs);
};
template<class _>
cache_t<_>::
cache_t(cache_t&& other)
: key_size_{other.key_size_}
, block_size_(other.block_size_)
, arena_(std::move(other.arena_))
, map_(std::move(other.map_))
{
}
template<class _>
cache_t<_>::
cache_t(nsize_t key_size,
nsize_t block_size, char const* label)
: key_size_(key_size)
, block_size_(block_size)
, arena_(label)
{
}
template<class _>
void
cache_t<_>::
reserve(std::size_t n)
{
arena_.hint(n * block_size_);
map_.reserve(n);
}
template<class _>
void
cache_t<_>::
clear()
{
arena_.clear();
map_.clear();
}
template<class _>
void
cache_t<_>::
periodic_activity()
{
arena_.periodic_activity();
}
template<class _>
auto
cache_t<_>::
find(nbuck_t n) ->
iterator
{
auto const iter = map_.find(n);
if(iter == map_.end())
return iterator{map_.end(), transform(*this)};
return iterator{iter, transform(*this)};
}
template<class _>
bucket
cache_t<_>::
create(nbuck_t n)
{
auto const p = arena_.alloc(block_size_);
map_.emplace(n, p);
return bucket{block_size_, p, detail::empty};
}
template<class _>
auto
cache_t<_>::
insert(nbuck_t n, bucket const& b) ->
iterator
{
void* const p = arena_.alloc(b.block_size());
ostream os{p, b.block_size()};
b.write(os);
auto const result = map_.emplace(n, p);
return iterator{result.first, transform(*this)};
}
template<class U>
void
swap(cache_t<U>& lhs, cache_t<U>& rhs)
{
using std::swap;
swap(lhs.key_size_, rhs.key_size_);
swap(lhs.block_size_, rhs.block_size_);
swap(lhs.arena_, rhs.arena_);
swap(lhs.map_, rhs.map_);
}
using cache = cache_t<>;
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,93 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_ENDIAN_HPP
#define NUDB_DETAIL_ENDIAN_HPP
#include <cstdint>
#include <type_traits>
namespace nudb {
namespace detail {
// This is a modified work, original implementation
// by Howard Hinnant <howard.hinnant@gmail.com>
//
// "This should be standardized" - Howard
// Endian provides answers to the following questions:
// 1. Is this system big or little endian?
// 2. Is the "desired endian" of some class or function the same as the
// native endian?
enum class endian
{
#ifdef _MSC_VER
big = 1,
little = 0,
native = little
#else
native = __BYTE_ORDER__,
little = __ORDER_LITTLE_ENDIAN__,
big = __ORDER_BIG_ENDIAN__
#endif
};
using is_little_endian =
std::integral_constant<bool,
endian::native == endian::little>;
static_assert(
endian::native == endian::little || endian::native == endian::big,
"endian::native shall be one of endian::little or endian::big");
static_assert(
endian::big != endian::little,
"endian::big and endian::little shall have different values");
// The pepper got baked into the file format as
// the hash of the little endian salt so now we
// need this function.
//
template<class = void>
std::uint64_t
to_little_endian(std::uint64_t v, std::false_type)
{
union U
{
std::uint64_t vi;
std::uint8_t va[8];
};
U u;
u.va[0] = v & 0xff;
u.va[1] = (v >> 8) & 0xff;
u.va[2] = (v >> 16) & 0xff;
u.va[3] = (v >> 24) & 0xff;
u.va[4] = (v >> 32) & 0xff;
u.va[5] = (v >> 40) & 0xff;
u.va[6] = (v >> 48) & 0xff;
u.va[7] = (v >> 56) & 0xff;
return u.vi;
}
inline
std::uint64_t
to_little_endian(std::uint64_t v, std::true_type)
{
return v;
}
inline
std::uint64_t
to_little_endian(std::uint64_t v)
{
return to_little_endian(v, is_little_endian{});
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,265 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_FIELD_HPP
#define NUDB_FIELD_HPP
#include <nudb/detail/stream.hpp>
#include <boost/assert.hpp>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <stdexcept>
#include <type_traits>
namespace nudb {
namespace detail {
// A 24-bit integer
struct uint24_t;
// A 48-bit integer
struct uint48_t;
// These metafunctions describe the binary format of fields on disk
template<class T>
struct field;
template<>
struct field<std::uint8_t>
{
static std::size_t constexpr size = 1;
static std::uint64_t constexpr max = 0xff;
};
template<>
struct field<std::uint16_t>
{
static std::size_t constexpr size = 2;
static std::uint64_t constexpr max = 0xffff;
};
template<>
struct field<uint24_t>
{
static std::size_t constexpr size = 3;
static std::uint64_t constexpr max = 0xffffff;
};
template<>
struct field<std::uint32_t>
{
static std::size_t constexpr size = 4;
static std::uint64_t constexpr max = 0xffffffff;
};
template<>
struct field<uint48_t>
{
static std::size_t constexpr size = 6;
static std::uint64_t constexpr max = 0x0000ffffffffffff;
};
template<>
struct field<std::uint64_t>
{
static std::size_t constexpr size = 8;
static std::uint64_t constexpr max = 0xffffffffffffffff;
};
// read field from memory
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint8_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
u = *p;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint16_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<< 8;
t = T(*p ) | t;
u = t;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, uint24_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
std::uint32_t t;
t = std::uint32_t(*p++)<<16;
t = (std::uint32_t(*p++)<< 8) | t;
t = std::uint32_t(*p ) | t;
u = t;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint32_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto const* p = reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<<24;
t = (T(*p++)<<16) | t;
t = (T(*p++)<< 8) | t;
t = T(*p ) | t;
u = t;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, uint48_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
std::uint64_t t;
t = (std::uint64_t(*p++)<<40);
t = (std::uint64_t(*p++)<<32) | t;
t = (std::uint64_t(*p++)<<24) | t;
t = (std::uint64_t(*p++)<<16) | t;
t = (std::uint64_t(*p++)<< 8) | t;
t = std::uint64_t(*p ) | t;
u = t;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint64_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<<56;
t = (T(*p++)<<48) | t;
t = (T(*p++)<<40) | t;
t = (T(*p++)<<32) | t;
t = (T(*p++)<<24) | t;
t = (T(*p++)<<16) | t;
t = (T(*p++)<< 8) | t;
t = T(*p ) | t;
u = t;
}
// read field from istream
template<class T, class U>
void
read(istream& is, U& u)
{
readp<T>(is.data(field<T>::size), u);
}
inline
void
read_size48(istream& is, std::size_t& u)
{
std::uint64_t v;
read<uint48_t>(is, v);
BOOST_ASSERT(v <= std::numeric_limits<std::uint32_t>::max());
u = static_cast<std::uint32_t>(v);
}
// write field to ostream
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint8_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
std::uint8_t* p = os.data(field<T>::size);
*p = static_cast<std::uint8_t>(u);
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint16_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
auto const t = static_cast<T>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, uint24_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
auto const t = static_cast<std::uint32_t>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>>16)&0xff;
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint32_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
auto const t = static_cast<T>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>>24)&0xff;
*p++ = (t>>16)&0xff;
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, uint48_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
auto const t = static_cast<std::uint64_t>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>>40)&0xff;
*p++ = (t>>32)&0xff;
*p++ = (t>>24)&0xff;
*p++ = (t>>16)&0xff;
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint64_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
auto const t = static_cast<T>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>>56)&0xff;
*p++ = (t>>48)&0xff;
*p++ = (t>>40)&0xff;
*p++ = (t>>32)&0xff;
*p++ = (t>>24)&0xff;
*p++ = (t>>16)&0xff;
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,629 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_FORMAT_HPP
#define NUDB_DETAIL_FORMAT_HPP
#include <nudb/error.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/buffer.hpp>
#include <nudb/detail/endian.hpp>
#include <nudb/detail/field.hpp>
#include <nudb/detail/stream.hpp>
#include <boost/assert.hpp>
#include <algorithm>
#include <array>
#include <limits>
#include <cstdint>
#include <cstring>
#include <memory>
#include <stdexcept>
#include <type_traits>
namespace nudb {
namespace detail {
// Format of the nudb files:
/*
Integer sizes
block_size less than 32 bits (maybe restrict it to 16 bits)
buckets more than 32 bits
capacity (same as bucket index)
file offsets 63 bits
hash up to 64 bits (48 currently)
item index less than 32 bits (index of item in bucket)
modulus (same as buckets)
value size up to 32 bits (or 32-bit builds can't read it)
*/
static std::size_t constexpr currentVersion = 2;
struct dat_file_header
{
static std::size_t constexpr size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
2 + // KeySize
64; // (Reserved)
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
nsize_t key_size;
};
struct key_file_header
{
static std::size_t constexpr size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
2 + // KeySize
8 + // Salt
8 + // Pepper
2 + // BlockSize
2 + // LoadFactor
56; // (Reserved)
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
nsize_t key_size;
std::uint64_t salt;
std::uint64_t pepper;
nsize_t block_size;
std::size_t load_factor;
// Computed values
nkey_t capacity; // Entries per bucket
nbuck_t buckets; // Number of buckets
nbuck_t modulus; // pow(2,ceil(log2(buckets)))
};
struct log_file_header
{
static std::size_t constexpr size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
2 + // KeySize
8 + // Salt
8 + // Pepper
2 + // BlockSize
8 + // KeyFileSize
8; // DataFileSize
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
nsize_t key_size;
std::uint64_t salt;
std::uint64_t pepper;
nsize_t block_size;
noff_t key_file_size;
noff_t dat_file_size;
};
// Type used to store hashes in buckets.
// This can be smaller than the output
// of the hash function.
//
using f_hash = uint48_t;
static_assert(field<f_hash>::size <=
sizeof(nhash_t), "");
template<class T>
nhash_t
make_hash(nhash_t h);
template<>
inline
nhash_t
make_hash<uint48_t>(nhash_t h)
{
return(h>>16)&0xffffffffffff;
}
// Returns the hash of a key given the salt.
// Note: The hash is expressed in f_hash units
//
template<class Hasher>
inline
nhash_t
hash(void const* key, nsize_t key_size, std::uint64_t salt)
{
Hasher h{salt};
return make_hash<f_hash>(h(key, key_size));
}
template<class Hasher>
inline
nhash_t
hash(void const* key, nsize_t key_size, Hasher const& h)
{
return make_hash<f_hash>(h(key, key_size));
}
// Computes pepper from salt
//
template<class Hasher>
std::uint64_t
pepper(std::uint64_t salt)
{
auto const v = to_little_endian(salt);
Hasher h{salt};
return h(&v, sizeof(v));
}
// Returns the actual size of a bucket.
// This can be smaller than the block size.
//
template<class = void>
nsize_t
bucket_size(nkey_t capacity)
{
// Bucket Record
return
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
capacity * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size); // Hash
}
// Returns the number of entries that fit in a bucket
//
template<class = void>
nkey_t
bucket_capacity(nsize_t block_size)
{
// Bucket Record
auto const size =
field<std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
auto const entry_size =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Hash
if(block_size < key_file_header::size ||
block_size < size)
return 0;
auto const n =
(block_size - size) / entry_size;
BOOST_ASSERT(n <= std::numeric_limits<nkey_t>::max());
return static_cast<nkey_t>(std::min<std::size_t>(
std::numeric_limits<nkey_t>::max(), n));
}
// Returns the number of bytes occupied by a value record
// VFALCO TODO Fix this
inline
std::size_t
value_size(std::size_t size,
std::size_t key_size)
{
// Data Record
return
field<uint48_t>::size + // Size
key_size + // Key
size; // Data
}
// Returns the closest power of 2 not less than x
template<class T>
T
ceil_pow2(T x)
{
static const unsigned long long t[6] = {
0xFFFFFFFF00000000ull,
0x00000000FFFF0000ull,
0x000000000000FF00ull,
0x00000000000000F0ull,
0x000000000000000Cull,
0x0000000000000002ull
};
int y =(((x &(x - 1)) == 0) ? 0 : 1);
int j = 32;
int i;
for(i = 0; i < 6; i++) {
int k =(((x & t[i]) == 0) ? 0 : j);
y += k;
x >>= k;
j >>= 1;
}
return T{1}<<y;
}
//------------------------------------------------------------------------------
// Read data file header from stream
template<class = void>
void
read(istream& is, dat_file_header& dh)
{
read(is, dh.type, sizeof(dh.type));
read<std::uint16_t>(is, dh.version);
read<std::uint64_t>(is, dh.uid);
read<std::uint64_t>(is, dh.appnum);
read<std::uint16_t>(is, dh.key_size);
std::array<std::uint8_t, 64> reserved;
read(is, reserved.data(), reserved.size());
}
// Read data file header from file
template<class File>
void
read(File& f, dat_file_header& dh, error_code& ec)
{
std::array<std::uint8_t, dat_file_header::size> buf;
f.read(0, buf.data(), buf.size(), ec);
if(ec)
return;
istream is(buf);
read(is, dh);
}
// Write data file header to stream
template<class = void>
void
write(ostream& os, dat_file_header const& dh)
{
write(os, "nudb.dat", 8);
write<std::uint16_t>(os, dh.version);
write<std::uint64_t>(os, dh.uid);
write<std::uint64_t>(os, dh.appnum);
write<std::uint16_t>(os, dh.key_size);
std::array<std::uint8_t, 64> reserved;
reserved.fill(0);
write(os, reserved.data(), reserved.size());
}
// Write data file header to file
template<class File>
void
write(File& f, dat_file_header const& dh, error_code& ec)
{
std::array<std::uint8_t, dat_file_header::size> buf;
ostream os(buf);
write(os, dh);
f.write(0, buf.data(), buf.size(), ec);
}
// Read key file header from stream
template<class = void>
void
read(istream& is, noff_t file_size, key_file_header& kh)
{
read(is, kh.type, sizeof(kh.type));
read<std::uint16_t>(is, kh.version);
read<std::uint64_t>(is, kh.uid);
read<std::uint64_t>(is, kh.appnum);
read<std::uint16_t>(is, kh.key_size);
read<std::uint64_t>(is, kh.salt);
read<std::uint64_t>(is, kh.pepper);
read<std::uint16_t>(is, kh.block_size);
read<std::uint16_t>(is, kh.load_factor);
std::array<std::uint8_t, 56> reserved;
read(is, reserved.data(), reserved.size());
// VFALCO These need to be checked to handle
// when the file size is too small
kh.capacity = bucket_capacity(kh.block_size);
if(file_size > kh.block_size)
{
if(kh.block_size > 0)
kh.buckets = static_cast<nbuck_t>(
(file_size - kh.block_size) / kh.block_size);
else
// VFALCO Corruption or logic error
kh.buckets = 0;
}
else
{
kh.buckets = 0;
}
kh.modulus = ceil_pow2(kh.buckets);
}
// Read key file header from file
template<class File>
void
read(File& f, key_file_header& kh, error_code& ec)
{
std::array<std::uint8_t, key_file_header::size> buf;
f.read(0, buf.data(), buf.size(), ec);
if(ec)
return;
istream is{buf};
auto const size = f.size(ec);
if(ec)
return;
read(is, size, kh);
}
// Write key file header to stream
template<class = void>
void
write(ostream& os, key_file_header const& kh)
{
write(os, "nudb.key", 8);
write<std::uint16_t>(os, kh.version);
write<std::uint64_t>(os, kh.uid);
write<std::uint64_t>(os, kh.appnum);
write<std::uint16_t>(os, kh.key_size);
write<std::uint64_t>(os, kh.salt);
write<std::uint64_t>(os, kh.pepper);
write<std::uint16_t>(os, kh.block_size);
write<std::uint16_t>(os, kh.load_factor);
std::array<std::uint8_t, 56> reserved;
reserved.fill(0);
write(os, reserved.data(), reserved.size());
}
// Write key file header to file
template<class File>
void
write(File& f, key_file_header const& kh, error_code& ec)
{
buffer buf;
buf.reserve(kh.block_size);
if(kh.block_size < key_file_header::size)
{
ec = error::invalid_block_size;
return;
}
std::fill(buf.get(), buf.get() + buf.size(), 0);
ostream os{buf.get(), buf.size()};
write(os, kh);
f.write(0, buf.get(), buf.size(), ec);
}
// Read log file header from stream
template<class = void>
void
read(istream& is, log_file_header& lh)
{
read(is, lh.type, sizeof(lh.type));
read<std::uint16_t>(is, lh.version);
read<std::uint64_t>(is, lh.uid);
read<std::uint64_t>(is, lh.appnum);
read<std::uint16_t>(is, lh.key_size);
read<std::uint64_t>(is, lh.salt);
read<std::uint64_t>(is, lh.pepper);
read<std::uint16_t>(is, lh.block_size);
read<std::uint64_t>(is, lh.key_file_size);
read<std::uint64_t>(is, lh.dat_file_size);
}
// Read log file header from file
template<class File>
void
read(File& f, log_file_header& lh, error_code& ec)
{
std::array<std::uint8_t, log_file_header::size> buf;
f.read(0, buf.data(), buf.size(), ec);
if(ec)
return;
istream is{buf};
read(is, lh);
}
// Write log file header to stream
template<class = void>
void
write(ostream& os, log_file_header const& lh)
{
write(os, "nudb.log", 8);
write<std::uint16_t>(os, lh.version);
write<std::uint64_t>(os, lh.uid);
write<std::uint64_t>(os, lh.appnum);
write<std::uint16_t>(os, lh.key_size);
write<std::uint64_t>(os, lh.salt);
write<std::uint64_t>(os, lh.pepper);
write<std::uint16_t>(os, lh.block_size);
write<std::uint64_t>(os, lh.key_file_size);
write<std::uint64_t>(os, lh.dat_file_size);
}
// Write log file header to file
template<class File>
void
write(File& f, log_file_header const& lh, error_code& ec)
{
std::array<std::uint8_t, log_file_header::size> buf;
ostream os{buf};
write(os, lh);
f.write(0, buf.data(), buf.size(), ec);
}
// Verify contents of data file header
template<class = void>
void
verify(dat_file_header const& dh, error_code& ec)
{
std::string const type{dh.type, 8};
if(type != "nudb.dat")
{
ec = error::not_data_file;
return;
}
if(dh.version != currentVersion)
{
ec = error::different_version;
return;
}
if(dh.key_size < 1)
{
ec = error::invalid_key_size;
return;
}
}
// Verify contents of key file header
template<class Hasher>
void
verify(key_file_header const& kh, error_code& ec)
{
std::string const type{kh.type, 8};
if(type != "nudb.key")
{
ec = error::not_key_file;
return;
}
if(kh.version != currentVersion)
{
ec = error::different_version;
return;
}
if(kh.key_size < 1)
{
ec = error::invalid_key_size;
return;
}
if(kh.pepper != pepper<Hasher>(kh.salt))
{
ec = error::hash_mismatch;
return;
}
if(kh.load_factor < 1)
{
ec = error::invalid_load_factor;
return;
}
if(kh.capacity < 1)
{
ec = error::invalid_capacity;
return;
}
if(kh.buckets < 1)
{
ec = error::invalid_bucket_count;
return;
}
}
// Verify contents of log file header
template<class Hasher>
void
verify(log_file_header const& lh, error_code& ec)
{
std::string const type{lh.type, 8};
if(type != "nudb.log")
{
ec = error::not_log_file;
return;
}
if(lh.version != currentVersion)
{
ec = error::different_version;
return;
}
if(lh.pepper != pepper<Hasher>(lh.salt))
{
ec = error::hash_mismatch;
return;
}
if(lh.key_size < 1)
{
ec = error::invalid_key_size;
return;
}
}
// Make sure key file and value file headers match
template<class Hasher>
void
verify(dat_file_header const& dh,
key_file_header const& kh, error_code& ec)
{
verify<Hasher>(kh, ec);
if(ec)
return;
if(kh.uid != dh.uid)
{
ec = error::uid_mismatch;
return;
}
if(kh.appnum != dh.appnum)
{
ec = error::appnum_mismatch;
return;
}
if(kh.key_size != dh.key_size)
{
ec = error::key_size_mismatch;
return;
}
}
// Make sure key file and log file headers match
template<class Hasher>
void
verify(key_file_header const& kh,
log_file_header const& lh, error_code& ec)
{
verify<Hasher>(lh, ec);
if(ec)
return;
if(kh.uid != lh.uid)
{
ec = error::uid_mismatch;
return;
}
if(kh.appnum != lh.appnum)
{
ec = error::appnum_mismatch;
return;
}
if(kh.key_size != lh.key_size)
{
ec = error::key_size_mismatch;
return;
}
if(kh.salt != lh.salt)
{
ec = error::salt_mismatch;
return;
}
if(kh.pepper != lh.pepper)
{
ec = error::pepper_mismatch;
return;
}
if(kh.block_size != lh.block_size)
{
ec = error::block_size_mismatch;
return;
}
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,259 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_GENTEX_HPP
#define NUDB_DETAIL_GENTEX_HPP
#include <boost/assert.hpp>
#include <condition_variable>
#include <cstddef>
#include <mutex>
#include <system_error>
namespace nudb {
namespace detail {
// Generation counting mutex
//
template<class = void>
class gentex_t
{
private:
std::mutex m_;
std::size_t gen_ = 0;
std::size_t cur_ = 0;
std::size_t prev_ = 0;
std::condition_variable cond_;
public:
gentex_t() = default;
gentex_t(gentex_t const&) = delete;
gentex_t& operator=(gentex_t const&) = delete;
void
start();
void
finish();
std::size_t
lock_gen();
void
unlock_gen(std::size_t gen);
};
template<class _>
void
gentex_t<_>::
start()
{
std::unique_lock<std::mutex> l{m_};
prev_ += cur_;
cur_ = 0;
++gen_;
}
template<class _>
void
gentex_t<_>::
finish()
{
std::unique_lock<std::mutex> l{m_};
while(prev_ > 0)
cond_.wait(l);
}
template<class _>
std::size_t
gentex_t<_>::
lock_gen()
{
std::lock_guard<
std::mutex> l{m_};
++cur_;
return gen_;
}
template<class _>
void
gentex_t<_>::
unlock_gen(std::size_t gen)
{
std::unique_lock<std::mutex> l{m_};
if(gen == gen_)
{
--cur_;
}
else
{
--prev_;
if(prev_ == 0)
cond_.notify_all();
}
}
using gentex = gentex_t<>;
//------------------------------------------------------------------------------
template<class GenerationLockable>
class genlock
{
private:
bool owned_ = false;
GenerationLockable* g_ = nullptr;
std::size_t gen_;
public:
using mutex_type = GenerationLockable;
genlock() = default;
genlock(genlock const&) = delete;
genlock& operator=(genlock const&) = delete;
genlock(genlock&& other);
genlock& operator=(genlock&& other);
explicit
genlock(mutex_type& g);
genlock(mutex_type& g, std::defer_lock_t);
~genlock();
mutex_type*
mutex() noexcept
{
return g_;
}
bool
owns_lock() const noexcept
{
return g_ && owned_;
}
explicit
operator bool() const noexcept
{
return owns_lock();
}
void
lock();
void
unlock();
mutex_type*
release() noexcept;
template<class U>
friend
void
swap(genlock<U>& lhs, genlock<U>& rhs) noexcept;
};
template<class G>
genlock<G>::
genlock(genlock&& other)
: owned_(other.owned_)
, g_(other.g_)
{
other.owned_ = false;
other.g_ = nullptr;
}
template<class G>
genlock<G>&
genlock<G>::
operator=(genlock&& other)
{
if(owns_lock())
unlock();
owned_ = other.owned_;
g_ = other.g_;
other.owned_ = false;
other.g_ = nullptr;
return *this;
}
template<class G>
genlock<G>::
genlock(mutex_type& g)
: g_(&g)
{
lock();
}
template<class G>
genlock<G>::
genlock(mutex_type& g, std::defer_lock_t)
: g_(&g)
{
}
template<class G>
genlock<G>::
~genlock()
{
if(owns_lock())
unlock();
}
template<class G>
void
genlock<G>::
lock()
{
// no associated gentex
BOOST_ASSERT(g_ != nullptr);
// gentex is already owned
BOOST_ASSERT(! owned_);
gen_ = g_->lock_gen();
owned_ = true;
}
template<class G>
void
genlock<G>::
unlock()
{
// no associated gentex
BOOST_ASSERT(g_ != nullptr);
// gentex is not owned
BOOST_ASSERT(owned_);
g_->unlock_gen(gen_);
owned_ = false;
}
template<class G>
auto
genlock<G>::
release() noexcept ->
mutex_type*
{
mutex_type* const g = g_;
g_ = nullptr;
return g;
}
template<class G>
void
swap(genlock<G>& lhs, genlock<G>& rhs) noexcept
{
using namespace std;
swap(lhs.owned_, rhs.owned_);
swap(lhs.g_, rhs.g_);
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,26 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_MUTEX_HPP
#define NUDB_DETAIL_MUTEX_HPP
#include <boost/thread/lock_types.hpp>
#include <boost/thread/shared_mutex.hpp>
namespace nudb {
namespace detail {
using shared_lock_type =
boost::shared_lock<boost::shared_mutex>;
using unique_lock_type =
boost::unique_lock<boost::shared_mutex>;
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,243 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_POOL_HPP
#define NUDB_DETAIL_POOL_HPP
#include <nudb/detail/arena.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/format.hpp>
#include <boost/assert.hpp>
#include <boost/thread/lock_types.hpp>
#include <cstdint>
#include <cstring>
#include <memory>
#include <map>
#include <utility>
namespace nudb {
namespace detail {
// Buffers key/value pairs in a map, associating
// them with a modifiable data file offset.
template<class = void>
class pool_t
{
public:
struct value_type;
class compare;
private:
using map_type = std::map<
value_type, noff_t, compare>;
arena arena_;
nsize_t key_size_;
nsize_t data_size_ = 0;
map_type map_;
public:
using iterator =
typename map_type::iterator;
pool_t(pool_t const&) = delete;
pool_t& operator=(pool_t const&) = delete;
pool_t(pool_t&& other);
pool_t(nsize_t key_size, char const* label);
iterator
begin()
{
return map_.begin();
}
iterator
end()
{
return map_.end();
}
bool
empty() const
{
return map_.size() == 0;
}
// Returns the number of elements in the pool
std::size_t
size() const
{
return map_.size();
}
// Returns the sum of data sizes in the pool
std::size_t
data_size() const
{
return data_size_;
}
void
clear();
void
periodic_activity();
iterator
find(void const* key);
// Insert a value
// @param h The hash of the key
void
insert(nhash_t h, void const* key,
void const* buffer, nsize_t size);
template<class U>
friend
void
swap(pool_t<U>& lhs, pool_t<U>& rhs);
};
template<class _>
struct pool_t<_>::value_type
{
nhash_t hash;
nsize_t size;
void const* key;
void const* data;
value_type(value_type const&) = default;
value_type& operator=(value_type const&) = default;
value_type(nhash_t hash_, nsize_t size_,
void const* key_, void const* data_)
: hash(hash_)
, size(size_)
, key(key_)
, data(data_)
{
}
};
template<class _>
class pool_t<_>::compare
{
std::size_t key_size_;
public:
using result_type = bool;
using first_argument_type = value_type;
using second_argument_type = value_type;
compare(compare const&) = default;
compare& operator=(compare const&) = default;
explicit
compare(nsize_t key_size)
: key_size_(key_size)
{
}
bool
operator()(value_type const& lhs,
value_type const& rhs) const
{
return std::memcmp(
lhs.key, rhs.key, key_size_) < 0;
}
};
//------------------------------------------------------------------------------
template<class _>
pool_t<_>::
pool_t(pool_t&& other)
: arena_(std::move(other.arena_))
, key_size_(other.key_size_)
, data_size_(other.data_size_)
, map_(std::move(other.map_))
{
}
template<class _>
pool_t<_>::
pool_t(nsize_t key_size, char const* label)
: arena_(label)
, key_size_(key_size)
, map_(compare{key_size})
{
}
template<class _>
void
pool_t<_>::
clear()
{
arena_.clear();
data_size_ = 0;
map_.clear();
}
template<class _>
void
pool_t<_>::
periodic_activity()
{
arena_.periodic_activity();
}
template<class _>
auto
pool_t<_>::
find(void const* key) ->
iterator
{
// VFALCO need is_transparent here
value_type tmp{0, 0, key, nullptr};
auto const iter = map_.find(tmp);
return iter;
}
template<class _>
void
pool_t<_>::
insert(nhash_t h,
void const* key, void const* data, nsize_t size)
{
auto const k = arena_.alloc(key_size_);
auto const d = arena_.alloc(size);
std::memcpy(k, key, key_size_);
std::memcpy(d, data, size);
auto const result = map_.emplace(
std::piecewise_construct,
std::make_tuple(h, size, k, d),
std::make_tuple(0));
(void)result.second;
// Must not already exist!
BOOST_ASSERT(result.second);
data_size_ += size;
}
template<class _>
void
swap(pool_t<_>& lhs, pool_t<_>& rhs)
{
using std::swap;
swap(lhs.arena_, rhs.arena_);
swap(lhs.key_size_, rhs.key_size_);
swap(lhs.data_size_, rhs.data_size_);
swap(lhs.map_, rhs.map_);
}
using pool = pool_t<>;
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,149 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_STREAM_HPP
#define NUDB_DETAIL_STREAM_HPP
#include <boost/assert.hpp>
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>
namespace nudb {
namespace detail {
// Input stream from bytes
template<class = void>
class istream_t
{
std::uint8_t const* buf_ = nullptr;
std::size_t size_ = 0;
public:
istream_t() = default;
istream_t(istream_t const&) = default;
istream_t& operator=(istream_t const&) = default;
istream_t(void const* data, std::size_t size)
: buf_(reinterpret_cast<std::uint8_t const*>(data))
, size_(size)
{
}
template<std::size_t N>
istream_t(std::array<std::uint8_t, N> const& a)
: buf_(a.data())
, size_(a.size())
{
}
std::uint8_t const*
data(std::size_t bytes);
std::uint8_t const*
operator()(std::size_t bytes)
{
return data(bytes);
}
};
// Precondition: bytes <= size_
//
template<class _>
std::uint8_t const*
istream_t<_>::data(std::size_t bytes)
{
BOOST_ASSERT(bytes <= size_);
if(size_ < bytes)
throw std::logic_error("short read from istream");
auto const data = buf_;
buf_ = buf_ + bytes;
size_ -= bytes;
return data;
}
using istream = istream_t<>;
//------------------------------------------------------------------------------
// Output stream to bytes
// VFALCO Should this assert on overwriting the buffer?
template<class = void>
class ostream_t
{
std::uint8_t* buf_ = nullptr;
std::size_t size_ = 0;
public:
ostream_t() = default;
ostream_t(ostream_t const&) = default;
ostream_t& operator=(ostream_t const&) = default;
ostream_t(void* data, std::size_t)
: buf_(reinterpret_cast<std::uint8_t*>(data))
{
}
template<std::size_t N>
ostream_t(std::array<std::uint8_t, N>& a)
: buf_(a.data())
{
}
// Returns the number of bytes written
std::size_t
size() const
{
return size_;
}
std::uint8_t*
data(std::size_t bytes);
std::uint8_t*
operator()(std::size_t bytes)
{
return data(bytes);
}
};
template<class _>
std::uint8_t*
ostream_t<_>::data(std::size_t bytes)
{
auto const data = buf_;
buf_ = buf_ + bytes;
size_ += bytes;
return data;
}
using ostream = ostream_t<>;
//------------------------------------------------------------------------------
// read blob
inline
void
read(istream& is, void* buffer, std::size_t bytes)
{
std::memcpy(buffer, is.data(bytes), bytes);
}
// write blob
inline
void
write(ostream& os, void const* buffer, std::size_t bytes)
{
std::memcpy(os.data(bytes), buffer, bytes);
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,328 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
//
// This is a derivative work based on xxHash 0.6.2, copyright below:
/*
xxHash - Extremely Fast Hash algorithm
Header File
Copyright (C) 2012-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : https://github.com/Cyan4973/xxHash
*/
#ifndef NUDB_DETAIL_XXHASH_HPP
#define NUDB_DETAIL_XXHASH_HPP
#include <nudb/detail/endian.hpp>
#include <cstdint>
#include <cstdlib>
#include <cstring>
namespace nudb {
namespace detail {
#define NUDB_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
// minGW _rotl gives poor performance
#if defined(_MSC_VER)
# define NUDB_XXH_rotl64(x,r) _rotl64(x,r)
#else
# define NUDB_XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
#if defined(_MSC_VER)
# define NUDB_XXH_swap32 _byteswap_ulong
#elif NUDB_GCC_VERSION >= 403
# define NUDB_XXH_swap32 __builtin_bswap32
#endif
#if defined(_MSC_VER)
# define NUDB_XXH_swap64 _byteswap_uint64
#elif NUDB_GCC_VERSION >= 403
# define NUDB_XXH_swap64 __builtin_bswap64
#endif
#ifndef NUDB_XXH_swap32
inline
std::uint32_t
NUDB_XXH_swap32(std::uint32_t x)
{
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );
}
#endif
#ifndef NUDB_XXH_swap64
inline
std::uint64_t
NUDB_XXH_swap64(std::uint64_t x)
{
return ((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
((x << 24) & 0x0000ff0000000000ULL) |
((x << 8) & 0x000000ff00000000ULL) |
((x >> 8) & 0x00000000ff000000ULL) |
((x >> 24) & 0x0000000000ff0000ULL) |
((x >> 40) & 0x000000000000ff00ULL) |
((x >> 56) & 0x00000000000000ffULL);
}
#endif
static std::uint64_t constexpr prime64_1 = 11400714785074694791ULL;
static std::uint64_t constexpr prime64_2 = 14029467366897019727ULL;
static std::uint64_t constexpr prime64_3 = 1609587929392839161ULL;
static std::uint64_t constexpr prime64_4 = 9650029242287828579ULL;
static std::uint64_t constexpr prime64_5 = 2870177450012600261ULL;
// Portable and safe solution. Generally efficient.
// see : http://stackoverflow.com/a/32095106/646947
inline
std::uint32_t
XXH_read32(void const* p)
{
std::uint32_t v;
memcpy(&v, p, sizeof(v));
return v;
}
inline
std::uint64_t
XXH_read64(void const* p)
{
std::uint64_t v;
memcpy(&v, p, sizeof(v));
return v;
}
// little endian, aligned
inline
std::uint32_t
XXH_readLE32_align(void const* p, std::true_type, std::true_type)
{
return *reinterpret_cast<std::uint32_t const*>(p);
}
// little endian, unaligned
inline
std::uint32_t
XXH_readLE32_align(void const* p, std::true_type, std::false_type)
{
return XXH_read32(p);
}
// big endian, aligned
inline
std::uint32_t
XXH_readLE32_align(void const* p, std::false_type, std::true_type)
{
return NUDB_XXH_swap32(
*reinterpret_cast<std::uint32_t const*>(p));
}
// big endian, unaligned
inline
std::uint32_t
XXH_readLE32_align(void const* p, std::false_type, std::false_type)
{
return NUDB_XXH_swap32(XXH_read32(p));
}
// little endian, aligned
inline
std::uint64_t
XXH_readLE64_align(void const* p, std::true_type, std::true_type)
{
return *reinterpret_cast<std::uint64_t const*>(p);
}
// little endian, unaligned
inline
std::uint64_t
XXH_readLE64_align(void const* p, std::true_type, std::false_type)
{
return XXH_read64(p);
}
// big endian, aligned
inline
std::uint64_t
XXH_readLE64_align(void const* p, std::false_type, std::true_type)
{
return NUDB_XXH_swap64(
*reinterpret_cast<std::uint64_t const*>(p));
}
// big endian, unaligned
inline
std::uint64_t
XXH_readLE64_align(void const* p, std::false_type, std::false_type)
{
return NUDB_XXH_swap64(XXH_read64(p));
}
inline
std::uint64_t
XXH64_round(std::uint64_t acc, std::uint64_t input)
{
acc += input * prime64_2;
acc = NUDB_XXH_rotl64(acc, 31);
acc *= prime64_1;
return acc;
}
inline
std::uint64_t
XXH64_mergeRound(std::uint64_t acc, std::uint64_t val)
{
val = XXH64_round(0, val);
acc ^= val;
acc = acc * prime64_1 + prime64_4;
return acc;
}
template<bool LittleEndian, bool Aligned>
std::uint64_t
XXH64_endian_align(
void const* input, std::size_t len, std::uint64_t seed,
std::integral_constant<bool, LittleEndian> endian,
std::integral_constant<bool, Aligned> align)
{
const std::uint8_t* p = (const std::uint8_t*)input;
const std::uint8_t* const bEnd = p + len;
std::uint64_t h64;
auto const XXH_get32bits =
[](void const* p)
{
return XXH_readLE32_align(p,
decltype(endian){}, decltype(align){});
};
auto const XXH_get64bits =
[](void const* p)
{
return XXH_readLE64_align(p,
decltype(endian){}, decltype(align){});
};
if(len>=32)
{
const std::uint8_t* const limit = bEnd - 32;
std::uint64_t v1 = seed + prime64_1 + prime64_2;
std::uint64_t v2 = seed + prime64_2;
std::uint64_t v3 = seed + 0;
std::uint64_t v4 = seed - prime64_1;
do
{
v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
}
while(p<=limit);
h64 = NUDB_XXH_rotl64(v1, 1) +
NUDB_XXH_rotl64(v2, 7) +
NUDB_XXH_rotl64(v3, 12) +
NUDB_XXH_rotl64(v4, 18);
h64 = XXH64_mergeRound(h64, v1);
h64 = XXH64_mergeRound(h64, v2);
h64 = XXH64_mergeRound(h64, v3);
h64 = XXH64_mergeRound(h64, v4);
}
else
{
h64 = seed + prime64_5;
}
h64 += len;
while(p + 8 <= bEnd)
{
std::uint64_t const k1 = XXH64_round(0, XXH_get64bits(p));
h64 ^= k1;
h64 = NUDB_XXH_rotl64(h64,27) * prime64_1 + prime64_4;
p+=8;
}
if(p+4<=bEnd)
{
h64 ^= (std::uint64_t)(XXH_get32bits(p)) * prime64_1;
h64 = NUDB_XXH_rotl64(h64, 23) * prime64_2 + prime64_3;
p+=4;
}
while(p<bEnd)
{
h64 ^= (*p) * prime64_5;
h64 = NUDB_XXH_rotl64(h64, 11) * prime64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= prime64_2;
h64 ^= h64 >> 29;
h64 *= prime64_3;
h64 ^= h64 >> 32;
return h64;
}
/* Calculate the 64-bit hash of a block of memory.
@param data A pointer to the buffer to compute the hash on.
The buffer may be unaligned.
@note This function runs faster on 64-bits systems, but slower
on 32-bits systems (see benchmark).
@param bytes The size of the buffer in bytes.
@param seed A value which may be used to permute the output.
Using a different seed with the same input will produce a
different value.
@return The 64-bit hash of the input data.
*/
template<class = void>
std::uint64_t
XXH64(void const* data, size_t bytes, std::uint64_t seed)
{
// Use faster algorithm if aligned
if((reinterpret_cast<std::uintptr_t>(data) & 7) == 0)
return XXH64_endian_align(data, bytes, seed,
is_little_endian{}, std::false_type{});
return XXH64_endian_align(data, bytes, seed,
is_little_endian{}, std::true_type{});
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,263 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_ERROR_HPP
#define NUDB_ERROR_HPP
#include <boost/system/system_error.hpp>
#include <boost/system/error_code.hpp>
namespace nudb {
/// The type of system-specific error code returned by the implementation
#if GENERATING_DOCS
class error_code{};
#else
using boost::system::error_code;
#endif
/// The type of cross-platform error code used by the implementation
#if GENERATING_DOCS
class error_condition{};
#else
using boost::system::error_condition;
#endif
/// The type of system-specific exception used when throwing
#if GENERATING_DOCS
class system_error{};
#else
using boost::system::system_error;
#endif
/// Returns the category used for system-specific error codes
#if GENERATING_DOCS
error_category const&
system_category();
#else
using boost::system::system_category;
#endif
/// Returns the category used for cross-platform error codes
#if GENERATING_DOCS
error_category const&
generic_category();
#else
using boost::system::generic_category;
#endif
/// The base class used for error categories
#if GENERATING_DOCS
class error_category{};
#else
using boost::system::error_category;
#endif
/// The set of constants used for cross-platform error codes
#if GENERATING_DOCS
enum errc{};
#else
namespace errc = boost::system::errc;
#endif
/// Database error codes.
enum class error
{
/** No error.
The operation completed successfully.
*/
success = 0,
/** The specified key was not found.
Returned when @ref basic_store::fetch does not
find the specified key.
*/
key_not_found,
/** The specified key already exists.
Returned when @ref basic_store::insert finds
the specified key already in the database.
*/
key_exists,
/** A file read returned less data than expected.
This can be caused by premature application
termination during a commit cycle.
*/
short_read,
/** A log file is present.
Indicates that the database needs to have the
associated log file applied to perform a recovery.
This error is returned by functions such as @ref rekey.
*/
log_file_exists,
/** No key file exists.
This error is returned by the recover process when
there is no valid key file. It happens when a
@ref rekey operation prematurely terminates. A
database without a key file cannot be opened. To
fix this error, it is necessary for an invocation of
@ref rekey to complete successfully.
*/
no_key_file,
/// Too many buckets in key file
too_many_buckets,
/// Not a data file
not_data_file,
/// Not a key file
not_key_file,
/// Not a log file
not_log_file,
/// Different version
different_version,
/// Invalid key size
invalid_key_size,
/// Invalid block size
invalid_block_size,
/// Short key file
short_key_file,
/// Short bucket
short_bucket,
/// Short spill
short_spill,
/// Short record
short_data_record,
/// Short value
short_value,
/// Hash mismatch
hash_mismatch,
/// Invalid load factor
invalid_load_factor,
/// Invalid capacity
invalid_capacity,
/// Invalid bucket count
invalid_bucket_count,
/// Invalid bucket size
invalid_bucket_size,
/// The data file header was incomplete
incomplete_data_file_header,
/// The key file header was incomplete
incomplete_key_file_header,
/// Invalid log record
invalid_log_record,
/// Invalid spill in log record
invalid_log_spill,
/// Invalid offset in log record
invalid_log_offset,
/// Invalid index in log record
invalid_log_index,
/// Invalid size in spill
invalid_spill_size,
/// UID mismatch
uid_mismatch,
/// appnum mismatch
appnum_mismatch,
/// key size mismatch
key_size_mismatch,
/// salt mismatch
salt_mismatch,
/// pepper mismatch
pepper_mismatch,
/// block size mismatch
block_size_mismatch,
/// orphaned value
orphaned_value,
/// missing value
missing_value,
/// size mismatch
size_mismatch,
/// duplicate value
duplicate_value
};
/// Returns the error category used for database error codes.
error_category const&
nudb_category();
/** Returns a database error code.
This function is used by the implementation to convert
@ref error values into @ref error_code objects.
*/
inline
error_code
make_error_code(error ev)
{
return error_code{static_cast<int>(ev), nudb_category()};
}
} // nudb
namespace boost {
namespace system {
template<>
struct is_error_code_enum<nudb::error>
{
static bool const value = true;
};
} // system
} // boost
#include <nudb/impl/error.ipp>
#endif

View File

@@ -0,0 +1,53 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_FILE_HPP
#define NUDB_FILE_HPP
#include <cstddef>
#include <string>
namespace nudb {
/// The type used to hold paths to files
using path_type = std::string;
/** Returns the best guess at the volume's block size.
@param path A path to a file on the device. The file does
not need to exist.
*/
inline
std::size_t
block_size(path_type const& path)
{
// A reasonable default for many SSD devices
return 4096;
}
/** File create and open modes.
These are used by @ref native_file.
*/
enum class file_mode
{
/// Open the file for sequential reads
scan,
/// Open the file for random reads
read,
/// Open the file for random reads and appending writes
append,
/// Open the file for random reads and writes
write
};
} // nudb
#endif

View File

@@ -0,0 +1,793 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_BASIC_STORE_IPP
#define NUDB_IMPL_BASIC_STORE_IPP
#include <nudb/concepts.hpp>
#include <nudb/recover.hpp>
#include <boost/assert.hpp>
#include <cmath>
#include <memory>
#ifndef NUDB_DEBUG_LOG
#define NUDB_DEBUG_LOG 0
#endif
#if NUDB_DEBUG_LOG
#include <beast/unit_test/dstream.hpp>
#include <iostream>
#endif
namespace nudb {
template<class Hasher, class File>
basic_store<Hasher, File>::state::
state(File&& df_, File&& kf_, File&& lf_,
path_type const& dp_, path_type const& kp_,
path_type const& lp_,
detail::key_file_header const& kh_)
: df(std::move(df_))
, kf(std::move(kf_))
, lf(std::move(lf_))
, dp(dp_)
, kp(kp_)
, lp(lp_)
, hasher(kh_.salt)
, p0(kh_.key_size, "p0")
, p1(kh_.key_size, "p1")
, c1(kh_.key_size, kh_.block_size, "c1")
, kh(kh_)
{
static_assert(is_File<File>::value,
"File requirements not met");
}
//------------------------------------------------------------------------------
template<class Hasher, class File>
basic_store<Hasher, File>::
~basic_store()
{
error_code ec;
// We call close here to make sure data is intact
// if an exception destroys the basic_store, but callers
// should always call close manually to receive the
// error code.
close(ec);
}
template<class Hasher, class File>
path_type const&
basic_store<Hasher, File>::
dat_path() const
{
BOOST_ASSERT(is_open());
return s_->dp;
}
template<class Hasher, class File>
path_type const&
basic_store<Hasher, File>::
key_path() const
{
BOOST_ASSERT(is_open());
return s_->kp;
}
template<class Hasher, class File>
path_type const&
basic_store<Hasher, File>::
log_path() const
{
BOOST_ASSERT(is_open());
return s_->lp;
}
template<class Hasher, class File>
std::uint64_t
basic_store<Hasher, File>::
appnum() const
{
BOOST_ASSERT(is_open());
return s_->kh.appnum;
}
template<class Hasher, class File>
std::size_t
basic_store<Hasher, File>::
key_size() const
{
BOOST_ASSERT(is_open());
return s_->kh.key_size;
}
template<class Hasher, class File>
std::size_t
basic_store<Hasher, File>::
block_size() const
{
BOOST_ASSERT(is_open());
return s_->kh.block_size;
}
template<class Hasher, class File>
template<class... Args>
void
basic_store<Hasher, File>::
open(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
error_code& ec,
Args&&... args)
{
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
using namespace detail;
BOOST_ASSERT(! is_open());
ec_ = {};
ecb_.store(false);
recover<Hasher, File>(
dat_path, key_path, log_path, ec, args...);
if(ec)
return;
File df(args...);
File kf(args...);
File lf(args...);
df.open(file_mode::append, dat_path, ec);
if(ec)
return;
kf.open(file_mode::write, key_path, ec);
if(ec)
return;
lf.create(file_mode::append, log_path, ec);
if(ec)
return;
// VFALCO TODO Erase empty log file if this
// function subsequently fails.
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
key_file_header kh;
read(kf, kh, ec);
if(ec)
return;
verify<Hasher>(kh, ec);
if(ec)
return;
verify<Hasher>(dh, kh, ec);
if(ec)
return;
boost::optional<state> s;
s.emplace(std::move(df), std::move(kf), std::move(lf),
dat_path, key_path, log_path, kh);
thresh_ = std::max<std::size_t>(65536UL,
kh.load_factor * kh.capacity);
frac_ = thresh_ / 2;
buckets_ = kh.buckets;
modulus_ = ceil_pow2(kh.buckets);
// VFALCO TODO This could be better
if(buckets_ < 1)
{
ec = error::short_key_file;
return;
}
dataWriteSize_ = 32 * nudb::block_size(dat_path);
logWriteSize_ = 32 * nudb::block_size(log_path);
s_.emplace(std::move(*s));
open_ = true;
t_ = std::thread(&basic_store::run, this);
}
template<class Hasher, class File>
void
basic_store<Hasher, File>::
close(error_code& ec)
{
if(open_)
{
open_ = false;
cv_.notify_all();
t_.join();
if(ecb_)
{
ec = ec_;
return;
}
s_->lf.close();
state s{std::move(*s_)};
File::erase(s.lp, ec_);
if(ec_)
ec = ec_;
}
}
template<class Hasher, class File>
template<class Callback>
void
basic_store<Hasher, File>::
fetch(
void const* key,
Callback && callback,
error_code& ec)
{
using namespace detail;
BOOST_ASSERT(is_open());
if(ecb_)
{
ec = ec_;
return;
}
auto const h =
hash(key, s_->kh.key_size, s_->hasher);
shared_lock_type m{m_};
{
auto iter = s_->p1.find(key);
if(iter == s_->p1.end())
{
iter = s_->p0.find(key);
if(iter == s_->p0.end())
goto cont;
}
callback(iter->first.data, iter->first.size);
return;
}
cont:
auto const n = bucket_index(h, buckets_, modulus_);
auto const iter = s_->c1.find(n);
if(iter != s_->c1.end())
return fetch(h, key, iter->second, callback, ec);
genlock<gentex> g{g_};
m.unlock();
buffer buf{s_->kh.block_size};
// b constructs from uninitialized buf
bucket b{s_->kh.block_size, buf.get()};
b.read(s_->kf, (n + 1) * b.block_size(), ec);
if(ec)
return;
fetch(h, key, b, callback, ec);
}
template<class Hasher, class File>
void
basic_store<Hasher, File>::
insert(
void const* key,
void const* data,
nsize_t size,
error_code& ec)
{
using namespace detail;
using namespace std::chrono;
BOOST_ASSERT(is_open());
if(ecb_)
{
ec = ec_;
return;
}
// Data Record
BOOST_ASSERT(size > 0); // zero disallowed
BOOST_ASSERT(size <= field<uint32_t>::max); // too large
auto const h =
hash(key, s_->kh.key_size, s_->hasher);
std::lock_guard<std::mutex> u{u_};
{
shared_lock_type m{m_};
if(s_->p1.find(key) != s_->p1.end() ||
s_->p0.find(key) != s_->p0.end())
{
ec = error::key_exists;
return;
}
auto const n = bucket_index(h, buckets_, modulus_);
auto const iter = s_->c1.find(n);
if(iter != s_->c1.end())
{
auto const found = exists(
h, key, &m, iter->second, ec);
if(ec)
return;
if(found)
{
ec = error::key_exists;
return;
}
// m is now unlocked
}
else
{
// VFALCO Audit for concurrency
genlock<gentex> g{g_};
m.unlock();
buffer buf;
buf.reserve(s_->kh.block_size);
bucket b{s_->kh.block_size, buf.get()};
b.read(s_->kf,
static_cast<noff_t>(n + 1) * s_->kh.block_size, ec);
if(ec)
return;
auto const found = exists(h, key, nullptr, b, ec);
if(ec)
return;
if(found)
{
ec = error::key_exists;
return;
}
}
}
// Perform insert
unique_lock_type m{m_};
s_->p1.insert(h, key, data, size);
auto const now = clock_type::now();
auto const elapsed = duration_cast<duration<float>>(
now > s_->when ? now - s_->when : clock_type::duration{1});
auto const work = s_->p1.data_size() +
3 * s_->p1.size() * s_->kh.block_size;
auto const rate = static_cast<std::size_t>(
std::ceil(work / elapsed.count()));
auto const sleep =
s_->rate && rate > s_->rate;
m.unlock();
if(sleep)
std::this_thread::sleep_for(milliseconds{25});
}
// Fetch key in loaded bucket b or its spills.
//
template<class Hasher, class File>
template<class Callback>
void
basic_store<Hasher, File>::
fetch(
detail::nhash_t h,
void const* key,
detail::bucket b,
Callback&& callback,
error_code& ec)
{
using namespace detail;
buffer buf0;
buffer buf1;
for(;;)
{
for(auto i = b.lower_bound(h); i < b.size(); ++i)
{
auto const item = b[i];
if(item.hash != h)
break;
// Data Record
auto const len =
s_->kh.key_size + // Key
item.size; // Value
buf0.reserve(len);
s_->df.read(item.offset +
field<uint48_t>::size, // Size
buf0.get(), len, ec);
if(ec)
return;
if(std::memcmp(buf0.get(), key,
s_->kh.key_size) == 0)
{
callback(
buf0.get() + s_->kh.key_size, item.size);
return;
}
}
auto const spill = b.spill();
if(! spill)
break;
buf1.reserve(s_->kh.block_size);
b = bucket(s_->kh.block_size,
buf1.get());
b.read(s_->df, spill, ec);
if(ec)
return;
}
ec = error::key_not_found;
}
// Returns `true` if the key exists
// lock is unlocked after the first bucket processed
//
template<class Hasher, class File>
bool
basic_store<Hasher, File>::
exists(
detail::nhash_t h,
void const* key,
detail::shared_lock_type* lock,
detail::bucket b,
error_code& ec)
{
using namespace detail;
buffer buf{s_->kh.key_size + s_->kh.block_size};
void* pk = buf.get();
void* pb = buf.get() + s_->kh.key_size;
for(;;)
{
for(auto i = b.lower_bound(h); i < b.size(); ++i)
{
auto const item = b[i];
if(item.hash != h)
break;
// Data Record
s_->df.read(item.offset +
field<uint48_t>::size, // Size
pk, s_->kh.key_size, ec); // Key
if(ec)
return false;
if(std::memcmp(pk, key, s_->kh.key_size) == 0)
return true;
}
auto spill = b.spill();
if(lock && lock->owns_lock())
lock->unlock();
if(! spill)
break;
b = bucket(s_->kh.block_size, pb);
b.read(s_->df, spill, ec);
if(ec)
return false;
}
return false;
}
// Split the bucket in b1 to b2
// b1 must be loaded
// tmp is used as a temporary buffer
// splits are written but not the new buckets
//
template<class Hasher, class File>
void
basic_store<Hasher, File>::
split(
detail::bucket& b1,
detail::bucket& b2,
detail::bucket& tmp,
nbuck_t n1,
nbuck_t n2,
nbuck_t buckets,
nbuck_t modulus,
detail::bulk_writer<File>& w,
error_code& ec)
{
using namespace detail;
// Trivial case: split empty bucket
if(b1.empty())
return;
// Split
for(std::size_t i = 0; i < b1.size();)
{
auto const e = b1[i];
auto const n = bucket_index(e.hash, buckets, modulus);
(void)n1;
(void)n2;
BOOST_ASSERT(n==n1 || n==n2);
if(n == n2)
{
b2.insert(e.offset, e.size, e.hash);
b1.erase(i);
}
else
{
++i;
}
}
noff_t spill = b1.spill();
if(spill)
{
b1.spill(0);
do
{
// If any part of the spill record is
// in the write buffer then flush first
if(spill + bucket_size(s_->kh.capacity) >
w.offset() - w.size())
{
w.flush(ec);
if(ec)
return;
}
tmp.read(s_->df, spill, ec);
if(ec)
return;
for(std::size_t i = 0; i < tmp.size(); ++i)
{
auto const e = tmp[i];
auto const n = bucket_index(
e.hash, buckets, modulus);
BOOST_ASSERT(n==n1 || n==n2);
if(n == n2)
{
maybe_spill(b2, w, ec);
if(ec)
return;
b2.insert(e.offset, e.size, e.hash);
}
else
{
maybe_spill(b1, w, ec);
if(ec)
return;
b1.insert(e.offset, e.size, e.hash);
}
}
spill = tmp.spill();
}
while(spill);
}
}
template<class Hasher, class File>
detail::bucket
basic_store<Hasher, File>::
load(
nbuck_t n,
detail::cache& c1,
detail::cache& c0,
void* buf,
error_code& ec)
{
using namespace detail;
auto iter = c1.find(n);
if(iter != c1.end())
return iter->second;
iter = c0.find(n);
if(iter != c0.end())
return c1.insert(n, iter->second)->second;
bucket tmp{s_->kh.block_size, buf};
tmp.read(s_->kf,
static_cast<noff_t>(n + 1) * s_->kh.block_size, ec);
if(ec)
return {};
c0.insert(n, tmp);
return c1.insert(n, tmp)->second;
}
template<class Hasher, class File>
void
basic_store<Hasher, File>::
commit(detail::unique_lock_type& m,
std::size_t& work, error_code& ec)
{
using namespace detail;
BOOST_ASSERT(m.owns_lock());
BOOST_ASSERT(! s_->p1.empty());
swap(s_->p0, s_->p1);
m.unlock();
work = s_->p0.data_size();
cache c0(s_->kh.key_size, s_->kh.block_size, "c0");
cache c1(s_->kh.key_size, s_->kh.block_size, "c1");
// 0.63212 ~= 1 - 1/e
{
auto const size = static_cast<std::size_t>(
std::ceil(0.63212 * s_->p0.size()));
c0.reserve(size);
c1.reserve(size);
}
buffer buf1{s_->kh.block_size};
buffer buf2{s_->kh.block_size};
bucket tmp{s_->kh.block_size, buf1.get()};
// Prepare rollback information
log_file_header lh;
lh.version = currentVersion; // Version
lh.uid = s_->kh.uid; // UID
lh.appnum = s_->kh.appnum; // Appnum
lh.key_size = s_->kh.key_size; // Key Size
lh.salt = s_->kh.salt; // Salt
lh.pepper = pepper<Hasher>(lh.salt); // Pepper
lh.block_size = s_->kh.block_size; // Block Size
lh.key_file_size = s_->kf.size(ec); // Key File Size
if(ec)
return;
lh.dat_file_size = s_->df.size(ec); // Data File Size
if(ec)
return;
write(s_->lf, lh, ec);
if(ec)
return;
// Checkpoint
s_->lf.sync(ec);
if(ec)
return;
// Append data and spills to data file
auto modulus = modulus_;
auto buckets = buckets_;
{
// Bulk write to avoid write amplification
auto const size = s_->df.size(ec);
if(ec)
return;
bulk_writer<File> w{s_->df, size, dataWriteSize_};
// Write inserted data to the data file
for(auto& e : s_->p0)
{
// VFALCO This could be UB since other
// threads are reading other data members
// of this object in memory
e.second = w.offset();
auto os = w.prepare(value_size(
e.first.size, s_->kh.key_size), ec);
if(ec)
return;
// Data Record
write<uint48_t>(os, e.first.size); // Size
write(os, e.first.key, s_->kh.key_size); // Key
write(os, e.first.data, e.first.size); // Data
}
// Do inserts, splits, and build view
// of original and modified buckets
for(auto const e : s_->p0)
{
// VFALCO Should this be >= or > ?
if((frac_ += 65536) >= thresh_)
{
// split
frac_ -= thresh_;
if(buckets == modulus)
modulus *= 2;
auto const n1 = buckets - (modulus / 2);
auto const n2 = buckets++;
auto b1 = load(n1, c1, c0, buf2.get(), ec);
if(ec)
return;
auto b2 = c1.create(n2);
// If split spills, the writer is
// flushed which can amplify writes.
split(b1, b2, tmp, n1, n2,
buckets, modulus, w, ec);
if(ec)
return;
}
// Insert
auto const n = bucket_index(
e.first.hash, buckets, modulus);
auto b = load(n, c1, c0, buf2.get(), ec);
if(ec)
return;
// This can amplify writes if it spills.
maybe_spill(b, w, ec);
if(ec)
return;
b.insert(e.second, e.first.size, e.first.hash);
}
w.flush(ec);
if(ec)
return;
}
work += s_->kh.block_size * (2 * c0.size() + c1.size());
// Give readers a view of the new buckets.
// This might be slightly better than the old
// view since there could be fewer spills.
m.lock();
swap(c1, s_->c1);
s_->p0.clear();
buckets_ = buckets;
modulus_ = modulus;
g_.start();
m.unlock();
// Write clean buckets to log file
{
auto const size = s_->lf.size(ec);
if(ec)
return;
bulk_writer<File> w{s_->lf, size, logWriteSize_};
for(auto const e : c0)
{
// Log Record
auto os = w.prepare(
field<std::uint64_t>::size + // Index
e.second.actual_size(), ec); // Bucket
if(ec)
return;
// Log Record
write<std::uint64_t>(os, e.first); // Index
e.second.write(os); // Bucket
}
c0.clear();
w.flush(ec);
if(ec)
return;
s_->lf.sync(ec);
if(ec)
return;
}
g_.finish();
// Write new buckets to key file
for(auto const e : s_->c1)
{
e.second.write(s_->kf,
(e.first + 1) * s_->kh.block_size, ec);
if(ec)
return;
}
// Finalize the commit
s_->df.sync(ec);
if(ec)
return;
s_->kf.sync(ec);
if(ec)
return;
s_->lf.trunc(0, ec);
if(ec)
return;
s_->lf.sync(ec);
if(ec)
return;
// Cache is no longer needed, all fetches will go straight
// to disk again. Do this after the sync, otherwise readers
// might get blocked longer due to the extra I/O.
m.lock();
s_->c1.clear();
}
template<class Hasher, class File>
void
basic_store<Hasher, File>::
run()
{
using namespace std::chrono;
using namespace detail;
#if NUDB_DEBUG_LOG
beast::unit_test::dstream dout{std::cout};
#endif
for(;;)
{
unique_lock_type m{m_};
if(! s_->p1.empty())
{
std::size_t work;
commit(m, work, ec_);
if(ec_)
{
ecb_.store(true);
return;
}
BOOST_ASSERT(m.owns_lock());
auto const now = clock_type::now();
auto const elapsed = duration_cast<duration<float>>(
now > s_->when ? now - s_->when : clock_type::duration{1});
s_->rate = static_cast<std::size_t>(
std::ceil(work / elapsed.count()));
#if NUDB_DEBUG_LOG
dout <<
"work=" << work <<
", time=" << elapsed.count() <<
", rate=" << s_->rate <<
"\n";
#endif
}
s_->p1.periodic_activity();
cv_.wait_until(m, s_->when + seconds{1},
[this]{ return ! open_; });
if(! open_)
break;
s_->when = clock_type::now();
}
{
unique_lock_type m{m_};
std::size_t work;
if(! s_->p1.empty())
commit(m, work, ec_);
}
if(ec_)
{
ecb_.store(true);
return;
}
}
} // nudb
#endif

View File

@@ -0,0 +1,163 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_CREATE_IPP
#define NUDB_IMPL_CREATE_IPP
#include <nudb/concepts.hpp>
#include <nudb/native_file.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstring>
#include <random>
#include <stdexcept>
#include <utility>
namespace nudb {
namespace detail {
template<class = void>
std::uint64_t
make_uid()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
} // detail
template<class>
std::uint64_t
make_salt()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
template<
class Hasher,
class File,
class... Args
>
void
create(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::uint64_t appnum,
std::uint64_t salt,
nsize_t key_size,
nsize_t blockSize,
float load_factor,
error_code& ec,
Args&&... args)
{
static_assert(is_File<File>::value,
"File requirements not met");
using namespace detail;
if(key_size < 1)
{
ec = error::invalid_key_size;
return;
}
if(blockSize > field<std::uint16_t>::max)
{
ec = error::invalid_block_size;
return;
}
if(load_factor <= 0.f || load_factor >= 1.f)
{
ec = error::invalid_load_factor;
return;
}
auto const capacity =
bucket_capacity(blockSize);
if(capacity < 1)
{
ec = error::invalid_block_size;
return;
}
bool edf = false;
bool ekf = false;
bool elf = false;
{
File df(args...);
File kf(args...);
File lf(args...);
df.create(file_mode::append, dat_path, ec);
if(ec)
goto fail;
edf = true;
kf.create(file_mode::append, key_path, ec);
if(ec)
goto fail;
ekf = true;
lf.create(file_mode::append, log_path, ec);
if(ec)
goto fail;
elf = true;
dat_file_header dh;
dh.version = currentVersion;
dh.uid = make_uid();
dh.appnum = appnum;
dh.key_size = key_size;
key_file_header kh;
kh.version = currentVersion;
kh.uid = dh.uid;
kh.appnum = appnum;
kh.key_size = key_size;
kh.salt = salt;
kh.pepper = pepper<Hasher>(salt);
kh.block_size = blockSize;
kh.load_factor = std::min<std::size_t>(
static_cast<std::size_t>(
65536.0 * load_factor), 65535);
write(df, dh, ec);
if(ec)
goto fail;
write(kf, kh, ec);
if(ec)
goto fail;
buffer buf{blockSize};
std::memset(buf.get(), 0, blockSize);
bucket b(blockSize, buf.get(), empty);
b.write(kf, blockSize, ec);
if(ec)
goto fail;
// VFALCO Leave log file empty?
df.sync(ec);
if(ec)
goto fail;
kf.sync(ec);
if(ec)
goto fail;
lf.sync(ec);
if(ec)
goto fail;
// Success
return;
}
fail:
if(edf)
erase_file(dat_path);
if(ekf)
erase_file(key_path);
if(elf)
erase_file(log_path);
}
} // nudb
#endif

View File

@@ -0,0 +1,180 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_ERROR_IPP
#define NUDB_IMPL_ERROR_IPP
namespace nudb {
inline
error_category const&
nudb_category()
{
struct cat_t : public error_category
{
char const*
name() const noexcept override
{
return "nudb";
}
std::string
message(int ev) const override
{
switch(static_cast<error>(ev))
{
case error::success:
return "the operation completed successfully";
case error::key_not_found:
return "key not found";
case error::key_exists:
return "key already exists";
case error::short_read:
return "short read";
case error::log_file_exists:
return "a log file exists";
case error::no_key_file:
return "no key file";
case error::too_many_buckets:
return "too many buckets";
case error::not_data_file:
return "not a data file";
case error::not_key_file:
return "not a key file";
case error::not_log_file:
return "not a log file";
case error::different_version:
return "different version";
case error::invalid_key_size:
return "invalid key size";
case error::invalid_block_size:
return "invalid block size";
case error::short_key_file:
return "short key file";
case error::short_bucket:
return "short bucket";
case error::short_spill:
return "short spill";
case error::short_data_record:
return "short data record";
case error::short_value:
return "short value";
case error::hash_mismatch:
return "hash mismatch";
case error::invalid_load_factor:
return "invalid load factor";
case error::invalid_capacity:
return "invalid capacity";
case error::invalid_bucket_count:
return "invalid bucket count";
case error::invalid_bucket_size:
return "invalid_bucket_size";
case error::incomplete_data_file_header:
return "incomplete data file header";
case error::incomplete_key_file_header:
return "incomplete key file header";
case error::invalid_log_record:
return "invalid log record";
case error::invalid_log_spill:
return "invalid spill in log";
case error::invalid_log_offset:
return "invalid offset in log";
case error::invalid_log_index:
return "invalid index in log";
case error::invalid_spill_size:
return "invalid size in spill";
case error::uid_mismatch:
return "uid mismatch";
case error::appnum_mismatch:
return "appnum mismatch";
case error::key_size_mismatch:
return "key size mismatch";
case error::salt_mismatch:
return "salt mismatch";
case error::pepper_mismatch:
return "pepper mismatch";
case error::block_size_mismatch:
return "block size mismatch";
case error::orphaned_value:
return "orphaned value";
case error::missing_value:
return "missing value";
case error::size_mismatch:
return "size mismatch";
case error::duplicate_value:
return "duplicate value";
default:
return "nudb error";
}
}
error_condition
default_error_condition(int ev) const noexcept override
{
return error_condition{ev, *this};
}
bool
equivalent(int ev,
error_condition const& ec) const noexcept override
{
return ec.value() == ev && &ec.category() == this;
}
bool
equivalent(error_code const& ec, int ev) const noexcept override
{
return ec.value() == ev && &ec.category() == this;
}
};
static cat_t const cat{};
return cat;
}
} // nudb
#endif

View File

@@ -0,0 +1,259 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_POSIX_FILE_IPP
#define NUDB_IMPL_POSIX_FILE_IPP
#include <boost/assert.hpp>
#include <limits.h>
namespace nudb {
inline
posix_file::
~posix_file()
{
close();
}
inline
posix_file::
posix_file(posix_file &&other)
: fd_(other.fd_)
{
other.fd_ = -1;
}
inline
posix_file&
posix_file::
operator=(posix_file&& other)
{
if(&other == this)
return *this;
close();
fd_ = other.fd_;
other.fd_ = -1;
return *this;
}
inline
void
posix_file::
close()
{
if(fd_ != -1)
{
::close(fd_);
fd_ = -1;
}
}
inline
void
posix_file::
create(file_mode mode, path_type const& path, error_code& ec)
{
auto const result = flags(mode);
BOOST_ASSERT(! is_open());
fd_ = ::open(path.c_str(), result.first);
if(fd_ != -1)
{
::close(fd_);
fd_ = -1;
ec = error_code{errc::file_exists, generic_category()};
return ;
}
int errnum = errno;
if(errnum != ENOENT)
return err(errnum, ec);
fd_ = ::open(path.c_str(), result.first | O_CREAT, 0644);
if(fd_ == -1)
return last_err(ec);
#ifndef __APPLE__
if(::posix_fadvise(fd_, 0, 0, result.second) != 0)
return last_err(ec);
#endif
}
inline
void
posix_file::
open(file_mode mode, path_type const& path, error_code& ec)
{
BOOST_ASSERT(! is_open());
auto const result = flags(mode);
fd_ = ::open(path.c_str(), result.first);
if(fd_ == -1)
return last_err(ec);
#ifndef __APPLE__
if(::posix_fadvise(fd_, 0, 0, result.second) != 0)
return last_err(ec);
#endif
}
inline
void
posix_file::
erase(path_type const& path, error_code& ec)
{
if(::unlink(path.c_str()) != 0)
{
int const ev = errno;
return err(ev, ec);
}
}
inline
std::uint64_t
posix_file::
size(error_code& ec) const
{
static_assert(sizeof(stat::st_size) == sizeof(std::uint64_t), "");
struct stat st;
if(::fstat(fd_, &st) != 0)
{
last_err(ec);
return 0;
}
return st.st_size;
}
inline
void
posix_file::
read(std::uint64_t offset,
void* buffer, std::size_t bytes, error_code& ec)
{
static_assert(sizeof(off_t) >= sizeof(offset), "");
while(bytes > 0)
{
auto const amount = static_cast<ssize_t>(
std::min(bytes, static_cast<std::size_t>(SSIZE_MAX)));
auto const n = ::pread(fd_, buffer, amount, offset);
if(n == -1)
{
auto const ev = errno;
if(ev == EINTR)
continue;
return err(ev, ec);
}
if(n == 0)
{
ec = error::short_read;
return;
}
offset += n;
bytes -= n;
buffer = reinterpret_cast<char*>(buffer) + n;
}
}
inline
void
posix_file::
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec)
{
static_assert(sizeof(off_t) >= sizeof(offset), "");
while(bytes > 0)
{
auto const amount = static_cast<ssize_t>(
std::min(bytes, static_cast<std::size_t>(SSIZE_MAX)));
auto const n = ::pwrite(fd_, buffer, amount, offset);
if(n == -1)
{
auto const ev = errno;
if(ev == EINTR)
continue;
return err(ev, ec);
}
offset += n;
bytes -= n;
buffer = reinterpret_cast<char const*>(buffer) + n;
}
}
inline
void
posix_file::
sync(error_code& ec)
{
for(;;)
{
if(::fsync(fd_) == 0)
break;
auto const ev = errno;
if(ev == EINTR)
continue;
return err(ev, ec);
}
}
inline
void
posix_file::
trunc(std::uint64_t length, error_code& ec)
{
for(;;)
{
if(::ftruncate(fd_, length) == 0)
break;
auto const ev = errno;
if(ev == EINTR)
continue;
return err(ev, ec);
}
}
inline
std::pair<int, int>
posix_file::
flags(file_mode mode)
{
std::pair<int, int> result;
switch(mode)
{
case file_mode::scan:
result.first =
O_RDONLY;
#ifndef __APPLE__
result.second =
POSIX_FADV_SEQUENTIAL;
#endif
break;
case file_mode::read:
result.first =
O_RDONLY;
#ifndef __APPLE__
result.second =
POSIX_FADV_RANDOM;
#endif
break;
case file_mode::append:
result.first =
O_RDWR |
O_APPEND;
#ifndef __APPLE__
result.second =
POSIX_FADV_RANDOM;
#endif
break;
case file_mode::write:
result.first =
O_RDWR;
#ifndef __APPLE__
result.second =
POSIX_FADV_NORMAL;
#endif
break;
}
return result;
}
} // nudb
#endif

View File

@@ -0,0 +1,209 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_RECOVER_IPP
#define NUDB_IMPL_RECOVER_IPP
#include <nudb/concepts.hpp>
#include <nudb/file.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <boost/assert.hpp>
#include <algorithm>
#include <cstddef>
#include <string>
namespace nudb {
template<
class Hasher,
class File,
class... Args>
void
recover(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
error_code& ec,
Args&&... args)
{
static_assert(is_File<File>::value,
"File requirements not met");
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
using namespace detail;
// Open data file
File df{args...};
df.open(file_mode::write, dat_path, ec);
if(ec)
return;
auto const dataFileSize = df.size(ec);
if(ec)
return;
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
// Open key file
File kf{args...};
kf.open(file_mode::write, key_path, ec);
if(ec)
return;
auto const keyFileSize = kf.size(ec);
if(ec)
return;
if(keyFileSize <= key_file_header::size)
{
kf.close();
erase_file(log_path, ec);
if(ec)
return;
File::erase(key_path, ec);
if(ec)
return;
ec = error::no_key_file;
return;
}
// Open log file
File lf{args...};
lf.open(file_mode::append, log_path, ec);
if(ec == errc::no_such_file_or_directory)
{
ec = {};
return;
}
if(ec)
return;
auto const logFileSize = lf.size(ec);
if(ec)
return;
// Read log file header
log_file_header lh;
read(lf, lh, ec);
if(ec == error::short_read)
{
BOOST_ASSERT(keyFileSize > key_file_header::size);
ec = {};
goto clear_log;
}
if(ec)
return;
verify<Hasher>(lh, ec);
if(ec)
return;
if(lh.key_file_size == 0)
goto trunc_files;
{
// Read key file header
key_file_header kh;
read(kf, kh, ec);
if(ec)
return;
verify<Hasher>(kh, ec);
if(ec)
return;
verify<Hasher>(dh, kh, ec);
if(ec)
return;
verify<Hasher>(kh, lh, ec);
if(ec)
return;
auto const readSize = 1024 * kh.block_size;
auto const bucketSize = bucket_size(kh.capacity);
buffer buf{kh.block_size};
bucket b{kh.block_size, buf.get()};
bulk_reader<File> r{lf,
log_file_header::size, logFileSize, readSize};
while(! r.eof())
{
// Log Record
auto is = r.prepare(field<std::uint64_t>::size, ec);
// Log file is incomplete, so roll back.
if(ec == error::short_read)
{
ec = {};
break;
}
if(ec)
return;
nsize_t n;
{
std::uint64_t v;
// VFALCO This should have been a uint32_t
read<std::uint64_t>(is, v); // Index
BOOST_ASSERT(v <= std::numeric_limits<std::uint32_t>::max());
n = static_cast<nsize_t>(v);
}
b.read(r, ec); // Bucket
if(ec == error::short_read)
{
ec = {};
break;
}
if(b.spill() && b.spill() + bucketSize > dataFileSize)
{
ec = error::invalid_log_spill;
return;
}
if(n > kh.buckets)
{
ec = error::invalid_log_index;
return;
}
b.write(kf, static_cast<noff_t>(n + 1) * kh.block_size, ec);
if(ec)
return;
}
}
trunc_files:
df.trunc(lh.dat_file_size, ec);
if(ec)
return;
df.sync(ec);
if(ec)
return;
if(lh.key_file_size != 0)
{
kf.trunc(lh.key_file_size, ec);
if(ec)
return;
kf.sync(ec);
if(ec)
return;
}
else
{
kf.close();
File::erase(key_path, ec);
if(ec)
return;
}
clear_log:
lf.trunc(0, ec);
if(ec)
return;
lf.sync(ec);
if(ec)
return;
lf.close();
File::erase(log_path, ec);
if(ec)
return;
}
} // nudb
#endif

View File

@@ -0,0 +1,248 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_REKEY_IPP
#define NUDB_IMPL_REKEY_IPP
#include <nudb/concepts.hpp>
#include <nudb/create.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <cmath>
namespace nudb {
// VFALCO Should this delete the key file on an error?
template<
class Hasher,
class File,
class Progress,
class... Args
>
void
rekey(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::size_t blockSize,
float loadFactor,
std::uint64_t itemCount,
std::size_t bufferSize,
error_code& ec,
Progress&& progress,
Args&&... args)
{
static_assert(is_File<File>::value,
"File requirements not met");
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
static_assert(is_Progress<Progress>::value,
"Progress requirements not met");
using namespace detail;
auto const readSize = 1024 * block_size(dat_path);
auto const writeSize = 16 * block_size(key_path);
// Open data file for reading and appending
File df{args...};
df.open(file_mode::append, dat_path, ec);
if(ec)
return;
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
auto const dataFileSize = df.size(ec);
if(ec)
return;
// Make sure log file doesn't exist
File lf{args...};
lf.open(file_mode::read, log_path, ec);
if(! ec)
ec = error::log_file_exists;
if(ec != errc::no_such_file_or_directory)
return;
ec = {};
// Set up key file header
key_file_header kh;
kh.version = currentVersion;
kh.uid = dh.uid;
kh.appnum = dh.appnum;
kh.key_size = dh.key_size;
kh.salt = make_salt();
kh.pepper = pepper<Hasher>(kh.salt);
kh.block_size = blockSize;
kh.load_factor = std::min<std::size_t>(
static_cast<std::size_t>(65536.0 * loadFactor), 65535);
kh.buckets = static_cast<std::size_t>(
std::ceil(itemCount /(
bucket_capacity(kh.block_size) * loadFactor)));
kh.modulus = ceil_pow2(kh.buckets);
// Create key file
File kf{args...};
kf.create(file_mode::write, key_path, ec);
if(ec)
return;
// Write key file header
// Note, file size is less than any valid block_size here
{
std::array<std::uint8_t, key_file_header::size> buf;
ostream os{buf.data(), buf.size()};
write(os, kh);
kf.write(0, buf.data(), buf.size(), ec);
if(ec)
return;
kf.sync(ec);
if(ec)
return;
}
// Create log file
lf.create(file_mode::append, log_path, ec);
if(ec)
return;
// Write log file header
{
log_file_header lh;
lh.version = currentVersion; // Version
lh.uid = kh.uid; // UID
lh.appnum = kh.appnum; // Appnum
lh.key_size = kh.key_size; // Key Size
lh.salt = kh.salt; // Salt
lh.pepper = pepper<Hasher>(kh.salt); // Pepper
lh.block_size = kh.block_size; // Block Size
lh.key_file_size = 0; // Key File Size
lh.dat_file_size = dataFileSize; // Data File Size
write(lf, lh, ec);
if(ec)
return;
lf.sync(ec);
if(ec)
return;
}
// Create full key file
buffer buf{kh.block_size};
{
// Write key file header
std::memset(buf.get(), 0, kh.block_size);
ostream os{buf.get(), kh.block_size};
write(os, kh);
kf.write(0, buf.get(), buf.size(), ec);
if(ec)
return;
kf.sync(ec);
if(ec)
return;
// Pre-allocate space for the entire key file
std::uint8_t zero = 0;
kf.write(
static_cast<noff_t>(kh.buckets + 1) * kh.block_size - 1,
&zero, 1, ec);
if(ec)
return;
kf.sync(ec);
if(ec)
return;
}
// Build contiguous sequential sections of the
// key file using multiple passes over the data.
//
auto const chunkSize = std::max<std::size_t>(1,
bufferSize / kh.block_size);
// Calculate work required
auto const passes =
(kh.buckets + chunkSize - 1) / chunkSize;
auto const nwork = passes * dataFileSize;
progress(0, nwork);
buf.reserve(chunkSize * kh.block_size);
bulk_writer<File> dw{df, dataFileSize, writeSize};
for(nbuck_t b0 = 0; b0 < kh.buckets; b0 += chunkSize)
{
auto const b1 = std::min<std::size_t>(b0 + chunkSize, kh.buckets);
// Buffered range is [b0, b1)
auto const bn = b1 - b0;
// Create empty buckets
for(std::size_t i = 0; i < bn; ++i)
bucket b{kh.block_size,
buf.get() + i * kh.block_size, empty};
// Insert all keys into buckets
// Iterate Data File
bulk_reader<File> r{df,
dat_file_header::size, dataFileSize, readSize};
while(! r.eof())
{
auto const offset = r.offset();
// Data Record or Spill Record
nsize_t size;
auto is = r.prepare(
field<uint48_t>::size, ec); // Size
if(ec)
return;
progress((b0 / chunkSize) * dataFileSize + r.offset(), nwork);
read_size48(is, size);
if(size > 0)
{
// Data Record
is = r.prepare(
dh.key_size + // Key
size, ec); // Data
if(ec)
return;
std::uint8_t const* const key =
is.data(dh.key_size);
auto const h = hash<Hasher>(
key, dh.key_size, kh.salt);
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
if(n < b0 || n >= b1)
continue;
bucket b{kh.block_size, buf.get() +
(n - b0) * kh.block_size};
maybe_spill(b, dw, ec);
if(ec)
return;
b.insert(offset, size, h);
}
else
{
// VFALCO Should never get here
// Spill Record
is = r.prepare(
field<std::uint16_t>::size, ec);
if(ec)
return;
read<std::uint16_t>(is, size); // Size
r.prepare(size, ec); // skip
if(ec)
return;
}
}
kf.write((b0 + 1) * kh.block_size, buf.get(),
static_cast<std::size_t>(bn * kh.block_size), ec);
if(ec)
return;
}
dw.flush(ec);
if(ec)
return;
lf.close();
File::erase(log_path, ec);
if(ec)
return;
}
} // nudb
#endif

View File

@@ -0,0 +1,630 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_VERIFY_IPP
#define NUDB_IMPL_VERIFY_IPP
#include <nudb/concepts.hpp>
#include <nudb/native_file.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstddef>
#include <limits>
#include <string>
namespace nudb {
namespace detail {
// Normal verify that does not require a buffer
//
template<
class Hasher,
class File,
class Progress>
void
verify_normal(
verify_info& info,
File& df,
File& kf,
dat_file_header& dh,
key_file_header& kh,
Progress&& progress,
error_code& ec)
{
static_assert(is_File<File>::value,
"File requirements not met");
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
static_assert(is_Progress<Progress>::value,
"Progress requirements not met");
info.algorithm = 0;
auto const readSize = 1024 * kh.block_size;
// This ratio balances the 2 work phases.
// The number is determined empirically.
auto const adjust = 1.75;
// Calculate the work required
auto const keys = static_cast<std::uint64_t>(
double(kh.load_factor) / 65536.0 * kh.buckets * kh.capacity);
std::uint64_t const nwork = static_cast<std::uint64_t>(
info.dat_file_size + keys * kh.block_size +
adjust * (info.key_file_size + keys * kh.block_size));
std::uint64_t work = 0;
progress(0, nwork);
// Iterate Data File
// Data Record
auto const dh_len =
field<uint48_t>::size + // Size
kh.key_size; // Key
std::uint64_t fetches = 0;
buffer buf{kh.block_size + dh_len};
bucket b{kh.block_size, buf.get()};
std::uint8_t* pd = buf.get() + kh.block_size;
{
bulk_reader<File> r{df, dat_file_header::size,
info.dat_file_size, readSize};
while(! r.eof())
{
auto const offset = r.offset();
// Data Record or Spill Record
auto is = r.prepare(
field<uint48_t>::size, ec); // Size
if(ec)
return;
nsize_t size;
read_size48(is, size);
if(size > 0)
{
// Data Record
is = r.prepare(
kh.key_size + // Key
size, ec); // Data
if(ec)
return;
std::uint8_t const* const key =
is.data(kh.key_size);
std::uint8_t const* const data =
is.data(size);
(void)data;
auto const h = hash<Hasher>(
key, kh.key_size, kh.salt);
// Check bucket and spills
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
b.read(kf,
static_cast<noff_t>(n + 1) * kh.block_size, ec);
if(ec)
return;
work += kh.block_size;
++fetches;
for(;;)
{
for(auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if(item.hash != h)
break;
if(item.offset == offset)
goto found;
++fetches;
}
auto const spill = b.spill();
if(! spill)
{
ec = error::orphaned_value;
return;
}
b.read(df, spill, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
++fetches;
}
found:
// Update
++info.value_count;
info.value_bytes += size;
}
else
{
// Spill Record
is = r.prepare(
field<std::uint16_t>::size, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
read<std::uint16_t>(is, size); // Size
if(size != info.bucket_size)
{
ec = error::invalid_spill_size;
return;
}
if(ec)
return;
b.read(r, ec); // Bucket
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
++info.spill_count_tot;
info.spill_bytes_tot +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.actual_size(); // Bucket
}
progress(work + offset, nwork);
}
work += info.dat_file_size;
}
// Iterate Key File
{
for(std::size_t n = 0; n < kh.buckets; ++n)
{
std::size_t nspill = 0;
b.read(kf, static_cast<noff_t>(
n + 1) * kh.block_size, ec);
if(ec)
return;
work += static_cast<std::uint64_t>(
adjust * kh.block_size);
bool spill = false;
for(;;)
{
info.key_count += b.size();
for(nkey_t i = 0; i < b.size(); ++i)
{
auto const e = b[i];
df.read(e.offset, pd, dh_len, ec);
if(ec == error::short_read)
{
ec = error::missing_value;
return;
}
if(ec)
return;
if(! spill)
work += static_cast<std::uint64_t>(
adjust * kh.block_size);
// Data Record
istream is{pd, dh_len};
std::uint64_t size;
// VFALCO This should really be a 32-bit field
read<uint48_t>(is, size); // Size
void const* key =
is.data(kh.key_size); // Key
if(size != e.size)
{
ec = error::size_mismatch;
return;
}
auto const h = hash<Hasher>(key,
kh.key_size, kh.salt);
if(h != e.hash)
{
ec = error::hash_mismatch;
return;
}
}
if(! b.spill())
break;
b.read(df, b.spill(), ec);
if(ec)
return;
spill = true;
++nspill;
++info.spill_count;
info.spill_bytes +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.actual_size(); // SpillBucket
}
if(nspill >= info.hist.size())
nspill = info.hist.size() - 1;
++info.hist[nspill];
progress(work, nwork);
}
}
float sum = 0;
for(size_t i = 0; i < info.hist.size(); ++i)
sum += info.hist[i] * (i + 1);
if(info.value_count)
info.avg_fetch =
float(fetches) / info.value_count;
else
info.avg_fetch = 0;
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
float(info.dat_file_size);
if(info.value_count)
info.overhead =
float(info.key_file_size + info.dat_file_size) /
(
info.value_bytes +
info.key_count *
(info.key_size +
// Data Record
field<uint48_t>::size) // Size
) - 1;
else
info.overhead = 0;
info.actual_load = info.key_count / float(
info.capacity * info.buckets);
}
// Fast version of verify that uses a buffer
//
template<class Hasher, class File, class Progress>
void
verify_fast(
verify_info& info,
File& df,
File& kf,
dat_file_header& dh,
key_file_header& kh,
std::size_t bufferSize,
Progress&& progress,
error_code& ec)
{
info.algorithm = 1;
auto const readSize = 1024 * kh.block_size;
// Counts unverified keys per bucket
if(kh.buckets > std::numeric_limits<nbuck_t>::max())
{
ec = error::too_many_buckets;
return;
}
std::unique_ptr<nkey_t[]> nkeys(
new nkey_t[kh.buckets]);
// Verify contiguous sequential sections of the
// key file using multiple passes over the data.
//
if(bufferSize < 2 * kh.block_size + sizeof(nkey_t))
throw std::logic_error("invalid buffer size");
auto chunkSize = std::min(kh.buckets,
(bufferSize - kh.block_size) /
(kh.block_size + sizeof(nkey_t)));
auto const passes =
(kh.buckets + chunkSize - 1) / chunkSize;
// Calculate the work required
std::uint64_t work = 0;
std::uint64_t const nwork =
passes * info.dat_file_size + info.key_file_size;
progress(0, nwork);
std::uint64_t fetches = 0;
buffer buf{(chunkSize + 1) * kh.block_size};
bucket tmp{kh.block_size,
buf.get() + chunkSize * kh.block_size};
for(nsize_t b0 = 0; b0 < kh.buckets; b0 += chunkSize)
{
// Load key file chunk to buffer
auto const b1 = std::min(b0 + chunkSize, kh.buckets);
// Buffered range is [b0, b1)
auto const bn = b1 - b0;
kf.read(
static_cast<noff_t>(b0 + 1) * kh.block_size,
buf.get(),
static_cast<noff_t>(bn * kh.block_size),
ec);
if(ec)
return;
work += bn * kh.block_size;
progress(work, nwork);
// Count keys in buckets, including spills
for(nbuck_t i = 0 ; i < bn; ++i)
{
bucket b{kh.block_size,
buf.get() + i * kh.block_size};
nkeys[i] = b.size();
std::size_t nspill = 0;
auto spill = b.spill();
while(spill != 0)
{
tmp.read(df, spill, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
nkeys[i] += tmp.size();
spill = tmp.spill();
++nspill;
++info.spill_count;
info.spill_bytes +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
tmp.actual_size(); // SpillBucket
}
if(nspill >= info.hist.size())
nspill = info.hist.size() - 1;
++info.hist[nspill];
info.key_count += nkeys[i];
}
// Iterate Data File
bulk_reader<File> r(df, dat_file_header::size,
info.dat_file_size, readSize);
while(! r.eof())
{
auto const offset = r.offset();
// Data Record or Spill Record
auto is = r.prepare(
field<uint48_t>::size, ec); // Size
if(ec == error::short_read)
{
ec = error::short_data_record;
return;
}
if(ec)
return;
nsize_t size;
detail::read_size48(is, size);
if(size > 0)
{
// Data Record
is = r.prepare(
kh.key_size + // Key
size, ec); // Data
if(ec == error::short_read)
{
ec = error::short_value;
return;
}
if(ec)
return;
std::uint8_t const* const key =
is.data(kh.key_size);
std::uint8_t const* const data =
is.data(size);
(void)data;
auto const h = hash<Hasher>(
key, kh.key_size, kh.salt);
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
if(n < b0 || n >= b1)
continue;
// Check bucket and spills
bucket b{kh.block_size, buf.get() +
(n - b0) * kh.block_size};
++fetches;
for(;;)
{
for(auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if(item.hash != h)
break;
if(item.offset == offset)
goto found;
++fetches;
}
auto const spill = b.spill();
if(! spill)
{
ec = error::orphaned_value;
return;
}
b = tmp;
b.read(df, spill, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
++fetches;
}
found:
// Update
++info.value_count;
info.value_bytes += size;
if(nkeys[n - b0]-- == 0)
{
ec = error::orphaned_value;
return;
}
}
else
{
// Spill Record
is = r.prepare(
field<std::uint16_t>::size, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
read<std::uint16_t>(is, size); // Size
if(bucket_size(
bucket_capacity(size)) != size)
{
ec = error::invalid_spill_size;
return;
}
r.prepare(size, ec); // Bucket
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
if(b0 == 0)
{
++info.spill_count_tot;
info.spill_bytes_tot +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
tmp.actual_size(); // Bucket
}
}
progress(work + offset, nwork);
}
// Make sure every key in every bucket was visited
for(std::size_t i = 0; i < bn; ++i)
{
if(nkeys[i] != 0)
{
ec = error::missing_value;
return;
}
}
work += info.dat_file_size;
}
float sum = 0;
for(std::size_t i = 0; i < info.hist.size(); ++i)
sum += info.hist[i] * (i + 1);
if(info.value_count)
info.avg_fetch =
float(fetches) / info.value_count;
else
info.avg_fetch = 0;
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
float(info.dat_file_size);
if(info.value_count)
info.overhead =
float(info.key_file_size + info.dat_file_size) /
(
info.value_bytes +
info.key_count *
(info.key_size +
// Data Record
field<uint48_t>::size) // Size
) - 1;
else
info.overhead = 0;
info.actual_load = info.key_count / float(
info.capacity * info.buckets);
}
} // detail
template<class Hasher, class Progress>
void
verify(
verify_info& info,
path_type const& dat_path,
path_type const& key_path,
std::size_t bufferSize,
Progress&& progress,
error_code& ec)
{
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
static_assert(is_Progress<Progress>::value,
"Progress requirements not met");
info = {};
using namespace detail;
using File = native_file;
File df;
df.open(file_mode::scan, dat_path, ec);
if(ec)
return;
File kf;
kf.open (file_mode::read, key_path, ec);
if(ec)
return;
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
key_file_header kh;
read(kf, kh, ec);
if(ec)
return;
verify<Hasher>(kh, ec);
if(ec)
return;
verify<Hasher>(dh, kh, ec);
if(ec)
return;
info.dat_path = dat_path;
info.key_path = key_path;
info.version = dh.version;
info.uid = dh.uid;
info.appnum = dh.appnum;
info.key_size = dh.key_size;
info.salt = kh.salt;
info.pepper = kh.pepper;
info.block_size = kh.block_size;
info.load_factor = kh.load_factor / 65536.f;
info.capacity = kh.capacity;
info.buckets = kh.buckets;
info.bucket_size = bucket_size(kh.capacity);
info.key_file_size = kf.size(ec);
if(ec)
return;
info.dat_file_size = df.size(ec);
if(ec)
return;
// Determine which algorithm requires the least amount
// of file I/O given the available buffer size
std::size_t chunkSize;
if(bufferSize >= 2 * kh.block_size + sizeof(nkey_t))
chunkSize = std::min(kh.buckets,
(bufferSize - kh.block_size) /
(kh.block_size + sizeof(nkey_t)));
else
chunkSize = 0;
std::size_t passes;
if(chunkSize > 0)
passes = (kh.buckets + chunkSize - 1) / chunkSize;
else
passes = 0;
if(! chunkSize ||
((
info.dat_file_size +
(kh.buckets * kh.load_factor * kh.capacity * kh.block_size) +
info.key_file_size
) < (
passes * info.dat_file_size + info.key_file_size
)))
{
detail::verify_normal<Hasher>(info,
df, kf, dh, kh, progress, ec);
}
else
{
detail::verify_fast<Hasher>(info,
df, kf, dh, kh, bufferSize, progress, ec);
}
}
} // nudb
#endif

View File

@@ -0,0 +1,96 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_VISIT_IPP
#define NUDB_IMPL_VISIT_IPP
#include <nudb/concepts.hpp>
#include <nudb/error.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/native_file.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstddef>
#include <string>
namespace nudb {
template<
class Callback,
class Progress>
void
visit(
path_type const& path,
Callback&& callback,
Progress&& progress,
error_code& ec)
{
// VFALCO Need concept check for Callback
static_assert(is_Progress<Progress>::value,
"Progress requirements not met");
using namespace detail;
using File = native_file;
auto const readSize = 1024 * block_size(path);
File df;
df.open(file_mode::scan, path, ec);
if(ec)
return;
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
auto const fileSize = df.size(ec);
if(ec)
return;
bulk_reader<File> r(df,
dat_file_header::size, fileSize, readSize);
progress(0, fileSize);
while(! r.eof())
{
// Data Record or Spill Record
nsize_t size;
auto is = r.prepare(
field<uint48_t>::size, ec); // Size
if(ec)
return;
detail::read_size48(is, size);
if(size > 0)
{
// Data Record
is = r.prepare(
dh.key_size + // Key
size, ec); // Data
std::uint8_t const* const key =
is.data(dh.key_size);
callback(key, dh.key_size,
is.data(size), size, ec);
if(ec)
return;
}
else
{
// Spill Record
is = r.prepare(
field<std::uint16_t>::size, ec);
if(ec)
return;
read<std::uint16_t>(is, size); // Size
r.prepare(size, ec); // skip bucket
if(ec)
return;
}
progress(r.offset(), fileSize);
}
}
} // nudb
#endif

View File

@@ -0,0 +1,264 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_WIN32_FILE_IPP
#define NUDB_IMPL_WIN32_FILE_IPP
#include <boost/assert.hpp>
namespace nudb {
inline
win32_file::
~win32_file()
{
close();
}
inline
win32_file::
win32_file(win32_file&& other)
: hf_(other.hf_)
{
other.hf_ = INVALID_HANDLE_VALUE;
}
inline
win32_file&
win32_file::
operator=(win32_file&& other)
{
if(&other == this)
return *this;
close();
hf_ = other.hf_;
other.hf_ = INVALID_HANDLE_VALUE;
return *this;
}
inline
void
win32_file::
close()
{
if(hf_ != INVALID_HANDLE_VALUE)
{
::CloseHandle(hf_);
hf_ = INVALID_HANDLE_VALUE;
}
}
inline
void
win32_file::
create(file_mode mode, path_type const& path, error_code& ec)
{
BOOST_ASSERT(! is_open());
auto const f = flags(mode);
hf_ = ::CreateFileA(path.c_str(),
f.first,
0,
NULL,
CREATE_NEW,
f.second,
NULL);
if(hf_ == INVALID_HANDLE_VALUE)
return last_err(ec);
}
inline
void
win32_file::
open(file_mode mode, path_type const& path, error_code& ec)
{
BOOST_ASSERT(! is_open());
auto const f = flags(mode);
hf_ = ::CreateFileA(path.c_str(),
f.first,
0,
NULL,
OPEN_EXISTING,
f.second,
NULL);
if(hf_ == INVALID_HANDLE_VALUE)
return last_err(ec);
}
inline
void
win32_file::
erase(path_type const& path, error_code& ec)
{
BOOL const bSuccess =
::DeleteFileA(path.c_str());
if(! bSuccess)
return last_err(ec);
}
inline
std::uint64_t
win32_file::
size(error_code& ec) const
{
BOOST_ASSERT(is_open());
LARGE_INTEGER fileSize;
if(! ::GetFileSizeEx(hf_, &fileSize))
{
last_err(ec);
return 0;
}
return fileSize.QuadPart;
}
inline
void
win32_file::
read(std::uint64_t offset, void* buffer, std::size_t bytes, error_code& ec)
{
while(bytes > 0)
{
DWORD bytesRead;
LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset);
OVERLAPPED ov;
ov.Offset = li.LowPart;
ov.OffsetHigh = li.HighPart;
ov.hEvent = NULL;
DWORD amount;
if(bytes > std::numeric_limits<DWORD>::max())
amount = std::numeric_limits<DWORD>::max();
else
amount = static_cast<DWORD>(bytes);
BOOL const bSuccess = ::ReadFile(
hf_, buffer, amount, &bytesRead, &ov);
if(! bSuccess)
{
DWORD const dwError = ::GetLastError();
if(dwError != ERROR_HANDLE_EOF)
return err(dwError, ec);
ec = make_error_code(error::short_read);
return;
}
if(bytesRead == 0)
{
ec = make_error_code(error::short_read);
return;
}
offset += bytesRead;
bytes -= bytesRead;
buffer = reinterpret_cast<char*>(
buffer) + bytesRead;
}
}
inline
void
win32_file::
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec)
{
while(bytes > 0)
{
LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset);
OVERLAPPED ov;
ov.Offset = li.LowPart;
ov.OffsetHigh = li.HighPart;
ov.hEvent = NULL;
DWORD amount;
if(bytes > std::numeric_limits<DWORD>::max())
amount = std::numeric_limits<DWORD>::max();
else
amount = static_cast<DWORD>(bytes);
DWORD bytesWritten;
BOOL const bSuccess = ::WriteFile(
hf_, buffer, amount, &bytesWritten, &ov);
if(! bSuccess)
return last_err(ec);
if(bytesWritten == 0)
{
ec = error_code{errc::no_space_on_device,
generic_category()};;
return;
}
offset += bytesWritten;
bytes -= bytesWritten;
buffer = reinterpret_cast<char const*>(
buffer) + bytesWritten;
}
}
inline
void
win32_file::
sync(error_code& ec)
{
if(! ::FlushFileBuffers(hf_))
return last_err(ec);
}
inline
void
win32_file::
trunc(std::uint64_t length, error_code& ec)
{
LARGE_INTEGER li;
li.QuadPart = length;
BOOL bSuccess;
bSuccess = ::SetFilePointerEx(
hf_, li, NULL, FILE_BEGIN);
if(bSuccess)
bSuccess = ::SetEndOfFile(hf_);
if(! bSuccess)
return last_err(ec);
}
inline
std::pair<DWORD, DWORD>
win32_file::
flags(file_mode mode)
{
std::pair<DWORD, DWORD> result{0, 0};
switch(mode)
{
case file_mode::scan:
result.first =
GENERIC_READ;
result.second =
FILE_FLAG_SEQUENTIAL_SCAN;
break;
case file_mode::read:
result.first =
GENERIC_READ;
result.second =
FILE_FLAG_RANDOM_ACCESS;
break;
case file_mode::append:
result.first =
GENERIC_READ | GENERIC_WRITE;
result.second =
FILE_FLAG_RANDOM_ACCESS
//| FILE_FLAG_NO_BUFFERING
//| FILE_FLAG_WRITE_THROUGH
;
break;
case file_mode::write:
result.first =
GENERIC_READ | GENERIC_WRITE;
result.second =
FILE_FLAG_RANDOM_ACCESS;
break;
}
return result;
}
} // nudb
#endif

View File

@@ -0,0 +1,76 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_NATIVE_FILE_HPP
#define NUDB_NATIVE_FILE_HPP
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <nudb/posix_file.hpp>
#include <nudb/win32_file.hpp>
#include <string>
namespace nudb {
/** A native file handle.
This type is set to the appropriate platform-specific
implementation to meet the file wrapper requirements.
*/
using native_file =
#ifdef _MSC_VER
win32_file;
#else
posix_file;
#endif
/** Erase a file if it exists.
This function attempts to erase the specified file.
No error is generated if the file does not already
exist.
@param path The path to the file to erase.
@param ec Set to the error, if any occurred.
@tparam File A type meeting the requirements of @b File.
If this type is unspecified, @ref native_file is used.
*/
template<class File = native_file>
inline
void
erase_file(path_type const& path, error_code& ec)
{
native_file::erase(path, ec);
if(ec == errc::no_such_file_or_directory)
ec = {};
}
/** Erase a file without returnign an error.
This function attempts to erase the specified file.
Any errors are ignored, including if the file does
not exist.
@param path The path to the file to erase.
@tparam File A type meeting the requirements of @b File.
If this type is unspecified, @ref native_file is used.
*/
template<class File = native_file>
inline
void
erase_file(path_type const& path)
{
error_code ec;
File::erase(path, ec);
}
} // nudb
#endif

View File

@@ -0,0 +1,27 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_HPP
#define NUDB_HPP
#include <nudb/concepts.hpp>
#include <nudb/create.hpp>
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <nudb/posix_file.hpp>
#include <nudb/progress.hpp>
#include <nudb/recover.hpp>
#include <nudb/rekey.hpp>
#include <nudb/store.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/verify.hpp>
#include <nudb/version.hpp>
#include <nudb/visit.hpp>
#include <nudb/win32_file.hpp>
#include <nudb/xxhasher.hpp>
#endif

View File

@@ -0,0 +1,228 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_POSIX_FILE_HPP
#define NUDB_DETAIL_POSIX_FILE_HPP
#include <nudb/file.hpp>
#include <nudb/error.hpp>
#include <cerrno>
#include <cstring>
#include <string>
#include <utility>
#ifndef NUDB_POSIX_FILE
# ifdef _MSC_VER
# define NUDB_POSIX_FILE 0
# else
# define NUDB_POSIX_FILE 1
# endif
#endif
#if NUDB_POSIX_FILE
# include <fcntl.h>
# include <sys/types.h>
# include <sys/uio.h>
# include <sys/stat.h>
# include <unistd.h>
#endif
#if NUDB_POSIX_FILE
namespace nudb {
class posix_file
{
int fd_ = -1;
public:
/// Constructor
posix_file() = default;
/// Copy constructor (disallowed)
posix_file(posix_file const&) = delete;
// Copy assignment (disallowed)
posix_file& operator=(posix_file const&) = delete;
/** Destructor.
If open, the file is closed.
*/
~posix_file();
/** Move constructor.
@note The state of the moved-from object is as if default constructed.
*/
posix_file(posix_file&&);
/** Move assignment.
@note The state of the moved-from object is as if default constructed.
*/
posix_file&
operator=(posix_file&& other);
/// Returns `true` if the file is open.
bool
is_open() const
{
return fd_ != -1;
}
/// Close the file if it is open.
void
close();
/** Create a new file.
After the file is created, it is opened as if by `open(mode, path, ec)`.
@par Requirements
The file must not already exist, or else `errc::file_exists`
is returned.
@param mode The open mode, which must be a valid @ref file_mode.
@param path The path of the file to create.
@param ec Set to the error, if any occurred.
*/
void
create(file_mode mode, path_type const& path, error_code& ec);
/** Open a file.
@par Requirements
The file must not already be open.
@param mode The open mode, which must be a valid @ref file_mode.
@param path The path of the file to open.
@param ec Set to the error, if any occurred.
*/
void
open(file_mode mode, path_type const& path, error_code& ec);
/** Remove a file from the file system.
It is not an error to attempt to erase a file that does not exist.
@param path The path of the file to remove.
@param ec Set to the error, if any occurred.
*/
static
void
erase(path_type const& path, error_code& ec);
/** Return the size of the file.
@par Requirements
The file must be open.
@param ec Set to the error, if any occurred.
@return The size of the file, in bytes.
*/
std::uint64_t
size(error_code& ec) const;
/** Read data from a location in the file.
@par Requirements
The file must be open.
@param offset The position in the file to read from,
expressed as a byte offset from the beginning.
@param buffer The location to store the data.
@param bytes The number of bytes to read.
@param ec Set to the error, if any occurred.
*/
void
read(std::uint64_t offset,
void* buffer, std::size_t bytes, error_code& ec);
/** Write data to a location in the file.
@par Requirements
The file must be open with a mode allowing writes.
@param offset The position in the file to write from,
expressed as a byte offset from the beginning.
@param buffer The data the write.
@param bytes The number of bytes to write.
@param ec Set to the error, if any occurred.
*/
void
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec);
/** Perform a low level file synchronization.
@par Requirements
The file must be open with a mode allowing writes.
@param ec Set to the error, if any occurred.
*/
void
sync(error_code& ec);
/** Truncate the file at a specific size.
@par Requirements
The file must be open with a mode allowing writes.
@param length The new file size.
@param ec Set to the error, if any occurred.
*/
void
trunc(std::uint64_t length, error_code& ec);
private:
static
void
err(int ev, error_code& ec)
{
ec = error_code{ev, system_category()};
}
static
void
last_err(error_code& ec)
{
err(errno, ec);
}
static
std::pair<int, int>
flags(file_mode mode);
};
} // nudb
#include <nudb/impl/posix_file.ipp>
#endif
#endif

View File

@@ -0,0 +1,32 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_PROGRESS_HPP
#define NUDB_PROGRESS_HPP
namespace nudb {
/** Progress function that does nothing.
This type meets the requirements of @b Progress,
and does nothing when invoked.
*/
struct
no_progress
{
no_progress() = default;
/// Called to indicate progress
void
operator()(std::uint64_t, std::uint64_t) const noexcept
{
};
};
} // nudb
#endif

View File

@@ -0,0 +1,73 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_RECOVER_HPP
#define NUDB_RECOVER_HPP
#include <nudb/error.hpp>
#include <nudb/native_file.hpp>
namespace nudb {
/** Perform recovery on a database.
This implements the recovery algorithm by rolling back
any partially committed data. If no log file is present,
the function does nothing.
During the commit phase of a NuDB database, a log file
is generated with information that may be used to roll
back the results of a partial commit. This function
checks for the presence of a log file. If present, the
log file is replayed on the key and data files belonging
to the database, restoring the database to its state
before the partial commit. When @ref recover is
successful, it erases the log file.
It is normally not necessary to call this function
directly, it is called automatically when a database is
opened in a call to @ref basic_store::open. Callers may
use this function to implement auxiliary tools for
manipulating the database.
@par Template Parameters
@tparam Hasher The hash function to use. This type must
meet the requirements of @b Hasher. The hash function
must be the same as that used to create the database, or
else an error is returned.
@tparam File The type of file to use. Use the default of
@ref native_file unless customizing the file behavior.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param log_path The path to the log file.
@param args Optional parameters passed to File constructors.
@param ec Set to the error, if any occurred.
*/
template<
class Hasher,
class File = native_file,
class... Args>
void
recover(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
error_code& ec,
Args&&... args);
} // nudb
#include <nudb/impl/recover.ipp>
#endif

View File

@@ -0,0 +1,110 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_REKEY_HPP
#define NUDB_REKEY_HPP
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <cstddef>
#include <cstdint>
namespace nudb {
/** Create a new key file from a data file.
This algorithm rebuilds a key file for the given data file.
It works efficiently by iterating the data file multiple times.
During the iteration, a contiguous block of the key file is
rendered in memory, then flushed to disk when the iteration is
complete. The size of this memory buffer is controlled by the
`bufferSize` parameter, larger is better. The algorithm works
the fastest when `bufferSize` is large enough to hold the entire
key file in memory; only a single iteration of the data file
is needed in this case.
During the rekey, spill records may be appended to the data
file. If the rekey operation is abnormally terminated, this
would normally result in a corrupted data file. To prevent this,
the function creates a log file using the specified path so
that the database can be fixed in a subsequent call to
@ref recover.
@note If a log file is already present, this function will
fail with @ref error::log_file_exists.
@par Template Parameters
@tparam Hasher The hash function to use. This type must
meet the requirements of @b Hasher. The hash function
must be the same as that used to create the database, or
else an error is returned.
@tparam File The type of file to use. This type must meet
the requirements of @b File.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param log_path The path to the log file.
@param blockSize The size of a key file block. Larger
blocks hold more keys but require more I/O cycles per
operation. The ideal block size the largest size that
may be read in a single I/O cycle, and device dependent.
The return value of @ref block_size returns a suitable
value for the volume of a given path.
@param loadFactor A number between zero and one
representing the average bucket occupancy (number of
items). A value of 0.5 is perfect. Lower numbers
waste space, and higher numbers produce negligible
savings at the cost of increased I/O cycles.
@param itemCount The number of items in the data file.
@param bufferSize The number of bytes to allocate for the buffer.
@param ec Set to the error if any occurred.
@param progress A function which will be called periodically
as the algorithm proceeds. The equivalent signature of the
progress function must be:
@code
void progress(
std::uint64_t amount, // Amount of work done so far
std::uint64_t total // Total amount of work to do
);
@endcode
@param args Optional arguments passed to @b File constructors.
*/
template<
class Hasher,
class File,
class Progress,
class... Args
>
void
rekey(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::size_t blockSize,
float loadFactor,
std::uint64_t itemCount,
std::size_t bufferSize,
error_code& ec,
Progress&& progress,
Args&&... args);
} // nudb
#include <nudb/impl/rekey.ipp>
#endif

View File

@@ -0,0 +1,27 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_STORE_HPP
#define NUDB_STORE_HPP
#include <nudb/basic_store.hpp>
#include <nudb/native_file.hpp>
#include <nudb/xxhasher.hpp>
namespace nudb {
/** A key/value database.
The @b Hasher used is is @ref xxhasher, which works very
well for almost all cases. The @b File is @ref native_file which
works on Windows and POSIX platforms.
*/
using store = basic_store<xxhasher, native_file>;
} // nudb
#endif

View File

@@ -0,0 +1,63 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_TYPE_TRAITS_HPP
#define NUDB_TYPE_TRAITS_HPP
#include <cstddef>
#include <cstdint>
namespace nudb {
#if ! GENERATING_DOCS
namespace detail {
// Holds a full digest
using nhash_t = std::uint64_t;
} // detail
/** Holds a bucket index or bucket count.
The maximum number of buckets in a key file is 2^32-1.
*/
//using nbuck_t = std::uint32_t;
using nbuck_t = std::size_t;
/** Holds a key index or count in bucket.
A bucket is limited to 2^16-1 items. The practical
limit is lower, since a bucket cannot be larger than
the block size.
*/
//using nkey_t = std::uint16_t;
using nkey_t = std::size_t;
/** Holds a file size or offset.
Operating system support for large files is required.
Practically, data files cannot exceed 2^48 since offsets
are stored as 48 bit unsigned values.
*/
using noff_t = std::uint64_t;
/** Holds a block, key, or value size.
Block size is limited to 2^16
Key file blocks are limited to the block size.
Value sizes are limited to 2^31-1.
*/
using nsize_t = std::size_t;
#endif
} // nudb
#endif

View File

@@ -0,0 +1,200 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_VERIFY_HPP
#define NUDB_VERIFY_HPP
#include <nudb/file.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <string>
namespace nudb {
/// Describes database statistics calculated by @ref verify.
struct verify_info
{
/** Indicates the verify algorithm used.
@li @b 0 Normal algorithm
@li @b 1 Fast algorith
*/
int algorithm; // 0 = normal, 1 = fast
/// The path to the data file
path_type dat_path;
/// The path to the key file
path_type key_path;
/// The API version used to create the database
std::size_t version = 0;
/// The unique identifier
std::uint64_t uid = 0;
/// The application-defined constant
std::uint64_t appnum = 0;
/// The size of each key, in bytes
nsize_t key_size = 0;
/// The salt used in the key file
std::uint64_t salt = 0;
/// The salt fingerprint
std::uint64_t pepper = 0;
/// The block size used in the key file
nsize_t block_size = 0;
/// The target load factor used in the key file
float load_factor = 0;
/// The maximum number of keys each bucket can hold
nkey_t capacity = 0;
/// The number of buckets in the key file
nbuck_t buckets = 0;
/// The size of a bucket in bytes
nsize_t bucket_size = 0;
/// The size of the key file
noff_t key_file_size = 0;
/// The size of the data file
noff_t dat_file_size = 0;
/// The number of keys found
std::uint64_t key_count = 0;
/// The number of values found
std::uint64_t value_count = 0;
/// The total number of bytes occupied by values
std::uint64_t value_bytes = 0;
/// The number of spill records in use
std::uint64_t spill_count = 0;
/// The total number of spill records
std::uint64_t spill_count_tot = 0;
/// The number of bytes occupied by spill records in use
std::uint64_t spill_bytes = 0;
/// The number of bytes occupied by all spill records
std::uint64_t spill_bytes_tot = 0;
/// Average number of key file reads per fetch
float avg_fetch = 0;
/// The fraction of the data file that is wasted
float waste = 0;
/// The data amplification ratio
float overhead = 0;
/// The measured bucket load fraction
float actual_load = 0;
/// A histogram of the number of buckets having N spill records
std::array<nbuck_t, 10> hist;
/// Default constructor
verify_info()
{
hist.fill(0);
}
};
/** Verify consistency of the key and data files.
This function opens the key and data files, and
performs the following checks on the contents:
@li Data file header validity
@li Key file header validity
@li Data and key file header agreements
@li Check that each value is contained in a bucket
@li Check that each bucket item reflects a value
@li Ensure no values with duplicate keys
Undefined behavior results when verifying a database
that still has a log file. Use @ref recover on such
databases first.
This function selects one of two algorithms to use, the
normal version, and a faster version that can take advantage
of a buffer of sufficient size. Depending on the value of
the bufferSize argument, the appropriate algorithm is chosen.
A good value of bufferSize is one that is a large fraction
of the key file size. For example, 20% of the size of the
key file. Larger is better, with the highest usable value
depending on the size of the key file. If presented with
a buffer size that is too large to be of extra use, the
fast algorithm will simply allocate what it needs.
@par Template Parameters
@tparam Hasher The hash function to use. This type must
meet the requirements of @b HashFunction. The hash function
must be the same as that used to create the database, or
else an error is returned.
@param info A structure which will be default constructed
inside this function, and filled in if the operation completes
successfully. If an error is indicated, the contents of this
variable are undefined.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param bufferSize The number of bytes to allocate for the buffer.
If this number is too small, or zero, a slower algorithm will be
used that does not require a buffer.
@param progress A function which will be called periodically
as the algorithm proceeds. The equivalent signature of the
progress function must be:
@code
void progress(
std::uint64_t amount, // Amount of work done so far
std::uint64_t total // Total amount of work to do
);
@endcode
@param ec Set to the error, if any occurred.
*/
template<class Hasher, class Progress>
void
verify(
verify_info& info,
path_type const& dat_path,
path_type const& key_path,
std::size_t bufferSize,
Progress&& progress,
error_code& ec);
} // nudb
#include <nudb/impl/verify.ipp>
#endif

View File

@@ -0,0 +1,21 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_VERSION_HPP
#define NUDB_VERSION_HPP
// follows http://semver.org
// NUDB_VERSION % 100 is the patch level
// NUDB_VERSION / 100 % 1000 is the minor version
// NUDB_VERSION / 100000 is the major version
//
#define NUDB_VERSION 100000
#define NUDB_VERSION_STRING "1.0.0-b6"
#endif

View File

@@ -0,0 +1,63 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_VISIT_HPP
#define NUDB_VISIT_HPP
#include <nudb/error.hpp>
#include <nudb/file.hpp>
namespace nudb {
/** Visit each key/data pair in a data file.
This function will open and iterate the contents of a
data file, invoking the callback for each key/value
pair found. Only a data file is necessary, the key
file may be omitted.
@param path The path to the data file.
@param callback A function which will be called with
each item found in the data file. The equivalent signature
of the callback must be:
@code
void callback(
void const* key, // A pointer to the item key
std::size_t key_size, // The size of the key (always the same)
void const* data, // A pointer to the item data
std::size_t data_size, // The size of the item data
error_code& ec // Indicates an error (out parameter)
);
@endcode
If the callback sets ec to an error, the visit is terminated.
@param progress A function which will be called periodically
as the algorithm proceeds. The equivalent signature of the
progress function must be:
@code
void progress(
std::uint64_t amount, // Amount of work done so far
std::uint64_t total // Total amount of work to do
);
@endcode
@param ec Set to the error, if any occurred.
*/
template<class Callback, class Progress>
void
visit(
path_type const& path,
Callback&& callback,
Progress&& progress,
error_code& ec);
} // nudb
#include <nudb/impl/visit.ipp>
#endif

View File

@@ -0,0 +1,246 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_WIN32_FILE_HPP
#define NUDB_DETAIL_WIN32_FILE_HPP
#include <nudb/file.hpp>
#include <nudb/error.hpp>
#include <cstddef>
#include <cstdint>
#include <string>
#ifndef NUDB_WIN32_FILE
# ifdef _MSC_VER
# define NUDB_WIN32_FILE 1
# else
# define NUDB_WIN32_FILE 0
# endif
#endif
#if NUDB_WIN32_FILE
#pragma push_macro("NOMINMAX")
#pragma push_macro("UNICODE")
#pragma push_macro("STRICT")
# ifndef NOMINMAX
# define NOMINMAX
# endif
# ifndef UNICODE
# define UNICODE
# endif
# ifndef STRICT
# define STRICT
# endif
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
# include <Windows.h>
#pragma pop_macro("STRICT")
#pragma pop_macro("UNICODE")
#pragma pop_macro("NOMINMAX")
#endif
#if NUDB_WIN32_FILE
namespace nudb {
/** A descriptor to a Win32 file.
This class provides a Win32 implementation of the @b File
concept.
*/
class win32_file
{
HANDLE hf_ = INVALID_HANDLE_VALUE;
public:
/// Constructor
win32_file() = default;
/// Copy constructor (disallowed)
win32_file(win32_file const&) = delete;
// Copy assignment (disallowed)
win32_file& operator=(win32_file const&) = delete;
/** Destructor.
If open, the file is closed.
*/
~win32_file();
/** Move constructor.
@note The state of the moved-from object is as if default constructed.
*/
win32_file(win32_file&&);
/** Move assignment.
@note The state of the moved-from object is as if default constructed.
*/
win32_file&
operator=(win32_file&& other);
/// Returns `true` if the file is open.
bool
is_open() const
{
return hf_ != INVALID_HANDLE_VALUE;
}
/// Close the file if it is open.
void
close();
/** Create a new file.
After the file is created, it is opened as if by `open(mode, path, ec)`.
@par Requirements
The file must not already exist, or else `errc::file_exists`
is returned.
@param mode The open mode, which must be a valid @ref file_mode.
@param path The path of the file to create.
@param ec Set to the error, if any occurred.
*/
void
create(file_mode mode, path_type const& path, error_code& ec);
/** Open a file.
@par Requirements
The file must not already be open.
@param mode The open mode, which must be a valid @ref file_mode.
@param path The path of the file to open.
@param ec Set to the error, if any occurred.
*/
void
open(file_mode mode, path_type const& path, error_code& ec);
/** Remove a file from the file system.
It is not an error to attempt to erase a file that does not exist.
@param path The path of the file to remove.
@param ec Set to the error, if any occurred.
*/
static
void
erase(path_type const& path, error_code& ec);
/** Return the size of the file.
@par Requirements
The file must be open.
@param ec Set to the error, if any occurred.
@return The size of the file, in bytes.
*/
std::uint64_t
size(error_code& ec) const;
/** Read data from a location in the file.
@par Requirements
The file must be open.
@param offset The position in the file to read from,
expressed as a byte offset from the beginning.
@param buffer The location to store the data.
@param bytes The number of bytes to read.
@param ec Set to the error, if any occurred.
*/
void
read(std::uint64_t offset,
void* buffer, std::size_t bytes, error_code& ec);
/** Write data to a location in the file.
@par Requirements
The file must be open with a mode allowing writes.
@param offset The position in the file to write from,
expressed as a byte offset from the beginning.
@param buffer The data the write.
@param bytes The number of bytes to write.
@param ec Set to the error, if any occurred.
*/
void
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec);
/** Perform a low level file synchronization.
@par Requirements
The file must be open with a mode allowing writes.
@param ec Set to the error, if any occurred.
*/
void
sync(error_code& ec);
/** Truncate the file at a specific size.
@par Requirements
The file must be open with a mode allowing writes.
@param length The new file size.
@param ec Set to the error, if any occurred.
*/
void
trunc(std::uint64_t length, error_code& ec);
private:
static
void
err(DWORD dwError, error_code& ec)
{
ec = error_code{static_cast<int>(dwError), system_category()};
}
static
void
last_err(error_code& ec)
{
err(::GetLastError(), ec);
}
static
std::pair<DWORD, DWORD>
flags(file_mode mode);
};
} // nudb
#include <nudb/impl/win32_file.ipp>
#endif
#endif

View File

@@ -0,0 +1,45 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_XXHASHER_HPP
#define NUDB_XXHASHER_HPP
#include <nudb/detail/xxhash.hpp>
#include <cstddef>
#include <cstdint>
#include <type_traits>
namespace nudb {
/** A Hasher that uses xxHash.
This object meets the requirements of @b Hasher. It is
the default hash function unless otherwise specified.
*/
class xxhasher
{
std::uint64_t seed_;
public:
using result_type = std::uint64_t;
explicit
xxhasher(std::uint64_t seed)
: seed_(seed)
{
}
result_type
operator()(void const* data, std::size_t bytes) const noexcept
{
return detail::XXH64(data, bytes, seed_);
}
};
} // nudb
#endif

View File

@@ -0,0 +1,38 @@
# Remember that this blacklist file is GLOBAL to all sanitizers
# Be therefore extremely careful when considering to add a sanitizer
# filter here instead of using a runtime suppression
#
# Remember also that filters here quite literally completely
# remove instrumentation altogether, so filtering here means
# that sanitizers such as tsan will false positive on problems
# introduced by code filtered here.
#
# The main use for this file is ubsan, as it's the only sanitizer
# without a runtime suppression facility.
#
# Be ESPECIALLY careful when filtering out entire source files!
# Try if at all possible to filter only functions using fun:regex
# Remember you must use mangled symbol names with fun:regex
#### Compile time filters for ubsan ####
## The well known ubsan failure in libstdc++ extant for years :)
# Line 96:24: runtime error: load of value 4294967221, which is not a valid value for type 'std::_Ios_Fmtflags'
fun:*_Ios_Fmtflags*
# boost/any.hpp:259:16: runtime error: downcast of address 0x000004392e70 which does not point to an object of type 'any::holder<int>'
fun:*any_cast*
# boost/lexical_cast.hpp:1625:43: runtime error: downcast of address 0x7fbb4fffbce8 which does not point to an object of type 'buffer_t' (aka 'parser_buf<std::basic_streambuf<char, char_traits<char> >, char>')
fun:*shl_input_streamable*
#### Compile time filters for asan ####
#### Compile time filters for msan ####
#### Compile time filters for tsan ####

View File

@@ -0,0 +1,150 @@
#!/usr/bin/env bash
set -euxo pipefail
# The above bash options do the following:
# -e When this option is on, if a simple command fails for any of the reasons
# listed in Consequences of Shell Errors or returns an exit status value >0,
# and is not part of the compound list following a while, until, or if
# keyword, and is not a part of an AND or OR list, and is not a pipeline
# preceded by the ! reserved word, then the shell shall immediately exit.
# -u The shell shall write a message to standard error when it tries to expand a
# variable that is not set and immediately exit. An interactive shell shall
# not exit.
# -x The shell shall write to standard error a trace for each command after it
# expands the command and before it executes it. It is unspecified
# whether the command that turns tracing off is traced.
# -o pipefail
# Pipelines fail on the first command which fails instead of dying later on
# down the pipeline.
shopt -s globstar
################################## ENVIRONMENT #################################
# If not CI, then set some defaults
if [[ -z ${CI:-} ]]; then
: ${TRAVIS_BRANCH:=feature}
: ${CC:=gcc}
: ${ADDRESS_MODEL:=64}
: ${VARIANT:=debug}
# If running locally we assume we have lcov/valgrind on PATH
else
export PATH=${VALGRIND_ROOT}/bin:${LCOV_ROOT}/usr/bin:${PATH}
fi
MAIN_BRANCH=0
# For builds not triggered by a pull request TRAVIS_BRANCH is the name of the
# branch currently being built; whereas for builds triggered by a pull request
# it is the name of the branch targeted by the pull request (in many cases this
# will be master).
if [[ ${TRAVIS_BRANCH} == master || ${TRAVIS_BRANCH} == develop ]]; then
MAIN_BRANCH=1
fi
num_jobs=1
if [[ $(uname) == Darwin ]]; then
num_jobs=$(sysctl -n hw.physicalcpu)
elif [[ $(uname -s) == Linux ]]; then
# CircleCI returns 32 phys procs, but 2 virt proc
num_proc_units=$(nproc)
# Physical cores
num_jobs=$(lscpu -p | grep -v '^#' | sort -u -t, -k 2,4 | wc -l)
if ((${num_proc_units} < ${num_jobs})); then
num_jobs=${num_proc_units}
fi
fi
echo "using toolset: ${CC}"
echo "using variant: ${VARIANT}"
echo "using address-model: ${ADDRESS_MODEL}"
echo "using PATH: ${PATH}"
echo "using MAIN_BRANCH: ${MAIN_BRANCH}"
echo "using BOOST_ROOT: ${BOOST_ROOT}"
#################################### HELPERS ###################################
function run_tests_with_debugger {
for x in bin/**/${VARIANT}/**/test-all; do
scripts/run-with-debugger.sh "${x}"
done
}
function run_tests {
for x in bin/**/${VARIANT}/**/test-all; do
${x}
done
}
function run_benchmark {
for x in bin/**/${VARIANT}/**/bench; do
${x} --inserts=10000
done
}
function run_tests_with_valgrind {
for x in bin/**/${VARIANT}/**/test-all; do
# TODO --max-stackframe=8388608
# see: https://travis-ci.org/vinniefalco/Beast/jobs/132486245
valgrind --error-exitcode=1 "${x}"
done
}
function build_bjam {
${BOOST_ROOT}/bjam toolset=${CC} \
variant=${VARIANT} \
address-model=${ADDRESS_MODEL} \
-j${num_jobs}
}
function build_cmake {
mkdir -p build
pushd build > /dev/null
cmake -DVARIANT=${VARIANT} ..
make -j${num_jobs}
mkdir -p ../bin/${VARIANT}
find . -executable -type f -exec cp {} ../bin/${VARIANT}/. \;
popd > /dev/null
}
##################################### BUILD ####################################
if [[ ${BUILD_SYSTEM:-} == cmake ]]; then
build_cmake
else
build_bjam
fi
##################################### TESTS ####################################
if [[ ${VARIANT} == coverage ]]; then
find . -name "*.gcda" | xargs rm -f
rm *.info -f
# Create baseline coverage data file
lcov --no-external -c -i -d . -o baseline.info > /dev/null
# Perform test
if [[ ${MAIN_BRANCH} == 1 ]]; then
run_tests_with_valgrind
else
run_tests
fi
# Create test coverage data file
lcov --no-external -c -d . -o testrun.info > /dev/null
# Combine baseline and test coverage data
lcov -a baseline.info -a testrun.info -o lcov-all.info > /dev/null
# Extract only include/*, and don\'t report on examples or test
lcov -e "lcov-all.info" "${PWD}/include/nudb/*" -o lcov.info > /dev/null
~/.local/bin/codecov -X gcov
cat lcov.info | node_modules/.bin/coveralls
# Clean up these stragglers so BOOST_ROOT cache is clean
find ${BOOST_ROOT}/bin.v2 -name "*.gcda" | xargs rm -f
else
run_tests_with_debugger
run_benchmark
fi

View File

@@ -0,0 +1,27 @@
#!/usr/bin/env bash
# Assumptions:
# 1) BOOST_ROOT and BOOST_URL are already defined,
# and contain valid values.
# 2) The last namepart of BOOST_ROOT matches the
# folder name internal to boost's .tar.gz
# When testing you can force a boost build by clearing travis caches:
# https://travis-ci.org/ripple/rippled/caches
set -eu
if [ ! -d "$BOOST_ROOT/lib" ]
then
wget $BOOST_URL -O /tmp/boost.tar.gz
cd `dirname $BOOST_ROOT`
rm -fr ${BOOST_ROOT}
tar xzf /tmp/boost.tar.gz
params="define=_GLIBCXX_USE_CXX11_ABI=0 \
address-model=$ADDRESS_MODEL --with-program_options \
--with-system --with-coroutine --with-filesystem"
cd $BOOST_ROOT && \
./bootstrap.sh --prefix=$BOOST_ROOT && \
./b2 -d1 $params && \
./b2 -d0 $params install
else
echo "Using cached boost at $BOOST_ROOT"
fi

View File

@@ -0,0 +1,90 @@
#!/usr/bin/env bash
set -euxo pipefail
# The above bash options do the following:
# -e When this option is on, if a simple command fails for any of the reasons
# listed in Consequences of Shell Errors or returns an exit status value >0,
# and is not part of the compound list following a while, until, or if
# keyword, and is not a part of an AND or OR list, and is not a pipeline
# preceded by the ! reserved word, then the shell shall immediately exit.
# -u The shell shall write a message to standard error when it tries to expand a
# variable that is not set and immediately exit. An interactive shell shall
# not exit.
# -x The shell shall write to standard error a trace for each command after it
# expands the command and before it executes it. It is unspecified
# whether the command that turns tracing off is traced.
# -o pipefail
# Pipelines fail on the first command which fails instead of dying later on
# down the pipeline.
HERE=${PWD}
# Override gcc version to $GCC_VER.
# Put an appropriate symlink at the front of the path.
mkdir -v ${HOME}/bin
for g in gcc g++ gcov gcc-ar gcc-nm gcc-ranlib
do
test -x $( type -p ${g}-${GCC_VER} )
ln -sv $(type -p ${g}-${GCC_VER}) $HOME/bin/${g}
done
if [[ -n ${CLANG_VER:-} ]]; then
# There are cases where the directory exists, but the exe is not available.
# Use this workaround for now.
if [[ ! -x llvm-${LLVM_VERSION}/bin/llvm-config ]] && [[ -d llvm-${LLVM_VERSION} ]]; then
rm -fr llvm-${LLVM_VERSION}
fi
if [[ ! -d llvm-${LLVM_VERSION} ]]; then
mkdir llvm-${LLVM_VERSION}
LLVM_URL="http://llvm.org/releases/${LLVM_VERSION}/clang+llvm-${LLVM_VERSION}-x86_64-linux-gnu-ubuntu-14.04.tar.xz"
wget -O - ${LLVM_URL} | tar -Jxvf - --strip 1 -C llvm-${LLVM_VERSION}
fi
llvm-${LLVM_VERSION}/bin/llvm-config --version;
export LLVM_CONFIG="llvm-${LLVM_VERSION}/bin/llvm-config";
fi
# There are cases where the directory exists, but the exe is not available.
# Use this workaround for now.
if [[ ! -x cmake/bin/cmake && -d cmake ]]; then
rm -fr cmake
fi
if [[ ! -d cmake && ${BUILD_SYSTEM:-} == cmake ]]; then
CMAKE_URL="http://www.cmake.org/files/v3.5/cmake-3.5.2-Linux-x86_64.tar.gz"
mkdir cmake && wget --no-check-certificate -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C cmake
fi
# NOTE, changed from PWD -> HOME
export PATH=${HOME}/bin:${PATH}
# What versions are we ACTUALLY running?
if [ -x $HOME/bin/g++ ]; then
${HOME}/bin/g++ -v
fi
if [ -x ${HOME}/bin/clang ]; then
${HOME}/bin/clang -v
fi
# Avoid `spurious errors` caused by ~/.npm permission issues
# Does it already exist? Who owns? What permissions?
ls -lah ~/.npm || mkdir ~/.npm
# Make sure we own it
chown -Rc ${USER} ~/.npm
# We use this so we can filter the subtrees from our coverage report
pip install --user https://github.com/codecov/codecov-python/archive/master.zip
bash scripts/install-boost.sh
bash scripts/install-valgrind.sh
# Install lcov
# Download the archive
wget http://downloads.sourceforge.net/ltp/lcov-1.12.tar.gz
# Extract to ~/lcov-1.12
tar xfvz lcov-1.12.tar.gz -C ${HOME}
# Set install path
mkdir -p ${LCOV_ROOT}
cd ${HOME}/lcov-1.12 && make install PREFIX=${LCOV_ROOT}
# Install coveralls reporter
cd ${HERE}
mkdir -p node_modules
npm install coveralls

View File

@@ -0,0 +1,20 @@
#!/usr/bin/env bash
# Assumptions:
# 1) VALGRIND_ROOT is already defined, and contains a valid values
set -eu
if [ ! -d "$VALGRIND_ROOT/bin" ]
then
# These are specified in the addons/apt section of .travis.yml
# sudo apt-get install subversion automake autotools-dev libc6-dbg
export PATH=$PATH:$VALGRIND_ROOT/bin
svn co svn://svn.valgrind.org/valgrind/trunk valgrind-co
cd valgrind-co
./autogen.sh
./configure --prefix=$VALGRIND_ROOT
make
make install
# test it
valgrind ls -l
else
echo "Using cached valgrind at $VALGRIND_ROOT"
fi

View File

@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -eu
if [[ $(uname) == "Darwin" ]]; then
# -o runs after loading the binary
# -k runs after any crash
# We use a ghetto appromixation of --return-child-result, exiting with
# 1 on a crash
lldb --batch \
-o 'run' \
-k 'thread backtrace all' \
-k 'script import os; os._exit(1)' \
$@
else
gdb --silent \
--batch \
--return-child-result \
-ex="set print thread-events off" \
-ex=run \
-ex="thread apply all bt full" \
--args $@
fi

View File

@@ -0,0 +1,9 @@
#!/bin/bash -u
set -e
gdb --silent \
--batch \
--return-child-result \
-ex="set print thread-events off" \
-ex=run \
-ex="thread apply all bt full" \
--args $@

View File

@@ -0,0 +1,38 @@
# Part of nudb
GroupSources(test "/")
GroupSources(include/nudb nudb)
GroupSources(extras/nudb extras)
GroupSources(extras/beast/include/beast beast)
GroupSources(extras/beast/extras/beast beast)
add_executable(test-all
${EXTRAS_INCLUDES}
${NUDB_INCLUDES}
${BEAST_INCLUDES}
../extras/beast/extras/beast/unit_test/main.cpp
basic_store.cpp
buffer.cpp
callgrind_test.cpp
concepts.cpp
create.cpp
error.cpp
file.cpp
native_file.cpp
posix_file.cpp
recover.cpp
rekey.cpp
store.cpp
type_traits.cpp
verify.cpp
version.cpp
visit.cpp
win32_file.cpp
xxhasher.cpp
)
if (WIN32)
target_link_libraries(test-all ${Boost_LIBRARIES})
else ()
target_link_libraries(test-all ${Boost_LIBRARIES} rt Threads::Threads)
endif ()

30
src/nudb/test/Jamfile Normal file
View File

@@ -0,0 +1,30 @@
#
# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#
import os ;
unit-test test-all :
../extras/beast/extras/beast/unit_test/main.cpp
basic_store.cpp
buffer.cpp
callgrind_test.cpp
concepts.cpp
create.cpp
error.cpp
file.cpp
native_file.cpp
posix_file.cpp
recover.cpp
rekey.cpp
store.cpp
type_traits.cpp
verify.cpp
version.cpp
visit.cpp
win32_file.cpp
xxhasher.cpp
;

View File

@@ -0,0 +1,250 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Test that header file is self-contained
#include <nudb/basic_store.hpp>
#include <nudb/test/test_store.hpp>
#include <nudb/detail/arena.hpp>
#include <nudb/detail/cache.hpp>
#include <nudb/detail/pool.hpp>
#include <nudb/progress.hpp>
#include <nudb/verify.hpp>
#include <beast/unit_test/suite.hpp>
#include <limits>
#include <type_traits>
namespace nudb {
namespace detail {
static_assert(!std::is_copy_constructible <arena>{}, "");
static_assert(!std::is_copy_assignable <arena>{}, "");
static_assert( std::is_move_constructible <arena>{}, "");
static_assert(!std::is_move_assignable <arena>{}, "");
static_assert(!std::is_copy_constructible <cache>{}, "");
static_assert(!std::is_copy_assignable <cache>{}, "");
static_assert( std::is_move_constructible <cache>{}, "");
static_assert(!std::is_move_assignable <cache>{}, "");
static_assert(!std::is_copy_constructible <pool>{}, "");
static_assert(!std::is_copy_assignable <pool>{}, "");
static_assert( std::is_move_constructible <pool>{}, "");
static_assert(!std::is_move_assignable <pool>{}, "");
} // detail
namespace test {
class basic_store_test : public beast::unit_test::suite
{
public:
void
test_members()
{
std::size_t const keySize = 4;
std::size_t const blockSize = 4096;
float loadFactor = 0.5f;
error_code ec;
test_store ts{keySize, blockSize, loadFactor};
// Files not found
ts.open(ec);
if(! BEAST_EXPECTS(ec ==
errc::no_such_file_or_directory, ec.message()))
return;
ec = {};
ts.create(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
ts.open(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
BEAST_EXPECT(ts.db.dat_path() == ts.dp);
BEAST_EXPECT(ts.db.key_path() == ts.kp);
BEAST_EXPECT(ts.db.log_path() == ts.lp);
BEAST_EXPECT(ts.db.appnum() == ts.appnum);
BEAST_EXPECT(ts.db.key_size() == ts.keySize);
BEAST_EXPECT(ts.db.block_size() == ts.blockSize);
}
// Inserts a bunch of values then fetches them
void
do_insert_fetch(
std::size_t N,
std::size_t keySize,
std::size_t blockSize,
float loadFactor,
bool sleep)
{
testcase <<
"N=" << N << ", "
"keySize=" << keySize << ", "
"blockSize=" << blockSize;
error_code ec;
test_store ts{keySize, blockSize, loadFactor};
ts.create(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
ts.open(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
// Insert
for(std::size_t n = 0; n < N; ++n)
{
auto const item = ts[n];
ts.db.insert(item.key, item.data, item.size, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
}
// Fetch
for(std::size_t n = 0; n < N; ++n)
{
auto const item = ts[n];
ts.db.fetch(item.key,
[&](void const* data, std::size_t size)
{
if(! BEAST_EXPECT(size == item.size))
return;
BEAST_EXPECT(
std::memcmp(data, item.data, size) == 0);
}, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
}
// Insert Duplicate
for(std::size_t n = 0; n < N; ++n)
{
auto const item = ts[n];
ts.db.insert(item.key, item.data, item.size, ec);
if(! BEAST_EXPECTS(
ec == error::key_exists, ec.message()))
return;
ec = {};
}
// Insert and Fetch
if(keySize > 1)
{
for(std::size_t n = 0; n < N; ++n)
{
auto item = ts[n];
ts.db.fetch(item.key,
[&](void const* data, std::size_t size)
{
if(! BEAST_EXPECT(size == item.size))
return;
BEAST_EXPECT(
std::memcmp(data, item.data, size) == 0);
}, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
item = ts[N + n];
ts.db.insert(item.key, item.data, item.size, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
ts.db.fetch(item.key,
[&](void const* data, std::size_t size)
{
if(! BEAST_EXPECT(size == item.size))
return;
BEAST_EXPECT(
std::memcmp(data, item.data, size) == 0);
}, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
}
}
if(sleep)
{
// Make sure we run periodic activity
std::this_thread::sleep_for(
std::chrono::milliseconds{3000});
}
ts.close(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
}
// Perform insert/fetch test across a range of parameters
void
test_insert_fetch()
{
for(auto const keySize : {
1, 2, 3, 31, 32, 33, 63, 64, 65, 95, 96, 97 })
{
std::size_t N;
std::size_t constexpr blockSize = 4096;
float loadFactor = 0.95f;
switch(keySize)
{
case 1: N = 10; break;
case 2: N = 100; break;
case 3: N = 250; break;
default:
N = 5000;
break;
};
do_insert_fetch(N, keySize, blockSize, loadFactor,
keySize == 97);
}
}
void
test_bulk_insert(std::size_t N, std::size_t keySize,
std::size_t blockSize, float loadFactor)
{
testcase <<
"bulk_insert N=" << N << ", "
"keySize=" << keySize << ", "
"blockSize=" << blockSize;
error_code ec;
test_store ts{keySize, blockSize, loadFactor};
ts.create(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
ts.open(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
// Insert
for(std::size_t n = 0; n < N; ++n)
{
auto const item = ts[n];
ts.db.insert(item.key, item.data, item.size, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
}
ts.close(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
verify_info info;
verify<xxhasher>(info, ts.dp, ts.kp,
64 * 1024 * 1024 , no_progress{}, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
log << info;
}
void
run() override
{
#if 1
test_members();
test_insert_fetch();
#else
// bulk-insert performance test
test_bulk_insert(10000000, 8, 4096, 0.5f);
#endif
}
};
BEAST_DEFINE_TESTSUITE(basic_store, test, nudb);
} // test
} // nudb

77
src/nudb/test/buffer.cpp Normal file
View File

@@ -0,0 +1,77 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Test that header file is self-contained
#include <nudb/detail/buffer.hpp>
#include <beast/unit_test/suite.hpp>
#include <type_traits>
namespace nudb {
namespace test {
class buffer_test : public beast::unit_test::suite
{
public:
void
run()
{
using buffer = nudb::detail::buffer;
static_assert(std::is_default_constructible<buffer>::value, "");
#if 0
static_assert(std::is_copy_constructible<buffer>::value, "");
static_assert(std::is_copy_assignable<buffer>::value, "");
#else
static_assert(! std::is_copy_constructible<buffer>::value, "");
static_assert(! std::is_copy_assignable<buffer>::value, "");
#endif
static_assert(std::is_move_constructible<buffer>::value, "");
static_assert(std::is_move_assignable<buffer>::value, "");
{
buffer b;
}
{
buffer b1(1024);
BEAST_EXPECT(b1.size() == 1024);
buffer b2(std::move(b1));
BEAST_EXPECT(b1.size() == 0);
BEAST_EXPECT(b2.size() == 1024);
}
{
buffer b1(1024);
BEAST_EXPECT(b1.size() == 1024);
buffer b2;
b2 = std::move(b1);
BEAST_EXPECT(b1.size() == 0);
BEAST_EXPECT(b2.size() == 1024);
}
#if 0
{
buffer b1(1024);
BEAST_EXPECT(b1.size() == 1024);
buffer b2(b1);
BEAST_EXPECT(b1.size() == 1024);
BEAST_EXPECT(b2.size() == 1024);
}
{
buffer b1(1024);
BEAST_EXPECT(b1.size() == 1024);
buffer b2;
b2 = b1;
BEAST_EXPECT(b1.size() == 1024);
BEAST_EXPECT(b2.size() == 1024);
}
#endif
}
};
BEAST_DEFINE_TESTSUITE(buffer, test, nudb);
} // test
} // nudb

View File

@@ -0,0 +1,92 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#include <nudb/test/test_store.hpp>
#include <beast/unit_test/suite.hpp>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <utility>
namespace nudb {
namespace test {
// This test is designed for callgrind runs to find hotspots
class callgrind_test : public beast::unit_test::suite
{
public:
// Creates and opens a database, performs a bunch
// of inserts, then alternates fetching all the keys
// with keys not present.
//
void
testCallgrind(std::size_t N)
{
using key_type = std::uint64_t;
std::size_t const blockSize = 4096;
float const loadFactor = 0.5;
error_code ec;
test_store ts{sizeof(key_type), blockSize, loadFactor};
ts.create(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
ts.open(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
for(std::size_t i = 0; i < N; ++i)
{
auto const item = ts[i];
ts.db.insert(item.key, item.data, item.size, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
}
Buffer b;
for(std::size_t i = 0; i < N * 2; ++i)
{
if(! (i%2))
{
auto const item = ts[i/2];
ts.db.fetch(item.key, b, ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
if(! BEAST_EXPECT(b.size() == item.size))
return;
if(! BEAST_EXPECT(std::memcmp(b.data(),
item.data, item.size) == 0))
return;
}
else
{
auto const item = ts[N + i/2];
ts.db.fetch(item.key, b, ec);
if(! BEAST_EXPECTS(ec ==
error::key_not_found, ec.message()))
return;
ec = {};
}
}
ts.close(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
}
void run()
{
// higher numbers, more pain
std::size_t constexpr N = 100000;
testCallgrind(N);
}
};
BEAST_DEFINE_TESTSUITE(callgrind, test, nudb);
} // test
} // nudb

View File

@@ -0,0 +1,9 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Test that header file is self-contained
#include <nudb/concepts.hpp>

49
src/nudb/test/create.cpp Normal file
View File

@@ -0,0 +1,49 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Test that header file is self-contained
#include <nudb/create.hpp>
#include <nudb/test/test_store.hpp>
#include <nudb/create.hpp>
#include <beast/unit_test/suite.hpp>
namespace nudb {
namespace test {
class create_test : public beast::unit_test::suite
{
public:
void
test_create()
{
std::size_t const keySize = 8;
std::size_t const blockSize = 256;
float const loadFactor = 0.5f;
error_code ec;
test_store ts{keySize, blockSize, loadFactor};
ts.create(ec);
if(! BEAST_EXPECTS(! ec, ec.message()))
return;
ts.create(ec);
if(! BEAST_EXPECTS(
ec == errc::file_exists, ec.message()))
return;
}
void
run() override
{
test_create();
}
};
BEAST_DEFINE_TESTSUITE(create, test, nudb);
} // test
} // nudb

83
src/nudb/test/error.cpp Normal file
View File

@@ -0,0 +1,83 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Test that header file is self-contained
#include <nudb/error.hpp>
#include <beast/unit_test/suite.hpp>
namespace nudb {
namespace test {
class error_test : public beast::unit_test::suite
{
public:
void check(char const* name, error ev)
{
auto const ec = make_error_code(ev);
BEAST_EXPECT(std::string{ec.category().name()} == name);
BEAST_EXPECT(! ec.message().empty());
BEAST_EXPECT(std::addressof(ec.category()) ==
std::addressof(nudb_category()));
BEAST_EXPECT(nudb_category().equivalent(static_cast<int>(ev),
ec.category().default_error_condition(static_cast<int>(ev))));
BEAST_EXPECT(nudb_category().equivalent(
ec, static_cast<int>(ev)));
}
void run() override
{
nudb_category().message(0);
nudb_category().message(99999);
check("nudb", error::success);
check("nudb", error::key_not_found);
check("nudb", error::key_exists);
check("nudb", error::short_read);
check("nudb", error::log_file_exists);
check("nudb", error::no_key_file);
check("nudb", error::too_many_buckets);
check("nudb", error::not_data_file);
check("nudb", error::not_key_file);
check("nudb", error::not_log_file);
check("nudb", error::different_version);
check("nudb", error::invalid_key_size);
check("nudb", error::invalid_block_size);
check("nudb", error::short_key_file);
check("nudb", error::short_bucket);
check("nudb", error::short_spill);
check("nudb", error::short_data_record);
check("nudb", error::short_value);
check("nudb", error::hash_mismatch);
check("nudb", error::invalid_load_factor);
check("nudb", error::invalid_capacity);
check("nudb", error::invalid_bucket_count);
check("nudb", error::invalid_bucket_size);
check("nudb", error::incomplete_data_file_header);
check("nudb", error::incomplete_key_file_header);
check("nudb", error::invalid_log_record);
check("nudb", error::invalid_log_spill);
check("nudb", error::invalid_log_offset);
check("nudb", error::invalid_log_index);
check("nudb", error::invalid_spill_size);
check("nudb", error::uid_mismatch);
check("nudb", error::appnum_mismatch);
check("nudb", error::key_size_mismatch);
check("nudb", error::salt_mismatch);
check("nudb", error::pepper_mismatch);
check("nudb", error::block_size_mismatch);
check("nudb", error::orphaned_value);
check("nudb", error::missing_value);
check("nudb", error::size_mismatch);
check("nudb", error::duplicate_value);
}
};
BEAST_DEFINE_TESTSUITE(error, test, nudb);
} // test
} // nudb

9
src/nudb/test/file.cpp Normal file
View File

@@ -0,0 +1,9 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Test that header file is self-contained
#include <nudb/file.hpp>

View File

@@ -0,0 +1,9 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// Test that header file is self-contained
#include <nudb/native_file.hpp>

Some files were not shown because too many files have changed in this diff Show More