Squashed 'src/nudb/' content from commit 00adc6a

git-subtree-dir: src/nudb
git-subtree-split: 00adc6a4f16679a376f40c967f77dfa544c179c1
This commit is contained in:
Vinnie Falco
2016-09-29 19:24:12 -04:00
commit 79159ffd87
113 changed files with 15806 additions and 0 deletions

View File

@@ -0,0 +1,436 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_BASIC_STORE_HPP
#define NUDB_BASIC_STORE_HPP
#include <nudb/file.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/cache.hpp>
#include <nudb/detail/gentex.hpp>
#include <nudb/detail/mutex.hpp>
#include <nudb/detail/pool.hpp>
#include <boost/optional.hpp>
#include <chrono>
#include <mutex>
#include <thread>
namespace nudb {
/** A high performance, insert-only key/value database for SSDs.
To create a database first call the @ref create
free function. Then construct a @ref basic_store and
call @ref open on it:
@code
error_code ec;
create<xxhasher>(
"db.dat", "db.key", "db.log",
1, make_salt(), 8, 4096, 0.5f, ec);
basic_store<xxhasher, native_file> db;
db.open("db.dat", "db.key", "db.log", ec);
@endcode
@tparam Hasher The hash function to use. This type
must meet the requirements of @b Hasher.
@tparam File The type of File object to use. This type
must meet the requirements of @b File.
*/
template<class Hasher, class File>
class basic_store
{
public:
using hash_type = Hasher;
using file_type = File;
private:
using clock_type =
std::chrono::steady_clock;
using time_point =
typename clock_type::time_point;
struct state
{
File df;
File kf;
File lf;
path_type dp;
path_type kp;
path_type lp;
Hasher hasher;
detail::pool p0;
detail::pool p1;
detail::cache c1;
detail::key_file_header kh;
std::size_t rate = 0;
time_point when = clock_type::now();
state(state const&) = delete;
state& operator=(state const&) = delete;
state(state&&) = default;
state& operator=(state&&) = default;
state(File&& df_, File&& kf_, File&& lf_,
path_type const& dp_, path_type const& kp_,
path_type const& lp_,
detail::key_file_header const& kh_);
};
bool open_ = false;
// Use optional because some
// members cannot be default-constructed.
//
boost::optional<state> s_; // State of an open database
std::size_t frac_; // accumulates load
std::size_t thresh_; // split threshold
nbuck_t buckets_; // number of buckets
nbuck_t modulus_; // hash modulus
std::mutex u_; // serializes insert()
detail::gentex g_;
boost::shared_mutex m_;
std::thread t_;
std::condition_variable_any cv_;
error_code ec_;
std::atomic<bool> ecb_; // `true` when ec_ set
std::size_t dataWriteSize_;
std::size_t logWriteSize_;
public:
/** Default constructor.
A default constructed database is initially closed.
*/
basic_store() = default;
/// Copy constructor (disallowed)
basic_store(basic_store const&) = delete;
/// Copy assignment (disallowed)
basic_store& operator=(basic_store const&) = delete;
/** Destroy the database.
Files are closed, memory is freed, and data that has not been
committed is discarded. To ensure that all inserted data is
written, it is necessary to call @ref close before destroying
the @ref basic_store.
This function ignores errors returned by @ref close; to receive
those errors it is necessary to call @ref close before the
@ref basic_store is destroyed.
*/
~basic_store();
/** Returns `true` if the database is open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
*/
bool
is_open() const
{
return open_;
}
/** Return the path to the data file.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The data file path.
*/
path_type const&
dat_path() const;
/** Return the path to the key file.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The key file path.
*/
path_type const&
key_path() const;
/** Return the path to the log file.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The log file path.
*/
path_type const&
log_path() const;
/** Return the appnum associated with the database.
This is an unsigned 64-bit integer associated with the
database and defined by the application. It is set
once when the database is created in a call to
@ref create.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The appnum.
*/
std::uint64_t
appnum() const;
/** Return the key size associated with the database.
The key size is defined by the application when the
database is created in a call to @ref create. The
key size cannot be changed on an existing database.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The size of keys in the database.
*/
std::size_t
key_size() const;
/** Return the block size associated with the database.
The block size is defined by the application when the
database is created in a call to @ref create or when a
key file is regenerated in a call to @ref rekey. The
block size cannot be changed on an existing key file.
Instead, a new key file may be created with a different
block size.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function
except @ref open or @ref close.
@return The size of blocks in the key file.
*/
std::size_t
block_size() const;
/** Close the database.
All data is committed before closing.
If an error occurs, the database is still closed.
@par Requirements
The database must be open.
@par Thread safety
Not thread safe. The caller is responsible for
ensuring that no other member functions are
called concurrently.
@param ec Set to the error, if any occurred.
*/
void
close(error_code& ec);
/** Open a database.
The database identified by the specified data, key, and
log file paths is opened. If a log file is present, the
recovery mechanism is invoked to restore database integrity
before the function returns.
@par Requirements
The database must be not be open.
@par Thread safety
Not thread safe. The caller is responsible for
ensuring that no other member functions are
called concurrently.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param log_path The path to the log file.
@param ec Set to the error, if any occurred.
@param args Optional arguments passed to @b File constructors.
*/
template<class... Args>
void
open(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
error_code& ec,
Args&&... args);
/** Fetch a value.
The function checks the database for the specified
key, and invokes the callback if it is found. If
the key is not found, `ec` is set to @ref error::key_not_found.
If any other errors occur, `ec` is set to the
corresponding error.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function except
@ref close.
@note If the implementation encounters an error while
committing data to the database, this function will
immediately return with `ec` set to the error which
occurred. All subsequent calls to @ref fetch will
return the same error until the database is closed.
@param key A pointer to a memory buffer of at least
@ref key_size() bytes, containing the key to be searched
for.
@param callback A function which will be called with the
value data if the fetch is successful. The equivalent
signature must be:
@code
void callback(
void const* buffer, // A buffer holding the value
std::size_t size // The size of the value in bytes
);
@endcode
The buffer provided to the callback remains valid
until the callback returns, ownership is not transferred.
@param ec Set to the error, if any occurred.
*/
template<class Callback>
void
fetch(void const* key, Callback && callback, error_code& ec);
/** Insert a value.
This function attempts to insert the specified key/value
pair into the database. If the key already exists,
`ec` is set to @ref error::key_exists. If an error
occurs, `ec` is set to the corresponding error.
@par Requirements
The database must be open.
@par Thread safety
Safe to call concurrently with any function except
@ref close.
@note If the implementation encounters an error while
committing data to the database, this function will
immediately return with `ec` set to the error which
occurred. All subsequent calls to @ref insert will
return the same error until the database is closed.
@param key A buffer holding the key to be inserted. The
size of the buffer should be at least the `key_size`
associated with the open database.
@param data A buffer holding the value to be inserted.
@param bytes The size of the buffer holding the value
data. This value must be greater than 0 and no more
than 0xffffffff.
@param ec Set to the error, if any occurred.
*/
void
insert(void const* key, void const* data,
nsize_t bytes, error_code& ec);
private:
template<class Callback>
void
fetch(detail::nhash_t h, void const* key,
detail::bucket b, Callback && callback, error_code& ec);
bool
exists(detail::nhash_t h, void const* key,
detail::shared_lock_type* lock, detail::bucket b, error_code& ec);
void
split(detail::bucket& b1, detail::bucket& b2,
detail::bucket& tmp, nbuck_t n1, nbuck_t n2,
nbuck_t buckets, nbuck_t modulus,
detail::bulk_writer<File>& w, error_code& ec);
detail::bucket
load(nbuck_t n, detail::cache& c1,
detail::cache& c0, void* buf, error_code& ec);
void
commit(detail::unique_lock_type& m,
std::size_t& work, error_code& ec);
void
run();
};
} // nudb
#include <nudb/impl/basic_store.ipp>
#endif

205
include/nudb/concepts.hpp Normal file
View File

@@ -0,0 +1,205 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_CONCEPTS_HPP
#define NUDB_CONCEPTS_HPP
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <cstddef>
#include <cstdint>
#include <type_traits>
namespace nudb {
namespace detail {
template<class T>
class check_is_File
{
template<class U, class R =
std::is_convertible<decltype(
std::declval<U const>().is_open()),
bool>>
static R check1(int);
template<class>
static std::false_type check1(...);
using type1 = decltype(check1<T>(0));
template<class U, class R = decltype(
std::declval<U>().close(),
std::true_type{})>
static R check2(int);
template<class>
static std::false_type check2(...);
using type2 = decltype(check2<T>(0));
template<class U, class R = decltype(
std::declval<U>().create(
std::declval<file_mode>(),
std::declval<path_type>(),
std::declval<error_code&>()),
std::true_type{})>
static R check3(int);
template<class>
static std::false_type check3(...);
using type3 = decltype(check3<T>(0));
template<class U, class R = decltype(
std::declval<U>().open(
std::declval<file_mode>(),
std::declval<path_type>(),
std::declval<error_code&>()),
std::true_type{})>
static R check4(int);
template<class>
static std::false_type check4(...);
using type4 = decltype(check4<T>(0));
template<class U, class R = decltype(
U::erase(
std::declval<path_type>(),
std::declval<error_code&>()),
std::true_type{})>
static R check5(int);
template<class>
static std::false_type check5(...);
using type5 = decltype(check5<T>(0));
template<class U, class R =
std::is_convertible<decltype(
std::declval<U const>().size(
std::declval<error_code&>())),
std::uint64_t>>
static R check6(int);
template<class>
static std::false_type check6(...);
using type6 = decltype(check6<T>(0));
template<class U, class R = decltype(
std::declval<U>().read(
std::declval<std::uint64_t>(),
std::declval<void*>(),
std::declval<std::size_t>(),
std::declval<error_code&>()),
std::true_type{})>
static R check7(int);
template<class>
static std::false_type check7(...);
using type7 = decltype(check7<T>(0));
template<class U, class R = decltype(
std::declval<U>().write(
std::declval<std::uint64_t>(),
std::declval<void const*>(),
std::declval<std::size_t>(),
std::declval<error_code&>()),
std::true_type{})>
static R check8(int);
template<class>
static std::false_type check8(...);
using type8 = decltype(check8<T>(0));
template<class U, class R = decltype(
std::declval<U>().sync(
std::declval<error_code&>()),
std::true_type{})>
static R check9(int);
template<class>
static std::false_type check9(...);
using type9 = decltype(check9<T>(0));
template<class U, class R = decltype(
std::declval<U>().trunc(
std::declval<std::uint64_t>(),
std::declval<error_code&>()),
std::true_type{})>
static R check10(int);
template<class>
static std::false_type check10(...);
using type10 = decltype(check10<T>(0));
public:
using type = std::integral_constant<bool,
std::is_move_constructible<T>::value &&
type1::value && type2::value && type3::value &&
type4::value && type5::value && type6::value &&
type7::value && type8::value && type9::value &&
type10::value
>;
};
template<class T>
class check_is_Hasher
{
template<class U, class R =
std::is_constructible<U, std::uint64_t>>
static R check1(int);
template<class>
static std::false_type check1(...);
using type1 = decltype(check1<T>(0));
template<class U, class R =
std::is_convertible<decltype(
std::declval<U const>().operator()(
std::declval<void const*>(),
std::declval<std::size_t>())),
std::uint64_t>>
static R check2(int);
template<class>
static std::false_type check2(...);
using type2 = decltype(check2<T>(0));
public:
using type = std::integral_constant<bool,
type1::value && type2::value>;
};
template<class T>
class check_is_Progress
{
template<class U, class R = decltype(
std::declval<U>().operator()(
std::declval<std::uint64_t>(),
std::declval<std::uint64_t>()),
std::true_type{})>
static R check1(int);
template<class>
static std::false_type check1(...);
public:
using type = decltype(check1<T>(0));
};
} // detail
/// Determine if `T` meets the requirements of @b `File`
template<class T>
#if GENERATING_DOCS
struct is_File : std::integral_constant<bool, ...>{};
#else
using is_File = typename detail::check_is_File<T>::type;
#endif
/// Determine if `T` meets the requirements of @b `Hasher`
template<class T>
#if GENERATING_DOCS
struct is_Hasher : std::integral_constant<bool, ...>{};
#else
using is_Hasher = typename detail::check_is_Hasher<T>::type;
#endif
/// Determine if `T` meets the requirements of @b `Progress`
template<class T>
#if GENERATING_DOCS
struct is_Progress : std::integral_constant<bool, ...>{};
#else
using is_Progress = typename detail::check_is_Progress<T>::type;
#endif
} // nudb
#endif

117
include/nudb/create.hpp Normal file
View File

@@ -0,0 +1,117 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_CREATE_HPP
#define NUDB_CREATE_HPP
#include <nudb/native_file.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstring>
#include <random>
#include <stdexcept>
#include <utility>
namespace nudb {
/** Return a random salt.
This function will use the system provided random
number device to generate a uniformly distributed
64-bit unsigned value suitable for use the salt
value in a call to @ref create.
*/
template<class = void>
std::uint64_t
make_salt();
/** Create a new database.
This function creates a set of new database files with
the given parameters. The files must not already exist or
else an error is returned.
If an error occurs while the files are being created,
the function attempts to remove the files before
returning.
@par Example
@code
error_code ec;
create<xxhasher>(
"db.dat", "db.key", "db.log",
1, make_salt(), 8, 4096, 0.5f, ec);
@endcode
@par Template Parameters
@tparam Hasher The hash function to use. This type must
meet the requirements of @b Hasher. The same hash
function must be used every time the database is opened,
or else an error is returned. The provided @ref xxhasher
is a suitable general purpose hash function.
@tparam File The type of file to use. Use the default of
@ref native_file unless customizing the file behavior.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param log_path The path to the log file.
@param appnum A caller-defined value stored in the file
headers. When opening the database, the same value is
preserved and returned to the caller.
@param salt A random unsigned integer used to permute
the hash function to make it unpredictable. The return
value of @ref make_salt returns a suitable value.
@param key_size The number of bytes in each key.
@param blockSize The size of a key file block. Larger
blocks hold more keys but require more I/O cycles per
operation. The ideal block size the largest size that
may be read in a single I/O cycle, and device dependent.
The return value of @ref block_size returns a suitable
value for the volume of a given path.
@param load_factor A number between zero and one
representing the average bucket occupancy (number of
items). A value of 0.5 is perfect. Lower numbers
waste space, and higher numbers produce negligible
savings at the cost of increased I/O cycles.
@param ec Set to the error, if any occurred.
@param args Optional arguments passed to @b File constructors.
*/
template<
class Hasher,
class File = native_file,
class... Args
>
void
create(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::uint64_t appnum,
std::uint64_t salt,
nsize_t key_size,
nsize_t blockSize,
float load_factor,
error_code& ec,
Args&&... args);
} // nudb
#include <nudb/impl/create.ipp>
#endif

View File

@@ -0,0 +1,296 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_ARENA_HPP
#define NUDB_DETAIL_ARENA_HPP
#include <boost/assert.hpp>
#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <memory>
#if NUDB_DEBUG_ARENA
#include <beast/unit_test/dstream.hpp>
#include <iostream>
#endif
namespace nudb {
namespace detail {
/* Custom memory manager that allocates in large blocks.
The implementation measures the rate of allocations in
bytes per second and tunes the large block size to fit
one second's worth of allocations.
*/
template<class = void>
class arena_t
{
using clock_type =
std::chrono::steady_clock;
using time_point =
typename clock_type::time_point;
class element;
char const* label_; // diagnostic
std::size_t alloc_ = 0; // block size
std::size_t used_ = 0; // bytes allocated
element* list_ = nullptr; // list of blocks
time_point when_ = clock_type::now();
public:
arena_t(arena_t const&) = delete;
arena_t& operator=(arena_t&&) = delete;
arena_t& operator=(arena_t const&) = delete;
~arena_t();
explicit
arena_t(char const* label = "");
arena_t(arena_t&& other);
// Set the allocation size
void
hint(std::size_t alloc)
{
alloc_ = alloc;
}
// Free all memory
void
clear();
void
periodic_activity();
std::uint8_t*
alloc(std::size_t n);
template<class U>
friend
void
swap(arena_t<U>& lhs, arena_t<U>& rhs);
};
//------------------------------------------------------------------------------
template<class _>
class arena_t<_>::element
{
std::size_t const capacity_;
std::size_t used_ = 0;
element* next_;
public:
element(std::size_t capacity, element* next)
: capacity_(capacity)
, next_(next)
{
}
element*
next() const
{
return next_;
}
void
clear()
{
used_ = 0;
}
std::size_t
remain() const
{
return capacity_ - used_;
}
std::size_t
capacity() const
{
return capacity_;
}
std::uint8_t*
alloc(std::size_t n);
};
template<class _>
std::uint8_t*
arena_t<_>::element::
alloc(std::size_t n)
{
if(n > capacity_ - used_)
return nullptr;
auto const p = const_cast<std::uint8_t*>(
reinterpret_cast<uint8_t const*>(this + 1)
) + used_;
used_ += n;
return p;
}
//------------------------------------------------------------------------------
template<class _>
arena_t<_>::
arena_t(char const* label)
: label_(label)
{
}
template<class _>
arena_t<_>::
~arena_t()
{
clear();
}
template<class _>
arena_t<_>::
arena_t(arena_t&& other)
: label_(other.label_)
, alloc_(other.alloc_)
, used_(other.used_)
, list_(other.list_)
, when_(other.when_)
{
other.used_ = 0;
other.list_ = nullptr;
other.when_ = clock_type::now();
other.alloc_ = 0;
}
template<class _>
void
arena_t<_>::
clear()
{
used_ = 0;
while(list_)
{
auto const e = list_;
list_ = list_->next();
e->~element();
delete[] reinterpret_cast<std::uint8_t*>(e);
}
}
template<class _>
void
arena_t<_>::
periodic_activity()
{
using namespace std::chrono;
auto const now = clock_type::now();
auto const elapsed = now - when_;
if(elapsed < milliseconds{500})
return;
when_ = now;
auto const rate = static_cast<std::size_t>(std::ceil(
used_ / duration_cast<duration<float>>(elapsed).count()));
#if NUDB_DEBUG_ARENA
beast::unit_test::dstream dout{std::cout};
auto const size =
[](element* e)
{
std::size_t n = 0;
while(e)
{
++n;
e = e->next();
}
return n;
};
#endif
if(rate >= alloc_ * 2)
{
// adjust up
alloc_ = std::max(rate, alloc_ * 2);
#if NUDB_DEBUG_ARENA
dout << label_ << ": "
"rate=" << rate <<
", alloc=" << alloc_ << " UP"
", nused=" << used_ <<
", used=" << size(list_) <<
"\n";
#endif
}
else if(rate <= alloc_ / 2)
{
// adjust down
alloc_ /= 2;
#if NUDB_DEBUG_ARENA
dout << label_ << ": "
"rate=" << rate <<
", alloc=" << alloc_ << " DOWN"
", nused=" << used_ <<
", used=" << size(list_) <<
"\n";
#endif
}
else
{
#if NUDB_DEBUG_ARENA
dout << label_ << ": "
"rate=" << rate <<
", alloc=" << alloc_ <<
", nused=" << used_ <<
", used=" << size(list_) <<
"\n";
#endif
}
}
template<class _>
std::uint8_t*
arena_t<_>::
alloc(std::size_t n)
{
// Undefined behavior: Zero byte allocations
BOOST_ASSERT(n != 0);
n = 8 *((n + 7) / 8);
std::uint8_t* p;
if(list_)
{
p = list_->alloc(n);
if(p)
{
used_ += n;
return p;
}
}
auto const size = std::max(alloc_, n);
auto const e = reinterpret_cast<element*>(
new std::uint8_t[sizeof(element) + size]);
list_ = ::new(e) element{size, list_};
used_ += n;
return list_->alloc(n);
}
template<class _>
void
swap(arena_t<_>& lhs, arena_t<_>& rhs)
{
using std::swap;
swap(lhs.used_, rhs.used_);
swap(lhs.list_, rhs.list_);
// don't swap alloc_ or when_
}
using arena = arena_t<>;
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,473 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_BUCKET_HPP
#define NUDB_DETAIL_BUCKET_HPP
#include <nudb/error.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/field.hpp>
#include <nudb/detail/format.hpp>
#include <boost/assert.hpp>
#include <cstddef>
#include <cstdint>
#include <cstring>
namespace nudb {
namespace detail {
// Returns bucket index given hash, buckets, and modulus
//
inline
nbuck_t
bucket_index(nhash_t h, nbuck_t buckets, std::uint64_t modulus)
{
BOOST_ASSERT(modulus <= 0x100000000ULL);
auto n = h % modulus;
if(n >= buckets)
n -= modulus / 2;
return static_cast<nbuck_t>(n);
}
//------------------------------------------------------------------------------
// Tag for constructing empty buckets
struct empty_t
{
constexpr empty_t() = default;
};
static empty_t constexpr empty{};
// Allows inspection and manipulation of bucket blobs in memory
template<class = void>
class bucket_t
{
nsize_t block_size_; // Size of a key file block
nkey_t size_; // Current key count
noff_t spill_; // Offset of next spill record or 0
std::uint8_t* p_; // Pointer to the bucket blob
public:
struct value_type
{
noff_t offset;
nhash_t hash;
nsize_t size;
};
bucket_t() = default;
bucket_t(bucket_t const&) = default;
bucket_t& operator=(bucket_t const&) = default;
bucket_t(nsize_t block_size, void* p);
bucket_t(nsize_t block_size, void* p, empty_t);
nsize_t
block_size() const
{
return block_size_;
}
// Serialized bucket size.
// Excludes empty
nsize_t
actual_size() const
{
return bucket_size(size_);
}
bool
empty() const
{
return size_ == 0;
}
bool
full() const
{
return size_ >=
detail::bucket_capacity(block_size_);
}
nkey_t
size() const
{
return size_;
}
// Returns offset of next spill record or 0
//
noff_t
spill() const
{
return spill_;
}
// Set offset of next spill record
//
void
spill(noff_t offset);
// Clear contents of the bucket
//
void
clear();
// Returns the record for a key
// entry without bounds checking.
//
value_type const
at(nkey_t i) const;
value_type const
operator[](nkey_t i) const
{
return at(i);
}
// Returns index of entry with prefix
// equal to or greater than the given prefix.
//
nkey_t
lower_bound(nhash_t h) const;
void
insert(noff_t offset, nsize_t size, nhash_t h);
// Erase an element by index
//
void
erase(nkey_t i);
// Read a full bucket from the
// file at the specified offset.
//
template<class File>
void
read(File& f, noff_t, error_code& ec);
// Read a compact bucket
//
template<class File>
void
read(bulk_reader<File>& r, error_code& ec);
// Write a compact bucket to the stream.
// This only writes entries that are not empty.
//
void
write(ostream& os) const;
// Write a bucket to the file at the specified offset.
// The full block_size() bytes are written.
//
template<class File>
void
write(File& f,noff_t offset, error_code& ec) const;
private:
// Update size and spill in the blob
void
update();
};
//------------------------------------------------------------------------------
template<class _>
bucket_t<_>::
bucket_t(nsize_t block_size, void* p)
: block_size_(block_size)
, p_(reinterpret_cast<std::uint8_t*>(p))
{
// Bucket Record
istream is(p_, block_size);
detail::read<uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill
}
template<class _>
bucket_t<_>::
bucket_t(nsize_t block_size, void* p, empty_t)
: block_size_(block_size)
, size_(0)
, spill_(0)
, p_(reinterpret_cast<std::uint8_t*>(p))
{
clear();
}
template<class _>
void
bucket_t<_>::
spill(noff_t offset)
{
spill_ = offset;
update();
}
template<class _>
void
bucket_t<_>::clear()
{
size_ = 0;
spill_ = 0;
std::memset(p_, 0, block_size_);
}
template<class _>
auto
bucket_t<_>::
at(nkey_t i) const ->
value_type const
{
value_type result;
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Prefix
// Bucket Record
detail::istream is{p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
i * w, w};
// Bucket Entry
detail::read<uint48_t>(
is, result.offset); // Offset
detail::read_size48(
is, result.size); // Size
detail::read<f_hash>(
is, result.hash); // Hash
return result;
}
template<class _>
nkey_t
bucket_t<_>::
lower_bound(nhash_t h) const
{
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Hash
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
// Bucket Entry
field<uint48_t>::size + // Offset
field<uint48_t>::size; // Size
nkey_t step;
nkey_t first = 0;
nkey_t count = size_;
while(count > 0)
{
step = count / 2;
nkey_t i = first + step;
nhash_t h1;
readp<f_hash>(p + i * w, h1);
if(h1 < h)
{
first = i + 1;
count -= step + 1;
}
else
{
count = step;
}
}
return first;
}
template<class _>
void
bucket_t<_>::
insert(
noff_t offset, nsize_t size, nhash_t h)
{
auto const i = lower_bound(h);
// Bucket Record
auto const p = p_ +
field<
std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Hash
std::memmove(
p +(i + 1) * w,
p + i * w,
(size_ - i) * w);
++size_;
update();
// Bucket Entry
ostream os{p + i * w, w};
detail::write<uint48_t>(
os, offset); // Offset
detail::write<uint48_t>(
os, size); // Size
detail::write<f_hash>(
os, h); // Prefix
}
template<class _>
void
bucket_t<_>::
erase(nkey_t i)
{
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Hash
--size_;
if(i < size_)
std::memmove(
p + i * w,
p +(i + 1) * w,
(size_ - i) * w);
std::memset(p + size_ * w, 0, w);
update();
}
template<class _>
template<class File>
void
bucket_t<_>::
read(File& f, noff_t offset, error_code& ec)
{
auto const cap = bucket_capacity(block_size_);
// Excludes padding to block size
f.read(offset, p_, bucket_size(cap), ec);
if(ec)
return;
istream is{p_, block_size_};
detail::read<std::uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill
if(size_ > cap)
{
ec = error::invalid_bucket_size;
return;
}
}
template<class _>
template<class File>
void
bucket_t<_>::
read(bulk_reader<File>& r, error_code& ec)
{
// Bucket Record(compact)
auto is = r.prepare(
detail::field<std::uint16_t>::size +
detail::field<uint48_t>::size, ec);
if(ec)
return;
detail::read<std::uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill
update();
// Excludes empty bucket entries
auto const w = size_ * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size); // Hash
is = r.prepare(w, ec);
if(ec)
return;
std::memcpy(p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size, // Spill
is.data(w), w); // Entries
}
template<class _>
void
bucket_t<_>::
write(ostream& os) const
{
// Does not pad up to the block size. This
// is called to write to the data file.
auto const size = actual_size();
// Bucket Record
std::memcpy(os.data(size), p_, size);
}
template<class _>
template<class File>
void
bucket_t<_>::
write(File& f, noff_t offset, error_code& ec) const
{
// Includes zero pad up to the block
// size, to make the key file size always
// a multiple of the block size.
auto const size = actual_size();
std::memset(p_ + size, 0, block_size_ - size);
// Bucket Record
f.write(offset, p_, block_size_, ec);
if(ec)
return;
}
template<class _>
void
bucket_t<_>::
update()
{
// Bucket Record
ostream os{p_, block_size_};
detail::write<std::uint16_t>(os, size_); // Count
detail::write<uint48_t>(os, spill_); // Spill
}
using bucket = bucket_t<>;
//------------------------------------------------------------------------------
// Spill bucket if full.
// The bucket is cleared after it spills.
//
template<class File>
void
maybe_spill(
bucket& b, bulk_writer<File>& w, error_code& ec)
{
if(b.full())
{
// Spill Record
auto const offset = w.offset();
auto os = w.prepare(
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.actual_size(), ec);
if(ec)
return;
write<uint48_t>(os, 0ULL); // Zero
write<std::uint16_t>(
os, b.actual_size()); // Size
auto const spill =
offset + os.size();
b.write(os); // Bucket
// Update bucket
b.clear();
b.spill(spill);
}
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,86 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_BUFFER_HPP
#define NUDB_DETAIL_BUFFER_HPP
#include <atomic>
#include <cstdint>
#include <memory>
namespace nudb {
namespace detail {
// Simple growable memory buffer
class buffer
{
private:
std::size_t size_ = 0;
std::unique_ptr<std::uint8_t[]> buf_;
public:
~buffer() = default;
buffer() = default;
buffer(buffer const&) = delete;
buffer& operator=(buffer const&) = delete;
explicit
buffer(std::size_t n)
: size_(n)
, buf_(new std::uint8_t[n])
{
}
buffer(buffer&& other)
: size_(other.size_)
, buf_(std::move(other.buf_))
{
other.size_ = 0;
}
buffer&
operator=(buffer&& other)
{
size_ = other.size_;
buf_ = std::move(other.buf_);
other.size_ = 0;
return *this;
}
std::size_t
size() const
{
return size_;
}
std::uint8_t*
get() const
{
return buf_.get();
}
void
reserve(std::size_t n)
{
if(size_ < n)
buf_.reset(new std::uint8_t[n]);
size_ = n;
}
// BufferFactory
void*
operator()(std::size_t n)
{
reserve(n);
return buf_.get();
}
};
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,196 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_BULKIO_HPP
#define NUDB_DETAIL_BULKIO_HPP
#include <nudb/type_traits.hpp>
#include <nudb/detail/buffer.hpp>
#include <nudb/detail/stream.hpp>
#include <nudb/error.hpp>
#include <algorithm>
#include <cstddef>
namespace nudb {
namespace detail {
// Scans a file in sequential large reads
template<class File>
class bulk_reader
{
File& f_;
buffer buf_;
noff_t last_; // size of file
noff_t offset_; // current position
std::size_t avail_; // bytes left to read in buf
std::size_t used_; // bytes consumed in buf
public:
bulk_reader(File& f, noff_t offset,
noff_t last, std::size_t buffer_size);
noff_t
offset() const
{
return offset_ - avail_;
}
bool
eof() const
{
return offset() >= last_;
}
istream
prepare(std::size_t needed, error_code& ec);
};
template<class File>
bulk_reader<File>::
bulk_reader(File& f, noff_t offset,
noff_t last, std::size_t buffer_size)
: f_(f)
, last_(last)
, offset_(offset)
, avail_(0)
, used_(0)
{
buf_.reserve(buffer_size);
}
template<class File>
istream
bulk_reader<File>::
prepare(std::size_t needed, error_code& ec)
{
if(needed > avail_)
{
if(offset_ + needed - avail_ > last_)
{
ec = error::short_read;
return {};
}
if(needed > buf_.size())
{
buffer buf;
buf.reserve(needed);
std::memcpy(buf.get(),
buf_.get() + used_, avail_);
buf_ = std::move(buf);
}
else
{
std::memmove(buf_.get(),
buf_.get() + used_, avail_);
}
auto const n = std::min(buf_.size() - avail_,
static_cast<std::size_t>(last_ - offset_));
f_.read(offset_, buf_.get() + avail_, n, ec);
if(ec)
return {};
offset_ += n;
avail_ += n;
used_ = 0;
}
istream is{buf_.get() + used_, needed};
used_ += needed;
avail_ -= needed;
return is;
}
//------------------------------------------------------------------------------
// Buffers file writes
// Caller must call flush manually at the end
template<class File>
class bulk_writer
{
File& f_;
buffer buf_;
noff_t offset_; // current position
std::size_t used_; // bytes written to buf
public:
bulk_writer(File& f, noff_t offset,
std::size_t buffer_size);
ostream
prepare(std::size_t needed, error_code& ec);
// Returns the number of bytes buffered
std::size_t
size()
{
return used_;
}
// Return current offset in file. This
// is advanced with each call to prepare.
noff_t
offset() const
{
return offset_ + used_;
}
// Caller must invoke flush manually in
// order to handle any error conditions.
void
flush(error_code& ec);
};
template<class File>
bulk_writer<File>::
bulk_writer(File& f,
noff_t offset, std::size_t buffer_size)
: f_(f)
, offset_(offset)
, used_(0)
{
buf_.reserve(buffer_size);
}
template<class File>
ostream
bulk_writer<File>::
prepare(std::size_t needed, error_code& ec)
{
if(used_ + needed > buf_.size())
{
flush(ec);
if(ec)
return{};
}
if(needed > buf_.size())
buf_.reserve(needed);
ostream os(buf_.get() + used_, needed);
used_ += needed;
return os;
}
template<class File>
void
bulk_writer<File>::
flush(error_code& ec)
{
if(used_)
{
auto const offset = offset_;
auto const used = used_;
offset_ += used_;
used_ = 0;
f_.write(offset, buf_.get(), used, ec);
if(ec)
return;
}
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,236 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_CACHE_HPP
#define NUDB_DETAIL_CACHE_HPP
#include <nudb/detail/arena.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/format.hpp>
#include <boost/iterator/transform_iterator.hpp>
#include <algorithm>
#include <cstdint>
#include <utility>
#include <vector>
#include <unordered_map>
namespace nudb {
namespace detail {
// Associative container storing
// bucket blobs keyed by bucket index.
//
template<class = void>
class cache_t
{
public:
using value_type = std::pair<nbuck_t, bucket>;
private:
using map_type =
std::unordered_map<nbuck_t, void*>;
struct transform
{
using argument_type =
typename map_type::value_type;
using result_type = value_type;
cache_t* cache_;
transform()
: cache_(nullptr)
{
}
explicit
transform(cache_t& cache)
: cache_(&cache)
{
}
value_type
operator()(argument_type const& e) const
{
return std::make_pair(e.first,
bucket{cache_->block_size_, e.second});
}
};
nsize_t key_size_ = 0;
nsize_t block_size_ = 0;
arena arena_;
map_type map_;
public:
using iterator = boost::transform_iterator<
transform, typename map_type::iterator,
value_type, value_type>;
cache_t(cache_t const&) = delete;
cache_t& operator=(cache_t&&) = delete;
cache_t& operator=(cache_t const&) = delete;
// Constructs a cache that will never have inserts
cache_t() = default;
cache_t(cache_t&& other);
explicit
cache_t(nsize_t key_size,
nsize_t block_size, char const* label);
std::size_t
size() const
{
return map_.size();
}
iterator
begin()
{
return iterator{map_.begin(), transform{*this}};
}
iterator
end()
{
return iterator{map_.end(), transform{*this}};
}
bool
empty() const
{
return map_.empty();
}
void
clear();
void
reserve(std::size_t n);
void
periodic_activity();
iterator
find(nbuck_t n);
// Create an empty bucket
//
bucket
create(nbuck_t n);
// Insert a copy of a bucket.
//
iterator
insert(nbuck_t n, bucket const& b);
template<class U>
friend
void
swap(cache_t<U>& lhs, cache_t<U>& rhs);
};
template<class _>
cache_t<_>::
cache_t(cache_t&& other)
: key_size_{other.key_size_}
, block_size_(other.block_size_)
, arena_(std::move(other.arena_))
, map_(std::move(other.map_))
{
}
template<class _>
cache_t<_>::
cache_t(nsize_t key_size,
nsize_t block_size, char const* label)
: key_size_(key_size)
, block_size_(block_size)
, arena_(label)
{
}
template<class _>
void
cache_t<_>::
reserve(std::size_t n)
{
arena_.hint(n * block_size_);
map_.reserve(n);
}
template<class _>
void
cache_t<_>::
clear()
{
arena_.clear();
map_.clear();
}
template<class _>
void
cache_t<_>::
periodic_activity()
{
arena_.periodic_activity();
}
template<class _>
auto
cache_t<_>::
find(nbuck_t n) ->
iterator
{
auto const iter = map_.find(n);
if(iter == map_.end())
return iterator{map_.end(), transform(*this)};
return iterator{iter, transform(*this)};
}
template<class _>
bucket
cache_t<_>::
create(nbuck_t n)
{
auto const p = arena_.alloc(block_size_);
map_.emplace(n, p);
return bucket{block_size_, p, detail::empty};
}
template<class _>
auto
cache_t<_>::
insert(nbuck_t n, bucket const& b) ->
iterator
{
void* const p = arena_.alloc(b.block_size());
ostream os{p, b.block_size()};
b.write(os);
auto const result = map_.emplace(n, p);
return iterator{result.first, transform(*this)};
}
template<class U>
void
swap(cache_t<U>& lhs, cache_t<U>& rhs)
{
using std::swap;
swap(lhs.key_size_, rhs.key_size_);
swap(lhs.block_size_, rhs.block_size_);
swap(lhs.arena_, rhs.arena_);
swap(lhs.map_, rhs.map_);
}
using cache = cache_t<>;
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,93 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_ENDIAN_HPP
#define NUDB_DETAIL_ENDIAN_HPP
#include <cstdint>
#include <type_traits>
namespace nudb {
namespace detail {
// This is a modified work, original implementation
// by Howard Hinnant <howard.hinnant@gmail.com>
//
// "This should be standardized" - Howard
// Endian provides answers to the following questions:
// 1. Is this system big or little endian?
// 2. Is the "desired endian" of some class or function the same as the
// native endian?
enum class endian
{
#ifdef _MSC_VER
big = 1,
little = 0,
native = little
#else
native = __BYTE_ORDER__,
little = __ORDER_LITTLE_ENDIAN__,
big = __ORDER_BIG_ENDIAN__
#endif
};
using is_little_endian =
std::integral_constant<bool,
endian::native == endian::little>;
static_assert(
endian::native == endian::little || endian::native == endian::big,
"endian::native shall be one of endian::little or endian::big");
static_assert(
endian::big != endian::little,
"endian::big and endian::little shall have different values");
// The pepper got baked into the file format as
// the hash of the little endian salt so now we
// need this function.
//
template<class = void>
std::uint64_t
to_little_endian(std::uint64_t v, std::false_type)
{
union U
{
std::uint64_t vi;
std::uint8_t va[8];
};
U u;
u.va[0] = v & 0xff;
u.va[1] = (v >> 8) & 0xff;
u.va[2] = (v >> 16) & 0xff;
u.va[3] = (v >> 24) & 0xff;
u.va[4] = (v >> 32) & 0xff;
u.va[5] = (v >> 40) & 0xff;
u.va[6] = (v >> 48) & 0xff;
u.va[7] = (v >> 56) & 0xff;
return u.vi;
}
inline
std::uint64_t
to_little_endian(std::uint64_t v, std::true_type)
{
return v;
}
inline
std::uint64_t
to_little_endian(std::uint64_t v)
{
return to_little_endian(v, is_little_endian{});
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,265 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_FIELD_HPP
#define NUDB_FIELD_HPP
#include <nudb/detail/stream.hpp>
#include <boost/assert.hpp>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <stdexcept>
#include <type_traits>
namespace nudb {
namespace detail {
// A 24-bit integer
struct uint24_t;
// A 48-bit integer
struct uint48_t;
// These metafunctions describe the binary format of fields on disk
template<class T>
struct field;
template<>
struct field<std::uint8_t>
{
static std::size_t constexpr size = 1;
static std::uint64_t constexpr max = 0xff;
};
template<>
struct field<std::uint16_t>
{
static std::size_t constexpr size = 2;
static std::uint64_t constexpr max = 0xffff;
};
template<>
struct field<uint24_t>
{
static std::size_t constexpr size = 3;
static std::uint64_t constexpr max = 0xffffff;
};
template<>
struct field<std::uint32_t>
{
static std::size_t constexpr size = 4;
static std::uint64_t constexpr max = 0xffffffff;
};
template<>
struct field<uint48_t>
{
static std::size_t constexpr size = 6;
static std::uint64_t constexpr max = 0x0000ffffffffffff;
};
template<>
struct field<std::uint64_t>
{
static std::size_t constexpr size = 8;
static std::uint64_t constexpr max = 0xffffffffffffffff;
};
// read field from memory
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint8_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
u = *p;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint16_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<< 8;
t = T(*p ) | t;
u = t;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, uint24_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
std::uint32_t t;
t = std::uint32_t(*p++)<<16;
t = (std::uint32_t(*p++)<< 8) | t;
t = std::uint32_t(*p ) | t;
u = t;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint32_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto const* p = reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<<24;
t = (T(*p++)<<16) | t;
t = (T(*p++)<< 8) | t;
t = T(*p ) | t;
u = t;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, uint48_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
std::uint64_t t;
t = (std::uint64_t(*p++)<<40);
t = (std::uint64_t(*p++)<<32) | t;
t = (std::uint64_t(*p++)<<24) | t;
t = (std::uint64_t(*p++)<<16) | t;
t = (std::uint64_t(*p++)<< 8) | t;
t = std::uint64_t(*p ) | t;
u = t;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint64_t>::value>::type* = nullptr>
void
readp(void const* v, U& u)
{
auto p = reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<<56;
t = (T(*p++)<<48) | t;
t = (T(*p++)<<40) | t;
t = (T(*p++)<<32) | t;
t = (T(*p++)<<24) | t;
t = (T(*p++)<<16) | t;
t = (T(*p++)<< 8) | t;
t = T(*p ) | t;
u = t;
}
// read field from istream
template<class T, class U>
void
read(istream& is, U& u)
{
readp<T>(is.data(field<T>::size), u);
}
inline
void
read_size48(istream& is, std::size_t& u)
{
std::uint64_t v;
read<uint48_t>(is, v);
BOOST_ASSERT(v <= std::numeric_limits<std::uint32_t>::max());
u = static_cast<std::uint32_t>(v);
}
// write field to ostream
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint8_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
std::uint8_t* p = os.data(field<T>::size);
*p = static_cast<std::uint8_t>(u);
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint16_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
auto const t = static_cast<T>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, uint24_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
auto const t = static_cast<std::uint32_t>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>>16)&0xff;
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint32_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
auto const t = static_cast<T>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>>24)&0xff;
*p++ = (t>>16)&0xff;
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, uint48_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
BOOST_ASSERT(u <= field<T>::max);
auto const t = static_cast<std::uint64_t>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>>40)&0xff;
*p++ = (t>>32)&0xff;
*p++ = (t>>24)&0xff;
*p++ = (t>>16)&0xff;
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
template<class T, class U, typename std::enable_if<
std::is_same<T, std::uint64_t>::value>::type* = nullptr>
void
write(ostream& os, U u)
{
auto const t = static_cast<T>(u);
std::uint8_t* p = os.data(field<T>::size);
*p++ = (t>>56)&0xff;
*p++ = (t>>48)&0xff;
*p++ = (t>>40)&0xff;
*p++ = (t>>32)&0xff;
*p++ = (t>>24)&0xff;
*p++ = (t>>16)&0xff;
*p++ = (t>> 8)&0xff;
*p = t &0xff;
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,629 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_FORMAT_HPP
#define NUDB_DETAIL_FORMAT_HPP
#include <nudb/error.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/buffer.hpp>
#include <nudb/detail/endian.hpp>
#include <nudb/detail/field.hpp>
#include <nudb/detail/stream.hpp>
#include <boost/assert.hpp>
#include <algorithm>
#include <array>
#include <limits>
#include <cstdint>
#include <cstring>
#include <memory>
#include <stdexcept>
#include <type_traits>
namespace nudb {
namespace detail {
// Format of the nudb files:
/*
Integer sizes
block_size less than 32 bits (maybe restrict it to 16 bits)
buckets more than 32 bits
capacity (same as bucket index)
file offsets 63 bits
hash up to 64 bits (48 currently)
item index less than 32 bits (index of item in bucket)
modulus (same as buckets)
value size up to 32 bits (or 32-bit builds can't read it)
*/
static std::size_t constexpr currentVersion = 2;
struct dat_file_header
{
static std::size_t constexpr size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
2 + // KeySize
64; // (Reserved)
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
nsize_t key_size;
};
struct key_file_header
{
static std::size_t constexpr size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
2 + // KeySize
8 + // Salt
8 + // Pepper
2 + // BlockSize
2 + // LoadFactor
56; // (Reserved)
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
nsize_t key_size;
std::uint64_t salt;
std::uint64_t pepper;
nsize_t block_size;
std::size_t load_factor;
// Computed values
nkey_t capacity; // Entries per bucket
nbuck_t buckets; // Number of buckets
nbuck_t modulus; // pow(2,ceil(log2(buckets)))
};
struct log_file_header
{
static std::size_t constexpr size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
2 + // KeySize
8 + // Salt
8 + // Pepper
2 + // BlockSize
8 + // KeyFileSize
8; // DataFileSize
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
nsize_t key_size;
std::uint64_t salt;
std::uint64_t pepper;
nsize_t block_size;
noff_t key_file_size;
noff_t dat_file_size;
};
// Type used to store hashes in buckets.
// This can be smaller than the output
// of the hash function.
//
using f_hash = uint48_t;
static_assert(field<f_hash>::size <=
sizeof(nhash_t), "");
template<class T>
nhash_t
make_hash(nhash_t h);
template<>
inline
nhash_t
make_hash<uint48_t>(nhash_t h)
{
return(h>>16)&0xffffffffffff;
}
// Returns the hash of a key given the salt.
// Note: The hash is expressed in f_hash units
//
template<class Hasher>
inline
nhash_t
hash(void const* key, nsize_t key_size, std::uint64_t salt)
{
Hasher h{salt};
return make_hash<f_hash>(h(key, key_size));
}
template<class Hasher>
inline
nhash_t
hash(void const* key, nsize_t key_size, Hasher const& h)
{
return make_hash<f_hash>(h(key, key_size));
}
// Computes pepper from salt
//
template<class Hasher>
std::uint64_t
pepper(std::uint64_t salt)
{
auto const v = to_little_endian(salt);
Hasher h{salt};
return h(&v, sizeof(v));
}
// Returns the actual size of a bucket.
// This can be smaller than the block size.
//
template<class = void>
nsize_t
bucket_size(nkey_t capacity)
{
// Bucket Record
return
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
capacity * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size); // Hash
}
// Returns the number of entries that fit in a bucket
//
template<class = void>
nkey_t
bucket_capacity(nsize_t block_size)
{
// Bucket Record
auto const size =
field<std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
auto const entry_size =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<f_hash>::size; // Hash
if(block_size < key_file_header::size ||
block_size < size)
return 0;
auto const n =
(block_size - size) / entry_size;
BOOST_ASSERT(n <= std::numeric_limits<nkey_t>::max());
return static_cast<nkey_t>(std::min<std::size_t>(
std::numeric_limits<nkey_t>::max(), n));
}
// Returns the number of bytes occupied by a value record
// VFALCO TODO Fix this
inline
std::size_t
value_size(std::size_t size,
std::size_t key_size)
{
// Data Record
return
field<uint48_t>::size + // Size
key_size + // Key
size; // Data
}
// Returns the closest power of 2 not less than x
template<class T>
T
ceil_pow2(T x)
{
static const unsigned long long t[6] = {
0xFFFFFFFF00000000ull,
0x00000000FFFF0000ull,
0x000000000000FF00ull,
0x00000000000000F0ull,
0x000000000000000Cull,
0x0000000000000002ull
};
int y =(((x &(x - 1)) == 0) ? 0 : 1);
int j = 32;
int i;
for(i = 0; i < 6; i++) {
int k =(((x & t[i]) == 0) ? 0 : j);
y += k;
x >>= k;
j >>= 1;
}
return T{1}<<y;
}
//------------------------------------------------------------------------------
// Read data file header from stream
template<class = void>
void
read(istream& is, dat_file_header& dh)
{
read(is, dh.type, sizeof(dh.type));
read<std::uint16_t>(is, dh.version);
read<std::uint64_t>(is, dh.uid);
read<std::uint64_t>(is, dh.appnum);
read<std::uint16_t>(is, dh.key_size);
std::array<std::uint8_t, 64> reserved;
read(is, reserved.data(), reserved.size());
}
// Read data file header from file
template<class File>
void
read(File& f, dat_file_header& dh, error_code& ec)
{
std::array<std::uint8_t, dat_file_header::size> buf;
f.read(0, buf.data(), buf.size(), ec);
if(ec)
return;
istream is(buf);
read(is, dh);
}
// Write data file header to stream
template<class = void>
void
write(ostream& os, dat_file_header const& dh)
{
write(os, "nudb.dat", 8);
write<std::uint16_t>(os, dh.version);
write<std::uint64_t>(os, dh.uid);
write<std::uint64_t>(os, dh.appnum);
write<std::uint16_t>(os, dh.key_size);
std::array<std::uint8_t, 64> reserved;
reserved.fill(0);
write(os, reserved.data(), reserved.size());
}
// Write data file header to file
template<class File>
void
write(File& f, dat_file_header const& dh, error_code& ec)
{
std::array<std::uint8_t, dat_file_header::size> buf;
ostream os(buf);
write(os, dh);
f.write(0, buf.data(), buf.size(), ec);
}
// Read key file header from stream
template<class = void>
void
read(istream& is, noff_t file_size, key_file_header& kh)
{
read(is, kh.type, sizeof(kh.type));
read<std::uint16_t>(is, kh.version);
read<std::uint64_t>(is, kh.uid);
read<std::uint64_t>(is, kh.appnum);
read<std::uint16_t>(is, kh.key_size);
read<std::uint64_t>(is, kh.salt);
read<std::uint64_t>(is, kh.pepper);
read<std::uint16_t>(is, kh.block_size);
read<std::uint16_t>(is, kh.load_factor);
std::array<std::uint8_t, 56> reserved;
read(is, reserved.data(), reserved.size());
// VFALCO These need to be checked to handle
// when the file size is too small
kh.capacity = bucket_capacity(kh.block_size);
if(file_size > kh.block_size)
{
if(kh.block_size > 0)
kh.buckets = static_cast<nbuck_t>(
(file_size - kh.block_size) / kh.block_size);
else
// VFALCO Corruption or logic error
kh.buckets = 0;
}
else
{
kh.buckets = 0;
}
kh.modulus = ceil_pow2(kh.buckets);
}
// Read key file header from file
template<class File>
void
read(File& f, key_file_header& kh, error_code& ec)
{
std::array<std::uint8_t, key_file_header::size> buf;
f.read(0, buf.data(), buf.size(), ec);
if(ec)
return;
istream is{buf};
auto const size = f.size(ec);
if(ec)
return;
read(is, size, kh);
}
// Write key file header to stream
template<class = void>
void
write(ostream& os, key_file_header const& kh)
{
write(os, "nudb.key", 8);
write<std::uint16_t>(os, kh.version);
write<std::uint64_t>(os, kh.uid);
write<std::uint64_t>(os, kh.appnum);
write<std::uint16_t>(os, kh.key_size);
write<std::uint64_t>(os, kh.salt);
write<std::uint64_t>(os, kh.pepper);
write<std::uint16_t>(os, kh.block_size);
write<std::uint16_t>(os, kh.load_factor);
std::array<std::uint8_t, 56> reserved;
reserved.fill(0);
write(os, reserved.data(), reserved.size());
}
// Write key file header to file
template<class File>
void
write(File& f, key_file_header const& kh, error_code& ec)
{
buffer buf;
buf.reserve(kh.block_size);
if(kh.block_size < key_file_header::size)
{
ec = error::invalid_block_size;
return;
}
std::fill(buf.get(), buf.get() + buf.size(), 0);
ostream os{buf.get(), buf.size()};
write(os, kh);
f.write(0, buf.get(), buf.size(), ec);
}
// Read log file header from stream
template<class = void>
void
read(istream& is, log_file_header& lh)
{
read(is, lh.type, sizeof(lh.type));
read<std::uint16_t>(is, lh.version);
read<std::uint64_t>(is, lh.uid);
read<std::uint64_t>(is, lh.appnum);
read<std::uint16_t>(is, lh.key_size);
read<std::uint64_t>(is, lh.salt);
read<std::uint64_t>(is, lh.pepper);
read<std::uint16_t>(is, lh.block_size);
read<std::uint64_t>(is, lh.key_file_size);
read<std::uint64_t>(is, lh.dat_file_size);
}
// Read log file header from file
template<class File>
void
read(File& f, log_file_header& lh, error_code& ec)
{
std::array<std::uint8_t, log_file_header::size> buf;
f.read(0, buf.data(), buf.size(), ec);
if(ec)
return;
istream is{buf};
read(is, lh);
}
// Write log file header to stream
template<class = void>
void
write(ostream& os, log_file_header const& lh)
{
write(os, "nudb.log", 8);
write<std::uint16_t>(os, lh.version);
write<std::uint64_t>(os, lh.uid);
write<std::uint64_t>(os, lh.appnum);
write<std::uint16_t>(os, lh.key_size);
write<std::uint64_t>(os, lh.salt);
write<std::uint64_t>(os, lh.pepper);
write<std::uint16_t>(os, lh.block_size);
write<std::uint64_t>(os, lh.key_file_size);
write<std::uint64_t>(os, lh.dat_file_size);
}
// Write log file header to file
template<class File>
void
write(File& f, log_file_header const& lh, error_code& ec)
{
std::array<std::uint8_t, log_file_header::size> buf;
ostream os{buf};
write(os, lh);
f.write(0, buf.data(), buf.size(), ec);
}
// Verify contents of data file header
template<class = void>
void
verify(dat_file_header const& dh, error_code& ec)
{
std::string const type{dh.type, 8};
if(type != "nudb.dat")
{
ec = error::not_data_file;
return;
}
if(dh.version != currentVersion)
{
ec = error::different_version;
return;
}
if(dh.key_size < 1)
{
ec = error::invalid_key_size;
return;
}
}
// Verify contents of key file header
template<class Hasher>
void
verify(key_file_header const& kh, error_code& ec)
{
std::string const type{kh.type, 8};
if(type != "nudb.key")
{
ec = error::not_key_file;
return;
}
if(kh.version != currentVersion)
{
ec = error::different_version;
return;
}
if(kh.key_size < 1)
{
ec = error::invalid_key_size;
return;
}
if(kh.pepper != pepper<Hasher>(kh.salt))
{
ec = error::hash_mismatch;
return;
}
if(kh.load_factor < 1)
{
ec = error::invalid_load_factor;
return;
}
if(kh.capacity < 1)
{
ec = error::invalid_capacity;
return;
}
if(kh.buckets < 1)
{
ec = error::invalid_bucket_count;
return;
}
}
// Verify contents of log file header
template<class Hasher>
void
verify(log_file_header const& lh, error_code& ec)
{
std::string const type{lh.type, 8};
if(type != "nudb.log")
{
ec = error::not_log_file;
return;
}
if(lh.version != currentVersion)
{
ec = error::different_version;
return;
}
if(lh.pepper != pepper<Hasher>(lh.salt))
{
ec = error::hash_mismatch;
return;
}
if(lh.key_size < 1)
{
ec = error::invalid_key_size;
return;
}
}
// Make sure key file and value file headers match
template<class Hasher>
void
verify(dat_file_header const& dh,
key_file_header const& kh, error_code& ec)
{
verify<Hasher>(kh, ec);
if(ec)
return;
if(kh.uid != dh.uid)
{
ec = error::uid_mismatch;
return;
}
if(kh.appnum != dh.appnum)
{
ec = error::appnum_mismatch;
return;
}
if(kh.key_size != dh.key_size)
{
ec = error::key_size_mismatch;
return;
}
}
// Make sure key file and log file headers match
template<class Hasher>
void
verify(key_file_header const& kh,
log_file_header const& lh, error_code& ec)
{
verify<Hasher>(lh, ec);
if(ec)
return;
if(kh.uid != lh.uid)
{
ec = error::uid_mismatch;
return;
}
if(kh.appnum != lh.appnum)
{
ec = error::appnum_mismatch;
return;
}
if(kh.key_size != lh.key_size)
{
ec = error::key_size_mismatch;
return;
}
if(kh.salt != lh.salt)
{
ec = error::salt_mismatch;
return;
}
if(kh.pepper != lh.pepper)
{
ec = error::pepper_mismatch;
return;
}
if(kh.block_size != lh.block_size)
{
ec = error::block_size_mismatch;
return;
}
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,259 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_GENTEX_HPP
#define NUDB_DETAIL_GENTEX_HPP
#include <boost/assert.hpp>
#include <condition_variable>
#include <cstddef>
#include <mutex>
#include <system_error>
namespace nudb {
namespace detail {
// Generation counting mutex
//
template<class = void>
class gentex_t
{
private:
std::mutex m_;
std::size_t gen_ = 0;
std::size_t cur_ = 0;
std::size_t prev_ = 0;
std::condition_variable cond_;
public:
gentex_t() = default;
gentex_t(gentex_t const&) = delete;
gentex_t& operator=(gentex_t const&) = delete;
void
start();
void
finish();
std::size_t
lock_gen();
void
unlock_gen(std::size_t gen);
};
template<class _>
void
gentex_t<_>::
start()
{
std::unique_lock<std::mutex> l{m_};
prev_ += cur_;
cur_ = 0;
++gen_;
}
template<class _>
void
gentex_t<_>::
finish()
{
std::unique_lock<std::mutex> l{m_};
while(prev_ > 0)
cond_.wait(l);
}
template<class _>
std::size_t
gentex_t<_>::
lock_gen()
{
std::lock_guard<
std::mutex> l{m_};
++cur_;
return gen_;
}
template<class _>
void
gentex_t<_>::
unlock_gen(std::size_t gen)
{
std::unique_lock<std::mutex> l{m_};
if(gen == gen_)
{
--cur_;
}
else
{
--prev_;
if(prev_ == 0)
cond_.notify_all();
}
}
using gentex = gentex_t<>;
//------------------------------------------------------------------------------
template<class GenerationLockable>
class genlock
{
private:
bool owned_ = false;
GenerationLockable* g_ = nullptr;
std::size_t gen_;
public:
using mutex_type = GenerationLockable;
genlock() = default;
genlock(genlock const&) = delete;
genlock& operator=(genlock const&) = delete;
genlock(genlock&& other);
genlock& operator=(genlock&& other);
explicit
genlock(mutex_type& g);
genlock(mutex_type& g, std::defer_lock_t);
~genlock();
mutex_type*
mutex() noexcept
{
return g_;
}
bool
owns_lock() const noexcept
{
return g_ && owned_;
}
explicit
operator bool() const noexcept
{
return owns_lock();
}
void
lock();
void
unlock();
mutex_type*
release() noexcept;
template<class U>
friend
void
swap(genlock<U>& lhs, genlock<U>& rhs) noexcept;
};
template<class G>
genlock<G>::
genlock(genlock&& other)
: owned_(other.owned_)
, g_(other.g_)
{
other.owned_ = false;
other.g_ = nullptr;
}
template<class G>
genlock<G>&
genlock<G>::
operator=(genlock&& other)
{
if(owns_lock())
unlock();
owned_ = other.owned_;
g_ = other.g_;
other.owned_ = false;
other.g_ = nullptr;
return *this;
}
template<class G>
genlock<G>::
genlock(mutex_type& g)
: g_(&g)
{
lock();
}
template<class G>
genlock<G>::
genlock(mutex_type& g, std::defer_lock_t)
: g_(&g)
{
}
template<class G>
genlock<G>::
~genlock()
{
if(owns_lock())
unlock();
}
template<class G>
void
genlock<G>::
lock()
{
// no associated gentex
BOOST_ASSERT(g_ != nullptr);
// gentex is already owned
BOOST_ASSERT(! owned_);
gen_ = g_->lock_gen();
owned_ = true;
}
template<class G>
void
genlock<G>::
unlock()
{
// no associated gentex
BOOST_ASSERT(g_ != nullptr);
// gentex is not owned
BOOST_ASSERT(owned_);
g_->unlock_gen(gen_);
owned_ = false;
}
template<class G>
auto
genlock<G>::
release() noexcept ->
mutex_type*
{
mutex_type* const g = g_;
g_ = nullptr;
return g;
}
template<class G>
void
swap(genlock<G>& lhs, genlock<G>& rhs) noexcept
{
using namespace std;
swap(lhs.owned_, rhs.owned_);
swap(lhs.g_, rhs.g_);
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,26 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_MUTEX_HPP
#define NUDB_DETAIL_MUTEX_HPP
#include <boost/thread/lock_types.hpp>
#include <boost/thread/shared_mutex.hpp>
namespace nudb {
namespace detail {
using shared_lock_type =
boost::shared_lock<boost::shared_mutex>;
using unique_lock_type =
boost::unique_lock<boost::shared_mutex>;
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,243 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_POOL_HPP
#define NUDB_DETAIL_POOL_HPP
#include <nudb/detail/arena.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/format.hpp>
#include <boost/assert.hpp>
#include <boost/thread/lock_types.hpp>
#include <cstdint>
#include <cstring>
#include <memory>
#include <map>
#include <utility>
namespace nudb {
namespace detail {
// Buffers key/value pairs in a map, associating
// them with a modifiable data file offset.
template<class = void>
class pool_t
{
public:
struct value_type;
class compare;
private:
using map_type = std::map<
value_type, noff_t, compare>;
arena arena_;
nsize_t key_size_;
nsize_t data_size_ = 0;
map_type map_;
public:
using iterator =
typename map_type::iterator;
pool_t(pool_t const&) = delete;
pool_t& operator=(pool_t const&) = delete;
pool_t(pool_t&& other);
pool_t(nsize_t key_size, char const* label);
iterator
begin()
{
return map_.begin();
}
iterator
end()
{
return map_.end();
}
bool
empty() const
{
return map_.size() == 0;
}
// Returns the number of elements in the pool
std::size_t
size() const
{
return map_.size();
}
// Returns the sum of data sizes in the pool
std::size_t
data_size() const
{
return data_size_;
}
void
clear();
void
periodic_activity();
iterator
find(void const* key);
// Insert a value
// @param h The hash of the key
void
insert(nhash_t h, void const* key,
void const* buffer, nsize_t size);
template<class U>
friend
void
swap(pool_t<U>& lhs, pool_t<U>& rhs);
};
template<class _>
struct pool_t<_>::value_type
{
nhash_t hash;
nsize_t size;
void const* key;
void const* data;
value_type(value_type const&) = default;
value_type& operator=(value_type const&) = default;
value_type(nhash_t hash_, nsize_t size_,
void const* key_, void const* data_)
: hash(hash_)
, size(size_)
, key(key_)
, data(data_)
{
}
};
template<class _>
class pool_t<_>::compare
{
std::size_t key_size_;
public:
using result_type = bool;
using first_argument_type = value_type;
using second_argument_type = value_type;
compare(compare const&) = default;
compare& operator=(compare const&) = default;
explicit
compare(nsize_t key_size)
: key_size_(key_size)
{
}
bool
operator()(value_type const& lhs,
value_type const& rhs) const
{
return std::memcmp(
lhs.key, rhs.key, key_size_) < 0;
}
};
//------------------------------------------------------------------------------
template<class _>
pool_t<_>::
pool_t(pool_t&& other)
: arena_(std::move(other.arena_))
, key_size_(other.key_size_)
, data_size_(other.data_size_)
, map_(std::move(other.map_))
{
}
template<class _>
pool_t<_>::
pool_t(nsize_t key_size, char const* label)
: arena_(label)
, key_size_(key_size)
, map_(compare{key_size})
{
}
template<class _>
void
pool_t<_>::
clear()
{
arena_.clear();
data_size_ = 0;
map_.clear();
}
template<class _>
void
pool_t<_>::
periodic_activity()
{
arena_.periodic_activity();
}
template<class _>
auto
pool_t<_>::
find(void const* key) ->
iterator
{
// VFALCO need is_transparent here
value_type tmp{0, 0, key, nullptr};
auto const iter = map_.find(tmp);
return iter;
}
template<class _>
void
pool_t<_>::
insert(nhash_t h,
void const* key, void const* data, nsize_t size)
{
auto const k = arena_.alloc(key_size_);
auto const d = arena_.alloc(size);
std::memcpy(k, key, key_size_);
std::memcpy(d, data, size);
auto const result = map_.emplace(
std::piecewise_construct,
std::make_tuple(h, size, k, d),
std::make_tuple(0));
(void)result.second;
// Must not already exist!
BOOST_ASSERT(result.second);
data_size_ += size;
}
template<class _>
void
swap(pool_t<_>& lhs, pool_t<_>& rhs)
{
using std::swap;
swap(lhs.arena_, rhs.arena_);
swap(lhs.key_size_, rhs.key_size_);
swap(lhs.data_size_, rhs.data_size_);
swap(lhs.map_, rhs.map_);
}
using pool = pool_t<>;
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,149 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_STREAM_HPP
#define NUDB_DETAIL_STREAM_HPP
#include <boost/assert.hpp>
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>
namespace nudb {
namespace detail {
// Input stream from bytes
template<class = void>
class istream_t
{
std::uint8_t const* buf_ = nullptr;
std::size_t size_ = 0;
public:
istream_t() = default;
istream_t(istream_t const&) = default;
istream_t& operator=(istream_t const&) = default;
istream_t(void const* data, std::size_t size)
: buf_(reinterpret_cast<std::uint8_t const*>(data))
, size_(size)
{
}
template<std::size_t N>
istream_t(std::array<std::uint8_t, N> const& a)
: buf_(a.data())
, size_(a.size())
{
}
std::uint8_t const*
data(std::size_t bytes);
std::uint8_t const*
operator()(std::size_t bytes)
{
return data(bytes);
}
};
// Precondition: bytes <= size_
//
template<class _>
std::uint8_t const*
istream_t<_>::data(std::size_t bytes)
{
BOOST_ASSERT(bytes <= size_);
if(size_ < bytes)
throw std::logic_error("short read from istream");
auto const data = buf_;
buf_ = buf_ + bytes;
size_ -= bytes;
return data;
}
using istream = istream_t<>;
//------------------------------------------------------------------------------
// Output stream to bytes
// VFALCO Should this assert on overwriting the buffer?
template<class = void>
class ostream_t
{
std::uint8_t* buf_ = nullptr;
std::size_t size_ = 0;
public:
ostream_t() = default;
ostream_t(ostream_t const&) = default;
ostream_t& operator=(ostream_t const&) = default;
ostream_t(void* data, std::size_t)
: buf_(reinterpret_cast<std::uint8_t*>(data))
{
}
template<std::size_t N>
ostream_t(std::array<std::uint8_t, N>& a)
: buf_(a.data())
{
}
// Returns the number of bytes written
std::size_t
size() const
{
return size_;
}
std::uint8_t*
data(std::size_t bytes);
std::uint8_t*
operator()(std::size_t bytes)
{
return data(bytes);
}
};
template<class _>
std::uint8_t*
ostream_t<_>::data(std::size_t bytes)
{
auto const data = buf_;
buf_ = buf_ + bytes;
size_ += bytes;
return data;
}
using ostream = ostream_t<>;
//------------------------------------------------------------------------------
// read blob
inline
void
read(istream& is, void* buffer, std::size_t bytes)
{
std::memcpy(buffer, is.data(bytes), bytes);
}
// write blob
inline
void
write(ostream& os, void const* buffer, std::size_t bytes)
{
std::memcpy(os.data(bytes), buffer, bytes);
}
} // detail
} // nudb
#endif

View File

@@ -0,0 +1,328 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
//
// This is a derivative work based on xxHash 0.6.2, copyright below:
/*
xxHash - Extremely Fast Hash algorithm
Header File
Copyright (C) 2012-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : https://github.com/Cyan4973/xxHash
*/
#ifndef NUDB_DETAIL_XXHASH_HPP
#define NUDB_DETAIL_XXHASH_HPP
#include <nudb/detail/endian.hpp>
#include <cstdint>
#include <cstdlib>
#include <cstring>
namespace nudb {
namespace detail {
#define NUDB_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
// minGW _rotl gives poor performance
#if defined(_MSC_VER)
# define NUDB_XXH_rotl64(x,r) _rotl64(x,r)
#else
# define NUDB_XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
#if defined(_MSC_VER)
# define NUDB_XXH_swap32 _byteswap_ulong
#elif NUDB_GCC_VERSION >= 403
# define NUDB_XXH_swap32 __builtin_bswap32
#endif
#if defined(_MSC_VER)
# define NUDB_XXH_swap64 _byteswap_uint64
#elif NUDB_GCC_VERSION >= 403
# define NUDB_XXH_swap64 __builtin_bswap64
#endif
#ifndef NUDB_XXH_swap32
inline
std::uint32_t
NUDB_XXH_swap32(std::uint32_t x)
{
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );
}
#endif
#ifndef NUDB_XXH_swap64
inline
std::uint64_t
NUDB_XXH_swap64(std::uint64_t x)
{
return ((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
((x << 24) & 0x0000ff0000000000ULL) |
((x << 8) & 0x000000ff00000000ULL) |
((x >> 8) & 0x00000000ff000000ULL) |
((x >> 24) & 0x0000000000ff0000ULL) |
((x >> 40) & 0x000000000000ff00ULL) |
((x >> 56) & 0x00000000000000ffULL);
}
#endif
static std::uint64_t constexpr prime64_1 = 11400714785074694791ULL;
static std::uint64_t constexpr prime64_2 = 14029467366897019727ULL;
static std::uint64_t constexpr prime64_3 = 1609587929392839161ULL;
static std::uint64_t constexpr prime64_4 = 9650029242287828579ULL;
static std::uint64_t constexpr prime64_5 = 2870177450012600261ULL;
// Portable and safe solution. Generally efficient.
// see : http://stackoverflow.com/a/32095106/646947
inline
std::uint32_t
XXH_read32(void const* p)
{
std::uint32_t v;
memcpy(&v, p, sizeof(v));
return v;
}
inline
std::uint64_t
XXH_read64(void const* p)
{
std::uint64_t v;
memcpy(&v, p, sizeof(v));
return v;
}
// little endian, aligned
inline
std::uint32_t
XXH_readLE32_align(void const* p, std::true_type, std::true_type)
{
return *reinterpret_cast<std::uint32_t const*>(p);
}
// little endian, unaligned
inline
std::uint32_t
XXH_readLE32_align(void const* p, std::true_type, std::false_type)
{
return XXH_read32(p);
}
// big endian, aligned
inline
std::uint32_t
XXH_readLE32_align(void const* p, std::false_type, std::true_type)
{
return NUDB_XXH_swap32(
*reinterpret_cast<std::uint32_t const*>(p));
}
// big endian, unaligned
inline
std::uint32_t
XXH_readLE32_align(void const* p, std::false_type, std::false_type)
{
return NUDB_XXH_swap32(XXH_read32(p));
}
// little endian, aligned
inline
std::uint64_t
XXH_readLE64_align(void const* p, std::true_type, std::true_type)
{
return *reinterpret_cast<std::uint64_t const*>(p);
}
// little endian, unaligned
inline
std::uint64_t
XXH_readLE64_align(void const* p, std::true_type, std::false_type)
{
return XXH_read64(p);
}
// big endian, aligned
inline
std::uint64_t
XXH_readLE64_align(void const* p, std::false_type, std::true_type)
{
return NUDB_XXH_swap64(
*reinterpret_cast<std::uint64_t const*>(p));
}
// big endian, unaligned
inline
std::uint64_t
XXH_readLE64_align(void const* p, std::false_type, std::false_type)
{
return NUDB_XXH_swap64(XXH_read64(p));
}
inline
std::uint64_t
XXH64_round(std::uint64_t acc, std::uint64_t input)
{
acc += input * prime64_2;
acc = NUDB_XXH_rotl64(acc, 31);
acc *= prime64_1;
return acc;
}
inline
std::uint64_t
XXH64_mergeRound(std::uint64_t acc, std::uint64_t val)
{
val = XXH64_round(0, val);
acc ^= val;
acc = acc * prime64_1 + prime64_4;
return acc;
}
template<bool LittleEndian, bool Aligned>
std::uint64_t
XXH64_endian_align(
void const* input, std::size_t len, std::uint64_t seed,
std::integral_constant<bool, LittleEndian> endian,
std::integral_constant<bool, Aligned> align)
{
const std::uint8_t* p = (const std::uint8_t*)input;
const std::uint8_t* const bEnd = p + len;
std::uint64_t h64;
auto const XXH_get32bits =
[](void const* p)
{
return XXH_readLE32_align(p,
decltype(endian){}, decltype(align){});
};
auto const XXH_get64bits =
[](void const* p)
{
return XXH_readLE64_align(p,
decltype(endian){}, decltype(align){});
};
if(len>=32)
{
const std::uint8_t* const limit = bEnd - 32;
std::uint64_t v1 = seed + prime64_1 + prime64_2;
std::uint64_t v2 = seed + prime64_2;
std::uint64_t v3 = seed + 0;
std::uint64_t v4 = seed - prime64_1;
do
{
v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
}
while(p<=limit);
h64 = NUDB_XXH_rotl64(v1, 1) +
NUDB_XXH_rotl64(v2, 7) +
NUDB_XXH_rotl64(v3, 12) +
NUDB_XXH_rotl64(v4, 18);
h64 = XXH64_mergeRound(h64, v1);
h64 = XXH64_mergeRound(h64, v2);
h64 = XXH64_mergeRound(h64, v3);
h64 = XXH64_mergeRound(h64, v4);
}
else
{
h64 = seed + prime64_5;
}
h64 += len;
while(p + 8 <= bEnd)
{
std::uint64_t const k1 = XXH64_round(0, XXH_get64bits(p));
h64 ^= k1;
h64 = NUDB_XXH_rotl64(h64,27) * prime64_1 + prime64_4;
p+=8;
}
if(p+4<=bEnd)
{
h64 ^= (std::uint64_t)(XXH_get32bits(p)) * prime64_1;
h64 = NUDB_XXH_rotl64(h64, 23) * prime64_2 + prime64_3;
p+=4;
}
while(p<bEnd)
{
h64 ^= (*p) * prime64_5;
h64 = NUDB_XXH_rotl64(h64, 11) * prime64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= prime64_2;
h64 ^= h64 >> 29;
h64 *= prime64_3;
h64 ^= h64 >> 32;
return h64;
}
/* Calculate the 64-bit hash of a block of memory.
@param data A pointer to the buffer to compute the hash on.
The buffer may be unaligned.
@note This function runs faster on 64-bits systems, but slower
on 32-bits systems (see benchmark).
@param bytes The size of the buffer in bytes.
@param seed A value which may be used to permute the output.
Using a different seed with the same input will produce a
different value.
@return The 64-bit hash of the input data.
*/
template<class = void>
std::uint64_t
XXH64(void const* data, size_t bytes, std::uint64_t seed)
{
// Use faster algorithm if aligned
if((reinterpret_cast<std::uintptr_t>(data) & 7) == 0)
return XXH64_endian_align(data, bytes, seed,
is_little_endian{}, std::false_type{});
return XXH64_endian_align(data, bytes, seed,
is_little_endian{}, std::true_type{});
}
} // detail
} // nudb
#endif

263
include/nudb/error.hpp Normal file
View File

@@ -0,0 +1,263 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_ERROR_HPP
#define NUDB_ERROR_HPP
#include <boost/system/system_error.hpp>
#include <boost/system/error_code.hpp>
namespace nudb {
/// The type of system-specific error code returned by the implementation
#if GENERATING_DOCS
class error_code{};
#else
using boost::system::error_code;
#endif
/// The type of cross-platform error code used by the implementation
#if GENERATING_DOCS
class error_condition{};
#else
using boost::system::error_condition;
#endif
/// The type of system-specific exception used when throwing
#if GENERATING_DOCS
class system_error{};
#else
using boost::system::system_error;
#endif
/// Returns the category used for system-specific error codes
#if GENERATING_DOCS
error_category const&
system_category();
#else
using boost::system::system_category;
#endif
/// Returns the category used for cross-platform error codes
#if GENERATING_DOCS
error_category const&
generic_category();
#else
using boost::system::generic_category;
#endif
/// The base class used for error categories
#if GENERATING_DOCS
class error_category{};
#else
using boost::system::error_category;
#endif
/// The set of constants used for cross-platform error codes
#if GENERATING_DOCS
enum errc{};
#else
namespace errc = boost::system::errc;
#endif
/// Database error codes.
enum class error
{
/** No error.
The operation completed successfully.
*/
success = 0,
/** The specified key was not found.
Returned when @ref basic_store::fetch does not
find the specified key.
*/
key_not_found,
/** The specified key already exists.
Returned when @ref basic_store::insert finds
the specified key already in the database.
*/
key_exists,
/** A file read returned less data than expected.
This can be caused by premature application
termination during a commit cycle.
*/
short_read,
/** A log file is present.
Indicates that the database needs to have the
associated log file applied to perform a recovery.
This error is returned by functions such as @ref rekey.
*/
log_file_exists,
/** No key file exists.
This error is returned by the recover process when
there is no valid key file. It happens when a
@ref rekey operation prematurely terminates. A
database without a key file cannot be opened. To
fix this error, it is necessary for an invocation of
@ref rekey to complete successfully.
*/
no_key_file,
/// Too many buckets in key file
too_many_buckets,
/// Not a data file
not_data_file,
/// Not a key file
not_key_file,
/// Not a log file
not_log_file,
/// Different version
different_version,
/// Invalid key size
invalid_key_size,
/// Invalid block size
invalid_block_size,
/// Short key file
short_key_file,
/// Short bucket
short_bucket,
/// Short spill
short_spill,
/// Short record
short_data_record,
/// Short value
short_value,
/// Hash mismatch
hash_mismatch,
/// Invalid load factor
invalid_load_factor,
/// Invalid capacity
invalid_capacity,
/// Invalid bucket count
invalid_bucket_count,
/// Invalid bucket size
invalid_bucket_size,
/// The data file header was incomplete
incomplete_data_file_header,
/// The key file header was incomplete
incomplete_key_file_header,
/// Invalid log record
invalid_log_record,
/// Invalid spill in log record
invalid_log_spill,
/// Invalid offset in log record
invalid_log_offset,
/// Invalid index in log record
invalid_log_index,
/// Invalid size in spill
invalid_spill_size,
/// UID mismatch
uid_mismatch,
/// appnum mismatch
appnum_mismatch,
/// key size mismatch
key_size_mismatch,
/// salt mismatch
salt_mismatch,
/// pepper mismatch
pepper_mismatch,
/// block size mismatch
block_size_mismatch,
/// orphaned value
orphaned_value,
/// missing value
missing_value,
/// size mismatch
size_mismatch,
/// duplicate value
duplicate_value
};
/// Returns the error category used for database error codes.
error_category const&
nudb_category();
/** Returns a database error code.
This function is used by the implementation to convert
@ref error values into @ref error_code objects.
*/
inline
error_code
make_error_code(error ev)
{
return error_code{static_cast<int>(ev), nudb_category()};
}
} // nudb
namespace boost {
namespace system {
template<>
struct is_error_code_enum<nudb::error>
{
static bool const value = true;
};
} // system
} // boost
#include <nudb/impl/error.ipp>
#endif

53
include/nudb/file.hpp Normal file
View File

@@ -0,0 +1,53 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_FILE_HPP
#define NUDB_FILE_HPP
#include <cstddef>
#include <string>
namespace nudb {
/// The type used to hold paths to files
using path_type = std::string;
/** Returns the best guess at the volume's block size.
@param path A path to a file on the device. The file does
not need to exist.
*/
inline
std::size_t
block_size(path_type const& path)
{
// A reasonable default for many SSD devices
return 4096;
}
/** File create and open modes.
These are used by @ref native_file.
*/
enum class file_mode
{
/// Open the file for sequential reads
scan,
/// Open the file for random reads
read,
/// Open the file for random reads and appending writes
append,
/// Open the file for random reads and writes
write
};
} // nudb
#endif

View File

@@ -0,0 +1,793 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_BASIC_STORE_IPP
#define NUDB_IMPL_BASIC_STORE_IPP
#include <nudb/concepts.hpp>
#include <nudb/recover.hpp>
#include <boost/assert.hpp>
#include <cmath>
#include <memory>
#ifndef NUDB_DEBUG_LOG
#define NUDB_DEBUG_LOG 0
#endif
#if NUDB_DEBUG_LOG
#include <beast/unit_test/dstream.hpp>
#include <iostream>
#endif
namespace nudb {
template<class Hasher, class File>
basic_store<Hasher, File>::state::
state(File&& df_, File&& kf_, File&& lf_,
path_type const& dp_, path_type const& kp_,
path_type const& lp_,
detail::key_file_header const& kh_)
: df(std::move(df_))
, kf(std::move(kf_))
, lf(std::move(lf_))
, dp(dp_)
, kp(kp_)
, lp(lp_)
, hasher(kh_.salt)
, p0(kh_.key_size, "p0")
, p1(kh_.key_size, "p1")
, c1(kh_.key_size, kh_.block_size, "c1")
, kh(kh_)
{
static_assert(is_File<File>::value,
"File requirements not met");
}
//------------------------------------------------------------------------------
template<class Hasher, class File>
basic_store<Hasher, File>::
~basic_store()
{
error_code ec;
// We call close here to make sure data is intact
// if an exception destroys the basic_store, but callers
// should always call close manually to receive the
// error code.
close(ec);
}
template<class Hasher, class File>
path_type const&
basic_store<Hasher, File>::
dat_path() const
{
BOOST_ASSERT(is_open());
return s_->dp;
}
template<class Hasher, class File>
path_type const&
basic_store<Hasher, File>::
key_path() const
{
BOOST_ASSERT(is_open());
return s_->kp;
}
template<class Hasher, class File>
path_type const&
basic_store<Hasher, File>::
log_path() const
{
BOOST_ASSERT(is_open());
return s_->lp;
}
template<class Hasher, class File>
std::uint64_t
basic_store<Hasher, File>::
appnum() const
{
BOOST_ASSERT(is_open());
return s_->kh.appnum;
}
template<class Hasher, class File>
std::size_t
basic_store<Hasher, File>::
key_size() const
{
BOOST_ASSERT(is_open());
return s_->kh.key_size;
}
template<class Hasher, class File>
std::size_t
basic_store<Hasher, File>::
block_size() const
{
BOOST_ASSERT(is_open());
return s_->kh.block_size;
}
template<class Hasher, class File>
template<class... Args>
void
basic_store<Hasher, File>::
open(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
error_code& ec,
Args&&... args)
{
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
using namespace detail;
BOOST_ASSERT(! is_open());
ec_ = {};
ecb_.store(false);
recover<Hasher, File>(
dat_path, key_path, log_path, ec, args...);
if(ec)
return;
File df(args...);
File kf(args...);
File lf(args...);
df.open(file_mode::append, dat_path, ec);
if(ec)
return;
kf.open(file_mode::write, key_path, ec);
if(ec)
return;
lf.create(file_mode::append, log_path, ec);
if(ec)
return;
// VFALCO TODO Erase empty log file if this
// function subsequently fails.
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
key_file_header kh;
read(kf, kh, ec);
if(ec)
return;
verify<Hasher>(kh, ec);
if(ec)
return;
verify<Hasher>(dh, kh, ec);
if(ec)
return;
boost::optional<state> s;
s.emplace(std::move(df), std::move(kf), std::move(lf),
dat_path, key_path, log_path, kh);
thresh_ = std::max<std::size_t>(65536UL,
kh.load_factor * kh.capacity);
frac_ = thresh_ / 2;
buckets_ = kh.buckets;
modulus_ = ceil_pow2(kh.buckets);
// VFALCO TODO This could be better
if(buckets_ < 1)
{
ec = error::short_key_file;
return;
}
dataWriteSize_ = 32 * nudb::block_size(dat_path);
logWriteSize_ = 32 * nudb::block_size(log_path);
s_.emplace(std::move(*s));
open_ = true;
t_ = std::thread(&basic_store::run, this);
}
template<class Hasher, class File>
void
basic_store<Hasher, File>::
close(error_code& ec)
{
if(open_)
{
open_ = false;
cv_.notify_all();
t_.join();
if(ecb_)
{
ec = ec_;
return;
}
s_->lf.close();
state s{std::move(*s_)};
File::erase(s.lp, ec_);
if(ec_)
ec = ec_;
}
}
template<class Hasher, class File>
template<class Callback>
void
basic_store<Hasher, File>::
fetch(
void const* key,
Callback && callback,
error_code& ec)
{
using namespace detail;
BOOST_ASSERT(is_open());
if(ecb_)
{
ec = ec_;
return;
}
auto const h =
hash(key, s_->kh.key_size, s_->hasher);
shared_lock_type m{m_};
{
auto iter = s_->p1.find(key);
if(iter == s_->p1.end())
{
iter = s_->p0.find(key);
if(iter == s_->p0.end())
goto cont;
}
callback(iter->first.data, iter->first.size);
return;
}
cont:
auto const n = bucket_index(h, buckets_, modulus_);
auto const iter = s_->c1.find(n);
if(iter != s_->c1.end())
return fetch(h, key, iter->second, callback, ec);
genlock<gentex> g{g_};
m.unlock();
buffer buf{s_->kh.block_size};
// b constructs from uninitialized buf
bucket b{s_->kh.block_size, buf.get()};
b.read(s_->kf, (n + 1) * b.block_size(), ec);
if(ec)
return;
fetch(h, key, b, callback, ec);
}
template<class Hasher, class File>
void
basic_store<Hasher, File>::
insert(
void const* key,
void const* data,
nsize_t size,
error_code& ec)
{
using namespace detail;
using namespace std::chrono;
BOOST_ASSERT(is_open());
if(ecb_)
{
ec = ec_;
return;
}
// Data Record
BOOST_ASSERT(size > 0); // zero disallowed
BOOST_ASSERT(size <= field<uint32_t>::max); // too large
auto const h =
hash(key, s_->kh.key_size, s_->hasher);
std::lock_guard<std::mutex> u{u_};
{
shared_lock_type m{m_};
if(s_->p1.find(key) != s_->p1.end() ||
s_->p0.find(key) != s_->p0.end())
{
ec = error::key_exists;
return;
}
auto const n = bucket_index(h, buckets_, modulus_);
auto const iter = s_->c1.find(n);
if(iter != s_->c1.end())
{
auto const found = exists(
h, key, &m, iter->second, ec);
if(ec)
return;
if(found)
{
ec = error::key_exists;
return;
}
// m is now unlocked
}
else
{
// VFALCO Audit for concurrency
genlock<gentex> g{g_};
m.unlock();
buffer buf;
buf.reserve(s_->kh.block_size);
bucket b{s_->kh.block_size, buf.get()};
b.read(s_->kf,
static_cast<noff_t>(n + 1) * s_->kh.block_size, ec);
if(ec)
return;
auto const found = exists(h, key, nullptr, b, ec);
if(ec)
return;
if(found)
{
ec = error::key_exists;
return;
}
}
}
// Perform insert
unique_lock_type m{m_};
s_->p1.insert(h, key, data, size);
auto const now = clock_type::now();
auto const elapsed = duration_cast<duration<float>>(
now > s_->when ? now - s_->when : clock_type::duration{1});
auto const work = s_->p1.data_size() +
3 * s_->p1.size() * s_->kh.block_size;
auto const rate = static_cast<std::size_t>(
std::ceil(work / elapsed.count()));
auto const sleep =
s_->rate && rate > s_->rate;
m.unlock();
if(sleep)
std::this_thread::sleep_for(milliseconds{25});
}
// Fetch key in loaded bucket b or its spills.
//
template<class Hasher, class File>
template<class Callback>
void
basic_store<Hasher, File>::
fetch(
detail::nhash_t h,
void const* key,
detail::bucket b,
Callback&& callback,
error_code& ec)
{
using namespace detail;
buffer buf0;
buffer buf1;
for(;;)
{
for(auto i = b.lower_bound(h); i < b.size(); ++i)
{
auto const item = b[i];
if(item.hash != h)
break;
// Data Record
auto const len =
s_->kh.key_size + // Key
item.size; // Value
buf0.reserve(len);
s_->df.read(item.offset +
field<uint48_t>::size, // Size
buf0.get(), len, ec);
if(ec)
return;
if(std::memcmp(buf0.get(), key,
s_->kh.key_size) == 0)
{
callback(
buf0.get() + s_->kh.key_size, item.size);
return;
}
}
auto const spill = b.spill();
if(! spill)
break;
buf1.reserve(s_->kh.block_size);
b = bucket(s_->kh.block_size,
buf1.get());
b.read(s_->df, spill, ec);
if(ec)
return;
}
ec = error::key_not_found;
}
// Returns `true` if the key exists
// lock is unlocked after the first bucket processed
//
template<class Hasher, class File>
bool
basic_store<Hasher, File>::
exists(
detail::nhash_t h,
void const* key,
detail::shared_lock_type* lock,
detail::bucket b,
error_code& ec)
{
using namespace detail;
buffer buf{s_->kh.key_size + s_->kh.block_size};
void* pk = buf.get();
void* pb = buf.get() + s_->kh.key_size;
for(;;)
{
for(auto i = b.lower_bound(h); i < b.size(); ++i)
{
auto const item = b[i];
if(item.hash != h)
break;
// Data Record
s_->df.read(item.offset +
field<uint48_t>::size, // Size
pk, s_->kh.key_size, ec); // Key
if(ec)
return false;
if(std::memcmp(pk, key, s_->kh.key_size) == 0)
return true;
}
auto spill = b.spill();
if(lock && lock->owns_lock())
lock->unlock();
if(! spill)
break;
b = bucket(s_->kh.block_size, pb);
b.read(s_->df, spill, ec);
if(ec)
return false;
}
return false;
}
// Split the bucket in b1 to b2
// b1 must be loaded
// tmp is used as a temporary buffer
// splits are written but not the new buckets
//
template<class Hasher, class File>
void
basic_store<Hasher, File>::
split(
detail::bucket& b1,
detail::bucket& b2,
detail::bucket& tmp,
nbuck_t n1,
nbuck_t n2,
nbuck_t buckets,
nbuck_t modulus,
detail::bulk_writer<File>& w,
error_code& ec)
{
using namespace detail;
// Trivial case: split empty bucket
if(b1.empty())
return;
// Split
for(std::size_t i = 0; i < b1.size();)
{
auto const e = b1[i];
auto const n = bucket_index(e.hash, buckets, modulus);
(void)n1;
(void)n2;
BOOST_ASSERT(n==n1 || n==n2);
if(n == n2)
{
b2.insert(e.offset, e.size, e.hash);
b1.erase(i);
}
else
{
++i;
}
}
noff_t spill = b1.spill();
if(spill)
{
b1.spill(0);
do
{
// If any part of the spill record is
// in the write buffer then flush first
if(spill + bucket_size(s_->kh.capacity) >
w.offset() - w.size())
{
w.flush(ec);
if(ec)
return;
}
tmp.read(s_->df, spill, ec);
if(ec)
return;
for(std::size_t i = 0; i < tmp.size(); ++i)
{
auto const e = tmp[i];
auto const n = bucket_index(
e.hash, buckets, modulus);
BOOST_ASSERT(n==n1 || n==n2);
if(n == n2)
{
maybe_spill(b2, w, ec);
if(ec)
return;
b2.insert(e.offset, e.size, e.hash);
}
else
{
maybe_spill(b1, w, ec);
if(ec)
return;
b1.insert(e.offset, e.size, e.hash);
}
}
spill = tmp.spill();
}
while(spill);
}
}
template<class Hasher, class File>
detail::bucket
basic_store<Hasher, File>::
load(
nbuck_t n,
detail::cache& c1,
detail::cache& c0,
void* buf,
error_code& ec)
{
using namespace detail;
auto iter = c1.find(n);
if(iter != c1.end())
return iter->second;
iter = c0.find(n);
if(iter != c0.end())
return c1.insert(n, iter->second)->second;
bucket tmp{s_->kh.block_size, buf};
tmp.read(s_->kf,
static_cast<noff_t>(n + 1) * s_->kh.block_size, ec);
if(ec)
return {};
c0.insert(n, tmp);
return c1.insert(n, tmp)->second;
}
template<class Hasher, class File>
void
basic_store<Hasher, File>::
commit(detail::unique_lock_type& m,
std::size_t& work, error_code& ec)
{
using namespace detail;
BOOST_ASSERT(m.owns_lock());
BOOST_ASSERT(! s_->p1.empty());
swap(s_->p0, s_->p1);
m.unlock();
work = s_->p0.data_size();
cache c0(s_->kh.key_size, s_->kh.block_size, "c0");
cache c1(s_->kh.key_size, s_->kh.block_size, "c1");
// 0.63212 ~= 1 - 1/e
{
auto const size = static_cast<std::size_t>(
std::ceil(0.63212 * s_->p0.size()));
c0.reserve(size);
c1.reserve(size);
}
buffer buf1{s_->kh.block_size};
buffer buf2{s_->kh.block_size};
bucket tmp{s_->kh.block_size, buf1.get()};
// Prepare rollback information
log_file_header lh;
lh.version = currentVersion; // Version
lh.uid = s_->kh.uid; // UID
lh.appnum = s_->kh.appnum; // Appnum
lh.key_size = s_->kh.key_size; // Key Size
lh.salt = s_->kh.salt; // Salt
lh.pepper = pepper<Hasher>(lh.salt); // Pepper
lh.block_size = s_->kh.block_size; // Block Size
lh.key_file_size = s_->kf.size(ec); // Key File Size
if(ec)
return;
lh.dat_file_size = s_->df.size(ec); // Data File Size
if(ec)
return;
write(s_->lf, lh, ec);
if(ec)
return;
// Checkpoint
s_->lf.sync(ec);
if(ec)
return;
// Append data and spills to data file
auto modulus = modulus_;
auto buckets = buckets_;
{
// Bulk write to avoid write amplification
auto const size = s_->df.size(ec);
if(ec)
return;
bulk_writer<File> w{s_->df, size, dataWriteSize_};
// Write inserted data to the data file
for(auto& e : s_->p0)
{
// VFALCO This could be UB since other
// threads are reading other data members
// of this object in memory
e.second = w.offset();
auto os = w.prepare(value_size(
e.first.size, s_->kh.key_size), ec);
if(ec)
return;
// Data Record
write<uint48_t>(os, e.first.size); // Size
write(os, e.first.key, s_->kh.key_size); // Key
write(os, e.first.data, e.first.size); // Data
}
// Do inserts, splits, and build view
// of original and modified buckets
for(auto const e : s_->p0)
{
// VFALCO Should this be >= or > ?
if((frac_ += 65536) >= thresh_)
{
// split
frac_ -= thresh_;
if(buckets == modulus)
modulus *= 2;
auto const n1 = buckets - (modulus / 2);
auto const n2 = buckets++;
auto b1 = load(n1, c1, c0, buf2.get(), ec);
if(ec)
return;
auto b2 = c1.create(n2);
// If split spills, the writer is
// flushed which can amplify writes.
split(b1, b2, tmp, n1, n2,
buckets, modulus, w, ec);
if(ec)
return;
}
// Insert
auto const n = bucket_index(
e.first.hash, buckets, modulus);
auto b = load(n, c1, c0, buf2.get(), ec);
if(ec)
return;
// This can amplify writes if it spills.
maybe_spill(b, w, ec);
if(ec)
return;
b.insert(e.second, e.first.size, e.first.hash);
}
w.flush(ec);
if(ec)
return;
}
work += s_->kh.block_size * (2 * c0.size() + c1.size());
// Give readers a view of the new buckets.
// This might be slightly better than the old
// view since there could be fewer spills.
m.lock();
swap(c1, s_->c1);
s_->p0.clear();
buckets_ = buckets;
modulus_ = modulus;
g_.start();
m.unlock();
// Write clean buckets to log file
{
auto const size = s_->lf.size(ec);
if(ec)
return;
bulk_writer<File> w{s_->lf, size, logWriteSize_};
for(auto const e : c0)
{
// Log Record
auto os = w.prepare(
field<std::uint64_t>::size + // Index
e.second.actual_size(), ec); // Bucket
if(ec)
return;
// Log Record
write<std::uint64_t>(os, e.first); // Index
e.second.write(os); // Bucket
}
c0.clear();
w.flush(ec);
if(ec)
return;
s_->lf.sync(ec);
if(ec)
return;
}
g_.finish();
// Write new buckets to key file
for(auto const e : s_->c1)
{
e.second.write(s_->kf,
(e.first + 1) * s_->kh.block_size, ec);
if(ec)
return;
}
// Finalize the commit
s_->df.sync(ec);
if(ec)
return;
s_->kf.sync(ec);
if(ec)
return;
s_->lf.trunc(0, ec);
if(ec)
return;
s_->lf.sync(ec);
if(ec)
return;
// Cache is no longer needed, all fetches will go straight
// to disk again. Do this after the sync, otherwise readers
// might get blocked longer due to the extra I/O.
m.lock();
s_->c1.clear();
}
template<class Hasher, class File>
void
basic_store<Hasher, File>::
run()
{
using namespace std::chrono;
using namespace detail;
#if NUDB_DEBUG_LOG
beast::unit_test::dstream dout{std::cout};
#endif
for(;;)
{
unique_lock_type m{m_};
if(! s_->p1.empty())
{
std::size_t work;
commit(m, work, ec_);
if(ec_)
{
ecb_.store(true);
return;
}
BOOST_ASSERT(m.owns_lock());
auto const now = clock_type::now();
auto const elapsed = duration_cast<duration<float>>(
now > s_->when ? now - s_->when : clock_type::duration{1});
s_->rate = static_cast<std::size_t>(
std::ceil(work / elapsed.count()));
#if NUDB_DEBUG_LOG
dout <<
"work=" << work <<
", time=" << elapsed.count() <<
", rate=" << s_->rate <<
"\n";
#endif
}
s_->p1.periodic_activity();
cv_.wait_until(m, s_->when + seconds{1},
[this]{ return ! open_; });
if(! open_)
break;
s_->when = clock_type::now();
}
{
unique_lock_type m{m_};
std::size_t work;
if(! s_->p1.empty())
commit(m, work, ec_);
}
if(ec_)
{
ecb_.store(true);
return;
}
}
} // nudb
#endif

View File

@@ -0,0 +1,163 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_CREATE_IPP
#define NUDB_IMPL_CREATE_IPP
#include <nudb/concepts.hpp>
#include <nudb/native_file.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstring>
#include <random>
#include <stdexcept>
#include <utility>
namespace nudb {
namespace detail {
template<class = void>
std::uint64_t
make_uid()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
} // detail
template<class>
std::uint64_t
make_salt()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
template<
class Hasher,
class File,
class... Args
>
void
create(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::uint64_t appnum,
std::uint64_t salt,
nsize_t key_size,
nsize_t blockSize,
float load_factor,
error_code& ec,
Args&&... args)
{
static_assert(is_File<File>::value,
"File requirements not met");
using namespace detail;
if(key_size < 1)
{
ec = error::invalid_key_size;
return;
}
if(blockSize > field<std::uint16_t>::max)
{
ec = error::invalid_block_size;
return;
}
if(load_factor <= 0.f || load_factor >= 1.f)
{
ec = error::invalid_load_factor;
return;
}
auto const capacity =
bucket_capacity(blockSize);
if(capacity < 1)
{
ec = error::invalid_block_size;
return;
}
bool edf = false;
bool ekf = false;
bool elf = false;
{
File df(args...);
File kf(args...);
File lf(args...);
df.create(file_mode::append, dat_path, ec);
if(ec)
goto fail;
edf = true;
kf.create(file_mode::append, key_path, ec);
if(ec)
goto fail;
ekf = true;
lf.create(file_mode::append, log_path, ec);
if(ec)
goto fail;
elf = true;
dat_file_header dh;
dh.version = currentVersion;
dh.uid = make_uid();
dh.appnum = appnum;
dh.key_size = key_size;
key_file_header kh;
kh.version = currentVersion;
kh.uid = dh.uid;
kh.appnum = appnum;
kh.key_size = key_size;
kh.salt = salt;
kh.pepper = pepper<Hasher>(salt);
kh.block_size = blockSize;
kh.load_factor = std::min<std::size_t>(
static_cast<std::size_t>(
65536.0 * load_factor), 65535);
write(df, dh, ec);
if(ec)
goto fail;
write(kf, kh, ec);
if(ec)
goto fail;
buffer buf{blockSize};
std::memset(buf.get(), 0, blockSize);
bucket b(blockSize, buf.get(), empty);
b.write(kf, blockSize, ec);
if(ec)
goto fail;
// VFALCO Leave log file empty?
df.sync(ec);
if(ec)
goto fail;
kf.sync(ec);
if(ec)
goto fail;
lf.sync(ec);
if(ec)
goto fail;
// Success
return;
}
fail:
if(edf)
erase_file(dat_path);
if(ekf)
erase_file(key_path);
if(elf)
erase_file(log_path);
}
} // nudb
#endif

180
include/nudb/impl/error.ipp Normal file
View File

@@ -0,0 +1,180 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_ERROR_IPP
#define NUDB_IMPL_ERROR_IPP
namespace nudb {
inline
error_category const&
nudb_category()
{
struct cat_t : public error_category
{
char const*
name() const noexcept override
{
return "nudb";
}
std::string
message(int ev) const override
{
switch(static_cast<error>(ev))
{
case error::success:
return "the operation completed successfully";
case error::key_not_found:
return "key not found";
case error::key_exists:
return "key already exists";
case error::short_read:
return "short read";
case error::log_file_exists:
return "a log file exists";
case error::no_key_file:
return "no key file";
case error::too_many_buckets:
return "too many buckets";
case error::not_data_file:
return "not a data file";
case error::not_key_file:
return "not a key file";
case error::not_log_file:
return "not a log file";
case error::different_version:
return "different version";
case error::invalid_key_size:
return "invalid key size";
case error::invalid_block_size:
return "invalid block size";
case error::short_key_file:
return "short key file";
case error::short_bucket:
return "short bucket";
case error::short_spill:
return "short spill";
case error::short_data_record:
return "short data record";
case error::short_value:
return "short value";
case error::hash_mismatch:
return "hash mismatch";
case error::invalid_load_factor:
return "invalid load factor";
case error::invalid_capacity:
return "invalid capacity";
case error::invalid_bucket_count:
return "invalid bucket count";
case error::invalid_bucket_size:
return "invalid_bucket_size";
case error::incomplete_data_file_header:
return "incomplete data file header";
case error::incomplete_key_file_header:
return "incomplete key file header";
case error::invalid_log_record:
return "invalid log record";
case error::invalid_log_spill:
return "invalid spill in log";
case error::invalid_log_offset:
return "invalid offset in log";
case error::invalid_log_index:
return "invalid index in log";
case error::invalid_spill_size:
return "invalid size in spill";
case error::uid_mismatch:
return "uid mismatch";
case error::appnum_mismatch:
return "appnum mismatch";
case error::key_size_mismatch:
return "key size mismatch";
case error::salt_mismatch:
return "salt mismatch";
case error::pepper_mismatch:
return "pepper mismatch";
case error::block_size_mismatch:
return "block size mismatch";
case error::orphaned_value:
return "orphaned value";
case error::missing_value:
return "missing value";
case error::size_mismatch:
return "size mismatch";
case error::duplicate_value:
return "duplicate value";
default:
return "nudb error";
}
}
error_condition
default_error_condition(int ev) const noexcept override
{
return error_condition{ev, *this};
}
bool
equivalent(int ev,
error_condition const& ec) const noexcept override
{
return ec.value() == ev && &ec.category() == this;
}
bool
equivalent(error_code const& ec, int ev) const noexcept override
{
return ec.value() == ev && &ec.category() == this;
}
};
static cat_t const cat{};
return cat;
}
} // nudb
#endif

View File

@@ -0,0 +1,259 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_POSIX_FILE_IPP
#define NUDB_IMPL_POSIX_FILE_IPP
#include <boost/assert.hpp>
#include <limits.h>
namespace nudb {
inline
posix_file::
~posix_file()
{
close();
}
inline
posix_file::
posix_file(posix_file &&other)
: fd_(other.fd_)
{
other.fd_ = -1;
}
inline
posix_file&
posix_file::
operator=(posix_file&& other)
{
if(&other == this)
return *this;
close();
fd_ = other.fd_;
other.fd_ = -1;
return *this;
}
inline
void
posix_file::
close()
{
if(fd_ != -1)
{
::close(fd_);
fd_ = -1;
}
}
inline
void
posix_file::
create(file_mode mode, path_type const& path, error_code& ec)
{
auto const result = flags(mode);
BOOST_ASSERT(! is_open());
fd_ = ::open(path.c_str(), result.first);
if(fd_ != -1)
{
::close(fd_);
fd_ = -1;
ec = error_code{errc::file_exists, generic_category()};
return ;
}
int errnum = errno;
if(errnum != ENOENT)
return err(errnum, ec);
fd_ = ::open(path.c_str(), result.first | O_CREAT, 0644);
if(fd_ == -1)
return last_err(ec);
#ifndef __APPLE__
if(::posix_fadvise(fd_, 0, 0, result.second) != 0)
return last_err(ec);
#endif
}
inline
void
posix_file::
open(file_mode mode, path_type const& path, error_code& ec)
{
BOOST_ASSERT(! is_open());
auto const result = flags(mode);
fd_ = ::open(path.c_str(), result.first);
if(fd_ == -1)
return last_err(ec);
#ifndef __APPLE__
if(::posix_fadvise(fd_, 0, 0, result.second) != 0)
return last_err(ec);
#endif
}
inline
void
posix_file::
erase(path_type const& path, error_code& ec)
{
if(::unlink(path.c_str()) != 0)
{
int const ev = errno;
return err(ev, ec);
}
}
inline
std::uint64_t
posix_file::
size(error_code& ec) const
{
static_assert(sizeof(stat::st_size) == sizeof(std::uint64_t), "");
struct stat st;
if(::fstat(fd_, &st) != 0)
{
last_err(ec);
return 0;
}
return st.st_size;
}
inline
void
posix_file::
read(std::uint64_t offset,
void* buffer, std::size_t bytes, error_code& ec)
{
static_assert(sizeof(off_t) >= sizeof(offset), "");
while(bytes > 0)
{
auto const amount = static_cast<ssize_t>(
std::min(bytes, static_cast<std::size_t>(SSIZE_MAX)));
auto const n = ::pread(fd_, buffer, amount, offset);
if(n == -1)
{
auto const ev = errno;
if(ev == EINTR)
continue;
return err(ev, ec);
}
if(n == 0)
{
ec = error::short_read;
return;
}
offset += n;
bytes -= n;
buffer = reinterpret_cast<char*>(buffer) + n;
}
}
inline
void
posix_file::
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec)
{
static_assert(sizeof(off_t) >= sizeof(offset), "");
while(bytes > 0)
{
auto const amount = static_cast<ssize_t>(
std::min(bytes, static_cast<std::size_t>(SSIZE_MAX)));
auto const n = ::pwrite(fd_, buffer, amount, offset);
if(n == -1)
{
auto const ev = errno;
if(ev == EINTR)
continue;
return err(ev, ec);
}
offset += n;
bytes -= n;
buffer = reinterpret_cast<char const*>(buffer) + n;
}
}
inline
void
posix_file::
sync(error_code& ec)
{
for(;;)
{
if(::fsync(fd_) == 0)
break;
auto const ev = errno;
if(ev == EINTR)
continue;
return err(ev, ec);
}
}
inline
void
posix_file::
trunc(std::uint64_t length, error_code& ec)
{
for(;;)
{
if(::ftruncate(fd_, length) == 0)
break;
auto const ev = errno;
if(ev == EINTR)
continue;
return err(ev, ec);
}
}
inline
std::pair<int, int>
posix_file::
flags(file_mode mode)
{
std::pair<int, int> result;
switch(mode)
{
case file_mode::scan:
result.first =
O_RDONLY;
#ifndef __APPLE__
result.second =
POSIX_FADV_SEQUENTIAL;
#endif
break;
case file_mode::read:
result.first =
O_RDONLY;
#ifndef __APPLE__
result.second =
POSIX_FADV_RANDOM;
#endif
break;
case file_mode::append:
result.first =
O_RDWR |
O_APPEND;
#ifndef __APPLE__
result.second =
POSIX_FADV_RANDOM;
#endif
break;
case file_mode::write:
result.first =
O_RDWR;
#ifndef __APPLE__
result.second =
POSIX_FADV_NORMAL;
#endif
break;
}
return result;
}
} // nudb
#endif

View File

@@ -0,0 +1,209 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_RECOVER_IPP
#define NUDB_IMPL_RECOVER_IPP
#include <nudb/concepts.hpp>
#include <nudb/file.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <boost/assert.hpp>
#include <algorithm>
#include <cstddef>
#include <string>
namespace nudb {
template<
class Hasher,
class File,
class... Args>
void
recover(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
error_code& ec,
Args&&... args)
{
static_assert(is_File<File>::value,
"File requirements not met");
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
using namespace detail;
// Open data file
File df{args...};
df.open(file_mode::write, dat_path, ec);
if(ec)
return;
auto const dataFileSize = df.size(ec);
if(ec)
return;
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
// Open key file
File kf{args...};
kf.open(file_mode::write, key_path, ec);
if(ec)
return;
auto const keyFileSize = kf.size(ec);
if(ec)
return;
if(keyFileSize <= key_file_header::size)
{
kf.close();
erase_file(log_path, ec);
if(ec)
return;
File::erase(key_path, ec);
if(ec)
return;
ec = error::no_key_file;
return;
}
// Open log file
File lf{args...};
lf.open(file_mode::append, log_path, ec);
if(ec == errc::no_such_file_or_directory)
{
ec = {};
return;
}
if(ec)
return;
auto const logFileSize = lf.size(ec);
if(ec)
return;
// Read log file header
log_file_header lh;
read(lf, lh, ec);
if(ec == error::short_read)
{
BOOST_ASSERT(keyFileSize > key_file_header::size);
ec = {};
goto clear_log;
}
if(ec)
return;
verify<Hasher>(lh, ec);
if(ec)
return;
if(lh.key_file_size == 0)
goto trunc_files;
{
// Read key file header
key_file_header kh;
read(kf, kh, ec);
if(ec)
return;
verify<Hasher>(kh, ec);
if(ec)
return;
verify<Hasher>(dh, kh, ec);
if(ec)
return;
verify<Hasher>(kh, lh, ec);
if(ec)
return;
auto const readSize = 1024 * kh.block_size;
auto const bucketSize = bucket_size(kh.capacity);
buffer buf{kh.block_size};
bucket b{kh.block_size, buf.get()};
bulk_reader<File> r{lf,
log_file_header::size, logFileSize, readSize};
while(! r.eof())
{
// Log Record
auto is = r.prepare(field<std::uint64_t>::size, ec);
// Log file is incomplete, so roll back.
if(ec == error::short_read)
{
ec = {};
break;
}
if(ec)
return;
nsize_t n;
{
std::uint64_t v;
// VFALCO This should have been a uint32_t
read<std::uint64_t>(is, v); // Index
BOOST_ASSERT(v <= std::numeric_limits<std::uint32_t>::max());
n = static_cast<nsize_t>(v);
}
b.read(r, ec); // Bucket
if(ec == error::short_read)
{
ec = {};
break;
}
if(b.spill() && b.spill() + bucketSize > dataFileSize)
{
ec = error::invalid_log_spill;
return;
}
if(n > kh.buckets)
{
ec = error::invalid_log_index;
return;
}
b.write(kf, static_cast<noff_t>(n + 1) * kh.block_size, ec);
if(ec)
return;
}
}
trunc_files:
df.trunc(lh.dat_file_size, ec);
if(ec)
return;
df.sync(ec);
if(ec)
return;
if(lh.key_file_size != 0)
{
kf.trunc(lh.key_file_size, ec);
if(ec)
return;
kf.sync(ec);
if(ec)
return;
}
else
{
kf.close();
File::erase(key_path, ec);
if(ec)
return;
}
clear_log:
lf.trunc(0, ec);
if(ec)
return;
lf.sync(ec);
if(ec)
return;
lf.close();
File::erase(log_path, ec);
if(ec)
return;
}
} // nudb
#endif

248
include/nudb/impl/rekey.ipp Normal file
View File

@@ -0,0 +1,248 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_REKEY_IPP
#define NUDB_IMPL_REKEY_IPP
#include <nudb/concepts.hpp>
#include <nudb/create.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <cmath>
namespace nudb {
// VFALCO Should this delete the key file on an error?
template<
class Hasher,
class File,
class Progress,
class... Args
>
void
rekey(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::size_t blockSize,
float loadFactor,
std::uint64_t itemCount,
std::size_t bufferSize,
error_code& ec,
Progress&& progress,
Args&&... args)
{
static_assert(is_File<File>::value,
"File requirements not met");
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
static_assert(is_Progress<Progress>::value,
"Progress requirements not met");
using namespace detail;
auto const readSize = 1024 * block_size(dat_path);
auto const writeSize = 16 * block_size(key_path);
// Open data file for reading and appending
File df{args...};
df.open(file_mode::append, dat_path, ec);
if(ec)
return;
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
auto const dataFileSize = df.size(ec);
if(ec)
return;
// Make sure log file doesn't exist
File lf{args...};
lf.open(file_mode::read, log_path, ec);
if(! ec)
ec = error::log_file_exists;
if(ec != errc::no_such_file_or_directory)
return;
ec = {};
// Set up key file header
key_file_header kh;
kh.version = currentVersion;
kh.uid = dh.uid;
kh.appnum = dh.appnum;
kh.key_size = dh.key_size;
kh.salt = make_salt();
kh.pepper = pepper<Hasher>(kh.salt);
kh.block_size = blockSize;
kh.load_factor = std::min<std::size_t>(
static_cast<std::size_t>(65536.0 * loadFactor), 65535);
kh.buckets = static_cast<std::size_t>(
std::ceil(itemCount /(
bucket_capacity(kh.block_size) * loadFactor)));
kh.modulus = ceil_pow2(kh.buckets);
// Create key file
File kf{args...};
kf.create(file_mode::write, key_path, ec);
if(ec)
return;
// Write key file header
// Note, file size is less than any valid block_size here
{
std::array<std::uint8_t, key_file_header::size> buf;
ostream os{buf.data(), buf.size()};
write(os, kh);
kf.write(0, buf.data(), buf.size(), ec);
if(ec)
return;
kf.sync(ec);
if(ec)
return;
}
// Create log file
lf.create(file_mode::append, log_path, ec);
if(ec)
return;
// Write log file header
{
log_file_header lh;
lh.version = currentVersion; // Version
lh.uid = kh.uid; // UID
lh.appnum = kh.appnum; // Appnum
lh.key_size = kh.key_size; // Key Size
lh.salt = kh.salt; // Salt
lh.pepper = pepper<Hasher>(kh.salt); // Pepper
lh.block_size = kh.block_size; // Block Size
lh.key_file_size = 0; // Key File Size
lh.dat_file_size = dataFileSize; // Data File Size
write(lf, lh, ec);
if(ec)
return;
lf.sync(ec);
if(ec)
return;
}
// Create full key file
buffer buf{kh.block_size};
{
// Write key file header
std::memset(buf.get(), 0, kh.block_size);
ostream os{buf.get(), kh.block_size};
write(os, kh);
kf.write(0, buf.get(), buf.size(), ec);
if(ec)
return;
kf.sync(ec);
if(ec)
return;
// Pre-allocate space for the entire key file
std::uint8_t zero = 0;
kf.write(
static_cast<noff_t>(kh.buckets + 1) * kh.block_size - 1,
&zero, 1, ec);
if(ec)
return;
kf.sync(ec);
if(ec)
return;
}
// Build contiguous sequential sections of the
// key file using multiple passes over the data.
//
auto const chunkSize = std::max<std::size_t>(1,
bufferSize / kh.block_size);
// Calculate work required
auto const passes =
(kh.buckets + chunkSize - 1) / chunkSize;
auto const nwork = passes * dataFileSize;
progress(0, nwork);
buf.reserve(chunkSize * kh.block_size);
bulk_writer<File> dw{df, dataFileSize, writeSize};
for(nbuck_t b0 = 0; b0 < kh.buckets; b0 += chunkSize)
{
auto const b1 = std::min<std::size_t>(b0 + chunkSize, kh.buckets);
// Buffered range is [b0, b1)
auto const bn = b1 - b0;
// Create empty buckets
for(std::size_t i = 0; i < bn; ++i)
bucket b{kh.block_size,
buf.get() + i * kh.block_size, empty};
// Insert all keys into buckets
// Iterate Data File
bulk_reader<File> r{df,
dat_file_header::size, dataFileSize, readSize};
while(! r.eof())
{
auto const offset = r.offset();
// Data Record or Spill Record
nsize_t size;
auto is = r.prepare(
field<uint48_t>::size, ec); // Size
if(ec)
return;
progress((b0 / chunkSize) * dataFileSize + r.offset(), nwork);
read_size48(is, size);
if(size > 0)
{
// Data Record
is = r.prepare(
dh.key_size + // Key
size, ec); // Data
if(ec)
return;
std::uint8_t const* const key =
is.data(dh.key_size);
auto const h = hash<Hasher>(
key, dh.key_size, kh.salt);
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
if(n < b0 || n >= b1)
continue;
bucket b{kh.block_size, buf.get() +
(n - b0) * kh.block_size};
maybe_spill(b, dw, ec);
if(ec)
return;
b.insert(offset, size, h);
}
else
{
// VFALCO Should never get here
// Spill Record
is = r.prepare(
field<std::uint16_t>::size, ec);
if(ec)
return;
read<std::uint16_t>(is, size); // Size
r.prepare(size, ec); // skip
if(ec)
return;
}
}
kf.write((b0 + 1) * kh.block_size, buf.get(),
static_cast<std::size_t>(bn * kh.block_size), ec);
if(ec)
return;
}
dw.flush(ec);
if(ec)
return;
lf.close();
File::erase(log_path, ec);
if(ec)
return;
}
} // nudb
#endif

View File

@@ -0,0 +1,630 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_VERIFY_IPP
#define NUDB_IMPL_VERIFY_IPP
#include <nudb/concepts.hpp>
#include <nudb/native_file.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstddef>
#include <limits>
#include <string>
namespace nudb {
namespace detail {
// Normal verify that does not require a buffer
//
template<
class Hasher,
class File,
class Progress>
void
verify_normal(
verify_info& info,
File& df,
File& kf,
dat_file_header& dh,
key_file_header& kh,
Progress&& progress,
error_code& ec)
{
static_assert(is_File<File>::value,
"File requirements not met");
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
static_assert(is_Progress<Progress>::value,
"Progress requirements not met");
info.algorithm = 0;
auto const readSize = 1024 * kh.block_size;
// This ratio balances the 2 work phases.
// The number is determined empirically.
auto const adjust = 1.75;
// Calculate the work required
auto const keys = static_cast<std::uint64_t>(
double(kh.load_factor) / 65536.0 * kh.buckets * kh.capacity);
std::uint64_t const nwork = static_cast<std::uint64_t>(
info.dat_file_size + keys * kh.block_size +
adjust * (info.key_file_size + keys * kh.block_size));
std::uint64_t work = 0;
progress(0, nwork);
// Iterate Data File
// Data Record
auto const dh_len =
field<uint48_t>::size + // Size
kh.key_size; // Key
std::uint64_t fetches = 0;
buffer buf{kh.block_size + dh_len};
bucket b{kh.block_size, buf.get()};
std::uint8_t* pd = buf.get() + kh.block_size;
{
bulk_reader<File> r{df, dat_file_header::size,
info.dat_file_size, readSize};
while(! r.eof())
{
auto const offset = r.offset();
// Data Record or Spill Record
auto is = r.prepare(
field<uint48_t>::size, ec); // Size
if(ec)
return;
nsize_t size;
read_size48(is, size);
if(size > 0)
{
// Data Record
is = r.prepare(
kh.key_size + // Key
size, ec); // Data
if(ec)
return;
std::uint8_t const* const key =
is.data(kh.key_size);
std::uint8_t const* const data =
is.data(size);
(void)data;
auto const h = hash<Hasher>(
key, kh.key_size, kh.salt);
// Check bucket and spills
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
b.read(kf,
static_cast<noff_t>(n + 1) * kh.block_size, ec);
if(ec)
return;
work += kh.block_size;
++fetches;
for(;;)
{
for(auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if(item.hash != h)
break;
if(item.offset == offset)
goto found;
++fetches;
}
auto const spill = b.spill();
if(! spill)
{
ec = error::orphaned_value;
return;
}
b.read(df, spill, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
++fetches;
}
found:
// Update
++info.value_count;
info.value_bytes += size;
}
else
{
// Spill Record
is = r.prepare(
field<std::uint16_t>::size, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
read<std::uint16_t>(is, size); // Size
if(size != info.bucket_size)
{
ec = error::invalid_spill_size;
return;
}
if(ec)
return;
b.read(r, ec); // Bucket
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
++info.spill_count_tot;
info.spill_bytes_tot +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.actual_size(); // Bucket
}
progress(work + offset, nwork);
}
work += info.dat_file_size;
}
// Iterate Key File
{
for(std::size_t n = 0; n < kh.buckets; ++n)
{
std::size_t nspill = 0;
b.read(kf, static_cast<noff_t>(
n + 1) * kh.block_size, ec);
if(ec)
return;
work += static_cast<std::uint64_t>(
adjust * kh.block_size);
bool spill = false;
for(;;)
{
info.key_count += b.size();
for(nkey_t i = 0; i < b.size(); ++i)
{
auto const e = b[i];
df.read(e.offset, pd, dh_len, ec);
if(ec == error::short_read)
{
ec = error::missing_value;
return;
}
if(ec)
return;
if(! spill)
work += static_cast<std::uint64_t>(
adjust * kh.block_size);
// Data Record
istream is{pd, dh_len};
std::uint64_t size;
// VFALCO This should really be a 32-bit field
read<uint48_t>(is, size); // Size
void const* key =
is.data(kh.key_size); // Key
if(size != e.size)
{
ec = error::size_mismatch;
return;
}
auto const h = hash<Hasher>(key,
kh.key_size, kh.salt);
if(h != e.hash)
{
ec = error::hash_mismatch;
return;
}
}
if(! b.spill())
break;
b.read(df, b.spill(), ec);
if(ec)
return;
spill = true;
++nspill;
++info.spill_count;
info.spill_bytes +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.actual_size(); // SpillBucket
}
if(nspill >= info.hist.size())
nspill = info.hist.size() - 1;
++info.hist[nspill];
progress(work, nwork);
}
}
float sum = 0;
for(size_t i = 0; i < info.hist.size(); ++i)
sum += info.hist[i] * (i + 1);
if(info.value_count)
info.avg_fetch =
float(fetches) / info.value_count;
else
info.avg_fetch = 0;
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
float(info.dat_file_size);
if(info.value_count)
info.overhead =
float(info.key_file_size + info.dat_file_size) /
(
info.value_bytes +
info.key_count *
(info.key_size +
// Data Record
field<uint48_t>::size) // Size
) - 1;
else
info.overhead = 0;
info.actual_load = info.key_count / float(
info.capacity * info.buckets);
}
// Fast version of verify that uses a buffer
//
template<class Hasher, class File, class Progress>
void
verify_fast(
verify_info& info,
File& df,
File& kf,
dat_file_header& dh,
key_file_header& kh,
std::size_t bufferSize,
Progress&& progress,
error_code& ec)
{
info.algorithm = 1;
auto const readSize = 1024 * kh.block_size;
// Counts unverified keys per bucket
if(kh.buckets > std::numeric_limits<nbuck_t>::max())
{
ec = error::too_many_buckets;
return;
}
std::unique_ptr<nkey_t[]> nkeys(
new nkey_t[kh.buckets]);
// Verify contiguous sequential sections of the
// key file using multiple passes over the data.
//
if(bufferSize < 2 * kh.block_size + sizeof(nkey_t))
throw std::logic_error("invalid buffer size");
auto chunkSize = std::min(kh.buckets,
(bufferSize - kh.block_size) /
(kh.block_size + sizeof(nkey_t)));
auto const passes =
(kh.buckets + chunkSize - 1) / chunkSize;
// Calculate the work required
std::uint64_t work = 0;
std::uint64_t const nwork =
passes * info.dat_file_size + info.key_file_size;
progress(0, nwork);
std::uint64_t fetches = 0;
buffer buf{(chunkSize + 1) * kh.block_size};
bucket tmp{kh.block_size,
buf.get() + chunkSize * kh.block_size};
for(nsize_t b0 = 0; b0 < kh.buckets; b0 += chunkSize)
{
// Load key file chunk to buffer
auto const b1 = std::min(b0 + chunkSize, kh.buckets);
// Buffered range is [b0, b1)
auto const bn = b1 - b0;
kf.read(
static_cast<noff_t>(b0 + 1) * kh.block_size,
buf.get(),
static_cast<noff_t>(bn * kh.block_size),
ec);
if(ec)
return;
work += bn * kh.block_size;
progress(work, nwork);
// Count keys in buckets, including spills
for(nbuck_t i = 0 ; i < bn; ++i)
{
bucket b{kh.block_size,
buf.get() + i * kh.block_size};
nkeys[i] = b.size();
std::size_t nspill = 0;
auto spill = b.spill();
while(spill != 0)
{
tmp.read(df, spill, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
nkeys[i] += tmp.size();
spill = tmp.spill();
++nspill;
++info.spill_count;
info.spill_bytes +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
tmp.actual_size(); // SpillBucket
}
if(nspill >= info.hist.size())
nspill = info.hist.size() - 1;
++info.hist[nspill];
info.key_count += nkeys[i];
}
// Iterate Data File
bulk_reader<File> r(df, dat_file_header::size,
info.dat_file_size, readSize);
while(! r.eof())
{
auto const offset = r.offset();
// Data Record or Spill Record
auto is = r.prepare(
field<uint48_t>::size, ec); // Size
if(ec == error::short_read)
{
ec = error::short_data_record;
return;
}
if(ec)
return;
nsize_t size;
detail::read_size48(is, size);
if(size > 0)
{
// Data Record
is = r.prepare(
kh.key_size + // Key
size, ec); // Data
if(ec == error::short_read)
{
ec = error::short_value;
return;
}
if(ec)
return;
std::uint8_t const* const key =
is.data(kh.key_size);
std::uint8_t const* const data =
is.data(size);
(void)data;
auto const h = hash<Hasher>(
key, kh.key_size, kh.salt);
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
if(n < b0 || n >= b1)
continue;
// Check bucket and spills
bucket b{kh.block_size, buf.get() +
(n - b0) * kh.block_size};
++fetches;
for(;;)
{
for(auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if(item.hash != h)
break;
if(item.offset == offset)
goto found;
++fetches;
}
auto const spill = b.spill();
if(! spill)
{
ec = error::orphaned_value;
return;
}
b = tmp;
b.read(df, spill, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
++fetches;
}
found:
// Update
++info.value_count;
info.value_bytes += size;
if(nkeys[n - b0]-- == 0)
{
ec = error::orphaned_value;
return;
}
}
else
{
// Spill Record
is = r.prepare(
field<std::uint16_t>::size, ec);
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
read<std::uint16_t>(is, size); // Size
if(bucket_size(
bucket_capacity(size)) != size)
{
ec = error::invalid_spill_size;
return;
}
r.prepare(size, ec); // Bucket
if(ec == error::short_read)
{
ec = error::short_spill;
return;
}
if(ec)
return;
if(b0 == 0)
{
++info.spill_count_tot;
info.spill_bytes_tot +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
tmp.actual_size(); // Bucket
}
}
progress(work + offset, nwork);
}
// Make sure every key in every bucket was visited
for(std::size_t i = 0; i < bn; ++i)
{
if(nkeys[i] != 0)
{
ec = error::missing_value;
return;
}
}
work += info.dat_file_size;
}
float sum = 0;
for(std::size_t i = 0; i < info.hist.size(); ++i)
sum += info.hist[i] * (i + 1);
if(info.value_count)
info.avg_fetch =
float(fetches) / info.value_count;
else
info.avg_fetch = 0;
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
float(info.dat_file_size);
if(info.value_count)
info.overhead =
float(info.key_file_size + info.dat_file_size) /
(
info.value_bytes +
info.key_count *
(info.key_size +
// Data Record
field<uint48_t>::size) // Size
) - 1;
else
info.overhead = 0;
info.actual_load = info.key_count / float(
info.capacity * info.buckets);
}
} // detail
template<class Hasher, class Progress>
void
verify(
verify_info& info,
path_type const& dat_path,
path_type const& key_path,
std::size_t bufferSize,
Progress&& progress,
error_code& ec)
{
static_assert(is_Hasher<Hasher>::value,
"Hasher requirements not met");
static_assert(is_Progress<Progress>::value,
"Progress requirements not met");
info = {};
using namespace detail;
using File = native_file;
File df;
df.open(file_mode::scan, dat_path, ec);
if(ec)
return;
File kf;
kf.open (file_mode::read, key_path, ec);
if(ec)
return;
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
key_file_header kh;
read(kf, kh, ec);
if(ec)
return;
verify<Hasher>(kh, ec);
if(ec)
return;
verify<Hasher>(dh, kh, ec);
if(ec)
return;
info.dat_path = dat_path;
info.key_path = key_path;
info.version = dh.version;
info.uid = dh.uid;
info.appnum = dh.appnum;
info.key_size = dh.key_size;
info.salt = kh.salt;
info.pepper = kh.pepper;
info.block_size = kh.block_size;
info.load_factor = kh.load_factor / 65536.f;
info.capacity = kh.capacity;
info.buckets = kh.buckets;
info.bucket_size = bucket_size(kh.capacity);
info.key_file_size = kf.size(ec);
if(ec)
return;
info.dat_file_size = df.size(ec);
if(ec)
return;
// Determine which algorithm requires the least amount
// of file I/O given the available buffer size
std::size_t chunkSize;
if(bufferSize >= 2 * kh.block_size + sizeof(nkey_t))
chunkSize = std::min(kh.buckets,
(bufferSize - kh.block_size) /
(kh.block_size + sizeof(nkey_t)));
else
chunkSize = 0;
std::size_t passes;
if(chunkSize > 0)
passes = (kh.buckets + chunkSize - 1) / chunkSize;
else
passes = 0;
if(! chunkSize ||
((
info.dat_file_size +
(kh.buckets * kh.load_factor * kh.capacity * kh.block_size) +
info.key_file_size
) < (
passes * info.dat_file_size + info.key_file_size
)))
{
detail::verify_normal<Hasher>(info,
df, kf, dh, kh, progress, ec);
}
else
{
detail::verify_fast<Hasher>(info,
df, kf, dh, kh, bufferSize, progress, ec);
}
}
} // nudb
#endif

View File

@@ -0,0 +1,96 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_VISIT_IPP
#define NUDB_IMPL_VISIT_IPP
#include <nudb/concepts.hpp>
#include <nudb/error.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/native_file.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstddef>
#include <string>
namespace nudb {
template<
class Callback,
class Progress>
void
visit(
path_type const& path,
Callback&& callback,
Progress&& progress,
error_code& ec)
{
// VFALCO Need concept check for Callback
static_assert(is_Progress<Progress>::value,
"Progress requirements not met");
using namespace detail;
using File = native_file;
auto const readSize = 1024 * block_size(path);
File df;
df.open(file_mode::scan, path, ec);
if(ec)
return;
dat_file_header dh;
read(df, dh, ec);
if(ec)
return;
verify(dh, ec);
if(ec)
return;
auto const fileSize = df.size(ec);
if(ec)
return;
bulk_reader<File> r(df,
dat_file_header::size, fileSize, readSize);
progress(0, fileSize);
while(! r.eof())
{
// Data Record or Spill Record
nsize_t size;
auto is = r.prepare(
field<uint48_t>::size, ec); // Size
if(ec)
return;
detail::read_size48(is, size);
if(size > 0)
{
// Data Record
is = r.prepare(
dh.key_size + // Key
size, ec); // Data
std::uint8_t const* const key =
is.data(dh.key_size);
callback(key, dh.key_size,
is.data(size), size, ec);
if(ec)
return;
}
else
{
// Spill Record
is = r.prepare(
field<std::uint16_t>::size, ec);
if(ec)
return;
read<std::uint16_t>(is, size); // Size
r.prepare(size, ec); // skip bucket
if(ec)
return;
}
progress(r.offset(), fileSize);
}
}
} // nudb
#endif

View File

@@ -0,0 +1,264 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_IMPL_WIN32_FILE_IPP
#define NUDB_IMPL_WIN32_FILE_IPP
#include <boost/assert.hpp>
namespace nudb {
inline
win32_file::
~win32_file()
{
close();
}
inline
win32_file::
win32_file(win32_file&& other)
: hf_(other.hf_)
{
other.hf_ = INVALID_HANDLE_VALUE;
}
inline
win32_file&
win32_file::
operator=(win32_file&& other)
{
if(&other == this)
return *this;
close();
hf_ = other.hf_;
other.hf_ = INVALID_HANDLE_VALUE;
return *this;
}
inline
void
win32_file::
close()
{
if(hf_ != INVALID_HANDLE_VALUE)
{
::CloseHandle(hf_);
hf_ = INVALID_HANDLE_VALUE;
}
}
inline
void
win32_file::
create(file_mode mode, path_type const& path, error_code& ec)
{
BOOST_ASSERT(! is_open());
auto const f = flags(mode);
hf_ = ::CreateFileA(path.c_str(),
f.first,
0,
NULL,
CREATE_NEW,
f.second,
NULL);
if(hf_ == INVALID_HANDLE_VALUE)
return last_err(ec);
}
inline
void
win32_file::
open(file_mode mode, path_type const& path, error_code& ec)
{
BOOST_ASSERT(! is_open());
auto const f = flags(mode);
hf_ = ::CreateFileA(path.c_str(),
f.first,
0,
NULL,
OPEN_EXISTING,
f.second,
NULL);
if(hf_ == INVALID_HANDLE_VALUE)
return last_err(ec);
}
inline
void
win32_file::
erase(path_type const& path, error_code& ec)
{
BOOL const bSuccess =
::DeleteFileA(path.c_str());
if(! bSuccess)
return last_err(ec);
}
inline
std::uint64_t
win32_file::
size(error_code& ec) const
{
BOOST_ASSERT(is_open());
LARGE_INTEGER fileSize;
if(! ::GetFileSizeEx(hf_, &fileSize))
{
last_err(ec);
return 0;
}
return fileSize.QuadPart;
}
inline
void
win32_file::
read(std::uint64_t offset, void* buffer, std::size_t bytes, error_code& ec)
{
while(bytes > 0)
{
DWORD bytesRead;
LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset);
OVERLAPPED ov;
ov.Offset = li.LowPart;
ov.OffsetHigh = li.HighPart;
ov.hEvent = NULL;
DWORD amount;
if(bytes > std::numeric_limits<DWORD>::max())
amount = std::numeric_limits<DWORD>::max();
else
amount = static_cast<DWORD>(bytes);
BOOL const bSuccess = ::ReadFile(
hf_, buffer, amount, &bytesRead, &ov);
if(! bSuccess)
{
DWORD const dwError = ::GetLastError();
if(dwError != ERROR_HANDLE_EOF)
return err(dwError, ec);
ec = make_error_code(error::short_read);
return;
}
if(bytesRead == 0)
{
ec = make_error_code(error::short_read);
return;
}
offset += bytesRead;
bytes -= bytesRead;
buffer = reinterpret_cast<char*>(
buffer) + bytesRead;
}
}
inline
void
win32_file::
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec)
{
while(bytes > 0)
{
LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset);
OVERLAPPED ov;
ov.Offset = li.LowPart;
ov.OffsetHigh = li.HighPart;
ov.hEvent = NULL;
DWORD amount;
if(bytes > std::numeric_limits<DWORD>::max())
amount = std::numeric_limits<DWORD>::max();
else
amount = static_cast<DWORD>(bytes);
DWORD bytesWritten;
BOOL const bSuccess = ::WriteFile(
hf_, buffer, amount, &bytesWritten, &ov);
if(! bSuccess)
return last_err(ec);
if(bytesWritten == 0)
{
ec = error_code{errc::no_space_on_device,
generic_category()};;
return;
}
offset += bytesWritten;
bytes -= bytesWritten;
buffer = reinterpret_cast<char const*>(
buffer) + bytesWritten;
}
}
inline
void
win32_file::
sync(error_code& ec)
{
if(! ::FlushFileBuffers(hf_))
return last_err(ec);
}
inline
void
win32_file::
trunc(std::uint64_t length, error_code& ec)
{
LARGE_INTEGER li;
li.QuadPart = length;
BOOL bSuccess;
bSuccess = ::SetFilePointerEx(
hf_, li, NULL, FILE_BEGIN);
if(bSuccess)
bSuccess = ::SetEndOfFile(hf_);
if(! bSuccess)
return last_err(ec);
}
inline
std::pair<DWORD, DWORD>
win32_file::
flags(file_mode mode)
{
std::pair<DWORD, DWORD> result{0, 0};
switch(mode)
{
case file_mode::scan:
result.first =
GENERIC_READ;
result.second =
FILE_FLAG_SEQUENTIAL_SCAN;
break;
case file_mode::read:
result.first =
GENERIC_READ;
result.second =
FILE_FLAG_RANDOM_ACCESS;
break;
case file_mode::append:
result.first =
GENERIC_READ | GENERIC_WRITE;
result.second =
FILE_FLAG_RANDOM_ACCESS
//| FILE_FLAG_NO_BUFFERING
//| FILE_FLAG_WRITE_THROUGH
;
break;
case file_mode::write:
result.first =
GENERIC_READ | GENERIC_WRITE;
result.second =
FILE_FLAG_RANDOM_ACCESS;
break;
}
return result;
}
} // nudb
#endif

View File

@@ -0,0 +1,76 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_NATIVE_FILE_HPP
#define NUDB_NATIVE_FILE_HPP
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <nudb/posix_file.hpp>
#include <nudb/win32_file.hpp>
#include <string>
namespace nudb {
/** A native file handle.
This type is set to the appropriate platform-specific
implementation to meet the file wrapper requirements.
*/
using native_file =
#ifdef _MSC_VER
win32_file;
#else
posix_file;
#endif
/** Erase a file if it exists.
This function attempts to erase the specified file.
No error is generated if the file does not already
exist.
@param path The path to the file to erase.
@param ec Set to the error, if any occurred.
@tparam File A type meeting the requirements of @b File.
If this type is unspecified, @ref native_file is used.
*/
template<class File = native_file>
inline
void
erase_file(path_type const& path, error_code& ec)
{
native_file::erase(path, ec);
if(ec == errc::no_such_file_or_directory)
ec = {};
}
/** Erase a file without returnign an error.
This function attempts to erase the specified file.
Any errors are ignored, including if the file does
not exist.
@param path The path to the file to erase.
@tparam File A type meeting the requirements of @b File.
If this type is unspecified, @ref native_file is used.
*/
template<class File = native_file>
inline
void
erase_file(path_type const& path)
{
error_code ec;
File::erase(path, ec);
}
} // nudb
#endif

27
include/nudb/nudb.hpp Normal file
View File

@@ -0,0 +1,27 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_HPP
#define NUDB_HPP
#include <nudb/concepts.hpp>
#include <nudb/create.hpp>
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <nudb/posix_file.hpp>
#include <nudb/progress.hpp>
#include <nudb/recover.hpp>
#include <nudb/rekey.hpp>
#include <nudb/store.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/verify.hpp>
#include <nudb/version.hpp>
#include <nudb/visit.hpp>
#include <nudb/win32_file.hpp>
#include <nudb/xxhasher.hpp>
#endif

228
include/nudb/posix_file.hpp Normal file
View File

@@ -0,0 +1,228 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_POSIX_FILE_HPP
#define NUDB_DETAIL_POSIX_FILE_HPP
#include <nudb/file.hpp>
#include <nudb/error.hpp>
#include <cerrno>
#include <cstring>
#include <string>
#include <utility>
#ifndef NUDB_POSIX_FILE
# ifdef _MSC_VER
# define NUDB_POSIX_FILE 0
# else
# define NUDB_POSIX_FILE 1
# endif
#endif
#if NUDB_POSIX_FILE
# include <fcntl.h>
# include <sys/types.h>
# include <sys/uio.h>
# include <sys/stat.h>
# include <unistd.h>
#endif
#if NUDB_POSIX_FILE
namespace nudb {
class posix_file
{
int fd_ = -1;
public:
/// Constructor
posix_file() = default;
/// Copy constructor (disallowed)
posix_file(posix_file const&) = delete;
// Copy assignment (disallowed)
posix_file& operator=(posix_file const&) = delete;
/** Destructor.
If open, the file is closed.
*/
~posix_file();
/** Move constructor.
@note The state of the moved-from object is as if default constructed.
*/
posix_file(posix_file&&);
/** Move assignment.
@note The state of the moved-from object is as if default constructed.
*/
posix_file&
operator=(posix_file&& other);
/// Returns `true` if the file is open.
bool
is_open() const
{
return fd_ != -1;
}
/// Close the file if it is open.
void
close();
/** Create a new file.
After the file is created, it is opened as if by `open(mode, path, ec)`.
@par Requirements
The file must not already exist, or else `errc::file_exists`
is returned.
@param mode The open mode, which must be a valid @ref file_mode.
@param path The path of the file to create.
@param ec Set to the error, if any occurred.
*/
void
create(file_mode mode, path_type const& path, error_code& ec);
/** Open a file.
@par Requirements
The file must not already be open.
@param mode The open mode, which must be a valid @ref file_mode.
@param path The path of the file to open.
@param ec Set to the error, if any occurred.
*/
void
open(file_mode mode, path_type const& path, error_code& ec);
/** Remove a file from the file system.
It is not an error to attempt to erase a file that does not exist.
@param path The path of the file to remove.
@param ec Set to the error, if any occurred.
*/
static
void
erase(path_type const& path, error_code& ec);
/** Return the size of the file.
@par Requirements
The file must be open.
@param ec Set to the error, if any occurred.
@return The size of the file, in bytes.
*/
std::uint64_t
size(error_code& ec) const;
/** Read data from a location in the file.
@par Requirements
The file must be open.
@param offset The position in the file to read from,
expressed as a byte offset from the beginning.
@param buffer The location to store the data.
@param bytes The number of bytes to read.
@param ec Set to the error, if any occurred.
*/
void
read(std::uint64_t offset,
void* buffer, std::size_t bytes, error_code& ec);
/** Write data to a location in the file.
@par Requirements
The file must be open with a mode allowing writes.
@param offset The position in the file to write from,
expressed as a byte offset from the beginning.
@param buffer The data the write.
@param bytes The number of bytes to write.
@param ec Set to the error, if any occurred.
*/
void
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec);
/** Perform a low level file synchronization.
@par Requirements
The file must be open with a mode allowing writes.
@param ec Set to the error, if any occurred.
*/
void
sync(error_code& ec);
/** Truncate the file at a specific size.
@par Requirements
The file must be open with a mode allowing writes.
@param length The new file size.
@param ec Set to the error, if any occurred.
*/
void
trunc(std::uint64_t length, error_code& ec);
private:
static
void
err(int ev, error_code& ec)
{
ec = error_code{ev, system_category()};
}
static
void
last_err(error_code& ec)
{
err(errno, ec);
}
static
std::pair<int, int>
flags(file_mode mode);
};
} // nudb
#include <nudb/impl/posix_file.ipp>
#endif
#endif

32
include/nudb/progress.hpp Normal file
View File

@@ -0,0 +1,32 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_PROGRESS_HPP
#define NUDB_PROGRESS_HPP
namespace nudb {
/** Progress function that does nothing.
This type meets the requirements of @b Progress,
and does nothing when invoked.
*/
struct
no_progress
{
no_progress() = default;
/// Called to indicate progress
void
operator()(std::uint64_t, std::uint64_t) const noexcept
{
};
};
} // nudb
#endif

73
include/nudb/recover.hpp Normal file
View File

@@ -0,0 +1,73 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_RECOVER_HPP
#define NUDB_RECOVER_HPP
#include <nudb/error.hpp>
#include <nudb/native_file.hpp>
namespace nudb {
/** Perform recovery on a database.
This implements the recovery algorithm by rolling back
any partially committed data. If no log file is present,
the function does nothing.
During the commit phase of a NuDB database, a log file
is generated with information that may be used to roll
back the results of a partial commit. This function
checks for the presence of a log file. If present, the
log file is replayed on the key and data files belonging
to the database, restoring the database to its state
before the partial commit. When @ref recover is
successful, it erases the log file.
It is normally not necessary to call this function
directly, it is called automatically when a database is
opened in a call to @ref basic_store::open. Callers may
use this function to implement auxiliary tools for
manipulating the database.
@par Template Parameters
@tparam Hasher The hash function to use. This type must
meet the requirements of @b Hasher. The hash function
must be the same as that used to create the database, or
else an error is returned.
@tparam File The type of file to use. Use the default of
@ref native_file unless customizing the file behavior.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param log_path The path to the log file.
@param args Optional parameters passed to File constructors.
@param ec Set to the error, if any occurred.
*/
template<
class Hasher,
class File = native_file,
class... Args>
void
recover(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
error_code& ec,
Args&&... args);
} // nudb
#include <nudb/impl/recover.ipp>
#endif

110
include/nudb/rekey.hpp Normal file
View File

@@ -0,0 +1,110 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_REKEY_HPP
#define NUDB_REKEY_HPP
#include <nudb/error.hpp>
#include <nudb/file.hpp>
#include <cstddef>
#include <cstdint>
namespace nudb {
/** Create a new key file from a data file.
This algorithm rebuilds a key file for the given data file.
It works efficiently by iterating the data file multiple times.
During the iteration, a contiguous block of the key file is
rendered in memory, then flushed to disk when the iteration is
complete. The size of this memory buffer is controlled by the
`bufferSize` parameter, larger is better. The algorithm works
the fastest when `bufferSize` is large enough to hold the entire
key file in memory; only a single iteration of the data file
is needed in this case.
During the rekey, spill records may be appended to the data
file. If the rekey operation is abnormally terminated, this
would normally result in a corrupted data file. To prevent this,
the function creates a log file using the specified path so
that the database can be fixed in a subsequent call to
@ref recover.
@note If a log file is already present, this function will
fail with @ref error::log_file_exists.
@par Template Parameters
@tparam Hasher The hash function to use. This type must
meet the requirements of @b Hasher. The hash function
must be the same as that used to create the database, or
else an error is returned.
@tparam File The type of file to use. This type must meet
the requirements of @b File.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param log_path The path to the log file.
@param blockSize The size of a key file block. Larger
blocks hold more keys but require more I/O cycles per
operation. The ideal block size the largest size that
may be read in a single I/O cycle, and device dependent.
The return value of @ref block_size returns a suitable
value for the volume of a given path.
@param loadFactor A number between zero and one
representing the average bucket occupancy (number of
items). A value of 0.5 is perfect. Lower numbers
waste space, and higher numbers produce negligible
savings at the cost of increased I/O cycles.
@param itemCount The number of items in the data file.
@param bufferSize The number of bytes to allocate for the buffer.
@param ec Set to the error if any occurred.
@param progress A function which will be called periodically
as the algorithm proceeds. The equivalent signature of the
progress function must be:
@code
void progress(
std::uint64_t amount, // Amount of work done so far
std::uint64_t total // Total amount of work to do
);
@endcode
@param args Optional arguments passed to @b File constructors.
*/
template<
class Hasher,
class File,
class Progress,
class... Args
>
void
rekey(
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::size_t blockSize,
float loadFactor,
std::uint64_t itemCount,
std::size_t bufferSize,
error_code& ec,
Progress&& progress,
Args&&... args);
} // nudb
#include <nudb/impl/rekey.ipp>
#endif

27
include/nudb/store.hpp Normal file
View File

@@ -0,0 +1,27 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_STORE_HPP
#define NUDB_STORE_HPP
#include <nudb/basic_store.hpp>
#include <nudb/native_file.hpp>
#include <nudb/xxhasher.hpp>
namespace nudb {
/** A key/value database.
The @b Hasher used is is @ref xxhasher, which works very
well for almost all cases. The @b File is @ref native_file which
works on Windows and POSIX platforms.
*/
using store = basic_store<xxhasher, native_file>;
} // nudb
#endif

View File

@@ -0,0 +1,63 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_TYPE_TRAITS_HPP
#define NUDB_TYPE_TRAITS_HPP
#include <cstddef>
#include <cstdint>
namespace nudb {
#if ! GENERATING_DOCS
namespace detail {
// Holds a full digest
using nhash_t = std::uint64_t;
} // detail
/** Holds a bucket index or bucket count.
The maximum number of buckets in a key file is 2^32-1.
*/
//using nbuck_t = std::uint32_t;
using nbuck_t = std::size_t;
/** Holds a key index or count in bucket.
A bucket is limited to 2^16-1 items. The practical
limit is lower, since a bucket cannot be larger than
the block size.
*/
//using nkey_t = std::uint16_t;
using nkey_t = std::size_t;
/** Holds a file size or offset.
Operating system support for large files is required.
Practically, data files cannot exceed 2^48 since offsets
are stored as 48 bit unsigned values.
*/
using noff_t = std::uint64_t;
/** Holds a block, key, or value size.
Block size is limited to 2^16
Key file blocks are limited to the block size.
Value sizes are limited to 2^31-1.
*/
using nsize_t = std::size_t;
#endif
} // nudb
#endif

200
include/nudb/verify.hpp Normal file
View File

@@ -0,0 +1,200 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_VERIFY_HPP
#define NUDB_VERIFY_HPP
#include <nudb/file.hpp>
#include <nudb/type_traits.hpp>
#include <nudb/detail/bucket.hpp>
#include <nudb/detail/bulkio.hpp>
#include <nudb/detail/format.hpp>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <string>
namespace nudb {
/// Describes database statistics calculated by @ref verify.
struct verify_info
{
/** Indicates the verify algorithm used.
@li @b 0 Normal algorithm
@li @b 1 Fast algorith
*/
int algorithm; // 0 = normal, 1 = fast
/// The path to the data file
path_type dat_path;
/// The path to the key file
path_type key_path;
/// The API version used to create the database
std::size_t version = 0;
/// The unique identifier
std::uint64_t uid = 0;
/// The application-defined constant
std::uint64_t appnum = 0;
/// The size of each key, in bytes
nsize_t key_size = 0;
/// The salt used in the key file
std::uint64_t salt = 0;
/// The salt fingerprint
std::uint64_t pepper = 0;
/// The block size used in the key file
nsize_t block_size = 0;
/// The target load factor used in the key file
float load_factor = 0;
/// The maximum number of keys each bucket can hold
nkey_t capacity = 0;
/// The number of buckets in the key file
nbuck_t buckets = 0;
/// The size of a bucket in bytes
nsize_t bucket_size = 0;
/// The size of the key file
noff_t key_file_size = 0;
/// The size of the data file
noff_t dat_file_size = 0;
/// The number of keys found
std::uint64_t key_count = 0;
/// The number of values found
std::uint64_t value_count = 0;
/// The total number of bytes occupied by values
std::uint64_t value_bytes = 0;
/// The number of spill records in use
std::uint64_t spill_count = 0;
/// The total number of spill records
std::uint64_t spill_count_tot = 0;
/// The number of bytes occupied by spill records in use
std::uint64_t spill_bytes = 0;
/// The number of bytes occupied by all spill records
std::uint64_t spill_bytes_tot = 0;
/// Average number of key file reads per fetch
float avg_fetch = 0;
/// The fraction of the data file that is wasted
float waste = 0;
/// The data amplification ratio
float overhead = 0;
/// The measured bucket load fraction
float actual_load = 0;
/// A histogram of the number of buckets having N spill records
std::array<nbuck_t, 10> hist;
/// Default constructor
verify_info()
{
hist.fill(0);
}
};
/** Verify consistency of the key and data files.
This function opens the key and data files, and
performs the following checks on the contents:
@li Data file header validity
@li Key file header validity
@li Data and key file header agreements
@li Check that each value is contained in a bucket
@li Check that each bucket item reflects a value
@li Ensure no values with duplicate keys
Undefined behavior results when verifying a database
that still has a log file. Use @ref recover on such
databases first.
This function selects one of two algorithms to use, the
normal version, and a faster version that can take advantage
of a buffer of sufficient size. Depending on the value of
the bufferSize argument, the appropriate algorithm is chosen.
A good value of bufferSize is one that is a large fraction
of the key file size. For example, 20% of the size of the
key file. Larger is better, with the highest usable value
depending on the size of the key file. If presented with
a buffer size that is too large to be of extra use, the
fast algorithm will simply allocate what it needs.
@par Template Parameters
@tparam Hasher The hash function to use. This type must
meet the requirements of @b HashFunction. The hash function
must be the same as that used to create the database, or
else an error is returned.
@param info A structure which will be default constructed
inside this function, and filled in if the operation completes
successfully. If an error is indicated, the contents of this
variable are undefined.
@param dat_path The path to the data file.
@param key_path The path to the key file.
@param bufferSize The number of bytes to allocate for the buffer.
If this number is too small, or zero, a slower algorithm will be
used that does not require a buffer.
@param progress A function which will be called periodically
as the algorithm proceeds. The equivalent signature of the
progress function must be:
@code
void progress(
std::uint64_t amount, // Amount of work done so far
std::uint64_t total // Total amount of work to do
);
@endcode
@param ec Set to the error, if any occurred.
*/
template<class Hasher, class Progress>
void
verify(
verify_info& info,
path_type const& dat_path,
path_type const& key_path,
std::size_t bufferSize,
Progress&& progress,
error_code& ec);
} // nudb
#include <nudb/impl/verify.ipp>
#endif

21
include/nudb/version.hpp Normal file
View File

@@ -0,0 +1,21 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_VERSION_HPP
#define NUDB_VERSION_HPP
// follows http://semver.org
// NUDB_VERSION % 100 is the patch level
// NUDB_VERSION / 100 % 1000 is the minor version
// NUDB_VERSION / 100000 is the major version
//
#define NUDB_VERSION 100000
#define NUDB_VERSION_STRING "1.0.0-b6"
#endif

63
include/nudb/visit.hpp Normal file
View File

@@ -0,0 +1,63 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_VISIT_HPP
#define NUDB_VISIT_HPP
#include <nudb/error.hpp>
#include <nudb/file.hpp>
namespace nudb {
/** Visit each key/data pair in a data file.
This function will open and iterate the contents of a
data file, invoking the callback for each key/value
pair found. Only a data file is necessary, the key
file may be omitted.
@param path The path to the data file.
@param callback A function which will be called with
each item found in the data file. The equivalent signature
of the callback must be:
@code
void callback(
void const* key, // A pointer to the item key
std::size_t key_size, // The size of the key (always the same)
void const* data, // A pointer to the item data
std::size_t data_size, // The size of the item data
error_code& ec // Indicates an error (out parameter)
);
@endcode
If the callback sets ec to an error, the visit is terminated.
@param progress A function which will be called periodically
as the algorithm proceeds. The equivalent signature of the
progress function must be:
@code
void progress(
std::uint64_t amount, // Amount of work done so far
std::uint64_t total // Total amount of work to do
);
@endcode
@param ec Set to the error, if any occurred.
*/
template<class Callback, class Progress>
void
visit(
path_type const& path,
Callback&& callback,
Progress&& progress,
error_code& ec);
} // nudb
#include <nudb/impl/visit.ipp>
#endif

246
include/nudb/win32_file.hpp Normal file
View File

@@ -0,0 +1,246 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_DETAIL_WIN32_FILE_HPP
#define NUDB_DETAIL_WIN32_FILE_HPP
#include <nudb/file.hpp>
#include <nudb/error.hpp>
#include <cstddef>
#include <cstdint>
#include <string>
#ifndef NUDB_WIN32_FILE
# ifdef _MSC_VER
# define NUDB_WIN32_FILE 1
# else
# define NUDB_WIN32_FILE 0
# endif
#endif
#if NUDB_WIN32_FILE
#pragma push_macro("NOMINMAX")
#pragma push_macro("UNICODE")
#pragma push_macro("STRICT")
# ifndef NOMINMAX
# define NOMINMAX
# endif
# ifndef UNICODE
# define UNICODE
# endif
# ifndef STRICT
# define STRICT
# endif
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
# include <Windows.h>
#pragma pop_macro("STRICT")
#pragma pop_macro("UNICODE")
#pragma pop_macro("NOMINMAX")
#endif
#if NUDB_WIN32_FILE
namespace nudb {
/** A descriptor to a Win32 file.
This class provides a Win32 implementation of the @b File
concept.
*/
class win32_file
{
HANDLE hf_ = INVALID_HANDLE_VALUE;
public:
/// Constructor
win32_file() = default;
/// Copy constructor (disallowed)
win32_file(win32_file const&) = delete;
// Copy assignment (disallowed)
win32_file& operator=(win32_file const&) = delete;
/** Destructor.
If open, the file is closed.
*/
~win32_file();
/** Move constructor.
@note The state of the moved-from object is as if default constructed.
*/
win32_file(win32_file&&);
/** Move assignment.
@note The state of the moved-from object is as if default constructed.
*/
win32_file&
operator=(win32_file&& other);
/// Returns `true` if the file is open.
bool
is_open() const
{
return hf_ != INVALID_HANDLE_VALUE;
}
/// Close the file if it is open.
void
close();
/** Create a new file.
After the file is created, it is opened as if by `open(mode, path, ec)`.
@par Requirements
The file must not already exist, or else `errc::file_exists`
is returned.
@param mode The open mode, which must be a valid @ref file_mode.
@param path The path of the file to create.
@param ec Set to the error, if any occurred.
*/
void
create(file_mode mode, path_type const& path, error_code& ec);
/** Open a file.
@par Requirements
The file must not already be open.
@param mode The open mode, which must be a valid @ref file_mode.
@param path The path of the file to open.
@param ec Set to the error, if any occurred.
*/
void
open(file_mode mode, path_type const& path, error_code& ec);
/** Remove a file from the file system.
It is not an error to attempt to erase a file that does not exist.
@param path The path of the file to remove.
@param ec Set to the error, if any occurred.
*/
static
void
erase(path_type const& path, error_code& ec);
/** Return the size of the file.
@par Requirements
The file must be open.
@param ec Set to the error, if any occurred.
@return The size of the file, in bytes.
*/
std::uint64_t
size(error_code& ec) const;
/** Read data from a location in the file.
@par Requirements
The file must be open.
@param offset The position in the file to read from,
expressed as a byte offset from the beginning.
@param buffer The location to store the data.
@param bytes The number of bytes to read.
@param ec Set to the error, if any occurred.
*/
void
read(std::uint64_t offset,
void* buffer, std::size_t bytes, error_code& ec);
/** Write data to a location in the file.
@par Requirements
The file must be open with a mode allowing writes.
@param offset The position in the file to write from,
expressed as a byte offset from the beginning.
@param buffer The data the write.
@param bytes The number of bytes to write.
@param ec Set to the error, if any occurred.
*/
void
write(std::uint64_t offset,
void const* buffer, std::size_t bytes, error_code& ec);
/** Perform a low level file synchronization.
@par Requirements
The file must be open with a mode allowing writes.
@param ec Set to the error, if any occurred.
*/
void
sync(error_code& ec);
/** Truncate the file at a specific size.
@par Requirements
The file must be open with a mode allowing writes.
@param length The new file size.
@param ec Set to the error, if any occurred.
*/
void
trunc(std::uint64_t length, error_code& ec);
private:
static
void
err(DWORD dwError, error_code& ec)
{
ec = error_code{static_cast<int>(dwError), system_category()};
}
static
void
last_err(error_code& ec)
{
err(::GetLastError(), ec);
}
static
std::pair<DWORD, DWORD>
flags(file_mode mode);
};
} // nudb
#include <nudb/impl/win32_file.ipp>
#endif
#endif

45
include/nudb/xxhasher.hpp Normal file
View File

@@ -0,0 +1,45 @@
//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef NUDB_XXHASHER_HPP
#define NUDB_XXHASHER_HPP
#include <nudb/detail/xxhash.hpp>
#include <cstddef>
#include <cstdint>
#include <type_traits>
namespace nudb {
/** A Hasher that uses xxHash.
This object meets the requirements of @b Hasher. It is
the default hash function unless otherwise specified.
*/
class xxhasher
{
std::uint64_t seed_;
public:
using result_type = std::uint64_t;
explicit
xxhasher(std::uint64_t seed)
: seed_(seed)
{
}
result_type
operator()(void const* data, std::size_t bytes) const noexcept
{
return detail::XXH64(data, bytes, seed_);
}
};
} // nudb
#endif