NuDB: Performance improvements (RIPD-793,796):

This introduces changes in nudb to improve speed, reduce database size,
and enhance correctness. The most significant change is to store hashes
rather than entire keys in the key file. The output of the hash function
is reduced to 48 bits, and stored directly in buckets.

The API is also modified to introduce a Codec parameter allowing for
compression and decompression to be supported in the implementation
itself rather than callers.

THe data file no longer contains a salt, as the salt is applicable
only to the key and log files. This allows a data file to have multiple
key files with different salt values. To distinguish physical files
belonging to the same logical database, a new field UID is introduced.
The UID is a 64-bit random value generated once on creation and stored
in all three files.

Buckets are zero filled to the end of each block, this is a security
measure to prevent unintended contents of memory getting stored to
disk. NuDB offers the varint integer type, this is identical to
the varint described by Google.

* Add varint
* Add Codec template argument
* Add "api" convenience traits
* Store hash in buckets
* istream can throw short read errors
* Support std::uint8_t format in streams
* Make file classes part of the public interface
* Remove buffers pessimization, replace with buffer
* Consolidate creation utility functions to the same header
* Zero fill unused areas of buckets on disk
* More coverage and improvements to the recover test
* Fix file read/write to loop until all bytes processed
* Add verify_fast, faster verify for large databases

The database version number is incremented to 2; older databases can
no longer be opened and should be deleted.
This commit is contained in:
Vinnie Falco
2015-02-03 07:46:24 -08:00
parent 62c5b5e570
commit e2a5535ed6
37 changed files with 2098 additions and 1300 deletions

View File

@@ -804,20 +804,22 @@
</ClCompile>
<ClInclude Include="..\..\src\beast\beast\nudb.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\api.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\common.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\create.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\arena.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\bucket.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\buffers.h">
<ClInclude Include="..\..\src\beast\beast\nudb\detail\buffer.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\bulkio.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\cache.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\config.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\field.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\format.h">
@@ -826,21 +828,19 @@
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\pool.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\posix_file.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\stream.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\win32_file.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\error.h">
<ClInclude Include="..\..\src\beast\beast\nudb\detail\varint.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\file.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\mode.h">
<ClInclude Include="..\..\src\beast\beast\nudb\identity_codec.h">
</ClInclude>
<ClCompile Include="..\..\src\beast\beast\nudb\nudb.cpp">
<ExcludedFromBuild>True</ExcludedFromBuild>
</ClCompile>
<ClInclude Include="..\..\src\beast\beast\nudb\posix_file.h">
</ClInclude>
<None Include="..\..\src\beast\beast\nudb\README.md">
</None>
<ClInclude Include="..\..\src\beast\beast\nudb\recover.h">
@@ -860,6 +860,9 @@
<ClCompile Include="..\..\src\beast\beast\nudb\tests\store_test.cpp">
<ExcludedFromBuild>True</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\beast\beast\nudb\tests\varint_test.cpp">
<ExcludedFromBuild>True</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\beast\beast\nudb\tests\verify_test.cpp">
<ExcludedFromBuild>True</ExcludedFromBuild>
</ClCompile>
@@ -867,6 +870,8 @@
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\visit.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\win32_file.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\random\rngfill.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\random\xor_shift_engine.h">

View File

@@ -1437,6 +1437,12 @@
<ClInclude Include="..\..\src\beast\beast\nudb.h">
<Filter>beast</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\api.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\common.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\create.h">
<Filter>beast\nudb</Filter>
</ClInclude>
@@ -1446,7 +1452,7 @@
<ClInclude Include="..\..\src\beast\beast\nudb\detail\bucket.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\buffers.h">
<ClInclude Include="..\..\src\beast\beast\nudb\detail\buffer.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\bulkio.h">
@@ -1455,9 +1461,6 @@
<ClInclude Include="..\..\src\beast\beast\nudb\detail\cache.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\config.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\field.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
@@ -1470,27 +1473,24 @@
<ClInclude Include="..\..\src\beast\beast\nudb\detail\pool.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\posix_file.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\stream.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\win32_file.h">
<ClInclude Include="..\..\src\beast\beast\nudb\detail\varint.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\error.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\file.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\mode.h">
<ClInclude Include="..\..\src\beast\beast\nudb\identity_codec.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClCompile Include="..\..\src\beast\beast\nudb\nudb.cpp">
<Filter>beast\nudb</Filter>
</ClCompile>
<ClInclude Include="..\..\src\beast\beast\nudb\posix_file.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<None Include="..\..\src\beast\beast\nudb\README.md">
<Filter>beast\nudb</Filter>
</None>
@@ -1515,6 +1515,9 @@
<ClCompile Include="..\..\src\beast\beast\nudb\tests\store_test.cpp">
<Filter>beast\nudb\tests</Filter>
</ClCompile>
<ClCompile Include="..\..\src\beast\beast\nudb\tests\varint_test.cpp">
<Filter>beast\nudb\tests</Filter>
</ClCompile>
<ClCompile Include="..\..\src\beast\beast\nudb\tests\verify_test.cpp">
<Filter>beast\nudb\tests</Filter>
</ClCompile>
@@ -1524,6 +1527,9 @@
<ClInclude Include="..\..\src\beast\beast\nudb\visit.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\win32_file.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\random\rngfill.h">
<Filter>beast\random</Filter>
</ClInclude>

View File

@@ -20,10 +20,10 @@
#ifndef BEAST_NUDB_H_INCLUDED
#define BEAST_NUDB_H_INCLUDED
#include <beast/nudb/api.h>
#include <beast/nudb/create.h>
#include <beast/nudb/error.h>
#include <beast/nudb/common.h>
#include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/recover.h>
#include <beast/nudb/store.h>
#include <beast/nudb/verify.h>

View File

@@ -167,16 +167,23 @@ fixed-length Bucket Records.
char[8] Type The characters "nudb.key"
uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant
uint16 KeySize Key size in bytes
uint64 Salt A random seed
uint64 Pepper The salt hashed
uint16 KeySize Key size in bytes
uint16 BlockSize Size of a file block in bytes
uint16 LoadFactor Target fraction in 65536ths
uint8[64] Reserved Zeroes
uint8[56] Reserved Zeroes
uint8[] Reserved Zero-pad to block size
The Type identifies the file as belonging to nudb. Salt is
The Type identifies the file as belonging to nudb. The UID is
generated randomly when the database is created, and this value
is stored in the data and log files as well. The UID is used
to determine if files belong to the same database. Salt is
generated when the database is created and helps prevent
complexity attacks; the salt is prepended to the key material
when computing a hash, or used to initialize the state of
@@ -197,7 +204,8 @@ bucket, and defines the size of a bucket record. The load factor
is the target fraction of bucket occupancy.
None of the information in the key file header or the data file
header may be changed after the database is created.
header may be changed after the database is created, including
the Appnum.
#### Bucket Record (fixed-length)
@@ -209,7 +217,7 @@ header may be changed after the database is created.
uint48 Offset Offset in data file of the data
uint48 Size The size of the value in bytes
uint8[KeySize] Key The key
uint48 Hash The hash of the key
### Data File
@@ -220,14 +228,15 @@ variable-length Value Records and Spill Records.
char[8] Type The characters "nudb.dat"
uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant
uint64 Salt A random seed
uint16 KeySize Key size in bytes
uint8[64] Reserved Zeroes
Salt contains the same value as the salt in the corresponding
key file. This is placed in the data file so that key and value
files belonging to the same database can be identified.
UID contains the same value as the salt in the corresponding key
file. This is placed in the data file so that key and value files
belonging to the same database can be identified.
#### Data Record (variable-length)
@@ -244,15 +253,24 @@ files belonging to the same database can be identified.
### Log File
The Log file contains the Header followed by zero or more fixed size
log records. Each log record contains a snapshot of a bucket. When a
database is not closed cleanly, the recovery process applies the log
records to the key file, overwriting data that may be only partially
updated with known good information. After the log records are applied,
the data and key files are truncated to the last known good size.
#### Header (44 bytes)
#### Header (62 bytes)
char[8] Type The characters "nudb.log"
uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant
uint16 KeySize Key size in bytes
uint64 Salt A random seed.
uint64 Pepper The salt hashed
uint16 KeySize Key size in bytes
uint16 BlockSize Size of a file block in bytes
uint64 KeyFileSize Size of key file.
uint64 DataFileSize Size of data file.

109
src/beast/beast/nudb/api.h Normal file
View File

@@ -0,0 +1,109 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_API_H_INCLUDED
#define BEAST_NUDB_API_H_INCLUDED
#include <beast/nudb/create.h>
#include <beast/nudb/store.h>
#include <beast/nudb/recover.h>
#include <beast/nudb/verify.h>
#include <beast/nudb/visit.h>
#include <cstdint>
namespace beast {
namespace nudb {
// Convenience for consolidating template arguments
//
template <
class Hasher,
class Codec,
class File = native_file,
std::size_t BufferSize = 16 * 1024 * 1024
>
struct api
{
using hash_type = Hasher;
using codec_type = Codec;
using file_type = File;
using store = nudb::store<Hasher, Codec, File>;
static std::size_t const buffer_size = BufferSize;
template <class... Args>
static
bool
create (
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::uint64_t appnum,
std::uint64_t salt,
std::size_t key_size,
std::size_t block_size,
float load_factor,
Args&&... args)
{
return nudb::create<Hasher, Codec, File>(
dat_path, key_path, log_path,
appnum, salt, key_size, block_size,
load_factor, args...);
}
template <class... Args>
static
bool
recover (
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
Args&&... args)
{
return nudb::recover<Hasher, Codec, File>(
dat_path, key_path, log_path, BufferSize,
args...);
}
static
verify_info
verify (
path_type const& dat_path,
path_type const& key_path)
{
return nudb::verify<Hasher>(
dat_path, key_path, BufferSize);
}
template <class Function>
static
bool
visit(
path_type const& path,
Function&& f)
{
return nudb::visit<Codec>(
path, BufferSize, f);
}
};
} // nudb
} // beast
#endif

View File

@@ -17,32 +17,48 @@
*/
//==============================================================================
#ifndef BEAST_NUDB_ERROR_H_INCLUDED
#define BEAST_NUDB_ERROR_H_INCLUDED
#ifndef BEAST_NUDB_COMMON_H_INCLUDED
#define BEAST_NUDB_COMMON_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <beast/utility/noexcept.h>
#include <stdexcept>
#include <string>
namespace beast {
namespace nudb {
// Commonly used types
enum class file_mode
{
scan, // read sequential
read, // read random
append, // read random, write append
write // read random, write random
};
using path_type = std::string;
// All exceptions thrown by nudb are derived
// from std::exception except for fail_error
// from std::runtime_error except for fail_error
/** Thrown when a codec fails, e.g. corrupt data. */
struct codec_error : std::runtime_error
{
template <class String>
explicit
codec_error (String const& s)
: runtime_error(s)
{
}
};
/** Base class for all errors thrown by file classes. */
struct file_error : std::runtime_error
{
template <class String>
explicit
file_error (char const* s)
: std::runtime_error(s)
{
}
explicit
file_error (std::string const& s)
: std::runtime_error(s)
file_error (String const& s)
: runtime_error(s)
{
}
};
@@ -67,21 +83,24 @@ struct file_short_write_error : file_error
}
};
/** Thrown when end of istream reached while reading. */
struct short_read_error : std::runtime_error
{
short_read_error()
: std::runtime_error(
"nudb: short read")
{
}
};
/** Base class for all exceptions thrown by store. */
class store_error : public std::runtime_error
{
public:
template <class String>
explicit
store_error (char const* m)
: std::runtime_error(
std::string("nudb: ") + m)
{
}
explicit
store_error (std::string const& m)
: std::runtime_error(
std::string("nudb: ") + m)
store_error (String const& s)
: runtime_error(s)
{
}
};
@@ -90,15 +109,10 @@ public:
class store_corrupt_error : public store_error
{
public:
template <class String>
explicit
store_corrupt_error (char const* m)
: store_error (m)
{
}
explicit
store_corrupt_error (std::string const& m)
: store_error (m)
store_corrupt_error (String const& s)
: store_error(s)
{
}
};

View File

@@ -22,16 +22,49 @@
#include <beast/nudb/file.h>
#include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h>
#include <algorithm>
#include <cstring>
#include <random>
#include <stdexcept>
#include <utility>
namespace beast {
namespace nudb {
namespace detail {
template <class = void>
std::uint64_t
make_uid()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
}
/** Generate a random salt. */
template <class = void>
std::uint64_t
make_salt()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
/** Returns the best guess at the volume's block size. */
inline
std::size_t
block_size (path_type const& /*path*/)
{
return 4096;
}
/** Create a new database.
Preconditions:
The files must not exist
@@ -40,7 +73,12 @@ namespace nudb {
@param args Arguments passed to File constructors
@return `false` if any file could not be created.
*/
template <class Hasher = default_hash>
template <
class Hasher,
class Codec,
class File,
class... Args
>
bool
create (
path_type const& dat_path,
@@ -50,10 +88,10 @@ create (
std::uint64_t salt,
std::size_t key_size,
std::size_t block_size,
float load_factor)
float load_factor,
Args&&... args)
{
using namespace detail;
using File = native_file;
if (key_size < 1)
throw std::domain_error(
"invalid key size");
@@ -67,43 +105,41 @@ create (
throw std::domain_error(
"nudb: load factor too large");
auto const capacity =
bucket_capacity(key_size, block_size);
bucket_capacity(block_size);
if (capacity < 1)
throw std::domain_error(
"nudb: block size too small");
File df;
File kf;
File lf;
for(;;)
File df(args...);
File kf(args...);
File lf(args...);
if (df.create(
file_mode::append, dat_path))
{
if (df.create(
file_mode::append, dat_path))
if (kf.create (
file_mode::append, key_path))
{
if (kf.create (
file_mode::append, key_path))
{
if (lf.create(
file_mode::append, log_path))
break;
File::erase (dat_path);
}
File::erase (key_path);
if (lf.create(
file_mode::append, log_path))
goto success;
File::erase (dat_path);
}
return false;
File::erase (key_path);
}
return false;
success:
dat_file_header dh;
dh.version = currentVersion;
dh.uid = make_uid();
dh.appnum = appnum;
dh.salt = salt;
dh.key_size = key_size;
key_file_header kh;
kh.version = currentVersion;
kh.uid = dh.uid;
kh.appnum = appnum;
kh.key_size = key_size;
kh.salt = salt;
kh.pepper = pepper<Hasher>(salt);
kh.key_size = key_size;
kh.block_size = block_size;
// VFALCO Should it be 65536?
// How do we set the min?
@@ -113,8 +149,7 @@ create (
write (kf, kh);
buffer buf(block_size);
std::memset(buf.get(), 0, block_size);
bucket b (key_size, block_size,
buf.get(), empty);
bucket b (block_size, buf.get(), empty);
b.write (kf, block_size);
// VFALCO Leave log file empty?
df.sync();

View File

@@ -20,7 +20,6 @@
#ifndef BEAST_NUDB_DETAIL_ARENA_H_INCLUDED
#define BEAST_NUDB_DETAIL_ARENA_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>

View File

@@ -20,12 +20,11 @@
#ifndef BEAST_NUDB_DETAIL_BUCKET_H_INCLUDED
#define BEAST_NUDB_DETAIL_BUCKET_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/common.h>
#include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/field.h>
#include <beast/nudb/detail/format.h>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
@@ -33,21 +32,7 @@ namespace beast {
namespace nudb {
namespace detail {
// Key, hash, and bucket calculations:
// Returns the hash of a key given the salt
//
template <class Hasher>
inline
typename Hasher::result_type
hash (void const* key,
std::size_t key_size, std::size_t salt)
{
Hasher h (salt);
h.append (key, key_size);
return static_cast<
typename Hasher::result_type>(h);
}
// bucket calculations:
// Returns bucket index given hash, buckets, and modulus
//
@@ -62,30 +47,6 @@ bucket_index (std::size_t h,
return n;
}
// Returns the bucket index of a key
//
template <class Hasher>
inline
std::size_t
bucket_index (void const* key, std::size_t key_size,
std::size_t salt, std::size_t buckets,
std::size_t modulus)
{
return bucket_index (hash<Hasher>
(key, key_size, salt), buckets, modulus);
}
// Returns the bucket index of a key
// given the key file header
template <class Hasher>
inline
std::size_t
bucket_index (void const* key, key_file_header const& kh)
{
return bucket_index<Hasher>(key, kh.key_size,
kh.salt, kh.buckets, kh.modulus);
}
//------------------------------------------------------------------------------
// Tag for constructing empty buckets
@@ -97,9 +58,8 @@ template <class = void>
class bucket_t
{
private:
std::size_t key_size_; // Size of key in bytes
std::size_t block_size_; // Size of a key file block
std::size_t count_; // Current key count
std::size_t size_; // Current key count
std::size_t spill_; // Offset of next spill record or 0
std::uint8_t* p_; // Pointer to the bucket blob
@@ -108,23 +68,15 @@ public:
{
std::size_t offset;
std::size_t size;
void const* key;
std::size_t hash;
};
bucket_t (bucket_t const&) = default;
bucket_t& operator= (bucket_t const&) = default;
bucket_t (std::size_t key_size,
std::size_t block_size, void* p);
bucket_t (std::size_t block_size, void* p);
bucket_t (std::size_t key_size,
std::size_t block_size, void* p, empty_t);
std::size_t
key_size() const
{
return key_size_;
}
bucket_t (std::size_t block_size, void* p, empty_t);
std::size_t
block_size() const
@@ -135,44 +87,46 @@ public:
std::size_t
compact_size() const
{
return detail::compact_size(
key_size_, count_);
return detail::bucket_size(size_);
}
bool
empty() const
{
return count_ == 0;
return size_ == 0;
}
bool
full() const
{
return count_ >= detail::bucket_capacity(
key_size_, block_size_);
return size_ >=
detail::bucket_capacity(block_size_);
}
std::size_t
size() const
{
return count_;
return size_;
}
// Returns offset of next spill record or 0
//
std::size_t
spill() const
{
return spill_;
}
// Clear contents of the bucket
void
clear();
// Set offset of next spill record
//
void
spill (std::size_t offset);
// Clear contents of the bucket
//
void
clear();
// Returns the record for a key
// entry without bounds checking.
//
@@ -185,12 +139,15 @@ public:
return at(i);
}
std::pair<value_type, bool>
find (void const* key) const;
// Returns index of entry with prefix
// equal to or greater than the given prefix.
//
std::size_t
lower_bound (std::size_t h) const;
void
insert (std::size_t offset,
std::size_t size, void const* key);
std::size_t size, std::size_t h);
// Erase an element by index
//
@@ -227,45 +184,31 @@ private:
// Update size and spill in the blob
void
update();
std::pair<std::size_t, bool>
lower_bound (void const* key) const;
};
//------------------------------------------------------------------------------
template <class _>
bucket_t<_>::bucket_t (std::size_t key_size,
bucket_t<_>::bucket_t (
std::size_t block_size, void* p)
: key_size_ (key_size)
, block_size_ (block_size)
: block_size_ (block_size)
, p_ (reinterpret_cast<std::uint8_t*>(p))
{
// Bucket Record
istream is(p_, block_size);
detail::read<uint16_t>(is, count_); // Count
detail::read<uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill
}
template <class _>
bucket_t<_>::bucket_t (std::size_t key_size,
bucket_t<_>::bucket_t (
std::size_t block_size, void* p, empty_t)
: key_size_ (key_size)
, block_size_ (block_size)
, count_ (0)
: block_size_ (block_size)
, size_ (0)
, spill_ (0)
, p_ (reinterpret_cast<std::uint8_t*>(p))
{
update();
}
template <class _>
void
bucket_t<_>::clear()
{
count_ = 0;
spill_ = 0;
update();
clear();
}
template <class _>
@@ -276,6 +219,15 @@ bucket_t<_>::spill (std::size_t offset)
update();
}
template <class _>
void
bucket_t<_>::clear()
{
size_ = 0;
spill_ = 0;
std::memset(p_, 0, block_size_);
}
template <class _>
auto
bucket_t<_>::at (std::size_t i) const ->
@@ -286,7 +238,7 @@ bucket_t<_>::at (std::size_t i) const ->
std::size_t const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size_; // Key
field<hash_t>::size; // Prefix
// Bucket Record
detail::istream is(p_ +
field<std::uint16_t>::size + // Count
@@ -297,54 +249,80 @@ bucket_t<_>::at (std::size_t i) const ->
is, result.offset); // Offset
detail::read<uint48_t>(
is, result.size); // Size
result.key = is.data(key_size_); // Key
detail::read<hash_t>(
is, result.hash); // Hash
return result;
}
template <class _>
auto
bucket_t<_>::find (void const* key) const ->
std::pair<value_type, bool>
std::size_t
bucket_t<_>::lower_bound (
std::size_t h) const
{
std::pair<value_type, bool> result;
std::size_t i;
std::tie(i, result.second) = lower_bound(key);
if (result.second)
result.first = at(i);
return result;
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<hash_t>::size; // Hash
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
// Bucket Entry
field<uint48_t>::size + // Offset
field<uint48_t>::size; // Size
std::size_t step;
std::size_t first = 0;
std::size_t count = size_;
while (count > 0)
{
step = count / 2;
auto const i = first + step;
std::size_t h1;
readp<hash_t>(p + i * w, h1);
if (h1 < h)
{
first = i + 1;
count -= step + 1;
}
else
{
count = step;
}
}
return first;
}
template <class _>
void
bucket_t<_>::insert (std::size_t offset,
std::size_t size, void const* key)
std::size_t size, std::size_t h)
{
bool found;
std::size_t i;
std::tie(i, found) = lower_bound(key);
(void)found;
assert(! found);
std::size_t i = lower_bound(h);
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
field<
std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
// Bucket Entry
std::size_t const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size_; // Key
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<hash_t>::size; // Hash
std::memmove (
p + (i + 1) * w,
p + i * w,
(count_ - i) * w);
count_++;
(size_ - i) * w);
size_++;
update();
// Bucket Entry
ostream os (p + i * w, w);
detail::write<uint48_t>(os, offset); // Offset
detail::write<uint48_t>(os, size); // Size
std::memcpy (os.data(key_size_),
key, key_size_); // Key
detail::write<uint48_t>(
os, offset); // Offset
detail::write<uint48_t>(
os, size); // Size
detail::write<hash_t>(
os, h); // Prefix
}
template <class _>
@@ -353,18 +331,20 @@ bucket_t<_>::erase (std::size_t i)
{
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
field<
std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size_; // Key
--count_;
if (i != count_)
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<hash_t>::size; // Hash
--size_;
if (i < size_)
std::memmove(
p + i * w,
p + (i + 1) * w,
(count_ - i) * w);
p + i * w,
p + (i + 1) * w,
(size_ - i) * w);
std::memset(p + size_ * w, 0, w);
update();
}
@@ -374,17 +354,15 @@ void
bucket_t<_>::read (File& f, std::size_t offset)
{
auto const cap = bucket_capacity (
key_size_, block_size_);
block_size_);
// Excludes padding to block size
f.read (offset, p_, bucket_size(
key_size_, bucket_capacity(
key_size_, block_size_)));
f.read (offset, p_, bucket_size(cap));
istream is(p_, block_size_);
detail::read<
std::uint16_t>(is, count_); // Count
std::uint16_t>(is, size_); // Count
detail::read<
uint48_t>(is, spill_); // Spill
if (count_ > cap)
if (size_ > cap)
throw store_corrupt_error(
"bad bucket size");
}
@@ -399,19 +377,21 @@ bucket_t<_>::read (bulk_reader<File>& r)
detail::field<std::uint16_t>::size +
detail::field<uint48_t>::size);
detail::read<
std::uint16_t>(is, count_); // Count
detail::read<uint48_t>(is, spill_); // Spill
std::uint16_t>(is, size_); // Count
detail::read<uint48_t>(
is, spill_); // Spill
update();
// Excludes empty bucket entries
auto const w = count_ * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size_); // Key
auto const w = size_ * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
field<hash_t>::size); // Hash
is = r.prepare (w);
std::memcpy(p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size, // Spill
is.data(w), w); // Entries
field<
std::uint16_t>::size + // Count
field<uint48_t>::size, // Spill
is.data(w), w); // Entries
}
template <class _>
@@ -447,56 +427,40 @@ bucket_t<_>::update()
// Bucket Record
ostream os(p_, block_size_);
detail::write<
std::uint16_t>(os, count_); // Count
std::uint16_t>(os, size_); // Count
detail::write<
uint48_t>(os, spill_); // Spill
}
// bool is true if key matches index
template <class _>
std::pair<std::size_t, bool>
bucket_t<_>::lower_bound (
void const* key) const
{
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size_; // Key
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
// Bucket Entry
field<uint48_t>::size + // Offset
field<uint48_t>::size; // Size
std::size_t step;
std::size_t first = 0;
std::size_t count = count_;
while (count > 0)
{
step = count / 2;
auto const i = first + step;
auto const c = std::memcmp (
p + i * w, key, key_size_);
if (c < 0)
{
first = i + 1;
count -= step + 1;
}
else if (c > 0)
{
count = step;
}
else
{
return std::make_pair (i, true);
}
}
return std::make_pair (first, false);
}
using bucket = bucket_t<>;
// Spill bucket if full.
// The bucket is cleared after it spills.
//
template <class File>
void
maybe_spill(bucket& b, bulk_writer<File>& w)
{
if (b.full())
{
// Spill Record
auto const offset = w.offset();
auto os = w.prepare(
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.compact_size());
write <uint48_t> (os, 0); // Zero
write <std::uint16_t> (
os, b.compact_size()); // Size
auto const spill =
offset + os.size();
b.write (os); // Bucket
// Update bucket
b.clear();
b.spill (spill);
}
}
} // detail
} // nudb
} // beast

View File

@@ -0,0 +1,99 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_DETAIL_BUFFER_H_INCLUDED
#define BEAST_NUDB_DETAIL_BUFFER_H_INCLUDED
#include <atomic>
#include <cstdint>
#include <memory>
namespace beast {
namespace nudb {
namespace detail {
// Simple growable memory buffer
class buffer
{
private:
std::size_t size_ = 0;
std::unique_ptr<std::uint8_t[]> buf_;
public:
~buffer() = default;
buffer() = default;
buffer (buffer const&) = delete;
buffer& operator= (buffer const&) = delete;
explicit
buffer (std::size_t n)
: size_ (n)
, buf_ (new std::uint8_t[n])
{
}
buffer (buffer&& other)
: size_ (other.size_)
, buf_ (std::move(other.buf_))
{
other.size_ = 0;
}
buffer& operator= (buffer&& other)
{
size_ = other.size_;
buf_ = std::move(other.buf_);
other.size_ = 0;
return *this;
}
std::size_t
size() const
{
return size_;
}
std::uint8_t*
get() const
{
return buf_.get();
}
void
reserve (std::size_t n)
{
if (size_ < n)
buf_.reset (new std::uint8_t[n]);
size_ = n;
}
// BufferFactory
void*
operator() (std::size_t n)
{
reserve(n);
return buf_.get();
}
};
} // detail
} // nudb
} // beast
#endif

View File

@@ -1,147 +0,0 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_DETAIL_BUFFERS_H_INCLUDED
#define BEAST_NUDB_DETAIL_BUFFERS_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <atomic>
#include <mutex>
#include <new>
namespace beast {
namespace nudb {
namespace detail {
// Thread safe pool of temp buffers,
// to avoid needless calls to malloc.
template <class = void>
class buffers_t
{
private:
struct element
{
element* next;
};
std::size_t const block_size_;
std::mutex m_;
element* h_ = nullptr;
public:
class value_type
{
private:
buffers_t& b_;
element* e_;
public:
value_type (value_type const&) = delete;
value_type& operator= (value_type const&) = delete;
explicit
value_type (buffers_t& b)
: b_ (b)
, e_ (b.acquire())
{
}
~value_type()
{
b_.release(e_);
}
std::uint8_t*
get() const
{
return const_cast <std::uint8_t*>(
reinterpret_cast<
std::uint8_t const*>(e_ + 1));
}
};
explicit
buffers_t (std::size_t block_size);
~buffers_t();
private:
element*
acquire();
void
release (element* e);
};
template <class _>
buffers_t<_>::buffers_t (std::size_t block_size)
: block_size_ (block_size)
, h_ (nullptr)
{
}
template <class _>
buffers_t<_>::~buffers_t()
{
for (element* e = h_; e;)
{
element* const next = e->next;
e->~element();
delete[] reinterpret_cast<
std::uint8_t*>(e);
e = next;
}
}
template <class _>
auto
buffers_t<_>::acquire() ->
element*
{
{
std::lock_guard<std::mutex> m(m_);
element* e = h_;
if (e)
{
h_ = e->next;
return e;
}
}
return ::new(
new std::uint8_t[
sizeof(element) + block_size_]
) element;
}
template <class _>
void
buffers_t<_>::release (element* e)
{
std::lock_guard<std::mutex> m(m_);
e->next = h_;
h_ = e;
}
using buffers = buffers_t<>;
} // detail
} // nudb
} // beast
#endif

View File

@@ -20,7 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_BULKIO_H_INCLUDED
#define BEAST_NUDB_DETAIL_BULKIO_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/buffer.h>
#include <beast/nudb/detail/stream.h>
#include <algorithm>
#include <cstddef>
@@ -45,10 +45,16 @@ public:
bulk_reader (File& f, std::size_t offset,
std::size_t last, std::size_t buffer_size);
std::size_t
offset() const
{
return offset_ - avail_;
}
bool
eof() const
{
return offset_ - avail_ == last_;
return offset() >= last_;
}
istream

View File

@@ -22,7 +22,6 @@
#include <beast/nudb/detail/arena.h>
#include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/config.h>
#include <boost/iterator/transform_iterator.hpp>
#include <algorithm>
#include <cstdint>
@@ -77,8 +76,8 @@ private:
operator() (argument_type const& e) const
{
return std::make_pair(e.first,
bucket (cache_->key_size_,
cache_->block_size_, e.second));
bucket (cache_->block_size_,
e.second));
}
};
@@ -209,7 +208,7 @@ cache_t<_>::create (std::size_t n)
{
auto const p = arena_.alloc (block_size_);
map_.emplace (n, p);
return bucket (key_size_, block_size_,
return bucket (block_size_,
p, detail::empty);
}

View File

@@ -1,75 +0,0 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_DETAIL_CONFIG_H_INCLUDED
#define BEAST_NUDB_DETAIL_CONFIG_H_INCLUDED
#include <beast/hash/xxhasher.h>
// Compiles out domain checks
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
# ifdef NDEBUG
# define BEAST_NUDB_NO_DOMAIN_CHECK 1
# else
# define BEAST_NUDB_NO_DOMAIN_CHECK 0
# endif
#endif
namespace beast {
namespace nudb {
// xxhasher is the fastest and the best choice
// when keys are already uniformly distributed
using default_hash = xxhasher;
namespace detail {
// Returns the closest power of 2 not less than x
template <class = void>
std::size_t
ceil_pow2 (unsigned long long x)
{
static const unsigned long long t[6] = {
0xFFFFFFFF00000000ull,
0x00000000FFFF0000ull,
0x000000000000FF00ull,
0x00000000000000F0ull,
0x000000000000000Cull,
0x0000000000000002ull
};
int y = (((x & (x - 1)) == 0) ? 0 : 1);
int j = 32;
int i;
for(i = 0; i < 6; i++) {
int k = (((x & t[i]) == 0) ? 0 : j);
y += k;
x >>= k;
j >>= 1;
}
return std::size_t(1)<<y;
}
} // detail
} // nudb
} // beast
#endif

View File

@@ -17,10 +17,9 @@
*/
//==============================================================================
#ifndef BEAST_NUDB_DETAIL_FIELD_H_INCLUDED
#define BEAST_NUDB_DETAIL_FIELD_H_INCLUDED
#ifndef BEAST_NUDB_FIELD_H_INCLUDED
#define BEAST_NUDB_FIELD_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/stream.h>
#include <beast/config/CompilerConfig.h> // for BEAST_CONSTEXPR
#include <cstddef>
@@ -85,16 +84,26 @@ struct field <std::uint64_t>
static std::size_t BEAST_CONSTEXPR max = 0xffffffffffffffff;
};
// read field from istream
// read field from memory
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint8_t>::value>* = nullptr>
void
readp (void const* v, U& u)
{
std::uint8_t const* p =
reinterpret_cast<std::uint8_t const*>(v);
u = *p;
}
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint16_t>::value>* = nullptr>
void
read (istream& is, U& u)
readp (void const* v, U& u)
{
T t;
std::uint8_t const* p =
is.data(field<T>::size);
reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<< 8;
t = T(*p ) | t;
u = t;
@@ -103,25 +112,25 @@ read (istream& is, U& u)
template <class T, class U, std::enable_if_t<
std::is_same<T, uint24_t>::value>* = nullptr>
void
read (istream& is, U& u)
readp (void const* v, U& u)
{
T t;
std::uint8_t const* p =
is.data(field<T>::size);
t = (T(*p++)<<16) | t;
t = (T(*p++)<< 8) | t;
t = T(*p ) | t;
reinterpret_cast<std::uint8_t const*>(v);
std::uint32_t t;
t = std::uint32_t(*p++)<<16;
t = (std::uint32_t(*p++)<< 8) | t;
t = std::uint32_t(*p ) | t;
u = t;
}
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint32_t>::value>* = nullptr>
void
read (istream& is, U& u)
readp (void const* v, U& u)
{
T t;
std::uint8_t const* p =
is.data(field<T>::size);
reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<<24;
t = (T(*p++)<<16) | t;
t = (T(*p++)<< 8) | t;
@@ -132,11 +141,11 @@ read (istream& is, U& u)
template <class T, class U, std::enable_if_t<
std::is_same<T, uint48_t>::value>* = nullptr>
void
read (istream& is, U& u)
readp (void const* v, U& u)
{
std::uint64_t t;
std::uint8_t const* p =
is.data(field<T>::size);
reinterpret_cast<std::uint8_t const*>(v);
std::uint64_t t;
t = (std::uint64_t(*p++)<<40);
t = (std::uint64_t(*p++)<<32) | t;
t = (std::uint64_t(*p++)<<24) | t;
@@ -149,11 +158,11 @@ read (istream& is, U& u)
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint64_t>::value>* = nullptr>
void
read (istream& is, U& u)
readp (void const* v, U& u)
{
T t;
std::uint8_t const* p =
is.data(field<T>::size);
reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<<56;
t = (T(*p++)<<48) | t;
t = (T(*p++)<<40) | t;
@@ -165,18 +174,32 @@ read (istream& is, U& u)
u = t;
}
// read field from istream
template <class T, class U>
void
read (istream& is, U& u)
{
readp<T>(is.data(field<T>::size), u);
}
// write field to ostream
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint8_t>::value>* = nullptr>
void
write (ostream& os, U const& u)
{
std::uint8_t* p =
os.data(field<T>::size);
*p = u;
}
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint16_t>::value>* = nullptr>
void
write (ostream& os, U const& u)
{
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
T t = u;
std::uint8_t* p =
os.data(field<T>::size);
@@ -184,16 +207,11 @@ write (ostream& os, U const& u)
*p = t &0xff;
}
template <class T, class U,std::enable_if_t<
template <class T, class U, std::enable_if_t<
std::is_same<T, uint24_t>::value>* = nullptr>
void
write (ostream& os, U const& u)
{
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
T t = u;
std::uint8_t* p =
os.data(field<T>::size);
@@ -202,16 +220,11 @@ write (ostream& os, U const& u)
*p = t &0xff;
}
template <class T, class U,std::enable_if_t<
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint32_t>::value>* = nullptr>
void
write (ostream& os, U const& u)
{
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
T t = u;
std::uint8_t* p =
os.data(field<T>::size);
@@ -221,16 +234,11 @@ write (ostream& os, U const& u)
*p = t &0xff;
}
template <class T, class U,std::enable_if_t<
template <class T, class U, std::enable_if_t<
std::is_same<T, uint48_t>::value>* = nullptr>
void
write (ostream& os, U const& u)
{
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
std::uint64_t const t = u;
std::uint8_t* p =
os.data(field<T>::size);
@@ -242,16 +250,11 @@ write (ostream& os, U const& u)
*p = t &0xff;
}
template <class T, class U,std::enable_if_t<
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint64_t>::value>* = nullptr>
void
write (ostream& os, U const& u)
{
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
T t = u;
std::uint8_t* p =
os.data(field<T>::size);

View File

@@ -20,7 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_FORMAT_H_INCLUDED
#define BEAST_NUDB_DETAIL_FORMAT_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/common.h>
#include <beast/nudb/detail/field.h>
#include <beast/nudb/detail/stream.h>
#include <beast/config/CompilerConfig.h> // for BEAST_CONSTEXPR
@@ -38,22 +38,23 @@ namespace detail {
// Format of the nudb files:
static std::size_t BEAST_CONSTEXPR currentVersion = 1;
static std::size_t BEAST_CONSTEXPR currentVersion = 2;
struct dat_file_header
{
static std::size_t BEAST_CONSTEXPR size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
8 + // Salt
2 + // KeySize
64; // (Reserved)
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
std::uint64_t salt;
std::size_t key_size;
};
@@ -62,20 +63,25 @@ struct key_file_header
static std::size_t BEAST_CONSTEXPR size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
2 + // KeySize
8 + // Salt
8 + // Pepper
2 + // KeySize
2 + // BlockSize
2 + // LoadFactor
64; // (Reserved)
56; // (Reserved)
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
std::size_t key_size;
std::uint64_t salt;
std::uint64_t pepper;
std::size_t key_size;
std::size_t block_size;
std::size_t load_factor;
@@ -91,23 +97,65 @@ struct log_file_header
static std::size_t BEAST_CONSTEXPR size =
8 + // Type
2 + // Version
8 + // UID
8 + // Appnum
2 + // KeySize
8 + // Salt
8 + // Pepper
2 + // KeySize
2 + // BlockSize
8 + // KeyFileSize
8; // DataFileSize
char type[8];
std::size_t version;
std::uint64_t uid;
std::uint64_t appnum;
std::size_t key_size;
std::uint64_t salt;
std::uint64_t pepper;
std::size_t key_size;
std::size_t block_size;
std::size_t key_file_size;
std::size_t dat_file_size;
};
// Type used to store hashes in buckets.
// This can be smaller than the output
// of the hash function.
//
using hash_t = uint48_t;
static_assert(field<hash_t>::size <=
sizeof(std::size_t), "");
template <class T>
std::size_t
make_hash (std::size_t h);
template<>
inline
std::size_t
make_hash<uint48_t>(std::size_t h)
{
return (h>>16)&0xffffffffffff;
}
// Returns the hash of a key given the salt.
// Note: The hash is expressed in hash_t units
//
template <class Hasher>
inline
std::size_t
hash (void const* key,
std::size_t key_size, std::size_t salt)
{
Hasher h (salt);
h.append (key, key_size);
return make_hash<hash_t>(static_cast<
typename Hasher::result_type>(h));
}
// Computes pepper from salt
//
template <class Hasher>
@@ -124,8 +172,7 @@ pepper (std::size_t salt)
//
template <class = void>
std::size_t
bucket_size (std::size_t key_size,
std::size_t capacity)
bucket_size (std::size_t capacity)
{
// Bucket Record
return
@@ -134,33 +181,14 @@ bucket_size (std::size_t key_size,
capacity * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size); // Key
field<hash_t>::size); // Hash
}
// Returns the size of a bucket large enough to
// hold size keys of length key_size.
//
inline
std::size_t
compact_size(std::size_t key_size,
std::size_t size)
{
// Bucket Record
return
field<std::uint16_t>::size + // Size
field<uint48_t>::size + // Spill
size * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size); // Key
}
// Returns: number of keys that fit in a bucket
// Returns the number of entries that fit in a bucket
//
template <class = void>
std::size_t
bucket_capacity (std::size_t key_size,
std::size_t block_size)
bucket_capacity (std::size_t block_size)
{
// Bucket Record
auto const size =
@@ -169,17 +197,18 @@ bucket_capacity (std::size_t key_size,
auto const entry_size =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size; // Key
field<hash_t>::size; // Hash
if (block_size < key_file_header::size ||
block_size < size)
return 0;
return (block_size - size) / entry_size;
}
// returns the number of bytes occupied by a value record
// Returns the number of bytes occupied by a value record
inline
std::size_t
data_size (std::size_t size, std::size_t key_size)
value_size (std::size_t size,
std::size_t key_size)
{
// Data Record
return
@@ -188,6 +217,34 @@ data_size (std::size_t size, std::size_t key_size)
size; // Data
}
// Returns the closest power of 2 not less than x
template <class = void>
std::size_t
ceil_pow2 (unsigned long long x)
{
static const unsigned long long t[6] = {
0xFFFFFFFF00000000ull,
0x00000000FFFF0000ull,
0x000000000000FF00ull,
0x00000000000000F0ull,
0x000000000000000Cull,
0x0000000000000002ull
};
int y = (((x & (x - 1)) == 0) ? 0 : 1);
int j = 32;
int i;
for(i = 0; i < 6; i++) {
int k = (((x & t[i]) == 0) ? 0 : j);
y += k;
x >>= k;
j >>= 1;
}
return std::size_t(1)<<y;
}
//------------------------------------------------------------------------------
// Read data file header from stream
@@ -197,11 +254,12 @@ read (istream& is, dat_file_header& dh)
{
read (is, dh.type, sizeof(dh.type));
read<std::uint16_t>(is, dh.version);
read<std::uint64_t>(is, dh.uid);
read<std::uint64_t>(is, dh.appnum);
read<std::uint64_t>(is, dh.salt);
read<std::uint16_t>(is, dh.key_size);
std::array <std::uint8_t, 64> zero;
read (is, zero.data(), zero.size());
std::array <std::uint8_t, 64> reserved;
read (is,
reserved.data(), reserved.size());
}
// Read data file header from file
@@ -231,12 +289,13 @@ write (ostream& os, dat_file_header const& dh)
{
write (os, "nudb.dat", 8);
write<std::uint16_t>(os, dh.version);
write<std::uint64_t>(os, dh.uid);
write<std::uint64_t>(os, dh.appnum);
write<std::uint64_t>(os, dh.salt);
write<std::uint16_t>(os, dh.key_size);
std::array <std::uint8_t, 64> zero;
zero.fill(0);
write (os, zero.data(), zero.size());
std::array <std::uint8_t, 64> reserved;
reserved.fill(0);
write (os,
reserved.data(), reserved.size());
}
// Write data file header to file
@@ -259,25 +318,26 @@ read (istream& is, std::size_t file_size,
{
read(is, kh.type, sizeof(kh.type));
read<std::uint16_t>(is, kh.version);
read<std::uint64_t>(is, kh.uid);
read<std::uint64_t>(is, kh.appnum);
read<std::uint16_t>(is, kh.key_size);
read<std::uint64_t>(is, kh.salt);
read<std::uint64_t>(is, kh.pepper);
read<std::uint16_t>(is, kh.key_size);
read<std::uint16_t>(is, kh.block_size);
read<std::uint16_t>(is, kh.load_factor);
std::array <std::uint8_t, 64> zero;
read (is, zero.data(), zero.size());
std::array <std::uint8_t, 56> reserved;
read (is,
reserved.data(), reserved.size());
// VFALCO These need to be checked to handle
// when the file size is too small
kh.capacity = bucket_capacity(
kh.key_size, kh.block_size);
kh.bucket_size = bucket_size(
kh.key_size, kh.capacity);
kh.capacity = bucket_capacity(kh.block_size);
kh.bucket_size = bucket_size(kh.capacity);
if (file_size > kh.block_size)
{
// VFALCO This should be handled elsewhere.
// we shouldn't put the computed fields in this header.
// we shouldn't put the computed fields
// in this header.
if (kh.block_size > 0)
kh.buckets = (file_size - kh.bucket_size)
/ kh.block_size;
@@ -319,15 +379,17 @@ write (ostream& os, key_file_header const& kh)
{
write (os, "nudb.key", 8);
write<std::uint16_t>(os, kh.version);
write<std::uint64_t>(os, kh.uid);
write<std::uint64_t>(os, kh.appnum);
write<std::uint16_t>(os, kh.key_size);
write<std::uint64_t>(os, kh.salt);
write<std::uint64_t>(os, kh.pepper);
write<std::uint16_t>(os, kh.key_size);
write<std::uint16_t>(os, kh.block_size);
write<std::uint16_t>(os, kh.load_factor);
std::array <std::uint8_t, 64> zero;
zero.fill (0);
write (os, zero.data(), zero.size());
std::array <std::uint8_t, 56> reserved;
reserved.fill (0);
write (os,
reserved.data(), reserved.size());
}
// Write key file header to file
@@ -353,10 +415,12 @@ read (istream& is, log_file_header& lh)
{
read (is, lh.type, sizeof(lh.type));
read<std::uint16_t>(is, lh.version);
read<std::uint64_t>(is, lh.uid);
read<std::uint64_t>(is, lh.appnum);
read<std::uint16_t>(is, lh.key_size);
read<std::uint64_t>(is, lh.salt);
read<std::uint64_t>(is, lh.pepper);
read<std::uint16_t>(is, lh.key_size);
read<std::uint16_t>(is, lh.block_size);
read<std::uint64_t>(is, lh.key_file_size);
read<std::uint64_t>(is, lh.dat_file_size);
}
@@ -381,10 +445,12 @@ write (ostream& os, log_file_header const& lh)
{
write (os, "nudb.log", 8);
write<std::uint16_t>(os, lh.version);
write<std::uint64_t>(os, lh.uid);
write<std::uint64_t>(os, lh.appnum);
write<std::uint16_t>(os, lh.key_size);
write<std::uint64_t>(os, lh.salt);
write<std::uint64_t>(os, lh.pepper);
write<std::uint16_t>(os, lh.key_size);
write<std::uint16_t>(os, lh.block_size);
write<std::uint64_t>(os, lh.key_file_size);
write<std::uint64_t>(os, lh.dat_file_size);
}
@@ -401,34 +467,6 @@ write (File& f, log_file_header const& lh)
f.write (0, buf.data(), buf.size());
}
template <class Hasher>
void
verify (key_file_header const& kh)
{
std::string const type (kh.type, 8);
if (type != "nudb.key")
throw store_corrupt_error (
"bad type in key file");
if (kh.version != currentVersion)
throw store_corrupt_error (
"bad version in key file");
if (kh.pepper != pepper<Hasher>(kh.salt))
throw store_corrupt_error(
"wrong hash function for key file");
if (kh.key_size < 1)
throw store_corrupt_error (
"bad key size in key file");
if (kh.load_factor < 1)
throw store_corrupt_error (
"bad load factor in key file");
if (kh.capacity < 1)
throw store_corrupt_error (
"bad capacity in key file");
if (kh.buckets < 1)
throw store_corrupt_error (
"bad key file size");
}
template <class = void>
void
verify (dat_file_header const& dh)
@@ -445,6 +483,34 @@ verify (dat_file_header const& dh)
"bad key size in data file");
}
template <class Hasher>
void
verify (key_file_header const& kh)
{
std::string const type (kh.type, 8);
if (type != "nudb.key")
throw store_corrupt_error (
"bad type in key file");
if (kh.version != currentVersion)
throw store_corrupt_error (
"bad version in key file");
if (kh.key_size < 1)
throw store_corrupt_error (
"bad key size in key file");
if (kh.pepper != pepper<Hasher>(kh.salt))
throw store_corrupt_error(
"wrong hash function for key file");
if (kh.load_factor < 1)
throw store_corrupt_error (
"bad load factor in key file");
if (kh.capacity < 1)
throw store_corrupt_error (
"bad capacity in key file");
if (kh.buckets < 1)
throw store_corrupt_error (
"bad key file size");
}
template <class Hasher>
void
verify (log_file_header const& lh)
@@ -470,17 +536,16 @@ void
verify (dat_file_header const& dh,
key_file_header const& kh)
{
verify (dh);
verify<Hasher> (kh);
if (kh.salt != dh.salt)
if (kh.uid != dh.uid)
throw store_corrupt_error(
"salt mismatch");
if (kh.key_size != dh.key_size)
throw store_corrupt_error(
"key size mismatch");
"uid mismatch");
if (kh.appnum != dh.appnum)
throw store_corrupt_error(
"appnum mismatch");
if (kh.key_size != dh.key_size)
throw store_corrupt_error(
"key size mismatch");
}
template <class Hasher>
@@ -489,15 +554,24 @@ verify (key_file_header const& kh,
log_file_header const& lh)
{
verify<Hasher>(lh);
if (kh.salt != lh.salt)
if (kh.uid != lh.uid)
throw store_corrupt_error (
"salt mismatch in log file");
"uid mismatch in log file");
if (kh.appnum != lh.appnum)
throw store_corrupt_error(
"appnum mismatch in log file");
if (kh.key_size != lh.key_size)
throw store_corrupt_error (
"key size mismatch in log file");
if (kh.appnum != lh.appnum)
throw store_corrupt_error(
"appnum mismatch");
if (kh.salt != lh.salt)
throw store_corrupt_error (
"salt mismatch in log file");
if (kh.pepper != lh.pepper)
throw store_corrupt_error (
"pepper mismatch in log file");
if (kh.block_size != lh.block_size)
throw store_corrupt_error (
"block size mismatch in log file");
}
} // detail

View File

@@ -20,7 +20,6 @@
#ifndef BEAST_NUDB_DETAIL_GENTEX_H_INCLUDED
#define BEAST_NUDB_DETAIL_GENTEX_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <beast/utility/noexcept.h>
#include <condition_variable>
#include <cstddef>

View File

@@ -22,7 +22,6 @@
#include <beast/nudb/detail/arena.h>
#include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h>
#include <cstdint>
#include <cstring>

View File

@@ -20,8 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_STREAM_H_INCLUDED
#define BEAST_NUDB_DETAIL_STREAM_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/common.h>
#include <array>
#include <cstddef>
#include <cstdint>
@@ -32,114 +31,54 @@ namespace beast {
namespace nudb {
namespace detail {
// Simple growable memory buffer
class buffer
{
private:
std::size_t size_ = 0;
std::unique_ptr<std::uint8_t[]> buf_;
public:
buffer() = default;
buffer (buffer const&) = delete;
buffer& operator= (buffer const&) = delete;
explicit
buffer (std::size_t n)
: size_ (n)
, buf_ (new std::uint8_t[n])
{
}
buffer (buffer&& other)
: size_ (other.size_)
, buf_ (std::move(other.buf_))
{
other.size_ = 0;
}
buffer& operator= (buffer&& other)
{
size_ = other.size_;
buf_ = std::move(other.buf_);
other.size_ = 0;
return *this;
}
std::size_t
size() const
{
return size_;
}
std::uint8_t*
get() const
{
return buf_.get();
}
void
reserve (std::size_t n)
{
if (size_ < n)
buf_.reset (new std::uint8_t[n]);
size_ = n;
}
};
//------------------------------------------------------------------------------
// Input stream from bytes
template <class = void>
class istream_t
{
private:
std::uint8_t const* buf_;
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
std::size_t bytes_;
#endif
std::size_t size_ = 0;
public:
istream_t (istream_t const&) = default;
istream_t& operator= (istream_t const&) = default;
istream_t (void const* data, std::size_t
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
bytes
#endif
)
istream_t (void const* data, std::size_t size)
: buf_(reinterpret_cast<
std::uint8_t const*>(data))
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
, bytes_(bytes)
#endif
, size_(size)
{
}
template <std::size_t N>
istream_t (std::array<std::uint8_t, N> const& a)
: buf_ (a.data())
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
, bytes_ (a.size())
#endif
, size_ (a.size())
{
}
std::uint8_t const*
data (std::size_t bytes)
data (std::size_t bytes);
std::uint8_t const*
operator()(std::size_t bytes)
{
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
if (bytes > bytes_)
throw std::logic_error(
"nudb: istream");
bytes_ -= bytes;
#endif
auto const data = buf_;
buf_ = buf_ + bytes;
return data;
return data(bytes);
}
};
template <class _>
std::uint8_t const*
istream_t<_>::data (std::size_t bytes)
{
if (size_ < bytes)
throw short_read_error();
auto const data = buf_;
buf_ = buf_ + bytes;
size_ -= bytes;
return data;
}
using istream = istream_t<>;
//------------------------------------------------------------------------------
@@ -151,32 +90,19 @@ class ostream_t
private:
std::uint8_t* buf_;
std::size_t size_ = 0;
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
std::size_t bytes_;
#endif
public:
ostream_t (ostream_t const&) = default;
ostream_t& operator= (ostream_t const&) = default;
ostream_t (void* data, std::size_t
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
bytes
#endif
)
ostream_t (void* data, std::size_t)
: buf_ (reinterpret_cast<std::uint8_t*>(data))
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
, bytes_ (bytes)
#endif
{
}
template <std::size_t N>
ostream_t (std::array<std::uint8_t, N>& a)
: buf_ (a.data())
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
, bytes_ (a.size())
#endif
{
}
@@ -188,21 +114,25 @@ public:
}
std::uint8_t*
data (std::size_t bytes)
data (std::size_t bytes);
std::uint8_t*
operator()(std::size_t bytes)
{
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
if (bytes > bytes_)
throw std::logic_error(
"nudb: ostream");
bytes_ -= bytes;
#endif
auto const data = buf_;
buf_ = buf_ + bytes;
size_ += bytes;
return data;
return data(bytes);
}
};
template <class _>
std::uint8_t*
ostream_t<_>::data (std::size_t bytes)
{
auto const data = buf_;
buf_ = buf_ + bytes;
size_ += bytes;
return data;
}
using ostream = ostream_t<>;
//------------------------------------------------------------------------------

View File

@@ -0,0 +1,155 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_VARINT_H_INCLUDED
#define BEAST_NUDB_VARINT_H_INCLUDED
#include <beast/config/CompilerConfig.h> // for BEAST_CONSTEXPR
#include <beast/nudb/detail/stream.h>
#include <cstdint>
#include <beast/cxx14/type_traits.h> // <type_traits>
namespace beast {
namespace nudb {
namespace detail {
// base128 varint format is from
// google protocol buffers:
// https://developers.google.com/protocol-buffers/docs/encoding#varints
// field tag
struct varint;
// Metafuncton to return largest
// possible size of T represented as varint.
// T must be unsigned
template <class T,
bool = std::is_unsigned<T>::value>
struct varint_traits;
template <class T>
struct varint_traits<T, true>
{
static std::size_t BEAST_CONSTEXPR max =
(8 * sizeof(T) + 6) / 7;
};
// Returns: Number of bytes consumed or 0 on error,
// if the buffer was too small or t overflowed.
//
template <class = void>
std::size_t
read_varint (void const* buf,
std::size_t buflen, std::size_t& t)
{
t = 0;
std::uint8_t const* p =
reinterpret_cast<
std::uint8_t const*>(buf);
std::size_t n = 0;
while (p[n] & 0x80)
if (++n >= buflen)
return 0;
if (++n > buflen)
return 0;
// Special case for 0
if (n == 1 && *p == 0)
{
t = 0;
return 1;
}
auto const used = n;
while (n--)
{
auto const d = p[n];
auto const t0 = t;
t *= 127;
t += d & 0x7f;
if (t <= t0)
return 0; // overflow
}
return used;
}
template <class T,
std::enable_if_t<std::is_unsigned<
T>::value>* = nullptr>
std::size_t
size_varint (T v)
{
std::size_t n = 0;
do
{
v /= 127;
++n;
}
while (v != 0);
return n;
}
template <class = void>
std::size_t
write_varint (void* p0, std::size_t v)
{
std::uint8_t* p = reinterpret_cast<
std::uint8_t*>(p0);
do
{
std::uint8_t d =
v % 127;
v /= 127;
if (v != 0)
d |= 0x80;
*p++ = d;
}
while (v != 0);
return p - reinterpret_cast<
std::uint8_t*>(p0);
}
// input stream
template <class T, std::enable_if_t<
std::is_same<T, varint>::value>* = nullptr>
void
read (istream& is, std::size_t& u)
{
auto p0 = is(1);
auto p1 = p0;
while (*p1++ & 0x80)
is(1);
read_varint(p0, p1 - p0, u);
}
// output stream
template <class T, std::enable_if_t<
std::is_same<T, varint>::value>* = nullptr>
void
write (ostream& os, std::size_t t)
{
write_varint(os.data(
size_varint(t)), t);
}
} // detail
} // nudb
} // beast
#endif

View File

@@ -20,9 +20,8 @@
#ifndef BEAST_NUDB_FILE_H_INCLUDED
#define BEAST_NUDB_FILE_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/posix_file.h>
#include <beast/nudb/detail/win32_file.h>
#include <beast/nudb/posix_file.h>
#include <beast/nudb/win32_file.h>
#include <string>
namespace beast {

View File

@@ -17,25 +17,46 @@
*/
//==============================================================================
#ifndef BEAST_NUDB_MODE_H_INCLUDED
#define BEAST_NUDB_MODE_H_INCLUDED
#ifndef BEAST_NUDB_IDENTITY_CODEC_H_INCLUDED
#define BEAST_NUDB_IDENTITY_CODEC_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <string>
#include <utility>
namespace beast {
namespace nudb {
enum class file_mode
/** Codec which maps input directly to output. */
class identity_codec
{
scan, // read sequential
read, // read random
append, // read random, write append
write // read random, write random
};
public:
template <class... Args>
explicit
identity_codec(Args&&... args)
{
}
// This sort of doesn't belong here
using path_type = std::string;
char const*
name() const
{
return "none";
}
template <class BufferFactory>
std::pair<void const*, std::size_t>
compress (void const* in,
std::size_t in_size, BufferFactory&&) const
{
return std::make_pair(in, in_size);
}
template <class BufferFactory>
std::pair<void const*, std::size_t>
decompress (void const* in,
std::size_t in_size, BufferFactory&&) const
{
return std::make_pair(in, in_size);
}
};
} // nudb
} // beast

View File

@@ -17,9 +17,8 @@
*/
//==============================================================================
#include <beast/nudb/detail/config.h>
#include <beast/nudb/tests/callgrind_test.cpp>
#include <beast/nudb/tests/recover_test.cpp>
#include <beast/nudb/tests/store_test.cpp>
#include <beast/nudb/tests/varint_test.cpp>
#include <beast/nudb/tests/verify_test.cpp>

View File

@@ -20,9 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_POSIX_FILE_H_INCLUDED
#define BEAST_NUDB_DETAIL_POSIX_FILE_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/common.h>
#include <cassert>
#include <cerrno>
#include <cstring>
@@ -266,14 +264,21 @@ void
posix_file<_>::read (std::size_t offset,
void* buffer, std::size_t bytes)
{
auto const n = ::pread (
fd_, buffer, bytes, offset);
// VFALCO end of file should throw short_read
if (n == -1)
throw file_posix_error(
"pread");
if (n < bytes)
throw file_short_read_error();
while(bytes > 0)
{
auto const n = ::pread (
fd_, buffer, bytes, offset);
// VFALCO end of file should throw short_read
if (n == -1)
throw file_posix_error(
"pread");
if (n == 0)
throw file_short_read_error();
offset += n;
bytes -= n;
buffer = reinterpret_cast<
char*>(buffer) + n;
}
}
template <class _>
@@ -281,13 +286,20 @@ void
posix_file<_>::write (std::size_t offset,
void const* buffer, std::size_t bytes)
{
auto const n = ::pwrite (
fd_, buffer, bytes, offset);
if (n == -1)
throw file_posix_error(
"pwrite");
if (n < bytes)
throw file_short_write_error();
while(bytes > 0)
{
auto const n = ::pwrite (
fd_, buffer, bytes, offset);
if (n == -1)
throw file_posix_error(
"pwrite");
if (n == 0)
throw file_short_write_error();
offset += n;
bytes -= n;
buffer = reinterpret_cast<
char const*>(buffer) + n;
}
}
template <class _>

View File

@@ -20,12 +20,10 @@
#ifndef BEAST_NUDB_RECOVER_H_INCLUDED
#define BEAST_NUDB_RECOVER_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/common.h>
#include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h>
#include <algorithm>
#include <cstddef>
@@ -39,19 +37,22 @@ namespace nudb {
any partially committed data.
*/
template <
class Hasher = default_hash,
class File = native_file>
class Hasher,
class Codec,
class File = native_file,
class... Args>
bool
recover (
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::size_t read_size = 16 * 1024 * 1024)
std::size_t read_size,
Args&&... args)
{
using namespace detail;
File df;
File lf;
File kf;
File df(args...);
File lf(args...);
File kf(args...);
if (! df.open (file_mode::append, dat_path))
return false;
if (! kf.open (file_mode::write, key_path))
@@ -96,8 +97,7 @@ recover (
verify<Hasher>(kh, lh);
auto const df_size = df.actual_size();
buffer buf(kh.block_size);
bucket b (kh.key_size,
kh.block_size, buf.get());
bucket b (kh.block_size, buf.get());
bulk_reader<File> r(lf, log_file_header::size,
lf_size, read_size);
while(! r.eof())

View File

@@ -20,20 +20,15 @@
#ifndef BEAST_NUDB_STORE_H_INCLUDED
#define BEAST_NUDB_STORE_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/common.h>
#include <beast/nudb/recover.h>
#include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/buffers.h>
#include <beast/nudb/detail/buffer.h>
#include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/cache.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h>
#include <beast/nudb/detail/gentex.h>
#include <beast/nudb/detail/pool.h>
#include <beast/nudb/detail/posix_file.h>
#include <beast/nudb/detail/win32_file.h>
#include <boost/thread/lock_types.hpp>
#include <boost/thread/shared_mutex.hpp>
#include <algorithm>
@@ -52,7 +47,6 @@
#include <limits>
#include <beast/cxx14/memory.h> // <memory>
#include <mutex>
#include <random>
#include <stdexcept>
#include <string>
#include <thread>
@@ -80,15 +74,17 @@ namespace nudb {
*/
/** A simple key/value database
@tparam Hasher The hash function to use on key
@tparam Codec The codec to apply to value data
@tparam File The type of File object to use.
@tparam Hash The hash function to use on key
*/
template <class Hasher, class File>
class basic_store
template <class Hasher, class Codec, class File>
class store
{
public:
using file_type = File;
using hash_type = Hasher;
using codec_type = Codec;
using file_type = File;
private:
// requires 64-bit integers or better
@@ -112,9 +108,6 @@ private:
using unique_lock_type =
boost::unique_lock<boost::shared_mutex>;
using blockbuf =
typename detail::buffers::value_type;
struct state
{
File df;
@@ -123,11 +116,11 @@ private:
path_type dp;
path_type kp;
path_type lp;
detail::buffers b;
detail::pool p0;
detail::pool p1;
detail::cache c0;
detail::cache c1;
Codec const codec;
detail::key_file_header const kh;
// pool commit high water mark
@@ -144,8 +137,6 @@ private:
};
bool open_ = false;
// VFALCO Make consistency checks optional?
//bool safe_ = true; // Do consistency checks
// VFALCO Unfortunately boost::optional doesn't support
// move construction so we use unique_ptr instead.
@@ -173,9 +164,9 @@ private:
std::exception_ptr ep_;
public:
basic_store() = default;
basic_store (basic_store const&) = delete;
basic_store& operator= (basic_store const&) = delete;
store() = default;
store (store const&) = delete;
store& operator= (store const&) = delete;
/** Destroy the database.
@@ -191,7 +182,7 @@ public:
Throws:
None
*/
~basic_store();
~store();
/** Returns `true` if the database is open. */
bool
@@ -250,17 +241,17 @@ public:
/** Fetch a value.
If key is found, BufferFactory will be called as:
`(void*)()(std::size_t bytes)`
If key is found, Handler will be called as:
`(void)()(void const* data, std::size_t size)`
where bytes is the size of the value, and the returned pointer
points to a buffer of at least bytes size.
where data and size represent the value. If the
key is not found, the handler is not called.
@return `true` if the key exists.
@return `true` if a matching key was found.
*/
template <class BufferFactory>
template <class Handler>
bool
fetch (void const* key, BufferFactory&& bf);
fetch (void const* key, Handler&& handler);
/** Insert a value.
@@ -280,12 +271,19 @@ private:
std::rethrow_exception(ep_);
}
std::pair <detail::bucket::value_type, bool>
find (void const* key, detail::bucket& b);
// Fetch key in loaded bucket b or its spills.
//
template <class Handler>
bool
fetch (std::size_t h, void const* key,
detail::bucket b, Handler&& handler);
void
maybe_spill (detail::bucket& b,
detail::bulk_writer<File>& w);
// Returns `true` if the key exists
// lock is unlocked after the first bucket processed
//
bool
exists (std::size_t h, void const* key,
shared_lock_type* lock, detail::bucket b);
void
split (detail::bucket& b1, detail::bucket& b2,
@@ -306,8 +304,8 @@ private:
//------------------------------------------------------------------------------
template <class Hasher, class File>
basic_store<Hasher, File>::state::state (
template <class Hasher, class Codec, class File>
store<Hasher, Codec, File>::state::state (
File&& df_, File&& kf_, File&& lf_,
path_type const& dp_, path_type const& kp_,
path_type const& lp_,
@@ -319,7 +317,6 @@ basic_store<Hasher, File>::state::state (
, dp (dp_)
, kp (kp_)
, lp (lp_)
, b (kh_.block_size)
, p0 (kh_.key_size, arena_alloc_size)
, p1 (kh_.key_size, arena_alloc_size)
, c0 (kh_.key_size, kh_.block_size)
@@ -330,8 +327,8 @@ basic_store<Hasher, File>::state::state (
//------------------------------------------------------------------------------
template <class Hasher, class File>
basic_store<Hasher, File>::~basic_store()
template <class Hasher, class Codec, class File>
store<Hasher, Codec, File>::~store()
{
try
{
@@ -344,10 +341,10 @@ basic_store<Hasher, File>::~basic_store()
}
}
template <class Hasher, class File>
template <class Hasher, class Codec, class File>
template <class... Args>
bool
basic_store<Hasher, File>::open (
store<Hasher, Codec, File>::open (
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
@@ -358,11 +355,13 @@ basic_store<Hasher, File>::open (
if (is_open())
throw std::logic_error("nudb: already open");
epb_.store(false);
recover (dat_path, key_path, log_path,
recover_read_size);
File df(std::forward<Args>(args)...);
File kf(std::forward<Args>(args)...);
File lf(std::forward<Args>(args)...);
recover<Hasher, Codec, File>(
dat_path, key_path, log_path,
recover_read_size,
args...);
File df(args...);
File kf(args...);
File lf(args...);
if (! df.open (file_mode::append, dat_path))
return false;
if (! kf.open (file_mode::write, key_path))
@@ -373,7 +372,7 @@ basic_store<Hasher, File>::open (
key_file_header kh;
read (df, dh);
read (kf, kh);
verify (dh);
verify<Codec> (dh);
verify<Hasher> (kh);
verify<Hasher> (dh, kh);
auto s = std::make_unique<state>(
@@ -392,13 +391,13 @@ basic_store<Hasher, File>::open (
s_ = std::move(s);
open_ = true;
thread_ = std::thread(
&basic_store::run, this);
&store::run, this);
return true;
}
template <class Hasher, class File>
template <class Hasher, class Codec, class File>
void
basic_store<Hasher, File>::close()
store<Hasher, Codec, File>::close()
{
if (open_)
{
@@ -414,229 +413,208 @@ basic_store<Hasher, File>::close()
}
}
template <class Hasher, class File>
template <class BufferFactory>
template <class Hasher, class Codec, class File>
template <class Handler>
bool
basic_store<Hasher, File>::fetch (
void const* key, BufferFactory&& bf)
store<Hasher, Codec, File>::fetch (
void const* key, Handler&& handler)
{
using namespace detail;
rethrow();
std::size_t offset;
std::size_t size;
blockbuf buf(s_->b);
bucket tmp (s_->kh.key_size,
s_->kh.block_size, buf.get());
auto const h = hash<Hasher>(
key, s_->kh.key_size, s_->kh.salt);
shared_lock_type m (m_);
{
auto const h = hash<Hasher>(
key, s_->kh.key_size, s_->kh.salt);
shared_lock_type m (m_,
boost::defer_lock);
m.lock();
auto iter = s_->p1.find(key);
if (iter == s_->p1.end())
{
typename pool::iterator iter;
iter = s_->p1.find(key);
if (iter != s_->p1.end())
{
void* const b = bf(
iter->first.size);
if (b == nullptr)
return false;
std::memcpy (b,
iter->first.data,
iter->first.size);
return true;
}
iter = s_->p0.find(key);
if (iter != s_->p0.end())
{
void* const b = bf(
iter->first.size);
if (b == nullptr)
return false;
std::memcpy (b,
iter->first.data,
iter->first.size);
return true;
}
if (iter == s_->p0.end())
goto next;
}
buffer buf;
auto const result =
s_->codec.decompress(
iter->first.data,
iter->first.size, buf);
handler(result.first, result.second);
return true;
}
next:
auto const n = bucket_index(
h, buckets_, modulus_);
auto const iter = s_->c1.find(n);
if (iter != s_->c1.end())
return fetch(h, key,
iter->second, handler);
// VFALCO Audit for concurrency
genlock <gentex> g (g_);
m.unlock();
buffer buf (s_->kh.block_size);
// VFALCO Constructs with garbage here
bucket b (s_->kh.block_size,
buf.get());
b.read (s_->kf,
(n + 1) * b.block_size());
return fetch(h, key, b, handler);
}
template <class Hasher, class Codec, class File>
bool
store<Hasher, Codec, File>::insert (
void const* key, void const* data,
std::size_t size)
{
using namespace detail;
rethrow();
buffer buf;
// Data Record
if (size > field<uint48_t>::max)
throw std::logic_error(
"nudb: size too large");
auto const h = hash<Hasher>(
key, s_->kh.key_size, s_->kh.salt);
std::lock_guard<std::mutex> u (u_);
{
shared_lock_type m (m_);
if (s_->p1.find(key) != s_->p1.end())
return false;
if (s_->p0.find(key) != s_->p0.end())
return false;
auto const n = bucket_index(
h, buckets_, modulus_);
auto const iter = s_->c1.find(n);
if (iter != s_->c1.end())
{
auto const result =
iter->second.find(key);
if (result.second)
{
offset = result.first.offset;
size = result.first.size;
goto found;
}
// VFALCO Audit for concurrency
auto spill = iter->second.spill();
m.unlock();
while (spill)
{
tmp.read(s_->df, spill);
auto const result = tmp.find(key);
if (result.second)
{
offset = result.first.offset;
size = result.first.size;
goto found;
}
spill = tmp.spill();
}
return false;
}
// VFALCO Audit for concurrency
genlock <gentex> g (g_);
m.unlock();
tmp.read (s_->kf,
(n + 1) * tmp.block_size());
auto const result = find(key, tmp);
if (! result.second)
return false;
offset = result.first.offset;
size = result.first.size;
}
found:
void* const b = bf(size);
if (b == nullptr)
return false;
// Data Record
s_->df.read (offset +
field<uint48_t>::size + // Size
s_->kh.key_size, // Key
b, size);
return true;
}
template <class Hasher, class File>
bool
basic_store<Hasher, File>::insert (void const* key,
void const* data, std::size_t size)
{
using namespace detail;
rethrow();
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
if (size > field<uint48_t>::max)
throw std::logic_error(
"nudb: size too large");
#endif
blockbuf buf (s_->b);
bucket tmp (s_->kh.key_size,
s_->kh.block_size, buf.get());
auto const h = hash<Hasher>(
key, s_->kh.key_size, s_->kh.salt);
std::lock_guard<std::mutex> u (u_);
shared_lock_type m (m_, boost::defer_lock);
m.lock();
if (s_->p1.find(key) != s_->p1.end())
return false;
if (s_->p0.find(key) != s_->p0.end())
return false;
auto const n = bucket_index(
h, buckets_, modulus_);
auto const iter = s_->c1.find(n);
if (iter != s_->c1.end())
{
if (iter->second.find(key).second)
return false;
// VFALCO Audit for concurrency
auto spill = iter->second.spill();
m.unlock();
while (spill)
{
tmp.read (s_->df, spill);
if (tmp.find(key).second)
if (exists(h, key, &m,
iter->second))
return false;
spill = tmp.spill();
// m is now unlocked
}
}
else
{
genlock <gentex> g (g_);
m.unlock();
// VFALCO Audit for concurrency
tmp.read (s_->kf,
(n + 1) * s_->kh.block_size);
if (find(key, tmp).second)
return false;
}
{
unique_lock_type m (m_);
s_->p1.insert (h, key, data, size);
// Did we go over the commit limit?
if (commit_limit_ > 0 &&
s_->p1.data_size() >= commit_limit_)
else
{
// Yes, start a new commit
cond_.notify_all();
// Wait for pool to shrink
cond_limit_.wait(m,
[this]() { return
s_->p1.data_size() <
commit_limit_; });
// VFALCO Audit for concurrency
genlock <gentex> g (g_);
m.unlock();
buf.reserve(s_->kh.block_size);
bucket b (s_->kh.block_size,
buf.get());
b.read (s_->kf,
(n + 1) * s_->kh.block_size);
if (exists(h, key, nullptr, b))
return false;
}
bool const notify =
s_->p1.data_size() >= s_->pool_thresh;
m.unlock();
if (notify)
cond_.notify_all();
}
auto const result =
s_->codec.compress(data, size, buf);
// Perform insert
unique_lock_type m (m_);
s_->p1.insert (h, key,
result.first, result.second);
// Did we go over the commit limit?
if (commit_limit_ > 0 &&
s_->p1.data_size() >= commit_limit_)
{
// Yes, start a new commit
cond_.notify_all();
// Wait for pool to shrink
cond_limit_.wait(m,
[this]() { return
s_->p1.data_size() <
commit_limit_; });
}
bool const notify =
s_->p1.data_size() >= s_->pool_thresh;
m.unlock();
if (notify)
cond_.notify_all();
return true;
}
// Find key in loaded bucket b or its spills.
//
template <class Hasher, class File>
std::pair <detail::bucket::value_type, bool>
basic_store<Hasher, File>::find (
void const* key, detail::bucket& b)
{
auto result = b.find(key);
if (result.second)
return result;
auto spill = b.spill();
while (spill)
{
b.read (s_->df, spill);
result = b.find(key);
if (result.second)
return result;
spill = b.spill();
}
return result;
}
// Spill bucket if full
//
template <class Hasher, class File>
void
basic_store<Hasher, File>::maybe_spill(
detail::bucket& b, detail::bulk_writer<File>& w)
template <class Hasher, class Codec, class File>
template <class Handler>
bool
store<Hasher, Codec, File>::fetch (
std::size_t h, void const* key,
detail::bucket b, Handler&& handler)
{
using namespace detail;
if (b.full())
buffer buf0;
buffer buf1;
for(;;)
{
// Spill Record
auto const offset = w.offset();
auto os = w.prepare(
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.compact_size());
write <uint48_t> (os, 0); // Zero
write <std::uint16_t> (
os, b.compact_size()); // Size
auto const spill =
offset + os.size();
b.write (os); // Bucket
// Update bucket
b.clear();
b.spill (spill);
for (auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if (item.hash != h)
break;
// Data Record
auto const len =
s_->kh.key_size + // Key
item.size; // Value
buf0.reserve(len);
s_->df.read(item.offset +
field<uint48_t>::size, // Size
buf0.get(), len);
if (std::memcmp(buf0.get(), key,
s_->kh.key_size) == 0)
{
auto const result =
s_->codec.decompress(
buf0.get() + s_->kh.key_size,
item.size, buf1);
handler(result.first, result.second);
return true;
}
}
auto const spill = b.spill();
if (! spill)
break;
buf1.reserve(s_->kh.block_size);
b = bucket(s_->kh.block_size,
buf1.get());
b.read(s_->df, spill);
}
return false;
}
template <class Hasher, class Codec, class File>
bool
store<Hasher, Codec, File>::exists (
std::size_t h, void const* key,
shared_lock_type* lock, detail::bucket b)
{
using namespace detail;
buffer buf(s_->kh.key_size +
s_->kh.block_size);
void* pk = buf.get();
void* pb = buf.get() + s_->kh.key_size;
for(;;)
{
for (auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if (item.hash != h)
break;
// Data Record
s_->df.read(item.offset +
field<uint48_t>::size, // Size
pk, s_->kh.key_size); // Key
if (std::memcmp(pk, key,
s_->kh.key_size) == 0)
return true;
}
auto spill = b.spill();
if (lock && lock->owns_lock())
lock->unlock();
if (! spill)
break;
b = bucket(s_->kh.block_size, pb);
b.read(s_->df, spill);
}
return false;
}
// Split the bucket in b1 to b2
@@ -644,9 +622,9 @@ basic_store<Hasher, File>::maybe_spill(
// tmp is used as a temporary buffer
// splits are written but not the new buckets
//
template <class Hasher, class File>
template <class Hasher, class Codec, class File>
void
basic_store<Hasher, File>::split (detail::bucket& b1,
store<Hasher, Codec, File>::split (detail::bucket& b1,
detail::bucket& b2, detail::bucket& tmp,
std::size_t n1, std::size_t n2,
std::size_t buckets, std::size_t modulus,
@@ -659,15 +637,13 @@ basic_store<Hasher, File>::split (detail::bucket& b1,
// Split
for (std::size_t i = 0; i < b1.size();)
{
auto e = b1[i];
auto const h = hash<Hasher>(
e.key, s_->kh.key_size, s_->kh.salt);
auto const e = b1[i];
auto const n = bucket_index(
h, buckets, modulus);
e.hash, buckets, modulus);
assert(n==n1 || n==n2);
if (n == n2)
{
b2.insert (e.offset, e.size, e.key);
b2.insert (e.offset, e.size, e.hash);
b1.erase (i);
}
else
@@ -684,26 +660,27 @@ basic_store<Hasher, File>::split (detail::bucket& b1,
// If any part of the spill record is
// in the write buffer then flush first
// VFALCO Needs audit
if (spill + bucket_size(s_->kh.key_size,
s_->kh.capacity) > w.offset() - w.size())
if (spill + bucket_size(s_->kh.capacity) >
w.offset() - w.size())
w.flush();
tmp.read (s_->df, spill);
for (std::size_t i = 0; i < tmp.size(); ++i)
{
auto e = tmp[i];
auto const n = bucket_index<Hasher>(
e.key, s_->kh.key_size, s_->kh.salt,
buckets, modulus);
auto const e = tmp[i];
auto const n = bucket_index(
e.hash, buckets, modulus);
assert(n==n1 || n==n2);
if (n == n2)
{
maybe_spill (b2, w);
b2.insert (e.offset, e.size, e.key);
maybe_spill(b2, w);
b2.insert(
e.offset, e.size, e.hash);
}
else
{
maybe_spill (b1, w);
b1.insert (e.offset, e.size, e.key);
maybe_spill(b1, w);
b1.insert(
e.offset, e.size, e.hash);
}
}
spill = tmp.spill();
@@ -732,9 +709,9 @@ basic_store<Hasher, File>::split (detail::bucket& b1,
// Postconditions:
// c1, and c0, and the memory pointed to by buf may be modified
//
template <class Hasher, class File>
template <class Hasher, class Codec, class File>
detail::bucket
basic_store<Hasher, File>::load (
store<Hasher, Codec, File>::load (
std::size_t n, detail::cache& c1,
detail::cache& c0, void* buf)
{
@@ -746,8 +723,7 @@ basic_store<Hasher, File>::load (
if (iter != c0.end())
return c1.insert (n,
iter->second)->second;
bucket tmp (s_->kh.key_size,
s_->kh.block_size, buf);
bucket tmp (s_->kh.block_size, buf);
tmp.read (s_->kf, (n + 1) *
s_->kh.block_size);
c0.insert (n, tmp);
@@ -760,15 +736,14 @@ basic_store<Hasher, File>::load (
//
// Effects:
//
template <class Hasher, class File>
template <class Hasher, class Codec, class File>
void
basic_store<Hasher, File>::commit()
store<Hasher, Codec, File>::commit()
{
using namespace detail;
blockbuf buf1 (s_->b);
blockbuf buf2 (s_->b);
bucket tmp (s_->kh.key_size,
s_->kh.block_size, buf1.get());
buffer buf1 (s_->kh.block_size);
buffer buf2 (s_->kh.block_size);
bucket tmp (s_->kh.block_size, buf1.get());
// Empty cache put in place temporarily
// so we can reuse the memory from s_->c1
cache c1;
@@ -788,11 +763,14 @@ basic_store<Hasher, File>::commit()
// Log File Header
log_file_header lh;
lh.version = currentVersion; // Version
lh.uid = s_->kh.uid; // UID
lh.appnum = s_->kh.appnum; // Appnum
lh.key_size = s_->kh.key_size; // Key Size
lh.salt = s_->kh.salt; // Salt
lh.pepper = pepper<Hasher>(
lh.salt); // Pepper
lh.key_size = s_->kh.key_size; // Key Size
lh.block_size =
s_->kh.block_size; // Block Size
lh.key_file_size =
s_->kf.actual_size(); // Key File Size
lh.dat_file_size =
@@ -813,7 +791,7 @@ basic_store<Hasher, File>::commit()
// threads are reading other data members
// of this object in memory
e.second = w.offset();
auto os = w.prepare (data_size(
auto os = w.prepare (value_size(
e.first.size, s_->kh.key_size));
// Data Record
write <uint48_t> (os,
@@ -848,8 +826,9 @@ basic_store<Hasher, File>::commit()
e.first.hash, buckets, modulus);
auto b = load (n, c1, s_->c0, buf2.get());
// This can amplify writes if it spills.
maybe_spill (b, w);
b.insert (e.second, e.first.size, e.first.key);
maybe_spill(b, w);
b.insert (e.second,
e.first.size, e.first.hash);
}
w.flush();
}
@@ -905,9 +884,9 @@ basic_store<Hasher, File>::commit()
}
}
template <class Hasher, class File>
template <class Hasher, class Codec, class File>
void
basic_store<Hasher, File>::run()
store<Hasher, Codec, File>::run()
{
auto const pred =
[this]()
@@ -957,29 +936,6 @@ basic_store<Hasher, File>::run()
}
}
//------------------------------------------------------------------------------
using store = basic_store <default_hash, native_file>;
/** Generate a random salt. */
template <class = void>
std::uint64_t
make_salt()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
/** Returns the best guess at the volume's block size. */
inline
std::size_t
block_size(std::string const& /*path*/)
{
return 4096;
}
} // nudb
} // beast

View File

@@ -17,10 +17,7 @@
*/
//==============================================================================
#include <beast/nudb/store.h>
#include <beast/nudb/recover.h>
#include <beast/nudb/tests/common.h>
#include <beast/nudb/tests/fail_file.h>
#include <beast/module/core/diagnostic/UnitTestUtilities.h>
#include <beast/module/core/files/File.h>
#include <beast/random/xor_shift_engine.h>
@@ -44,18 +41,18 @@ public:
// with keys not present.
void
do_test (std::size_t count,
nudb::path_type const& path)
path_type const& path)
{
auto const dp = path + ".dat";
auto const kp = path + ".key";
auto const lp = path + ".log";
nudb::create (dp, kp, lp,
test_api::create (dp, kp, lp,
appnum,
salt,
sizeof(nudb::test::key_type),
nudb::block_size(path),
0.50);
nudb::store db;
test_api::store db;
if (! expect (db.open(dp, kp, lp,
arena_alloc_size), "open"))
return;
@@ -67,7 +64,7 @@ public:
expect (db.insert(&v.key, v.data, v.size),
"insert");
}
storage s;
Storage s;
for (std::size_t i = 0; i < count * 2; ++i)
{
if (! (i%2))

View File

@@ -17,11 +17,13 @@
*/
//==============================================================================
#ifndef BEAST_NUDB_COMMON_H_INCLUDED
#define BEAST_NUDB_COMMON_H_INCLUDED
#ifndef BEAST_NUDB_TESTS_COMMON_H_INCLUDED
#define BEAST_NUDB_TESTS_COMMON_H_INCLUDED
#include <beast/nudb.h>
#include <beast/nudb/identity_codec.h>
#include <beast/nudb/tests/fail_file.h>
#include <beast/hash/xxhasher.h>
#include <beast/random/xor_shift_engine.h>
#include <cstdint>
#include <iomanip>
@@ -33,9 +35,17 @@ namespace test {
using key_type = std::size_t;
using fail_store = nudb::basic_store<
beast::nudb::default_hash, nudb::fail_file <
nudb::native_file>>;
// xxhasher is fast and produces good results
using test_api_base =
nudb::api<xxhasher, identity_codec, native_file>;
struct test_api : test_api_base
{
using fail_store = nudb::store<
typename test_api_base::hash_type,
typename test_api_base::codec_type,
nudb::fail_file <typename test_api_base::file_type>>;
};
static std::size_t BEAST_CONSTEXPR arena_alloc_size = 16 * 1024 * 1024;
@@ -45,8 +55,8 @@ static std::uint64_t BEAST_CONSTEXPR salt = 42;
//------------------------------------------------------------------------------
// Meets the requirements of BufferFactory
class storage
// Meets the requirements of Handler
class Storage
{
private:
std::size_t size_ = 0;
@@ -54,9 +64,9 @@ private:
std::unique_ptr<std::uint8_t[]> buf_;
public:
storage() = default;
storage (storage const&) = delete;
storage& operator= (storage const&) = delete;
Storage() = default;
Storage (Storage const&) = delete;
Storage& operator= (Storage const&) = delete;
std::size_t
size() const
@@ -71,15 +81,23 @@ public:
}
std::uint8_t*
operator()(std::size_t n)
reserve (std::size_t size)
{
if (capacity_ < n)
if (capacity_ < size)
{
capacity_ = detail::ceil_pow2(n);
capacity_ = detail::ceil_pow2(size);
buf_.reset (
new std::uint8_t[capacity_]);
}
size_ = n;
size_ = size;
return buf_.get();
}
std::uint8_t*
operator()(void const* data, std::size_t size)
{
reserve (size);
std::memcpy(buf_.get(), data, size);
return buf_.get();
}
};
@@ -134,7 +152,7 @@ private:
maxSize = 1250
};
storage s_;
Storage s_;
beast::xor_shift_engine gen_;
std::uniform_int_distribution<std::uint32_t> d_size_;
@@ -162,7 +180,7 @@ public:
value_type v;
rngcpy (&v.key, sizeof(v.key), gen_);
v.size = d_size_(gen_);
v.data = s_(v.size);
v.data = s_.reserve(v.size);
rngcpy (v.data, v.size, gen_);
return v;
}
@@ -205,14 +223,18 @@ print (Log log,
log << "actual_load: " << std::fixed << std::setprecision(0) <<
info.actual_load * 100 << "%";
log << "version: " << num(info.version);
log << "salt: " << std::showbase << std::hex << info.salt;
log << "uid: " << std::showbase << std::hex << info.uid;
log << "appnum: " << info.appnum;
log << "key_size: " << num(info.key_size);
log << "salt: " << std::showbase << std::hex << info.salt;
log << "pepper: " << std::showbase << std::hex << info.pepper;
log << "block_size: " << num(info.block_size);
log << "bucket_size: " << num(info.bucket_size);
log << "load_factor: " << std::fixed << std::setprecision(0) <<
info.load_factor * 100 << "%";
log << "capacity: " << num(info.capacity);
log << "buckets: " << num(info.buckets);
log << "key_count: " << num(info.key_count);
log << "value_count: " << num(info.value_count);
log << "value_bytes: " << num(info.value_bytes);
log << "spill_count: " << num(info.spill_count);

View File

@@ -20,7 +20,7 @@
#ifndef BEAST_NUDB_FAIL_FILE_H_INCLUDED
#define BEAST_NUDB_FAIL_FILE_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/common.h>
#include <atomic>
#include <cstddef>
#include <string>

View File

@@ -17,10 +17,7 @@
*/
//==============================================================================
#include <beast/nudb/store.h>
#include <beast/nudb/recover.h>
#include <beast/nudb/tests/common.h>
#include <beast/nudb/tests/fail_file.h>
#include <beast/module/core/files/File.h>
#include <beast/random/xor_shift_engine.h>
#include <beast/unit_test/suite.h>
@@ -42,17 +39,19 @@ public:
// they are there. Uses a fail_file that causes the n-th
// I/O to fail, causing an exception.
void
do_work (std::size_t n, std::size_t count,
float load_factor, nudb::path_type const& path)
do_work (std::size_t count, float load_factor,
nudb::path_type const& path, fail_counter& c)
{
auto const dp = path + ".dat";
auto const kp = path + ".key";
auto const lp = path + ".log";
nudb::fail_counter c(0);
nudb::create (dp, kp, lp, appnum, salt,
sizeof(key_type), block_size(path),
load_factor);
fail_store db;
test_api::file_type::erase (dp);
test_api::file_type::erase (kp);
test_api::file_type::erase (lp);
expect(test_api::create (
dp, kp, lp, appnum, salt, sizeof(key_type),
block_size(path), load_factor), "create");
test_api::fail_store db;
if (! expect(db.open(dp, kp, lp,
arena_alloc_size, c), "open"))
{
@@ -60,14 +59,14 @@ public:
// to report this and terminate the test.
}
expect (db.appnum() == appnum, "appnum");
c.reset(n);
Sequence seq;
for (std::size_t i = 0; i < count; ++i)
{
auto const v = seq[i];
db.insert(&v.key, v.data, v.size);
expect(db.insert(&v.key, v.data, v.size),
"insert");
}
storage s;
Storage s;
for (std::size_t i = 0; i < count; ++i)
{
auto const v = seq[i];
@@ -81,26 +80,36 @@ public:
break;
}
db.close();
#ifndef NDEBUG
print(log, verify(dp, kp));
verify(dp, kp);
#endif
nudb::native_file::erase (dp);
nudb::native_file::erase (kp);
nudb::native_file::erase (lp);
verify_info info;
try
{
info = test_api::verify(dp, kp);
}
catch(...)
{
print(log, info);
throw;
}
test_api::file_type::erase (dp);
test_api::file_type::erase (kp);
test_api::file_type::erase (lp);
}
void
do_recover (path_type const& path)
do_recover (path_type const& path,
fail_counter& c)
{
auto const dp = path + ".dat";
auto const kp = path + ".key";
auto const lp = path + ".log";
recover(dp, kp, lp);
verify(dp, kp);
nudb::native_file::erase (dp);
nudb::native_file::erase (kp);
nudb::native_file::erase (lp);
recover<test_api::hash_type,
test_api::codec_type, fail_file<
test_api::file_type>>(dp, kp, lp,
test_api::buffer_size, c);
test_api::verify(dp, kp);
test_api::file_type::erase (dp);
test_api::file_type::erase (kp);
test_api::file_type::erase (lp);
}
void
@@ -114,12 +123,24 @@ public:
{
try
{
do_work (n, count, load_factor, path);
fail_counter c(n);
do_work (count, load_factor, path, c);
break;
}
catch (nudb::fail_error const&)
{
do_recover (path);
}
for (std::size_t m = 1;;++m)
{
fail_counter c(m);
try
{
do_recover (path, c);
break;
}
catch (nudb::fail_error const&)
{
}
}
}
}
@@ -131,11 +152,10 @@ public:
void
run() override
{
float lf = 0.75f;
float lf = 0.55f;
test_recover (lf, 0);
test_recover (lf, 10);
test_recover (lf, 100);
test_recover (lf, 1000);
}
};
@@ -148,7 +168,8 @@ public:
run() override
{
float lf = 0.90f;
test_recover (lf, 100000);
test_recover (lf, 1000);
test_recover (lf, 10000);
}
};

View File

@@ -18,9 +18,7 @@
//==============================================================================
#include <BeastConfig.h>
#include <beast/nudb.h>
#include <beast/nudb/tests/common.h>
#include <beast/nudb/tests/fail_file.h>
#include <beast/module/core/diagnostic/UnitTestUtilities.h>
#include <beast/module/core/files/File.h>
#include <beast/random/xor_shift_engine.h>
@@ -55,15 +53,15 @@ public:
auto const kp = path + ".key";
auto const lp = path + ".log";
Sequence seq;
nudb::store db;
test_api::store db;
try
{
expect (nudb::create (dp, kp, lp, appnum,
expect (test_api::create (dp, kp, lp, appnum,
salt, sizeof(key_type), block_size,
load_factor), "create");
expect (db.open(dp, kp, lp,
arena_alloc_size), "open");
storage s;
Storage s;
// insert
for (std::size_t i = 0; i < N; ++i)
{
@@ -102,7 +100,9 @@ public:
"insert 2");
}
db.close();
auto const stats = nudb::verify (dp, kp);
//auto const stats = test_api::verify(dp, kp);
auto const stats = verify<test_api::hash_type>(
dp, kp, 1 * 1024 * 1024);
expect (stats.hist[1] > 0, "no splits");
print (log, stats);
}
@@ -114,9 +114,9 @@ public:
{
fail (e.what());
}
expect (native_file::erase(dp));
expect (native_file::erase(kp));
expect (! native_file::erase(lp));
expect (test_api::file_type::erase(dp));
expect (test_api::file_type::erase(kp));
expect (! test_api::file_type::erase(lp));
}
void

View File

@@ -0,0 +1,73 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <beast/nudb/detail/varint.h>
#include <beast/unit_test/suite.h>
#include <array>
namespace beast {
namespace nudb {
namespace tests {
class varint_test : public unit_test::suite
{
public:
void
test_varints (std::vector<std::size_t> vv)
{
testcase("encode, decode");
for (auto const v : vv)
{
std::array<std::uint8_t,
detail::varint_traits<
std::size_t>::max> vi;
auto const n0 =
detail::write_varint(
vi.data(), v);
expect (n0 > 0, "write error");
std::size_t v1;
auto const n1 =
detail::read_varint(
vi.data(), n0, v1);
expect(n1 == n0, "read error");
expect(v == v1, "wrong value");
}
}
void
run() override
{
test_varints({
0, 1, 2,
126, 127, 128,
253, 254, 255,
16127, 16128, 16129,
0xff,
0xffff,
0xffffffff,
0xffffffffffffUL,
0xffffffffffffffffUL});
}
};
BEAST_DEFINE_TESTSUITE(varint,nudb,beast);
} // test
} // nudb
} // beast

View File

@@ -20,11 +20,213 @@
#include <beast/nudb/verify.h>
#include <beast/nudb/tests/common.h>
#include <beast/unit_test/suite.h>
#include <beast/chrono/basic_seconds_clock.h>
#include <chrono>
#include <iomanip>
#include <ostream>
namespace beast {
namespace nudb {
namespace test {
namespace detail {
class save_stream_state
{
std::ostream& os_;
std::streamsize precision_;
std::ios::fmtflags flags_;
std::ios::char_type fill_;
public:
~save_stream_state()
{
os_.precision(precision_);
os_.flags(flags_);
os_.fill(fill_);
}
save_stream_state(save_stream_state const&) = delete;
save_stream_state& operator=(save_stream_state const&) = delete;
explicit save_stream_state(std::ostream& os)
: os_(os)
, precision_(os.precision())
, flags_(os.flags())
, fill_(os.fill())
{
}
};
template <class Rep, class Period>
std::ostream&
pretty_time(std::ostream& os, std::chrono::duration<Rep, Period> d)
{
save_stream_state _(os);
using namespace std::chrono;
if (d < microseconds{1})
{
// use nanoseconds
if (d < nanoseconds{100})
{
// use floating
using ns = duration<float, std::nano>;
os << std::fixed << std::setprecision(1) << ns(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<nanoseconds>(d).count();
}
os << "ns";
}
else if (d < milliseconds{1})
{
// use microseconds
if (d < microseconds{100})
{
// use floating
using ms = duration<float, std::micro>;
os << std::fixed << std::setprecision(1) << ms(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<microseconds>(d).count();
}
os << "us";
}
else if (d < seconds{1})
{
// use milliseconds
if (d < milliseconds{100})
{
// use floating
using ms = duration<float, std::milli>;
os << std::fixed << std::setprecision(1) << ms(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<milliseconds>(d).count();
}
os << "ms";
}
else if (d < minutes{1})
{
// use seconds
if (d < seconds{100})
{
// use floating
using s = duration<float>;
os << std::fixed << std::setprecision(1) << s(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<seconds>(d).count();
}
os << "s";
}
else
{
// use minutes
if (d < minutes{100})
{
// use floating
using m = duration<float, std::ratio<60>>;
os << std::fixed << std::setprecision(1) << m(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<minutes>(d).count();
}
os << "min";
}
return os;
}
template <class Period, class Rep>
inline
std::string
fmtdur(std::chrono::duration<Period, Rep> const& d)
{
std::stringstream ss;
pretty_time(ss, d);
return ss.str();
}
} // detail
//------------------------------------------------------------------------------
template <class Log>
class progress
{
private:
using clock_type =
beast::basic_seconds_clock<
std::chrono::steady_clock>;
Log& log_;
clock_type::time_point start_ = clock_type::now();
clock_type::time_point now_ = clock_type::now();
clock_type::time_point report_ = clock_type::now();
std::size_t prev_ = 0;
bool estimate_ = false;
public:
explicit
progress(Log& log)
: log_(log)
{
}
void
operator()(std::size_t w, std::size_t w1)
{
using namespace std::chrono;
auto const now = clock_type::now();
if (now == now_)
return;
now_ = now;
auto const elapsed = now - start_;
if (! estimate_)
{
if (elapsed < seconds(15))
return;
estimate_ = true;
}
else if (now - report_ <
std::chrono::seconds(60))
{
return;
}
auto const rate =
elapsed.count() / double(w);
clock_type::duration const remain(
static_cast<clock_type::duration::rep>(
(w1 - w) * rate));
log_ <<
"Remaining: " << detail::fmtdur(remain) <<
" (" << w << " of " << w1 <<
" in " << detail::fmtdur(elapsed) <<
", " << (w - prev_) <<
" in " << detail::fmtdur(now - report_) <<
")";
report_ = now;
prev_ = w;
}
void
finish()
{
log_ <<
"Total time: " << detail::fmtdur(
clock_type::now() - start_);
}
};
//------------------------------------------------------------------------------
class verify_test : public unit_test::suite
{
public:
@@ -34,12 +236,41 @@ public:
{
auto const dp = path + ".dat";
auto const kp = path + ".key";
print(log, verify(dp, kp));
print(log, test_api::verify(dp, kp));
}
void
run() override
{
testcase(abort_on_fail) << "verify " << arg();
if (arg().empty())
return fail("missing unit test argument");
do_verify(arg());
pass();
}
};
class verify_fast_test : public unit_test::suite
{
public:
// Runs verify on the database and reports statistics
void
do_verify (nudb::path_type const& path)
{
auto const dp = path + ".dat";
auto const kp = path + ".key";
progress<decltype(log)> p(log);
// VFALCO HACK 32gb hardcoded!
auto const info = verify_fast<
test_api::hash_type>(
dp, kp, 34359738368, p);
print(log, info);
}
void
run() override
{
testcase(abort_on_fail) << "verify_fast " << arg();
if (arg().empty())
return fail("missing unit test argument");
do_verify(arg());
@@ -48,8 +279,8 @@ public:
};
BEAST_DEFINE_TESTSUITE_MANUAL(verify,nudb,beast);
BEAST_DEFINE_TESTSUITE_MANUAL(verify_fast,nudb,beast);
} // test
} // nudb
} // beast

View File

@@ -20,12 +20,10 @@
#ifndef BEAST_NUDB_VERIFY_H_INCLUDED
#define BEAST_NUDB_VERIFY_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/common.h>
#include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h>
#include <algorithm>
#include <cstddef>
@@ -39,8 +37,11 @@ struct verify_info
{
// Configured
std::size_t version = 0; // API version
std::size_t salt = 0; // Salt or database ID
std::size_t uid = 0; // UID
std::size_t appnum = 0; // Appnum
std::size_t key_size = 0; // Size of a key in bytes
std::size_t salt = 0; // Salt
std::size_t pepper = 0; // Pepper
std::size_t block_size = 0; // Block size in bytes
float load_factor = 0; // Target bucket fill fraction
@@ -82,12 +83,12 @@ struct verify_info
Iterates the key and data files, throws store_corrupt_error
on broken invariants.
*/
template <class Hasher = default_hash>
template <class Hasher>
verify_info
verify (
path_type const& dat_path,
path_type const& key_path,
std::size_t read_size = 16 * 1024 * 1024)
std::size_t read_size)
{
using namespace detail;
using File = native_file;
@@ -103,12 +104,16 @@ verify (
dat_file_header dh;
read (df, dh);
read (kf, kh);
verify(dh);
verify<Hasher>(dh, kh);
verify_info info;
info.version = dh.version;
info.salt = dh.salt;
info.uid = dh.uid;
info.appnum = dh.appnum;
info.key_size = dh.key_size;
info.salt = kh.salt;
info.pepper = kh.pepper;
info.block_size = kh.block_size;
info.load_factor = kh.load_factor / 65536.f;
info.capacity = kh.capacity;
@@ -117,21 +122,28 @@ verify (
info.key_file_size = kf.actual_size();
info.dat_file_size = df.actual_size();
buffer buf (kh.block_size);
bucket b (kh.key_size,
kh.block_size, buf.get());
// Data Record
auto const dh_len =
field<uint48_t>::size + // Size
kh.key_size; // Key
std::size_t fetches = 0;
// Iterate Data File
buffer buf (kh.block_size + dh_len);
bucket b (kh.block_size, buf.get());
std::uint8_t* pd = buf.get() + kh.block_size;
{
bulk_reader<File> r(df,
dat_file_header::size,
df.actual_size(), read_size);
while (! r.eof())
{
auto const offset = r.offset();
// Data Record or Spill Record
std::size_t size;
auto is = r.prepare(
field<uint48_t>::size); // Size
std::size_t size;
read<uint48_t>(is, size);
if (size > 0)
{
@@ -144,39 +156,49 @@ verify (
std::uint8_t const* const data =
is.data(size);
(void)data;
auto const h = hash<Hasher>(
key, kh.key_size, kh.salt);
// Check bucket and spills
try
{
b.read (kf, (bucket_index<Hasher>(
key, kh) + 1) * kh.block_size);
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
b.read (kf, (n + 1) * kh.block_size);
++fetches;
}
catch (file_short_read_error const&)
{
throw store_corrupt_error(
"short bucket");
}
for(;;)
for (;;)
{
if (b.find(key).second)
break;
if (b.spill() != 0)
for (auto i = b.lower_bound(h);
i < b.size(); ++i)
{
try
{
b.read (df, b.spill());
}
catch (file_short_read_error const&)
{
throw store_corrupt_error(
"short spill");
}
auto const item = b[i];
if (item.hash != h)
break;
if (item.offset == offset)
goto found;
++fetches;
}
else
{
auto const spill = b.spill();
if (! spill)
throw store_corrupt_error(
"orphaned value");
try
{
b.read (df, spill);
++fetches;
}
catch (file_short_read_error const&)
{
throw store_corrupt_error(
"short spill");
}
}
found:
// Update
++info.value_count;
info.value_bytes += size;
@@ -196,17 +218,12 @@ verify (
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.compact_size(); // Bucket
}
}
}
// Iterate Key File
{
// Data Record (header)
buffer buf (
field<uint48_t>::size + // Size
kh.key_size); // Key Size
for (std::size_t n = 0; n < kh.buckets; ++n)
{
std::size_t nspill = 0;
@@ -219,8 +236,7 @@ verify (
auto const e = b[i];
try
{
df.read (e.offset,
buf.get(), buf.size());
df.read (e.offset, pd, dh_len);
}
catch (file_short_read_error const&)
{
@@ -228,16 +244,19 @@ verify (
"missing value");
}
// Data Record
istream is(buf.get(), buf.size());
istream is(pd, dh_len);
std::size_t size;
read<uint48_t>(is, size); // Size
void const* key =
is.data(kh.key_size); // Key
if (size != e.size)
throw store_corrupt_error(
"wrong size");
if (std::memcmp(is.data(kh.key_size),
e.key, kh.key_size) != 0)
auto const h = hash<Hasher>(key,
kh.key_size, kh.salt);
if (h != e.hash)
throw store_corrupt_error(
"wrong key");
"wrong hash");
}
if (! b.spill())
break;
@@ -266,12 +285,242 @@ verify (
float sum = 0;
for (int i = 0; i < info.hist.size(); ++i)
sum += info.hist[i] * (i + 1);
info.avg_fetch = sum / info.buckets;
//info.avg_fetch = sum / info.buckets;
info.avg_fetch = float(fetches) / info.value_count;
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
float(info.dat_file_size);
info.overhead =
float(info.key_file_size + info.dat_file_size) /
(info.value_bytes + info.key_count * info.key_size) - 1;
(
info.value_bytes +
info.key_count *
(info.key_size +
// Data Record
field<uint48_t>::size) // Size
) - 1;
info.actual_load = info.key_count / float(
info.capacity * info.buckets);
return info;
}
/** Verify consistency of the key and data files.
Effects:
Opens the key and data files in read-only mode.
Throws file_error if a file can't be opened.
Iterates the key and data files, throws store_corrupt_error
on broken invariants.
This uses a different algorithm that depends on allocating
a large buffer.
*/
template <class Hasher, class Progress>
verify_info
verify_fast (
path_type const& dat_path,
path_type const& key_path,
std::size_t buffer_size,
Progress&& progress)
{
using namespace detail;
using File = native_file;
File df;
File kf;
if (! df.open (file_mode::scan, dat_path))
throw store_corrupt_error(
"no data file");
if (! kf.open (file_mode::read, key_path))
throw store_corrupt_error(
"no key file");
key_file_header kh;
dat_file_header dh;
read (df, dh);
read (kf, kh);
verify(dh);
verify<Hasher>(dh, kh);
verify_info info;
info.version = dh.version;
info.uid = dh.uid;
info.appnum = dh.appnum;
info.key_size = dh.key_size;
info.salt = kh.salt;
info.pepper = kh.pepper;
info.block_size = kh.block_size;
info.load_factor = kh.load_factor / 65536.f;
info.capacity = kh.capacity;
info.buckets = kh.buckets;
info.bucket_size = kh.bucket_size;
info.key_file_size = kf.actual_size();
info.dat_file_size = df.actual_size();
std::size_t fetches = 0;
// Counts unverified keys per bucket
std::unique_ptr<std::uint32_t[]> nkeys(
new std::uint32_t[kh.buckets]);
// Verify contiguous sequential sections of the
// key file using multiple passes over the data.
//
auto const buckets = std::max<std::size_t>(1,
buffer_size / kh.block_size);
buffer buf((buckets + 1) * kh.block_size);
bucket tmp(kh.block_size, buf.get() +
buckets * kh.block_size);
std::size_t const passes =
(kh.buckets + buckets - 1) / buckets;
auto const df_size = df.actual_size();
std::size_t const work = passes * df_size;
std::size_t npass = 0;
for (std::size_t b0 = 0; b0 < kh.buckets;
b0 += buckets)
{
auto const b1 = std::min(
b0 + buckets, kh.buckets);
// Buffered range is [b0, b1)
auto const bn = b1 - b0;
kf.read((b0 + 1) * kh.block_size,
buf.get(), bn * kh.block_size);
// Count keys in buckets
for (std::size_t i = b0 ; i < b1; ++i)
{
bucket b(kh.block_size, buf.get() +
(i - b0) * kh.block_size);
nkeys[i] = b.size();
std::size_t nspill = 0;
auto spill = b.spill();
while (spill != 0)
{
tmp.read(df, spill);
nkeys[i] += tmp.size();
spill = tmp.spill();
++nspill;
++info.spill_count;
info.spill_bytes +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
tmp.compact_size(); // SpillBucket
}
if (nspill >= info.hist.size())
nspill = info.hist.size() - 1;
++info.hist[nspill];
info.key_count += nkeys[i];
}
// Iterate Data File
bulk_reader<File> r(df,
dat_file_header::size, df_size,
64 * 1024 * 1024);
while (! r.eof())
{
auto const offset = r.offset();
progress(npass * df_size + offset, work);
// Data Record or Spill Record
auto is = r.prepare(
field<uint48_t>::size); // Size
std::size_t size;
read<uint48_t>(is, size);
if (size > 0)
{
// Data Record
is = r.prepare(
kh.key_size + // Key
size); // Data
std::uint8_t const* const key =
is.data(kh.key_size);
std::uint8_t const* const data =
is.data(size);
(void)data;
auto const h = hash<Hasher>(
key, kh.key_size, kh.salt);
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
if (n < b0 || n >= b1)
continue;
// Check bucket and spills
bucket b (kh.block_size, buf.get() +
(n - b0) * kh.block_size);
++fetches;
for (;;)
{
for (auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if (item.hash != h)
break;
if (item.offset == offset)
goto found;
++fetches;
}
auto const spill = b.spill();
if (! spill)
throw store_corrupt_error(
"orphaned value");
b = tmp;
try
{
b.read (df, spill);
++fetches;
}
catch (file_short_read_error const&)
{
throw store_corrupt_error(
"short spill");
}
}
found:
// Update
++info.value_count;
info.value_bytes += size;
if (nkeys[n]-- == 0)
throw store_corrupt_error(
"duplicate value");
}
else
{
// Spill Record
is = r.prepare(
field<std::uint16_t>::size);
read<std::uint16_t>(is, size); // Size
if (size != kh.bucket_size)
throw store_corrupt_error(
"bad spill size");
tmp.read(r); // Bucket
if (b0 == 0)
{
++info.spill_count_tot;
info.spill_bytes_tot +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
tmp.compact_size(); // Bucket
}
}
}
++npass;
}
// Make sure every key in every bucket was visited
for (std::size_t i = 0;
i < kh.buckets; ++i)
if (nkeys[i] != 0)
throw store_corrupt_error(
"orphan value");
float sum = 0;
for (int i = 0; i < info.hist.size(); ++i)
sum += info.hist[i] * (i + 1);
//info.avg_fetch = sum / info.buckets;
info.avg_fetch = float(fetches) / info.value_count;
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
float(info.dat_file_size);
info.overhead =
float(info.key_file_size + info.dat_file_size) /
(
info.value_bytes +
info.key_count *
(info.key_size +
// Data Record
field<uint48_t>::size) // Size
) - 1;
info.actual_load = info.key_count / float(
info.capacity * info.buckets);
return info;

View File

@@ -20,11 +20,10 @@
#ifndef BEAST_NUDB_VISIT_H_INCLUDED
#define BEAST_NUDB_VISIT_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/common.h>
#include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/buffer.h>
#include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h>
#include <algorithm>
#include <cstddef>
@@ -44,12 +43,12 @@ namespace nudb {
@return `true` if the visit completed
This only requires the data file.
*/
template <class Function>
template <class Codec, class Function>
bool
visit(
path_type const& path,
Function f,
std::size_t read_size = 16 * 1024 * 1024)
std::size_t read_size,
Function&& f)
{
using namespace detail;
using File = native_file;
@@ -57,11 +56,13 @@ visit(
df.open (file_mode::scan, path);
dat_file_header dh;
read (df, dh);
verify (dh);
verify<Codec> (dh);
Codec codec;
// Iterate Data File
bulk_reader<File> r(
df, dat_file_header::size,
df.actual_size(), read_size);
buffer buf;
try
{
while (! r.eof())
@@ -79,10 +80,10 @@ visit(
size); // Data
std::uint8_t const* const key =
is.data(dh.key_size);
std::uint8_t const* const data =
is.data(size);
auto const result = codec.decompress(
is.data(size), size, buf);
if (! f(key, dh.key_size,
data, size))
result.first, result.second))
return false;
}
else

View File

@@ -20,9 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_WIN32_FILE_H_INCLUDED
#define BEAST_NUDB_DETAIL_WIN32_FILE_H_INCLUDED
#include <beast/nudb/error.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/common.h>
#include <cassert>
#include <string>
@@ -321,25 +319,32 @@ void
win32_file<_>::read (std::size_t offset,
void* buffer, std::size_t bytes)
{
DWORD bytesRead;
LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset);
OVERLAPPED ov;
ov.Offset = li.LowPart;
ov.OffsetHigh = li.HighPart;
ov.hEvent = NULL;
BOOL const bSuccess = ::ReadFile(
hf_, buffer, bytes, &bytesRead, &ov);
if (! bSuccess)
while(bytes > 0)
{
DWORD const dwError = ::GetLastError();
if (dwError != ERROR_HANDLE_EOF)
throw file_win32_error(
"read file", dwError);
throw file_short_read_error();
DWORD bytesRead;
LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset);
OVERLAPPED ov;
ov.Offset = li.LowPart;
ov.OffsetHigh = li.HighPart;
ov.hEvent = NULL;
BOOL const bSuccess = ::ReadFile(
hf_, buffer, bytes, &bytesRead, &ov);
if (! bSuccess)
{
DWORD const dwError = ::GetLastError();
if (dwError != ERROR_HANDLE_EOF)
throw file_win32_error(
"read file", dwError);
throw file_short_read_error();
}
if (bytesRead == 0)
throw file_short_read_error();
offset += bytesRead;
bytes -= bytesRead;
buffer = reinterpret_cast<char*>(
buffer) + bytesRead;
}
if (bytesRead != bytes)
throw file_short_read_error();
}
template <class _>
@@ -347,20 +352,28 @@ void
win32_file<_>::write (std::size_t offset,
void const* buffer, std::size_t bytes)
{
LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset);
OVERLAPPED ov;
ov.Offset = li.LowPart;
ov.OffsetHigh = li.HighPart;
ov.hEvent = NULL;
DWORD bytesWritten;
BOOL const bSuccess = ::WriteFile(
hf_, buffer, bytes, &bytesWritten, &ov);
if (! bSuccess)
throw file_win32_error(
"write file");
if (bytesWritten != bytes)
throw file_short_write_error();
while(bytes > 0)
{
LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset);
OVERLAPPED ov;
ov.Offset = li.LowPart;
ov.OffsetHigh = li.HighPart;
ov.hEvent = NULL;
DWORD bytesWritten;
BOOL const bSuccess = ::WriteFile(
hf_, buffer, bytes, &bytesWritten, &ov);
if (! bSuccess)
throw file_win32_error(
"write file");
if (bytesWritten == 0)
throw file_short_write_error();
offset += bytesWritten;
bytes -= bytesWritten;
buffer = reinterpret_cast<
char const*>(buffer) +
bytesWritten;
}
}
template <class _>

View File

@@ -24,7 +24,10 @@
#include <ripple/nodestore/impl/DecodedBlob.h>
#include <ripple/nodestore/impl/EncodedBlob.h>
#include <beast/nudb.h>
#include <beast/nudb/detail/bucket.h> // remove asap
#include <beast/nudb/identity_codec.h>
#include <beast/nudb/visit.h>
#include <beast/hash/xxhasher.h>
#include <snappy.h>
#include <boost/filesystem.hpp>
#include <cassert>
@@ -61,10 +64,13 @@ public:
currentType = typeTwo
};
using api = beast::nudb::api<
beast::xxhasher, beast::nudb::identity_codec>;
beast::Journal journal_;
size_t const keyBytes_;
std::string const name_;
beast::nudb::store db_;
api::store db_;
std::atomic <bool> deletePath_;
Scheduler& scheduler_;
@@ -85,7 +91,7 @@ public:
auto const kp = (folder / "nudb.key").string ();
auto const lp = (folder / "nudb.log").string ();
using beast::nudb::make_salt;
beast::nudb::create (dp, kp, lp,
api::create (dp, kp, lp,
currentType, make_salt(), keyBytes,
beast::nudb::block_size(kp),
0.50);
@@ -200,22 +206,24 @@ public:
fetch1 (void const* key,
std::shared_ptr <NodeObject>* pno)
{
Status status;
pno->reset();
std::size_t bytes;
std::unique_ptr <std::uint8_t[]> data;
if (! db_.fetch (key,
[&data, &bytes](std::size_t n)
[key, pno, &status](void const* data, std::size_t size)
{
bytes = n;
data.reset(new std::uint8_t[bytes]);
return data.get();
DecodedBlob decoded (key, data, size);
if (! decoded.wasOk ())
{
status = dataCorrupt;
return;
}
*pno = decoded.createObject();
status = ok;
}))
{
return notFound;
DecodedBlob decoded (key, data.get(), bytes);
if (! decoded.wasOk ())
return dataCorrupt;
*pno = decoded.createObject();
return ok;
}
return status;
}
void
@@ -236,31 +244,35 @@ public:
fetch2 (void const* key,
std::shared_ptr <NodeObject>* pno)
{
Status status;
pno->reset();
std::size_t actual;
std::unique_ptr <char[]> compressed;
if (! db_.fetch (key,
[&](std::size_t n)
[&](void const* data, std::size_t size)
{
actual = n;
compressed.reset(
new char[n]);
return compressed.get();
std::size_t actual;
if (! snappy::GetUncompressedLength(
(char const*)data, size, &actual))
{
status = dataCorrupt;
return;
}
std::unique_ptr <char[]> buf (new char[actual]);
snappy::RawUncompress (
(char const*)data, size, buf.get());
DecodedBlob decoded (key, buf.get(), actual);
if (! decoded.wasOk ())
{
status = dataCorrupt;
return;
}
*pno = decoded.createObject();
status = ok;
}))
{
return notFound;
std::size_t size;
if (! snappy::GetUncompressedLength(
(char const*)compressed.get(),
actual, &size))
return dataCorrupt;
std::unique_ptr <char[]> data (new char[size]);
snappy::RawUncompress (compressed.get(),
actual, data.get());
DecodedBlob decoded (key, data.get(), size);
if (! decoded.wasOk ())
return dataCorrupt;
*pno = decoded.createObject();
return ok;
}
return status;
}
void
@@ -342,7 +354,7 @@ public:
auto const lp = db_.log_path();
auto const appnum = db_.appnum();
db_.close();
beast::nudb::visit (dp,
api::visit (dp,
[&](
void const* key, std::size_t key_bytes,
void const* data, std::size_t size)
@@ -399,7 +411,7 @@ public:
auto const kp = db_.key_path();
auto const lp = db_.log_path();
db_.close();
beast::nudb::verify (dp, kp);
api::verify (dp, kp);
db_.open (dp, kp, lp,
arena_alloc_size);
}