NuDB: Performance improvements (RIPD-793,796):

This introduces changes in nudb to improve speed, reduce database size,
and enhance correctness. The most significant change is to store hashes
rather than entire keys in the key file. The output of the hash function
is reduced to 48 bits, and stored directly in buckets.

The API is also modified to introduce a Codec parameter allowing for
compression and decompression to be supported in the implementation
itself rather than callers.

THe data file no longer contains a salt, as the salt is applicable
only to the key and log files. This allows a data file to have multiple
key files with different salt values. To distinguish physical files
belonging to the same logical database, a new field UID is introduced.
The UID is a 64-bit random value generated once on creation and stored
in all three files.

Buckets are zero filled to the end of each block, this is a security
measure to prevent unintended contents of memory getting stored to
disk. NuDB offers the varint integer type, this is identical to
the varint described by Google.

* Add varint
* Add Codec template argument
* Add "api" convenience traits
* Store hash in buckets
* istream can throw short read errors
* Support std::uint8_t format in streams
* Make file classes part of the public interface
* Remove buffers pessimization, replace with buffer
* Consolidate creation utility functions to the same header
* Zero fill unused areas of buckets on disk
* More coverage and improvements to the recover test
* Fix file read/write to loop until all bytes processed
* Add verify_fast, faster verify for large databases

The database version number is incremented to 2; older databases can
no longer be opened and should be deleted.
This commit is contained in:
Vinnie Falco
2015-02-03 07:46:24 -08:00
parent 62c5b5e570
commit e2a5535ed6
37 changed files with 2098 additions and 1300 deletions

View File

@@ -804,20 +804,22 @@
</ClCompile> </ClCompile>
<ClInclude Include="..\..\src\beast\beast\nudb.h"> <ClInclude Include="..\..\src\beast\beast\nudb.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\api.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\common.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\create.h"> <ClInclude Include="..\..\src\beast\beast\nudb\create.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\arena.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\arena.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\bucket.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\bucket.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\buffers.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\buffer.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\bulkio.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\bulkio.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\cache.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\cache.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\config.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\field.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\field.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\format.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\format.h">
@@ -826,21 +828,19 @@
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\pool.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\pool.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\posix_file.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\stream.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\stream.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\win32_file.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\varint.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\error.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\file.h"> <ClInclude Include="..\..\src\beast\beast\nudb\file.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\mode.h"> <ClInclude Include="..\..\src\beast\beast\nudb\identity_codec.h">
</ClInclude> </ClInclude>
<ClCompile Include="..\..\src\beast\beast\nudb\nudb.cpp"> <ClCompile Include="..\..\src\beast\beast\nudb\nudb.cpp">
<ExcludedFromBuild>True</ExcludedFromBuild> <ExcludedFromBuild>True</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClInclude Include="..\..\src\beast\beast\nudb\posix_file.h">
</ClInclude>
<None Include="..\..\src\beast\beast\nudb\README.md"> <None Include="..\..\src\beast\beast\nudb\README.md">
</None> </None>
<ClInclude Include="..\..\src\beast\beast\nudb\recover.h"> <ClInclude Include="..\..\src\beast\beast\nudb\recover.h">
@@ -860,6 +860,9 @@
<ClCompile Include="..\..\src\beast\beast\nudb\tests\store_test.cpp"> <ClCompile Include="..\..\src\beast\beast\nudb\tests\store_test.cpp">
<ExcludedFromBuild>True</ExcludedFromBuild> <ExcludedFromBuild>True</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\beast\beast\nudb\tests\varint_test.cpp">
<ExcludedFromBuild>True</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\beast\beast\nudb\tests\verify_test.cpp"> <ClCompile Include="..\..\src\beast\beast\nudb\tests\verify_test.cpp">
<ExcludedFromBuild>True</ExcludedFromBuild> <ExcludedFromBuild>True</ExcludedFromBuild>
</ClCompile> </ClCompile>
@@ -867,6 +870,8 @@
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\visit.h"> <ClInclude Include="..\..\src\beast\beast\nudb\visit.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\win32_file.h">
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\random\rngfill.h"> <ClInclude Include="..\..\src\beast\beast\random\rngfill.h">
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\random\xor_shift_engine.h"> <ClInclude Include="..\..\src\beast\beast\random\xor_shift_engine.h">

View File

@@ -1437,6 +1437,12 @@
<ClInclude Include="..\..\src\beast\beast\nudb.h"> <ClInclude Include="..\..\src\beast\beast\nudb.h">
<Filter>beast</Filter> <Filter>beast</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\api.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\common.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\create.h"> <ClInclude Include="..\..\src\beast\beast\nudb\create.h">
<Filter>beast\nudb</Filter> <Filter>beast\nudb</Filter>
</ClInclude> </ClInclude>
@@ -1446,7 +1452,7 @@
<ClInclude Include="..\..\src\beast\beast\nudb\detail\bucket.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\bucket.h">
<Filter>beast\nudb\detail</Filter> <Filter>beast\nudb\detail</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\buffers.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\buffer.h">
<Filter>beast\nudb\detail</Filter> <Filter>beast\nudb\detail</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\bulkio.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\bulkio.h">
@@ -1455,9 +1461,6 @@
<ClInclude Include="..\..\src\beast\beast\nudb\detail\cache.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\cache.h">
<Filter>beast\nudb\detail</Filter> <Filter>beast\nudb\detail</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\config.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\field.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\field.h">
<Filter>beast\nudb\detail</Filter> <Filter>beast\nudb\detail</Filter>
</ClInclude> </ClInclude>
@@ -1470,27 +1473,24 @@
<ClInclude Include="..\..\src\beast\beast\nudb\detail\pool.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\pool.h">
<Filter>beast\nudb\detail</Filter> <Filter>beast\nudb\detail</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\posix_file.h">
<Filter>beast\nudb\detail</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\stream.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\stream.h">
<Filter>beast\nudb\detail</Filter> <Filter>beast\nudb\detail</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\detail\win32_file.h"> <ClInclude Include="..\..\src\beast\beast\nudb\detail\varint.h">
<Filter>beast\nudb\detail</Filter> <Filter>beast\nudb\detail</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\error.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\file.h"> <ClInclude Include="..\..\src\beast\beast\nudb\file.h">
<Filter>beast\nudb</Filter> <Filter>beast\nudb</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\mode.h"> <ClInclude Include="..\..\src\beast\beast\nudb\identity_codec.h">
<Filter>beast\nudb</Filter> <Filter>beast\nudb</Filter>
</ClInclude> </ClInclude>
<ClCompile Include="..\..\src\beast\beast\nudb\nudb.cpp"> <ClCompile Include="..\..\src\beast\beast\nudb\nudb.cpp">
<Filter>beast\nudb</Filter> <Filter>beast\nudb</Filter>
</ClCompile> </ClCompile>
<ClInclude Include="..\..\src\beast\beast\nudb\posix_file.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<None Include="..\..\src\beast\beast\nudb\README.md"> <None Include="..\..\src\beast\beast\nudb\README.md">
<Filter>beast\nudb</Filter> <Filter>beast\nudb</Filter>
</None> </None>
@@ -1515,6 +1515,9 @@
<ClCompile Include="..\..\src\beast\beast\nudb\tests\store_test.cpp"> <ClCompile Include="..\..\src\beast\beast\nudb\tests\store_test.cpp">
<Filter>beast\nudb\tests</Filter> <Filter>beast\nudb\tests</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\beast\beast\nudb\tests\varint_test.cpp">
<Filter>beast\nudb\tests</Filter>
</ClCompile>
<ClCompile Include="..\..\src\beast\beast\nudb\tests\verify_test.cpp"> <ClCompile Include="..\..\src\beast\beast\nudb\tests\verify_test.cpp">
<Filter>beast\nudb\tests</Filter> <Filter>beast\nudb\tests</Filter>
</ClCompile> </ClCompile>
@@ -1524,6 +1527,9 @@
<ClInclude Include="..\..\src\beast\beast\nudb\visit.h"> <ClInclude Include="..\..\src\beast\beast\nudb\visit.h">
<Filter>beast\nudb</Filter> <Filter>beast\nudb</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\beast\beast\nudb\win32_file.h">
<Filter>beast\nudb</Filter>
</ClInclude>
<ClInclude Include="..\..\src\beast\beast\random\rngfill.h"> <ClInclude Include="..\..\src\beast\beast\random\rngfill.h">
<Filter>beast\random</Filter> <Filter>beast\random</Filter>
</ClInclude> </ClInclude>

View File

@@ -20,10 +20,10 @@
#ifndef BEAST_NUDB_H_INCLUDED #ifndef BEAST_NUDB_H_INCLUDED
#define BEAST_NUDB_H_INCLUDED #define BEAST_NUDB_H_INCLUDED
#include <beast/nudb/api.h>
#include <beast/nudb/create.h> #include <beast/nudb/create.h>
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/file.h> #include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/recover.h> #include <beast/nudb/recover.h>
#include <beast/nudb/store.h> #include <beast/nudb/store.h>
#include <beast/nudb/verify.h> #include <beast/nudb/verify.h>

View File

@@ -167,16 +167,23 @@ fixed-length Bucket Records.
char[8] Type The characters "nudb.key" char[8] Type The characters "nudb.key"
uint16 Version Holds the version number uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant uint64 Appnum Application defined constant
uint16 KeySize Key size in bytes
uint64 Salt A random seed uint64 Salt A random seed
uint64 Pepper The salt hashed uint64 Pepper The salt hashed
uint16 KeySize Key size in bytes
uint16 BlockSize Size of a file block in bytes uint16 BlockSize Size of a file block in bytes
uint16 LoadFactor Target fraction in 65536ths uint16 LoadFactor Target fraction in 65536ths
uint8[64] Reserved Zeroes
uint8[56] Reserved Zeroes
uint8[] Reserved Zero-pad to block size uint8[] Reserved Zero-pad to block size
The Type identifies the file as belonging to nudb. Salt is The Type identifies the file as belonging to nudb. The UID is
generated randomly when the database is created, and this value
is stored in the data and log files as well. The UID is used
to determine if files belong to the same database. Salt is
generated when the database is created and helps prevent generated when the database is created and helps prevent
complexity attacks; the salt is prepended to the key material complexity attacks; the salt is prepended to the key material
when computing a hash, or used to initialize the state of when computing a hash, or used to initialize the state of
@@ -197,7 +204,8 @@ bucket, and defines the size of a bucket record. The load factor
is the target fraction of bucket occupancy. is the target fraction of bucket occupancy.
None of the information in the key file header or the data file None of the information in the key file header or the data file
header may be changed after the database is created. header may be changed after the database is created, including
the Appnum.
#### Bucket Record (fixed-length) #### Bucket Record (fixed-length)
@@ -209,7 +217,7 @@ header may be changed after the database is created.
uint48 Offset Offset in data file of the data uint48 Offset Offset in data file of the data
uint48 Size The size of the value in bytes uint48 Size The size of the value in bytes
uint8[KeySize] Key The key uint48 Hash The hash of the key
### Data File ### Data File
@@ -220,14 +228,15 @@ variable-length Value Records and Spill Records.
char[8] Type The characters "nudb.dat" char[8] Type The characters "nudb.dat"
uint16 Version Holds the version number uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant uint64 Appnum Application defined constant
uint64 Salt A random seed
uint16 KeySize Key size in bytes uint16 KeySize Key size in bytes
uint8[64] Reserved Zeroes uint8[64] Reserved Zeroes
Salt contains the same value as the salt in the corresponding UID contains the same value as the salt in the corresponding key
key file. This is placed in the data file so that key and value file. This is placed in the data file so that key and value files
files belonging to the same database can be identified. belonging to the same database can be identified.
#### Data Record (variable-length) #### Data Record (variable-length)
@@ -244,15 +253,24 @@ files belonging to the same database can be identified.
### Log File ### Log File
The Log file contains the Header followed by zero or more fixed size The Log file contains the Header followed by zero or more fixed size
log records. Each log record contains a snapshot of a bucket. When a
database is not closed cleanly, the recovery process applies the log
records to the key file, overwriting data that may be only partially
updated with known good information. After the log records are applied,
the data and key files are truncated to the last known good size.
#### Header (44 bytes) #### Header (62 bytes)
char[8] Type The characters "nudb.log" char[8] Type The characters "nudb.log"
uint16 Version Holds the version number uint16 Version Holds the version number
uint64 UID Unique ID generated on creation
uint64 Appnum Application defined constant uint64 Appnum Application defined constant
uint16 KeySize Key size in bytes
uint64 Salt A random seed. uint64 Salt A random seed.
uint64 Pepper The salt hashed uint64 Pepper The salt hashed
uint16 KeySize Key size in bytes uint16 BlockSize Size of a file block in bytes
uint64 KeyFileSize Size of key file. uint64 KeyFileSize Size of key file.
uint64 DataFileSize Size of data file. uint64 DataFileSize Size of data file.

109
src/beast/beast/nudb/api.h Normal file
View File

@@ -0,0 +1,109 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_API_H_INCLUDED
#define BEAST_NUDB_API_H_INCLUDED
#include <beast/nudb/create.h>
#include <beast/nudb/store.h>
#include <beast/nudb/recover.h>
#include <beast/nudb/verify.h>
#include <beast/nudb/visit.h>
#include <cstdint>
namespace beast {
namespace nudb {
// Convenience for consolidating template arguments
//
template <
class Hasher,
class Codec,
class File = native_file,
std::size_t BufferSize = 16 * 1024 * 1024
>
struct api
{
using hash_type = Hasher;
using codec_type = Codec;
using file_type = File;
using store = nudb::store<Hasher, Codec, File>;
static std::size_t const buffer_size = BufferSize;
template <class... Args>
static
bool
create (
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
std::uint64_t appnum,
std::uint64_t salt,
std::size_t key_size,
std::size_t block_size,
float load_factor,
Args&&... args)
{
return nudb::create<Hasher, Codec, File>(
dat_path, key_path, log_path,
appnum, salt, key_size, block_size,
load_factor, args...);
}
template <class... Args>
static
bool
recover (
path_type const& dat_path,
path_type const& key_path,
path_type const& log_path,
Args&&... args)
{
return nudb::recover<Hasher, Codec, File>(
dat_path, key_path, log_path, BufferSize,
args...);
}
static
verify_info
verify (
path_type const& dat_path,
path_type const& key_path)
{
return nudb::verify<Hasher>(
dat_path, key_path, BufferSize);
}
template <class Function>
static
bool
visit(
path_type const& path,
Function&& f)
{
return nudb::visit<Codec>(
path, BufferSize, f);
}
};
} // nudb
} // beast
#endif

View File

@@ -17,32 +17,48 @@
*/ */
//============================================================================== //==============================================================================
#ifndef BEAST_NUDB_ERROR_H_INCLUDED #ifndef BEAST_NUDB_COMMON_H_INCLUDED
#define BEAST_NUDB_ERROR_H_INCLUDED #define BEAST_NUDB_COMMON_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <beast/utility/noexcept.h>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
namespace beast { namespace beast {
namespace nudb { namespace nudb {
// Commonly used types
enum class file_mode
{
scan, // read sequential
read, // read random
append, // read random, write append
write // read random, write random
};
using path_type = std::string;
// All exceptions thrown by nudb are derived // All exceptions thrown by nudb are derived
// from std::exception except for fail_error // from std::runtime_error except for fail_error
/** Thrown when a codec fails, e.g. corrupt data. */
struct codec_error : std::runtime_error
{
template <class String>
explicit
codec_error (String const& s)
: runtime_error(s)
{
}
};
/** Base class for all errors thrown by file classes. */ /** Base class for all errors thrown by file classes. */
struct file_error : std::runtime_error struct file_error : std::runtime_error
{ {
template <class String>
explicit explicit
file_error (char const* s) file_error (String const& s)
: std::runtime_error(s) : runtime_error(s)
{
}
explicit
file_error (std::string const& s)
: std::runtime_error(s)
{ {
} }
}; };
@@ -67,21 +83,24 @@ struct file_short_write_error : file_error
} }
}; };
/** Thrown when end of istream reached while reading. */
struct short_read_error : std::runtime_error
{
short_read_error()
: std::runtime_error(
"nudb: short read")
{
}
};
/** Base class for all exceptions thrown by store. */ /** Base class for all exceptions thrown by store. */
class store_error : public std::runtime_error class store_error : public std::runtime_error
{ {
public: public:
template <class String>
explicit explicit
store_error (char const* m) store_error (String const& s)
: std::runtime_error( : runtime_error(s)
std::string("nudb: ") + m)
{
}
explicit
store_error (std::string const& m)
: std::runtime_error(
std::string("nudb: ") + m)
{ {
} }
}; };
@@ -90,15 +109,10 @@ public:
class store_corrupt_error : public store_error class store_corrupt_error : public store_error
{ {
public: public:
template <class String>
explicit explicit
store_corrupt_error (char const* m) store_corrupt_error (String const& s)
: store_error (m) : store_error(s)
{
}
explicit
store_corrupt_error (std::string const& m)
: store_error (m)
{ {
} }
}; };

View File

@@ -22,16 +22,49 @@
#include <beast/nudb/file.h> #include <beast/nudb/file.h>
#include <beast/nudb/detail/bucket.h> #include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h> #include <beast/nudb/detail/format.h>
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
#include <random>
#include <stdexcept> #include <stdexcept>
#include <utility> #include <utility>
namespace beast { namespace beast {
namespace nudb { namespace nudb {
namespace detail {
template <class = void>
std::uint64_t
make_uid()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
}
/** Generate a random salt. */
template <class = void>
std::uint64_t
make_salt()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
/** Returns the best guess at the volume's block size. */
inline
std::size_t
block_size (path_type const& /*path*/)
{
return 4096;
}
/** Create a new database. /** Create a new database.
Preconditions: Preconditions:
The files must not exist The files must not exist
@@ -40,7 +73,12 @@ namespace nudb {
@param args Arguments passed to File constructors @param args Arguments passed to File constructors
@return `false` if any file could not be created. @return `false` if any file could not be created.
*/ */
template <class Hasher = default_hash> template <
class Hasher,
class Codec,
class File,
class... Args
>
bool bool
create ( create (
path_type const& dat_path, path_type const& dat_path,
@@ -50,10 +88,10 @@ create (
std::uint64_t salt, std::uint64_t salt,
std::size_t key_size, std::size_t key_size,
std::size_t block_size, std::size_t block_size,
float load_factor) float load_factor,
Args&&... args)
{ {
using namespace detail; using namespace detail;
using File = native_file;
if (key_size < 1) if (key_size < 1)
throw std::domain_error( throw std::domain_error(
"invalid key size"); "invalid key size");
@@ -67,15 +105,13 @@ create (
throw std::domain_error( throw std::domain_error(
"nudb: load factor too large"); "nudb: load factor too large");
auto const capacity = auto const capacity =
bucket_capacity(key_size, block_size); bucket_capacity(block_size);
if (capacity < 1) if (capacity < 1)
throw std::domain_error( throw std::domain_error(
"nudb: block size too small"); "nudb: block size too small");
File df; File df(args...);
File kf; File kf(args...);
File lf; File lf(args...);
for(;;)
{
if (df.create( if (df.create(
file_mode::append, dat_path)) file_mode::append, dat_path))
{ {
@@ -84,26 +120,26 @@ create (
{ {
if (lf.create( if (lf.create(
file_mode::append, log_path)) file_mode::append, log_path))
break; goto success;
File::erase (dat_path); File::erase (dat_path);
} }
File::erase (key_path); File::erase (key_path);
} }
return false; return false;
} success:
dat_file_header dh; dat_file_header dh;
dh.version = currentVersion; dh.version = currentVersion;
dh.uid = make_uid();
dh.appnum = appnum; dh.appnum = appnum;
dh.salt = salt;
dh.key_size = key_size; dh.key_size = key_size;
key_file_header kh; key_file_header kh;
kh.version = currentVersion; kh.version = currentVersion;
kh.uid = dh.uid;
kh.appnum = appnum; kh.appnum = appnum;
kh.key_size = key_size;
kh.salt = salt; kh.salt = salt;
kh.pepper = pepper<Hasher>(salt); kh.pepper = pepper<Hasher>(salt);
kh.key_size = key_size;
kh.block_size = block_size; kh.block_size = block_size;
// VFALCO Should it be 65536? // VFALCO Should it be 65536?
// How do we set the min? // How do we set the min?
@@ -113,8 +149,7 @@ create (
write (kf, kh); write (kf, kh);
buffer buf(block_size); buffer buf(block_size);
std::memset(buf.get(), 0, block_size); std::memset(buf.get(), 0, block_size);
bucket b (key_size, block_size, bucket b (block_size, buf.get(), empty);
buf.get(), empty);
b.write (kf, block_size); b.write (kf, block_size);
// VFALCO Leave log file empty? // VFALCO Leave log file empty?
df.sync(); df.sync();

View File

@@ -20,7 +20,6 @@
#ifndef BEAST_NUDB_DETAIL_ARENA_H_INCLUDED #ifndef BEAST_NUDB_DETAIL_ARENA_H_INCLUDED
#define BEAST_NUDB_DETAIL_ARENA_H_INCLUDED #define BEAST_NUDB_DETAIL_ARENA_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>

View File

@@ -20,12 +20,11 @@
#ifndef BEAST_NUDB_DETAIL_BUCKET_H_INCLUDED #ifndef BEAST_NUDB_DETAIL_BUCKET_H_INCLUDED
#define BEAST_NUDB_DETAIL_BUCKET_H_INCLUDED #define BEAST_NUDB_DETAIL_BUCKET_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/detail/bulkio.h> #include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/field.h> #include <beast/nudb/detail/field.h>
#include <beast/nudb/detail/format.h> #include <beast/nudb/detail/format.h>
#include <cassert> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
@@ -33,21 +32,7 @@ namespace beast {
namespace nudb { namespace nudb {
namespace detail { namespace detail {
// Key, hash, and bucket calculations: // bucket calculations:
// Returns the hash of a key given the salt
//
template <class Hasher>
inline
typename Hasher::result_type
hash (void const* key,
std::size_t key_size, std::size_t salt)
{
Hasher h (salt);
h.append (key, key_size);
return static_cast<
typename Hasher::result_type>(h);
}
// Returns bucket index given hash, buckets, and modulus // Returns bucket index given hash, buckets, and modulus
// //
@@ -62,30 +47,6 @@ bucket_index (std::size_t h,
return n; return n;
} }
// Returns the bucket index of a key
//
template <class Hasher>
inline
std::size_t
bucket_index (void const* key, std::size_t key_size,
std::size_t salt, std::size_t buckets,
std::size_t modulus)
{
return bucket_index (hash<Hasher>
(key, key_size, salt), buckets, modulus);
}
// Returns the bucket index of a key
// given the key file header
template <class Hasher>
inline
std::size_t
bucket_index (void const* key, key_file_header const& kh)
{
return bucket_index<Hasher>(key, kh.key_size,
kh.salt, kh.buckets, kh.modulus);
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Tag for constructing empty buckets // Tag for constructing empty buckets
@@ -97,9 +58,8 @@ template <class = void>
class bucket_t class bucket_t
{ {
private: private:
std::size_t key_size_; // Size of key in bytes
std::size_t block_size_; // Size of a key file block std::size_t block_size_; // Size of a key file block
std::size_t count_; // Current key count std::size_t size_; // Current key count
std::size_t spill_; // Offset of next spill record or 0 std::size_t spill_; // Offset of next spill record or 0
std::uint8_t* p_; // Pointer to the bucket blob std::uint8_t* p_; // Pointer to the bucket blob
@@ -108,23 +68,15 @@ public:
{ {
std::size_t offset; std::size_t offset;
std::size_t size; std::size_t size;
void const* key; std::size_t hash;
}; };
bucket_t (bucket_t const&) = default; bucket_t (bucket_t const&) = default;
bucket_t& operator= (bucket_t const&) = default; bucket_t& operator= (bucket_t const&) = default;
bucket_t (std::size_t key_size, bucket_t (std::size_t block_size, void* p);
std::size_t block_size, void* p);
bucket_t (std::size_t key_size, bucket_t (std::size_t block_size, void* p, empty_t);
std::size_t block_size, void* p, empty_t);
std::size_t
key_size() const
{
return key_size_;
}
std::size_t std::size_t
block_size() const block_size() const
@@ -135,44 +87,46 @@ public:
std::size_t std::size_t
compact_size() const compact_size() const
{ {
return detail::compact_size( return detail::bucket_size(size_);
key_size_, count_);
} }
bool bool
empty() const empty() const
{ {
return count_ == 0; return size_ == 0;
} }
bool bool
full() const full() const
{ {
return count_ >= detail::bucket_capacity( return size_ >=
key_size_, block_size_); detail::bucket_capacity(block_size_);
} }
std::size_t std::size_t
size() const size() const
{ {
return count_; return size_;
} }
// Returns offset of next spill record or 0 // Returns offset of next spill record or 0
//
std::size_t std::size_t
spill() const spill() const
{ {
return spill_; return spill_;
} }
// Clear contents of the bucket
void
clear();
// Set offset of next spill record // Set offset of next spill record
//
void void
spill (std::size_t offset); spill (std::size_t offset);
// Clear contents of the bucket
//
void
clear();
// Returns the record for a key // Returns the record for a key
// entry without bounds checking. // entry without bounds checking.
// //
@@ -185,12 +139,15 @@ public:
return at(i); return at(i);
} }
std::pair<value_type, bool> // Returns index of entry with prefix
find (void const* key) const; // equal to or greater than the given prefix.
//
std::size_t
lower_bound (std::size_t h) const;
void void
insert (std::size_t offset, insert (std::size_t offset,
std::size_t size, void const* key); std::size_t size, std::size_t h);
// Erase an element by index // Erase an element by index
// //
@@ -227,45 +184,31 @@ private:
// Update size and spill in the blob // Update size and spill in the blob
void void
update(); update();
std::pair<std::size_t, bool>
lower_bound (void const* key) const;
}; };
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
template <class _> template <class _>
bucket_t<_>::bucket_t (std::size_t key_size, bucket_t<_>::bucket_t (
std::size_t block_size, void* p) std::size_t block_size, void* p)
: key_size_ (key_size) : block_size_ (block_size)
, block_size_ (block_size)
, p_ (reinterpret_cast<std::uint8_t*>(p)) , p_ (reinterpret_cast<std::uint8_t*>(p))
{ {
// Bucket Record // Bucket Record
istream is(p_, block_size); istream is(p_, block_size);
detail::read<uint16_t>(is, count_); // Count detail::read<uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill detail::read<uint48_t>(is, spill_); // Spill
} }
template <class _> template <class _>
bucket_t<_>::bucket_t (std::size_t key_size, bucket_t<_>::bucket_t (
std::size_t block_size, void* p, empty_t) std::size_t block_size, void* p, empty_t)
: key_size_ (key_size) : block_size_ (block_size)
, block_size_ (block_size) , size_ (0)
, count_ (0)
, spill_ (0) , spill_ (0)
, p_ (reinterpret_cast<std::uint8_t*>(p)) , p_ (reinterpret_cast<std::uint8_t*>(p))
{ {
update(); clear();
}
template <class _>
void
bucket_t<_>::clear()
{
count_ = 0;
spill_ = 0;
update();
} }
template <class _> template <class _>
@@ -276,6 +219,15 @@ bucket_t<_>::spill (std::size_t offset)
update(); update();
} }
template <class _>
void
bucket_t<_>::clear()
{
size_ = 0;
spill_ = 0;
std::memset(p_, 0, block_size_);
}
template <class _> template <class _>
auto auto
bucket_t<_>::at (std::size_t i) const -> bucket_t<_>::at (std::size_t i) const ->
@@ -286,7 +238,7 @@ bucket_t<_>::at (std::size_t i) const ->
std::size_t const w = std::size_t const w =
field<uint48_t>::size + // Offset field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size field<uint48_t>::size + // Size
key_size_; // Key field<hash_t>::size; // Prefix
// Bucket Record // Bucket Record
detail::istream is(p_ + detail::istream is(p_ +
field<std::uint16_t>::size + // Count field<std::uint16_t>::size + // Count
@@ -297,54 +249,80 @@ bucket_t<_>::at (std::size_t i) const ->
is, result.offset); // Offset is, result.offset); // Offset
detail::read<uint48_t>( detail::read<uint48_t>(
is, result.size); // Size is, result.size); // Size
result.key = is.data(key_size_); // Key detail::read<hash_t>(
is, result.hash); // Hash
return result; return result;
} }
template <class _> template <class _>
auto std::size_t
bucket_t<_>::find (void const* key) const -> bucket_t<_>::lower_bound (
std::pair<value_type, bool> std::size_t h) const
{ {
std::pair<value_type, bool> result; // Bucket Entry
std::size_t i; auto const w =
std::tie(i, result.second) = lower_bound(key); field<uint48_t>::size + // Offset
if (result.second) field<uint48_t>::size + // Size
result.first = at(i); field<hash_t>::size; // Hash
return result; // Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
// Bucket Entry
field<uint48_t>::size + // Offset
field<uint48_t>::size; // Size
std::size_t step;
std::size_t first = 0;
std::size_t count = size_;
while (count > 0)
{
step = count / 2;
auto const i = first + step;
std::size_t h1;
readp<hash_t>(p + i * w, h1);
if (h1 < h)
{
first = i + 1;
count -= step + 1;
}
else
{
count = step;
}
}
return first;
} }
template <class _> template <class _>
void void
bucket_t<_>::insert (std::size_t offset, bucket_t<_>::insert (std::size_t offset,
std::size_t size, void const* key) std::size_t size, std::size_t h)
{ {
bool found; std::size_t i = lower_bound(h);
std::size_t i;
std::tie(i, found) = lower_bound(key);
(void)found;
assert(! found);
// Bucket Record // Bucket Record
auto const p = p_ + auto const p = p_ +
field<std::uint16_t>::size + // Count field<
std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill field<uint48_t>::size; // Spill
// Bucket Entry // Bucket Entry
std::size_t const w = std::size_t const w =
field<uint48_t>::size + // Offset field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size field<uint48_t>::size + // Size
key_size_; // Key field<hash_t>::size; // Hash
std::memmove ( std::memmove (
p + (i + 1) * w, p + (i + 1) * w,
p + i * w, p + i * w,
(count_ - i) * w); (size_ - i) * w);
count_++; size_++;
update(); update();
// Bucket Entry // Bucket Entry
ostream os (p + i * w, w); ostream os (p + i * w, w);
detail::write<uint48_t>(os, offset); // Offset detail::write<uint48_t>(
detail::write<uint48_t>(os, size); // Size os, offset); // Offset
std::memcpy (os.data(key_size_), detail::write<uint48_t>(
key, key_size_); // Key os, size); // Size
detail::write<hash_t>(
os, h); // Prefix
} }
template <class _> template <class _>
@@ -353,18 +331,20 @@ bucket_t<_>::erase (std::size_t i)
{ {
// Bucket Record // Bucket Record
auto const p = p_ + auto const p = p_ +
field<std::uint16_t>::size + // Count field<
std::uint16_t>::size + // Count
field<uint48_t>::size; // Spill field<uint48_t>::size; // Spill
auto const w = auto const w =
field<uint48_t>::size + // Offset field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size field<uint48_t>::size + // Size
key_size_; // Key field<hash_t>::size; // Hash
--count_; --size_;
if (i != count_) if (i < size_)
std::memmove( std::memmove(
p + i * w, p + i * w,
p + (i + 1) * w, p + (i + 1) * w,
(count_ - i) * w); (size_ - i) * w);
std::memset(p + size_ * w, 0, w);
update(); update();
} }
@@ -374,17 +354,15 @@ void
bucket_t<_>::read (File& f, std::size_t offset) bucket_t<_>::read (File& f, std::size_t offset)
{ {
auto const cap = bucket_capacity ( auto const cap = bucket_capacity (
key_size_, block_size_); block_size_);
// Excludes padding to block size // Excludes padding to block size
f.read (offset, p_, bucket_size( f.read (offset, p_, bucket_size(cap));
key_size_, bucket_capacity(
key_size_, block_size_)));
istream is(p_, block_size_); istream is(p_, block_size_);
detail::read< detail::read<
std::uint16_t>(is, count_); // Count std::uint16_t>(is, size_); // Count
detail::read< detail::read<
uint48_t>(is, spill_); // Spill uint48_t>(is, spill_); // Spill
if (count_ > cap) if (size_ > cap)
throw store_corrupt_error( throw store_corrupt_error(
"bad bucket size"); "bad bucket size");
} }
@@ -399,17 +377,19 @@ bucket_t<_>::read (bulk_reader<File>& r)
detail::field<std::uint16_t>::size + detail::field<std::uint16_t>::size +
detail::field<uint48_t>::size); detail::field<uint48_t>::size);
detail::read< detail::read<
std::uint16_t>(is, count_); // Count std::uint16_t>(is, size_); // Count
detail::read<uint48_t>(is, spill_); // Spill detail::read<uint48_t>(
is, spill_); // Spill
update(); update();
// Excludes empty bucket entries // Excludes empty bucket entries
auto const w = count_ * ( auto const w = size_ * (
field<uint48_t>::size + // Offset field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size field<uint48_t>::size + // Size
key_size_); // Key field<hash_t>::size); // Hash
is = r.prepare (w); is = r.prepare (w);
std::memcpy(p_ + std::memcpy(p_ +
field<std::uint16_t>::size + // Count field<
std::uint16_t>::size + // Count
field<uint48_t>::size, // Spill field<uint48_t>::size, // Spill
is.data(w), w); // Entries is.data(w), w); // Entries
} }
@@ -447,56 +427,40 @@ bucket_t<_>::update()
// Bucket Record // Bucket Record
ostream os(p_, block_size_); ostream os(p_, block_size_);
detail::write< detail::write<
std::uint16_t>(os, count_); // Count std::uint16_t>(os, size_); // Count
detail::write< detail::write<
uint48_t>(os, spill_); // Spill uint48_t>(os, spill_); // Spill
} }
// bool is true if key matches index
template <class _>
std::pair<std::size_t, bool>
bucket_t<_>::lower_bound (
void const* key) const
{
// Bucket Entry
auto const w =
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size_; // Key
// Bucket Record
auto const p = p_ +
field<std::uint16_t>::size + // Count
field<uint48_t>::size + // Spill
// Bucket Entry
field<uint48_t>::size + // Offset
field<uint48_t>::size; // Size
std::size_t step;
std::size_t first = 0;
std::size_t count = count_;
while (count > 0)
{
step = count / 2;
auto const i = first + step;
auto const c = std::memcmp (
p + i * w, key, key_size_);
if (c < 0)
{
first = i + 1;
count -= step + 1;
}
else if (c > 0)
{
count = step;
}
else
{
return std::make_pair (i, true);
}
}
return std::make_pair (first, false);
}
using bucket = bucket_t<>; using bucket = bucket_t<>;
// Spill bucket if full.
// The bucket is cleared after it spills.
//
template <class File>
void
maybe_spill(bucket& b, bulk_writer<File>& w)
{
if (b.full())
{
// Spill Record
auto const offset = w.offset();
auto os = w.prepare(
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
b.compact_size());
write <uint48_t> (os, 0); // Zero
write <std::uint16_t> (
os, b.compact_size()); // Size
auto const spill =
offset + os.size();
b.write (os); // Bucket
// Update bucket
b.clear();
b.spill (spill);
}
}
} // detail } // detail
} // nudb } // nudb
} // beast } // beast

View File

@@ -0,0 +1,99 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_DETAIL_BUFFER_H_INCLUDED
#define BEAST_NUDB_DETAIL_BUFFER_H_INCLUDED
#include <atomic>
#include <cstdint>
#include <memory>
namespace beast {
namespace nudb {
namespace detail {
// Simple growable memory buffer
class buffer
{
private:
std::size_t size_ = 0;
std::unique_ptr<std::uint8_t[]> buf_;
public:
~buffer() = default;
buffer() = default;
buffer (buffer const&) = delete;
buffer& operator= (buffer const&) = delete;
explicit
buffer (std::size_t n)
: size_ (n)
, buf_ (new std::uint8_t[n])
{
}
buffer (buffer&& other)
: size_ (other.size_)
, buf_ (std::move(other.buf_))
{
other.size_ = 0;
}
buffer& operator= (buffer&& other)
{
size_ = other.size_;
buf_ = std::move(other.buf_);
other.size_ = 0;
return *this;
}
std::size_t
size() const
{
return size_;
}
std::uint8_t*
get() const
{
return buf_.get();
}
void
reserve (std::size_t n)
{
if (size_ < n)
buf_.reset (new std::uint8_t[n]);
size_ = n;
}
// BufferFactory
void*
operator() (std::size_t n)
{
reserve(n);
return buf_.get();
}
};
} // detail
} // nudb
} // beast
#endif

View File

@@ -1,147 +0,0 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_DETAIL_BUFFERS_H_INCLUDED
#define BEAST_NUDB_DETAIL_BUFFERS_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <atomic>
#include <mutex>
#include <new>
namespace beast {
namespace nudb {
namespace detail {
// Thread safe pool of temp buffers,
// to avoid needless calls to malloc.
template <class = void>
class buffers_t
{
private:
struct element
{
element* next;
};
std::size_t const block_size_;
std::mutex m_;
element* h_ = nullptr;
public:
class value_type
{
private:
buffers_t& b_;
element* e_;
public:
value_type (value_type const&) = delete;
value_type& operator= (value_type const&) = delete;
explicit
value_type (buffers_t& b)
: b_ (b)
, e_ (b.acquire())
{
}
~value_type()
{
b_.release(e_);
}
std::uint8_t*
get() const
{
return const_cast <std::uint8_t*>(
reinterpret_cast<
std::uint8_t const*>(e_ + 1));
}
};
explicit
buffers_t (std::size_t block_size);
~buffers_t();
private:
element*
acquire();
void
release (element* e);
};
template <class _>
buffers_t<_>::buffers_t (std::size_t block_size)
: block_size_ (block_size)
, h_ (nullptr)
{
}
template <class _>
buffers_t<_>::~buffers_t()
{
for (element* e = h_; e;)
{
element* const next = e->next;
e->~element();
delete[] reinterpret_cast<
std::uint8_t*>(e);
e = next;
}
}
template <class _>
auto
buffers_t<_>::acquire() ->
element*
{
{
std::lock_guard<std::mutex> m(m_);
element* e = h_;
if (e)
{
h_ = e->next;
return e;
}
}
return ::new(
new std::uint8_t[
sizeof(element) + block_size_]
) element;
}
template <class _>
void
buffers_t<_>::release (element* e)
{
std::lock_guard<std::mutex> m(m_);
e->next = h_;
h_ = e;
}
using buffers = buffers_t<>;
} // detail
} // nudb
} // beast
#endif

View File

@@ -20,7 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_BULKIO_H_INCLUDED #ifndef BEAST_NUDB_DETAIL_BULKIO_H_INCLUDED
#define BEAST_NUDB_DETAIL_BULKIO_H_INCLUDED #define BEAST_NUDB_DETAIL_BULKIO_H_INCLUDED
#include <beast/nudb/detail/config.h> #include <beast/nudb/detail/buffer.h>
#include <beast/nudb/detail/stream.h> #include <beast/nudb/detail/stream.h>
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
@@ -45,10 +45,16 @@ public:
bulk_reader (File& f, std::size_t offset, bulk_reader (File& f, std::size_t offset,
std::size_t last, std::size_t buffer_size); std::size_t last, std::size_t buffer_size);
std::size_t
offset() const
{
return offset_ - avail_;
}
bool bool
eof() const eof() const
{ {
return offset_ - avail_ == last_; return offset() >= last_;
} }
istream istream

View File

@@ -22,7 +22,6 @@
#include <beast/nudb/detail/arena.h> #include <beast/nudb/detail/arena.h>
#include <beast/nudb/detail/bucket.h> #include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/config.h>
#include <boost/iterator/transform_iterator.hpp> #include <boost/iterator/transform_iterator.hpp>
#include <algorithm> #include <algorithm>
#include <cstdint> #include <cstdint>
@@ -77,8 +76,8 @@ private:
operator() (argument_type const& e) const operator() (argument_type const& e) const
{ {
return std::make_pair(e.first, return std::make_pair(e.first,
bucket (cache_->key_size_, bucket (cache_->block_size_,
cache_->block_size_, e.second)); e.second));
} }
}; };
@@ -209,7 +208,7 @@ cache_t<_>::create (std::size_t n)
{ {
auto const p = arena_.alloc (block_size_); auto const p = arena_.alloc (block_size_);
map_.emplace (n, p); map_.emplace (n, p);
return bucket (key_size_, block_size_, return bucket (block_size_,
p, detail::empty); p, detail::empty);
} }

View File

@@ -1,75 +0,0 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_DETAIL_CONFIG_H_INCLUDED
#define BEAST_NUDB_DETAIL_CONFIG_H_INCLUDED
#include <beast/hash/xxhasher.h>
// Compiles out domain checks
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
# ifdef NDEBUG
# define BEAST_NUDB_NO_DOMAIN_CHECK 1
# else
# define BEAST_NUDB_NO_DOMAIN_CHECK 0
# endif
#endif
namespace beast {
namespace nudb {
// xxhasher is the fastest and the best choice
// when keys are already uniformly distributed
using default_hash = xxhasher;
namespace detail {
// Returns the closest power of 2 not less than x
template <class = void>
std::size_t
ceil_pow2 (unsigned long long x)
{
static const unsigned long long t[6] = {
0xFFFFFFFF00000000ull,
0x00000000FFFF0000ull,
0x000000000000FF00ull,
0x00000000000000F0ull,
0x000000000000000Cull,
0x0000000000000002ull
};
int y = (((x & (x - 1)) == 0) ? 0 : 1);
int j = 32;
int i;
for(i = 0; i < 6; i++) {
int k = (((x & t[i]) == 0) ? 0 : j);
y += k;
x >>= k;
j >>= 1;
}
return std::size_t(1)<<y;
}
} // detail
} // nudb
} // beast
#endif

View File

@@ -17,10 +17,9 @@
*/ */
//============================================================================== //==============================================================================
#ifndef BEAST_NUDB_DETAIL_FIELD_H_INCLUDED #ifndef BEAST_NUDB_FIELD_H_INCLUDED
#define BEAST_NUDB_DETAIL_FIELD_H_INCLUDED #define BEAST_NUDB_FIELD_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/stream.h> #include <beast/nudb/detail/stream.h>
#include <beast/config/CompilerConfig.h> // for BEAST_CONSTEXPR #include <beast/config/CompilerConfig.h> // for BEAST_CONSTEXPR
#include <cstddef> #include <cstddef>
@@ -85,16 +84,26 @@ struct field <std::uint64_t>
static std::size_t BEAST_CONSTEXPR max = 0xffffffffffffffff; static std::size_t BEAST_CONSTEXPR max = 0xffffffffffffffff;
}; };
// read field from istream // read field from memory
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint8_t>::value>* = nullptr>
void
readp (void const* v, U& u)
{
std::uint8_t const* p =
reinterpret_cast<std::uint8_t const*>(v);
u = *p;
}
template <class T, class U, std::enable_if_t< template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint16_t>::value>* = nullptr> std::is_same<T, std::uint16_t>::value>* = nullptr>
void void
read (istream& is, U& u) readp (void const* v, U& u)
{ {
T t;
std::uint8_t const* p = std::uint8_t const* p =
is.data(field<T>::size); reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<< 8; t = T(*p++)<< 8;
t = T(*p ) | t; t = T(*p ) | t;
u = t; u = t;
@@ -103,25 +112,25 @@ read (istream& is, U& u)
template <class T, class U, std::enable_if_t< template <class T, class U, std::enable_if_t<
std::is_same<T, uint24_t>::value>* = nullptr> std::is_same<T, uint24_t>::value>* = nullptr>
void void
read (istream& is, U& u) readp (void const* v, U& u)
{ {
T t;
std::uint8_t const* p = std::uint8_t const* p =
is.data(field<T>::size); reinterpret_cast<std::uint8_t const*>(v);
t = (T(*p++)<<16) | t; std::uint32_t t;
t = (T(*p++)<< 8) | t; t = std::uint32_t(*p++)<<16;
t = T(*p ) | t; t = (std::uint32_t(*p++)<< 8) | t;
t = std::uint32_t(*p ) | t;
u = t; u = t;
} }
template <class T, class U, std::enable_if_t< template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint32_t>::value>* = nullptr> std::is_same<T, std::uint32_t>::value>* = nullptr>
void void
read (istream& is, U& u) readp (void const* v, U& u)
{ {
T t;
std::uint8_t const* p = std::uint8_t const* p =
is.data(field<T>::size); reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<<24; t = T(*p++)<<24;
t = (T(*p++)<<16) | t; t = (T(*p++)<<16) | t;
t = (T(*p++)<< 8) | t; t = (T(*p++)<< 8) | t;
@@ -132,11 +141,11 @@ read (istream& is, U& u)
template <class T, class U, std::enable_if_t< template <class T, class U, std::enable_if_t<
std::is_same<T, uint48_t>::value>* = nullptr> std::is_same<T, uint48_t>::value>* = nullptr>
void void
read (istream& is, U& u) readp (void const* v, U& u)
{ {
std::uint64_t t;
std::uint8_t const* p = std::uint8_t const* p =
is.data(field<T>::size); reinterpret_cast<std::uint8_t const*>(v);
std::uint64_t t;
t = (std::uint64_t(*p++)<<40); t = (std::uint64_t(*p++)<<40);
t = (std::uint64_t(*p++)<<32) | t; t = (std::uint64_t(*p++)<<32) | t;
t = (std::uint64_t(*p++)<<24) | t; t = (std::uint64_t(*p++)<<24) | t;
@@ -149,11 +158,11 @@ read (istream& is, U& u)
template <class T, class U, std::enable_if_t< template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint64_t>::value>* = nullptr> std::is_same<T, std::uint64_t>::value>* = nullptr>
void void
read (istream& is, U& u) readp (void const* v, U& u)
{ {
T t;
std::uint8_t const* p = std::uint8_t const* p =
is.data(field<T>::size); reinterpret_cast<std::uint8_t const*>(v);
T t;
t = T(*p++)<<56; t = T(*p++)<<56;
t = (T(*p++)<<48) | t; t = (T(*p++)<<48) | t;
t = (T(*p++)<<40) | t; t = (T(*p++)<<40) | t;
@@ -165,18 +174,32 @@ read (istream& is, U& u)
u = t; u = t;
} }
// read field from istream
template <class T, class U>
void
read (istream& is, U& u)
{
readp<T>(is.data(field<T>::size), u);
}
// write field to ostream // write field to ostream
template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint8_t>::value>* = nullptr>
void
write (ostream& os, U const& u)
{
std::uint8_t* p =
os.data(field<T>::size);
*p = u;
}
template <class T, class U, std::enable_if_t< template <class T, class U, std::enable_if_t<
std::is_same<T, std::uint16_t>::value>* = nullptr> std::is_same<T, std::uint16_t>::value>* = nullptr>
void void
write (ostream& os, U const& u) write (ostream& os, U const& u)
{ {
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
T t = u; T t = u;
std::uint8_t* p = std::uint8_t* p =
os.data(field<T>::size); os.data(field<T>::size);
@@ -189,11 +212,6 @@ template <class T, class U,std::enable_if_t<
void void
write (ostream& os, U const& u) write (ostream& os, U const& u)
{ {
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
T t = u; T t = u;
std::uint8_t* p = std::uint8_t* p =
os.data(field<T>::size); os.data(field<T>::size);
@@ -207,11 +225,6 @@ template <class T, class U,std::enable_if_t<
void void
write (ostream& os, U const& u) write (ostream& os, U const& u)
{ {
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
T t = u; T t = u;
std::uint8_t* p = std::uint8_t* p =
os.data(field<T>::size); os.data(field<T>::size);
@@ -226,11 +239,6 @@ template <class T, class U,std::enable_if_t<
void void
write (ostream& os, U const& u) write (ostream& os, U const& u)
{ {
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
std::uint64_t const t = u; std::uint64_t const t = u;
std::uint8_t* p = std::uint8_t* p =
os.data(field<T>::size); os.data(field<T>::size);
@@ -247,11 +255,6 @@ template <class T, class U,std::enable_if_t<
void void
write (ostream& os, U const& u) write (ostream& os, U const& u)
{ {
#ifndef BEAST_NUDB_NO_DOMAIN_CHECK
if (u > field<T>::max)
throw std::logic_error(
"nudb: field max exceeded");
#endif
T t = u; T t = u;
std::uint8_t* p = std::uint8_t* p =
os.data(field<T>::size); os.data(field<T>::size);

View File

@@ -20,7 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_FORMAT_H_INCLUDED #ifndef BEAST_NUDB_DETAIL_FORMAT_H_INCLUDED
#define BEAST_NUDB_DETAIL_FORMAT_H_INCLUDED #define BEAST_NUDB_DETAIL_FORMAT_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/detail/field.h> #include <beast/nudb/detail/field.h>
#include <beast/nudb/detail/stream.h> #include <beast/nudb/detail/stream.h>
#include <beast/config/CompilerConfig.h> // for BEAST_CONSTEXPR #include <beast/config/CompilerConfig.h> // for BEAST_CONSTEXPR
@@ -38,22 +38,23 @@ namespace detail {
// Format of the nudb files: // Format of the nudb files:
static std::size_t BEAST_CONSTEXPR currentVersion = 1; static std::size_t BEAST_CONSTEXPR currentVersion = 2;
struct dat_file_header struct dat_file_header
{ {
static std::size_t BEAST_CONSTEXPR size = static std::size_t BEAST_CONSTEXPR size =
8 + // Type 8 + // Type
2 + // Version 2 + // Version
8 + // UID
8 + // Appnum 8 + // Appnum
8 + // Salt
2 + // KeySize 2 + // KeySize
64; // (Reserved) 64; // (Reserved)
char type[8]; char type[8];
std::size_t version; std::size_t version;
std::uint64_t uid;
std::uint64_t appnum; std::uint64_t appnum;
std::uint64_t salt;
std::size_t key_size; std::size_t key_size;
}; };
@@ -62,20 +63,25 @@ struct key_file_header
static std::size_t BEAST_CONSTEXPR size = static std::size_t BEAST_CONSTEXPR size =
8 + // Type 8 + // Type
2 + // Version 2 + // Version
8 + // UID
8 + // Appnum 8 + // Appnum
2 + // KeySize
8 + // Salt 8 + // Salt
8 + // Pepper 8 + // Pepper
2 + // KeySize
2 + // BlockSize 2 + // BlockSize
2 + // LoadFactor 2 + // LoadFactor
64; // (Reserved)
56; // (Reserved)
char type[8]; char type[8];
std::size_t version; std::size_t version;
std::uint64_t uid;
std::uint64_t appnum; std::uint64_t appnum;
std::size_t key_size;
std::uint64_t salt; std::uint64_t salt;
std::uint64_t pepper; std::uint64_t pepper;
std::size_t key_size;
std::size_t block_size; std::size_t block_size;
std::size_t load_factor; std::size_t load_factor;
@@ -91,23 +97,65 @@ struct log_file_header
static std::size_t BEAST_CONSTEXPR size = static std::size_t BEAST_CONSTEXPR size =
8 + // Type 8 + // Type
2 + // Version 2 + // Version
8 + // UID
8 + // Appnum 8 + // Appnum
2 + // KeySize
8 + // Salt 8 + // Salt
8 + // Pepper 8 + // Pepper
2 + // KeySize 2 + // BlockSize
8 + // KeyFileSize 8 + // KeyFileSize
8; // DataFileSize 8; // DataFileSize
char type[8]; char type[8];
std::size_t version; std::size_t version;
std::uint64_t uid;
std::uint64_t appnum; std::uint64_t appnum;
std::size_t key_size;
std::uint64_t salt; std::uint64_t salt;
std::uint64_t pepper; std::uint64_t pepper;
std::size_t key_size; std::size_t block_size;
std::size_t key_file_size; std::size_t key_file_size;
std::size_t dat_file_size; std::size_t dat_file_size;
}; };
// Type used to store hashes in buckets.
// This can be smaller than the output
// of the hash function.
//
using hash_t = uint48_t;
static_assert(field<hash_t>::size <=
sizeof(std::size_t), "");
template <class T>
std::size_t
make_hash (std::size_t h);
template<>
inline
std::size_t
make_hash<uint48_t>(std::size_t h)
{
return (h>>16)&0xffffffffffff;
}
// Returns the hash of a key given the salt.
// Note: The hash is expressed in hash_t units
//
template <class Hasher>
inline
std::size_t
hash (void const* key,
std::size_t key_size, std::size_t salt)
{
Hasher h (salt);
h.append (key, key_size);
return make_hash<hash_t>(static_cast<
typename Hasher::result_type>(h));
}
// Computes pepper from salt // Computes pepper from salt
// //
template <class Hasher> template <class Hasher>
@@ -124,8 +172,7 @@ pepper (std::size_t salt)
// //
template <class = void> template <class = void>
std::size_t std::size_t
bucket_size (std::size_t key_size, bucket_size (std::size_t capacity)
std::size_t capacity)
{ {
// Bucket Record // Bucket Record
return return
@@ -134,33 +181,14 @@ bucket_size (std::size_t key_size,
capacity * ( capacity * (
field<uint48_t>::size + // Offset field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size field<uint48_t>::size + // Size
key_size); // Key field<hash_t>::size); // Hash
} }
// Returns the size of a bucket large enough to // Returns the number of entries that fit in a bucket
// hold size keys of length key_size.
//
inline
std::size_t
compact_size(std::size_t key_size,
std::size_t size)
{
// Bucket Record
return
field<std::uint16_t>::size + // Size
field<uint48_t>::size + // Spill
size * (
field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size
key_size); // Key
}
// Returns: number of keys that fit in a bucket
// //
template <class = void> template <class = void>
std::size_t std::size_t
bucket_capacity (std::size_t key_size, bucket_capacity (std::size_t block_size)
std::size_t block_size)
{ {
// Bucket Record // Bucket Record
auto const size = auto const size =
@@ -169,17 +197,18 @@ bucket_capacity (std::size_t key_size,
auto const entry_size = auto const entry_size =
field<uint48_t>::size + // Offset field<uint48_t>::size + // Offset
field<uint48_t>::size + // Size field<uint48_t>::size + // Size
key_size; // Key field<hash_t>::size; // Hash
if (block_size < key_file_header::size || if (block_size < key_file_header::size ||
block_size < size) block_size < size)
return 0; return 0;
return (block_size - size) / entry_size; return (block_size - size) / entry_size;
} }
// returns the number of bytes occupied by a value record // Returns the number of bytes occupied by a value record
inline inline
std::size_t std::size_t
data_size (std::size_t size, std::size_t key_size) value_size (std::size_t size,
std::size_t key_size)
{ {
// Data Record // Data Record
return return
@@ -188,6 +217,34 @@ data_size (std::size_t size, std::size_t key_size)
size; // Data size; // Data
} }
// Returns the closest power of 2 not less than x
template <class = void>
std::size_t
ceil_pow2 (unsigned long long x)
{
static const unsigned long long t[6] = {
0xFFFFFFFF00000000ull,
0x00000000FFFF0000ull,
0x000000000000FF00ull,
0x00000000000000F0ull,
0x000000000000000Cull,
0x0000000000000002ull
};
int y = (((x & (x - 1)) == 0) ? 0 : 1);
int j = 32;
int i;
for(i = 0; i < 6; i++) {
int k = (((x & t[i]) == 0) ? 0 : j);
y += k;
x >>= k;
j >>= 1;
}
return std::size_t(1)<<y;
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Read data file header from stream // Read data file header from stream
@@ -197,11 +254,12 @@ read (istream& is, dat_file_header& dh)
{ {
read (is, dh.type, sizeof(dh.type)); read (is, dh.type, sizeof(dh.type));
read<std::uint16_t>(is, dh.version); read<std::uint16_t>(is, dh.version);
read<std::uint64_t>(is, dh.uid);
read<std::uint64_t>(is, dh.appnum); read<std::uint64_t>(is, dh.appnum);
read<std::uint64_t>(is, dh.salt);
read<std::uint16_t>(is, dh.key_size); read<std::uint16_t>(is, dh.key_size);
std::array <std::uint8_t, 64> zero; std::array <std::uint8_t, 64> reserved;
read (is, zero.data(), zero.size()); read (is,
reserved.data(), reserved.size());
} }
// Read data file header from file // Read data file header from file
@@ -231,12 +289,13 @@ write (ostream& os, dat_file_header const& dh)
{ {
write (os, "nudb.dat", 8); write (os, "nudb.dat", 8);
write<std::uint16_t>(os, dh.version); write<std::uint16_t>(os, dh.version);
write<std::uint64_t>(os, dh.uid);
write<std::uint64_t>(os, dh.appnum); write<std::uint64_t>(os, dh.appnum);
write<std::uint64_t>(os, dh.salt);
write<std::uint16_t>(os, dh.key_size); write<std::uint16_t>(os, dh.key_size);
std::array <std::uint8_t, 64> zero; std::array <std::uint8_t, 64> reserved;
zero.fill(0); reserved.fill(0);
write (os, zero.data(), zero.size()); write (os,
reserved.data(), reserved.size());
} }
// Write data file header to file // Write data file header to file
@@ -259,25 +318,26 @@ read (istream& is, std::size_t file_size,
{ {
read(is, kh.type, sizeof(kh.type)); read(is, kh.type, sizeof(kh.type));
read<std::uint16_t>(is, kh.version); read<std::uint16_t>(is, kh.version);
read<std::uint64_t>(is, kh.uid);
read<std::uint64_t>(is, kh.appnum); read<std::uint64_t>(is, kh.appnum);
read<std::uint16_t>(is, kh.key_size);
read<std::uint64_t>(is, kh.salt); read<std::uint64_t>(is, kh.salt);
read<std::uint64_t>(is, kh.pepper); read<std::uint64_t>(is, kh.pepper);
read<std::uint16_t>(is, kh.key_size);
read<std::uint16_t>(is, kh.block_size); read<std::uint16_t>(is, kh.block_size);
read<std::uint16_t>(is, kh.load_factor); read<std::uint16_t>(is, kh.load_factor);
std::array <std::uint8_t, 64> zero; std::array <std::uint8_t, 56> reserved;
read (is, zero.data(), zero.size()); read (is,
reserved.data(), reserved.size());
// VFALCO These need to be checked to handle // VFALCO These need to be checked to handle
// when the file size is too small // when the file size is too small
kh.capacity = bucket_capacity( kh.capacity = bucket_capacity(kh.block_size);
kh.key_size, kh.block_size); kh.bucket_size = bucket_size(kh.capacity);
kh.bucket_size = bucket_size(
kh.key_size, kh.capacity);
if (file_size > kh.block_size) if (file_size > kh.block_size)
{ {
// VFALCO This should be handled elsewhere. // VFALCO This should be handled elsewhere.
// we shouldn't put the computed fields in this header. // we shouldn't put the computed fields
// in this header.
if (kh.block_size > 0) if (kh.block_size > 0)
kh.buckets = (file_size - kh.bucket_size) kh.buckets = (file_size - kh.bucket_size)
/ kh.block_size; / kh.block_size;
@@ -319,15 +379,17 @@ write (ostream& os, key_file_header const& kh)
{ {
write (os, "nudb.key", 8); write (os, "nudb.key", 8);
write<std::uint16_t>(os, kh.version); write<std::uint16_t>(os, kh.version);
write<std::uint64_t>(os, kh.uid);
write<std::uint64_t>(os, kh.appnum); write<std::uint64_t>(os, kh.appnum);
write<std::uint16_t>(os, kh.key_size);
write<std::uint64_t>(os, kh.salt); write<std::uint64_t>(os, kh.salt);
write<std::uint64_t>(os, kh.pepper); write<std::uint64_t>(os, kh.pepper);
write<std::uint16_t>(os, kh.key_size);
write<std::uint16_t>(os, kh.block_size); write<std::uint16_t>(os, kh.block_size);
write<std::uint16_t>(os, kh.load_factor); write<std::uint16_t>(os, kh.load_factor);
std::array <std::uint8_t, 64> zero; std::array <std::uint8_t, 56> reserved;
zero.fill (0); reserved.fill (0);
write (os, zero.data(), zero.size()); write (os,
reserved.data(), reserved.size());
} }
// Write key file header to file // Write key file header to file
@@ -353,10 +415,12 @@ read (istream& is, log_file_header& lh)
{ {
read (is, lh.type, sizeof(lh.type)); read (is, lh.type, sizeof(lh.type));
read<std::uint16_t>(is, lh.version); read<std::uint16_t>(is, lh.version);
read<std::uint64_t>(is, lh.uid);
read<std::uint64_t>(is, lh.appnum); read<std::uint64_t>(is, lh.appnum);
read<std::uint16_t>(is, lh.key_size);
read<std::uint64_t>(is, lh.salt); read<std::uint64_t>(is, lh.salt);
read<std::uint64_t>(is, lh.pepper); read<std::uint64_t>(is, lh.pepper);
read<std::uint16_t>(is, lh.key_size); read<std::uint16_t>(is, lh.block_size);
read<std::uint64_t>(is, lh.key_file_size); read<std::uint64_t>(is, lh.key_file_size);
read<std::uint64_t>(is, lh.dat_file_size); read<std::uint64_t>(is, lh.dat_file_size);
} }
@@ -381,10 +445,12 @@ write (ostream& os, log_file_header const& lh)
{ {
write (os, "nudb.log", 8); write (os, "nudb.log", 8);
write<std::uint16_t>(os, lh.version); write<std::uint16_t>(os, lh.version);
write<std::uint64_t>(os, lh.uid);
write<std::uint64_t>(os, lh.appnum); write<std::uint64_t>(os, lh.appnum);
write<std::uint16_t>(os, lh.key_size);
write<std::uint64_t>(os, lh.salt); write<std::uint64_t>(os, lh.salt);
write<std::uint64_t>(os, lh.pepper); write<std::uint64_t>(os, lh.pepper);
write<std::uint16_t>(os, lh.key_size); write<std::uint16_t>(os, lh.block_size);
write<std::uint64_t>(os, lh.key_file_size); write<std::uint64_t>(os, lh.key_file_size);
write<std::uint64_t>(os, lh.dat_file_size); write<std::uint64_t>(os, lh.dat_file_size);
} }
@@ -401,34 +467,6 @@ write (File& f, log_file_header const& lh)
f.write (0, buf.data(), buf.size()); f.write (0, buf.data(), buf.size());
} }
template <class Hasher>
void
verify (key_file_header const& kh)
{
std::string const type (kh.type, 8);
if (type != "nudb.key")
throw store_corrupt_error (
"bad type in key file");
if (kh.version != currentVersion)
throw store_corrupt_error (
"bad version in key file");
if (kh.pepper != pepper<Hasher>(kh.salt))
throw store_corrupt_error(
"wrong hash function for key file");
if (kh.key_size < 1)
throw store_corrupt_error (
"bad key size in key file");
if (kh.load_factor < 1)
throw store_corrupt_error (
"bad load factor in key file");
if (kh.capacity < 1)
throw store_corrupt_error (
"bad capacity in key file");
if (kh.buckets < 1)
throw store_corrupt_error (
"bad key file size");
}
template <class = void> template <class = void>
void void
verify (dat_file_header const& dh) verify (dat_file_header const& dh)
@@ -445,6 +483,34 @@ verify (dat_file_header const& dh)
"bad key size in data file"); "bad key size in data file");
} }
template <class Hasher>
void
verify (key_file_header const& kh)
{
std::string const type (kh.type, 8);
if (type != "nudb.key")
throw store_corrupt_error (
"bad type in key file");
if (kh.version != currentVersion)
throw store_corrupt_error (
"bad version in key file");
if (kh.key_size < 1)
throw store_corrupt_error (
"bad key size in key file");
if (kh.pepper != pepper<Hasher>(kh.salt))
throw store_corrupt_error(
"wrong hash function for key file");
if (kh.load_factor < 1)
throw store_corrupt_error (
"bad load factor in key file");
if (kh.capacity < 1)
throw store_corrupt_error (
"bad capacity in key file");
if (kh.buckets < 1)
throw store_corrupt_error (
"bad key file size");
}
template <class Hasher> template <class Hasher>
void void
verify (log_file_header const& lh) verify (log_file_header const& lh)
@@ -470,17 +536,16 @@ void
verify (dat_file_header const& dh, verify (dat_file_header const& dh,
key_file_header const& kh) key_file_header const& kh)
{ {
verify (dh);
verify<Hasher> (kh); verify<Hasher> (kh);
if (kh.salt != dh.salt) if (kh.uid != dh.uid)
throw store_corrupt_error( throw store_corrupt_error(
"salt mismatch"); "uid mismatch");
if (kh.key_size != dh.key_size)
throw store_corrupt_error(
"key size mismatch");
if (kh.appnum != dh.appnum) if (kh.appnum != dh.appnum)
throw store_corrupt_error( throw store_corrupt_error(
"appnum mismatch"); "appnum mismatch");
if (kh.key_size != dh.key_size)
throw store_corrupt_error(
"key size mismatch");
} }
template <class Hasher> template <class Hasher>
@@ -489,15 +554,24 @@ verify (key_file_header const& kh,
log_file_header const& lh) log_file_header const& lh)
{ {
verify<Hasher>(lh); verify<Hasher>(lh);
if (kh.salt != lh.salt) if (kh.uid != lh.uid)
throw store_corrupt_error ( throw store_corrupt_error (
"salt mismatch in log file"); "uid mismatch in log file");
if (kh.appnum != lh.appnum)
throw store_corrupt_error(
"appnum mismatch in log file");
if (kh.key_size != lh.key_size) if (kh.key_size != lh.key_size)
throw store_corrupt_error ( throw store_corrupt_error (
"key size mismatch in log file"); "key size mismatch in log file");
if (kh.appnum != lh.appnum) if (kh.salt != lh.salt)
throw store_corrupt_error ( throw store_corrupt_error (
"appnum mismatch"); "salt mismatch in log file");
if (kh.pepper != lh.pepper)
throw store_corrupt_error (
"pepper mismatch in log file");
if (kh.block_size != lh.block_size)
throw store_corrupt_error (
"block size mismatch in log file");
} }
} // detail } // detail

View File

@@ -20,7 +20,6 @@
#ifndef BEAST_NUDB_DETAIL_GENTEX_H_INCLUDED #ifndef BEAST_NUDB_DETAIL_GENTEX_H_INCLUDED
#define BEAST_NUDB_DETAIL_GENTEX_H_INCLUDED #define BEAST_NUDB_DETAIL_GENTEX_H_INCLUDED
#include <beast/nudb/detail/config.h>
#include <beast/utility/noexcept.h> #include <beast/utility/noexcept.h>
#include <condition_variable> #include <condition_variable>
#include <cstddef> #include <cstddef>

View File

@@ -22,7 +22,6 @@
#include <beast/nudb/detail/arena.h> #include <beast/nudb/detail/arena.h>
#include <beast/nudb/detail/bucket.h> #include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h> #include <beast/nudb/detail/format.h>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>

View File

@@ -20,8 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_STREAM_H_INCLUDED #ifndef BEAST_NUDB_DETAIL_STREAM_H_INCLUDED
#define BEAST_NUDB_DETAIL_STREAM_H_INCLUDED #define BEAST_NUDB_DETAIL_STREAM_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/detail/config.h>
#include <array> #include <array>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
@@ -32,114 +31,54 @@ namespace beast {
namespace nudb { namespace nudb {
namespace detail { namespace detail {
// Simple growable memory buffer
class buffer
{
private:
std::size_t size_ = 0;
std::unique_ptr<std::uint8_t[]> buf_;
public:
buffer() = default;
buffer (buffer const&) = delete;
buffer& operator= (buffer const&) = delete;
explicit
buffer (std::size_t n)
: size_ (n)
, buf_ (new std::uint8_t[n])
{
}
buffer (buffer&& other)
: size_ (other.size_)
, buf_ (std::move(other.buf_))
{
other.size_ = 0;
}
buffer& operator= (buffer&& other)
{
size_ = other.size_;
buf_ = std::move(other.buf_);
other.size_ = 0;
return *this;
}
std::size_t
size() const
{
return size_;
}
std::uint8_t*
get() const
{
return buf_.get();
}
void
reserve (std::size_t n)
{
if (size_ < n)
buf_.reset (new std::uint8_t[n]);
size_ = n;
}
};
//------------------------------------------------------------------------------
// Input stream from bytes // Input stream from bytes
template <class = void> template <class = void>
class istream_t class istream_t
{ {
private: private:
std::uint8_t const* buf_; std::uint8_t const* buf_;
#if ! BEAST_NUDB_NO_DOMAIN_CHECK std::size_t size_ = 0;
std::size_t bytes_;
#endif
public: public:
istream_t (istream_t const&) = default; istream_t (istream_t const&) = default;
istream_t& operator= (istream_t const&) = default; istream_t& operator= (istream_t const&) = default;
istream_t (void const* data, std::size_t istream_t (void const* data, std::size_t size)
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
bytes
#endif
)
: buf_(reinterpret_cast< : buf_(reinterpret_cast<
std::uint8_t const*>(data)) std::uint8_t const*>(data))
#if ! BEAST_NUDB_NO_DOMAIN_CHECK , size_(size)
, bytes_(bytes)
#endif
{ {
} }
template <std::size_t N> template <std::size_t N>
istream_t (std::array<std::uint8_t, N> const& a) istream_t (std::array<std::uint8_t, N> const& a)
: buf_ (a.data()) : buf_ (a.data())
#if ! BEAST_NUDB_NO_DOMAIN_CHECK , size_ (a.size())
, bytes_ (a.size())
#endif
{ {
} }
std::uint8_t const* std::uint8_t const*
data (std::size_t bytes) data (std::size_t bytes);
std::uint8_t const*
operator()(std::size_t bytes)
{ {
#if ! BEAST_NUDB_NO_DOMAIN_CHECK return data(bytes);
if (bytes > bytes_)
throw std::logic_error(
"nudb: istream");
bytes_ -= bytes;
#endif
auto const data = buf_;
buf_ = buf_ + bytes;
return data;
} }
}; };
template <class _>
std::uint8_t const*
istream_t<_>::data (std::size_t bytes)
{
if (size_ < bytes)
throw short_read_error();
auto const data = buf_;
buf_ = buf_ + bytes;
size_ -= bytes;
return data;
}
using istream = istream_t<>; using istream = istream_t<>;
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@@ -151,32 +90,19 @@ class ostream_t
private: private:
std::uint8_t* buf_; std::uint8_t* buf_;
std::size_t size_ = 0; std::size_t size_ = 0;
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
std::size_t bytes_;
#endif
public: public:
ostream_t (ostream_t const&) = default; ostream_t (ostream_t const&) = default;
ostream_t& operator= (ostream_t const&) = default; ostream_t& operator= (ostream_t const&) = default;
ostream_t (void* data, std::size_t ostream_t (void* data, std::size_t)
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
bytes
#endif
)
: buf_ (reinterpret_cast<std::uint8_t*>(data)) : buf_ (reinterpret_cast<std::uint8_t*>(data))
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
, bytes_ (bytes)
#endif
{ {
} }
template <std::size_t N> template <std::size_t N>
ostream_t (std::array<std::uint8_t, N>& a) ostream_t (std::array<std::uint8_t, N>& a)
: buf_ (a.data()) : buf_ (a.data())
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
, bytes_ (a.size())
#endif
{ {
} }
@@ -188,20 +114,24 @@ public:
} }
std::uint8_t* std::uint8_t*
data (std::size_t bytes) data (std::size_t bytes);
std::uint8_t*
operator()(std::size_t bytes)
{
return data(bytes);
}
};
template <class _>
std::uint8_t*
ostream_t<_>::data (std::size_t bytes)
{ {
#if ! BEAST_NUDB_NO_DOMAIN_CHECK
if (bytes > bytes_)
throw std::logic_error(
"nudb: ostream");
bytes_ -= bytes;
#endif
auto const data = buf_; auto const data = buf_;
buf_ = buf_ + bytes; buf_ = buf_ + bytes;
size_ += bytes; size_ += bytes;
return data; return data;
} }
};
using ostream = ostream_t<>; using ostream = ostream_t<>;

View File

@@ -0,0 +1,155 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_NUDB_VARINT_H_INCLUDED
#define BEAST_NUDB_VARINT_H_INCLUDED
#include <beast/config/CompilerConfig.h> // for BEAST_CONSTEXPR
#include <beast/nudb/detail/stream.h>
#include <cstdint>
#include <beast/cxx14/type_traits.h> // <type_traits>
namespace beast {
namespace nudb {
namespace detail {
// base128 varint format is from
// google protocol buffers:
// https://developers.google.com/protocol-buffers/docs/encoding#varints
// field tag
struct varint;
// Metafuncton to return largest
// possible size of T represented as varint.
// T must be unsigned
template <class T,
bool = std::is_unsigned<T>::value>
struct varint_traits;
template <class T>
struct varint_traits<T, true>
{
static std::size_t BEAST_CONSTEXPR max =
(8 * sizeof(T) + 6) / 7;
};
// Returns: Number of bytes consumed or 0 on error,
// if the buffer was too small or t overflowed.
//
template <class = void>
std::size_t
read_varint (void const* buf,
std::size_t buflen, std::size_t& t)
{
t = 0;
std::uint8_t const* p =
reinterpret_cast<
std::uint8_t const*>(buf);
std::size_t n = 0;
while (p[n] & 0x80)
if (++n >= buflen)
return 0;
if (++n > buflen)
return 0;
// Special case for 0
if (n == 1 && *p == 0)
{
t = 0;
return 1;
}
auto const used = n;
while (n--)
{
auto const d = p[n];
auto const t0 = t;
t *= 127;
t += d & 0x7f;
if (t <= t0)
return 0; // overflow
}
return used;
}
template <class T,
std::enable_if_t<std::is_unsigned<
T>::value>* = nullptr>
std::size_t
size_varint (T v)
{
std::size_t n = 0;
do
{
v /= 127;
++n;
}
while (v != 0);
return n;
}
template <class = void>
std::size_t
write_varint (void* p0, std::size_t v)
{
std::uint8_t* p = reinterpret_cast<
std::uint8_t*>(p0);
do
{
std::uint8_t d =
v % 127;
v /= 127;
if (v != 0)
d |= 0x80;
*p++ = d;
}
while (v != 0);
return p - reinterpret_cast<
std::uint8_t*>(p0);
}
// input stream
template <class T, std::enable_if_t<
std::is_same<T, varint>::value>* = nullptr>
void
read (istream& is, std::size_t& u)
{
auto p0 = is(1);
auto p1 = p0;
while (*p1++ & 0x80)
is(1);
read_varint(p0, p1 - p0, u);
}
// output stream
template <class T, std::enable_if_t<
std::is_same<T, varint>::value>* = nullptr>
void
write (ostream& os, std::size_t t)
{
write_varint(os.data(
size_varint(t)), t);
}
} // detail
} // nudb
} // beast
#endif

View File

@@ -20,9 +20,8 @@
#ifndef BEAST_NUDB_FILE_H_INCLUDED #ifndef BEAST_NUDB_FILE_H_INCLUDED
#define BEAST_NUDB_FILE_H_INCLUDED #define BEAST_NUDB_FILE_H_INCLUDED
#include <beast/nudb/detail/config.h> #include <beast/nudb/posix_file.h>
#include <beast/nudb/detail/posix_file.h> #include <beast/nudb/win32_file.h>
#include <beast/nudb/detail/win32_file.h>
#include <string> #include <string>
namespace beast { namespace beast {

View File

@@ -17,25 +17,46 @@
*/ */
//============================================================================== //==============================================================================
#ifndef BEAST_NUDB_MODE_H_INCLUDED #ifndef BEAST_NUDB_IDENTITY_CODEC_H_INCLUDED
#define BEAST_NUDB_MODE_H_INCLUDED #define BEAST_NUDB_IDENTITY_CODEC_H_INCLUDED
#include <beast/nudb/detail/config.h> #include <utility>
#include <string>
namespace beast { namespace beast {
namespace nudb { namespace nudb {
enum class file_mode /** Codec which maps input directly to output. */
class identity_codec
{ {
scan, // read sequential public:
read, // read random template <class... Args>
append, // read random, write append explicit
write // read random, write random identity_codec(Args&&... args)
}; {
}
// This sort of doesn't belong here char const*
using path_type = std::string; name() const
{
return "none";
}
template <class BufferFactory>
std::pair<void const*, std::size_t>
compress (void const* in,
std::size_t in_size, BufferFactory&&) const
{
return std::make_pair(in, in_size);
}
template <class BufferFactory>
std::pair<void const*, std::size_t>
decompress (void const* in,
std::size_t in_size, BufferFactory&&) const
{
return std::make_pair(in, in_size);
}
};
} // nudb } // nudb
} // beast } // beast

View File

@@ -17,9 +17,8 @@
*/ */
//============================================================================== //==============================================================================
#include <beast/nudb/detail/config.h>
#include <beast/nudb/tests/callgrind_test.cpp> #include <beast/nudb/tests/callgrind_test.cpp>
#include <beast/nudb/tests/recover_test.cpp> #include <beast/nudb/tests/recover_test.cpp>
#include <beast/nudb/tests/store_test.cpp> #include <beast/nudb/tests/store_test.cpp>
#include <beast/nudb/tests/varint_test.cpp>
#include <beast/nudb/tests/verify_test.cpp> #include <beast/nudb/tests/verify_test.cpp>

View File

@@ -20,9 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_POSIX_FILE_H_INCLUDED #ifndef BEAST_NUDB_DETAIL_POSIX_FILE_H_INCLUDED
#define BEAST_NUDB_DETAIL_POSIX_FILE_H_INCLUDED #define BEAST_NUDB_DETAIL_POSIX_FILE_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/config.h>
#include <cassert> #include <cassert>
#include <cerrno> #include <cerrno>
#include <cstring> #include <cstring>
@@ -265,6 +263,8 @@ template <class _>
void void
posix_file<_>::read (std::size_t offset, posix_file<_>::read (std::size_t offset,
void* buffer, std::size_t bytes) void* buffer, std::size_t bytes)
{
while(bytes > 0)
{ {
auto const n = ::pread ( auto const n = ::pread (
fd_, buffer, bytes, offset); fd_, buffer, bytes, offset);
@@ -272,22 +272,34 @@ posix_file<_>::read (std::size_t offset,
if (n == -1) if (n == -1)
throw file_posix_error( throw file_posix_error(
"pread"); "pread");
if (n < bytes) if (n == 0)
throw file_short_read_error(); throw file_short_read_error();
offset += n;
bytes -= n;
buffer = reinterpret_cast<
char*>(buffer) + n;
}
} }
template <class _> template <class _>
void void
posix_file<_>::write (std::size_t offset, posix_file<_>::write (std::size_t offset,
void const* buffer, std::size_t bytes) void const* buffer, std::size_t bytes)
{
while(bytes > 0)
{ {
auto const n = ::pwrite ( auto const n = ::pwrite (
fd_, buffer, bytes, offset); fd_, buffer, bytes, offset);
if (n == -1) if (n == -1)
throw file_posix_error( throw file_posix_error(
"pwrite"); "pwrite");
if (n < bytes) if (n == 0)
throw file_short_write_error(); throw file_short_write_error();
offset += n;
bytes -= n;
buffer = reinterpret_cast<
char const*>(buffer) + n;
}
} }
template <class _> template <class _>

View File

@@ -20,12 +20,10 @@
#ifndef BEAST_NUDB_RECOVER_H_INCLUDED #ifndef BEAST_NUDB_RECOVER_H_INCLUDED
#define BEAST_NUDB_RECOVER_H_INCLUDED #define BEAST_NUDB_RECOVER_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/file.h> #include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/bucket.h> #include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/bulkio.h> #include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h> #include <beast/nudb/detail/format.h>
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
@@ -39,19 +37,22 @@ namespace nudb {
any partially committed data. any partially committed data.
*/ */
template < template <
class Hasher = default_hash, class Hasher,
class File = native_file> class Codec,
class File = native_file,
class... Args>
bool bool
recover ( recover (
path_type const& dat_path, path_type const& dat_path,
path_type const& key_path, path_type const& key_path,
path_type const& log_path, path_type const& log_path,
std::size_t read_size = 16 * 1024 * 1024) std::size_t read_size,
Args&&... args)
{ {
using namespace detail; using namespace detail;
File df; File df(args...);
File lf; File lf(args...);
File kf; File kf(args...);
if (! df.open (file_mode::append, dat_path)) if (! df.open (file_mode::append, dat_path))
return false; return false;
if (! kf.open (file_mode::write, key_path)) if (! kf.open (file_mode::write, key_path))
@@ -96,8 +97,7 @@ recover (
verify<Hasher>(kh, lh); verify<Hasher>(kh, lh);
auto const df_size = df.actual_size(); auto const df_size = df.actual_size();
buffer buf(kh.block_size); buffer buf(kh.block_size);
bucket b (kh.key_size, bucket b (kh.block_size, buf.get());
kh.block_size, buf.get());
bulk_reader<File> r(lf, log_file_header::size, bulk_reader<File> r(lf, log_file_header::size,
lf_size, read_size); lf_size, read_size);
while(! r.eof()) while(! r.eof())

View File

@@ -20,20 +20,15 @@
#ifndef BEAST_NUDB_STORE_H_INCLUDED #ifndef BEAST_NUDB_STORE_H_INCLUDED
#define BEAST_NUDB_STORE_H_INCLUDED #define BEAST_NUDB_STORE_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/recover.h> #include <beast/nudb/recover.h>
#include <beast/nudb/detail/bucket.h> #include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/buffers.h> #include <beast/nudb/detail/buffer.h>
#include <beast/nudb/detail/bulkio.h> #include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/cache.h> #include <beast/nudb/detail/cache.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h> #include <beast/nudb/detail/format.h>
#include <beast/nudb/detail/gentex.h> #include <beast/nudb/detail/gentex.h>
#include <beast/nudb/detail/pool.h> #include <beast/nudb/detail/pool.h>
#include <beast/nudb/detail/posix_file.h>
#include <beast/nudb/detail/win32_file.h>
#include <boost/thread/lock_types.hpp> #include <boost/thread/lock_types.hpp>
#include <boost/thread/shared_mutex.hpp> #include <boost/thread/shared_mutex.hpp>
#include <algorithm> #include <algorithm>
@@ -52,7 +47,6 @@
#include <limits> #include <limits>
#include <beast/cxx14/memory.h> // <memory> #include <beast/cxx14/memory.h> // <memory>
#include <mutex> #include <mutex>
#include <random>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <thread> #include <thread>
@@ -80,15 +74,17 @@ namespace nudb {
*/ */
/** A simple key/value database /** A simple key/value database
@tparam Hasher The hash function to use on key
@tparam Codec The codec to apply to value data
@tparam File The type of File object to use. @tparam File The type of File object to use.
@tparam Hash The hash function to use on key
*/ */
template <class Hasher, class File> template <class Hasher, class Codec, class File>
class basic_store class store
{ {
public: public:
using file_type = File;
using hash_type = Hasher; using hash_type = Hasher;
using codec_type = Codec;
using file_type = File;
private: private:
// requires 64-bit integers or better // requires 64-bit integers or better
@@ -112,9 +108,6 @@ private:
using unique_lock_type = using unique_lock_type =
boost::unique_lock<boost::shared_mutex>; boost::unique_lock<boost::shared_mutex>;
using blockbuf =
typename detail::buffers::value_type;
struct state struct state
{ {
File df; File df;
@@ -123,11 +116,11 @@ private:
path_type dp; path_type dp;
path_type kp; path_type kp;
path_type lp; path_type lp;
detail::buffers b;
detail::pool p0; detail::pool p0;
detail::pool p1; detail::pool p1;
detail::cache c0; detail::cache c0;
detail::cache c1; detail::cache c1;
Codec const codec;
detail::key_file_header const kh; detail::key_file_header const kh;
// pool commit high water mark // pool commit high water mark
@@ -144,8 +137,6 @@ private:
}; };
bool open_ = false; bool open_ = false;
// VFALCO Make consistency checks optional?
//bool safe_ = true; // Do consistency checks
// VFALCO Unfortunately boost::optional doesn't support // VFALCO Unfortunately boost::optional doesn't support
// move construction so we use unique_ptr instead. // move construction so we use unique_ptr instead.
@@ -173,9 +164,9 @@ private:
std::exception_ptr ep_; std::exception_ptr ep_;
public: public:
basic_store() = default; store() = default;
basic_store (basic_store const&) = delete; store (store const&) = delete;
basic_store& operator= (basic_store const&) = delete; store& operator= (store const&) = delete;
/** Destroy the database. /** Destroy the database.
@@ -191,7 +182,7 @@ public:
Throws: Throws:
None None
*/ */
~basic_store(); ~store();
/** Returns `true` if the database is open. */ /** Returns `true` if the database is open. */
bool bool
@@ -250,17 +241,17 @@ public:
/** Fetch a value. /** Fetch a value.
If key is found, BufferFactory will be called as: If key is found, Handler will be called as:
`(void*)()(std::size_t bytes)` `(void)()(void const* data, std::size_t size)`
where bytes is the size of the value, and the returned pointer where data and size represent the value. If the
points to a buffer of at least bytes size. key is not found, the handler is not called.
@return `true` if the key exists. @return `true` if a matching key was found.
*/ */
template <class BufferFactory> template <class Handler>
bool bool
fetch (void const* key, BufferFactory&& bf); fetch (void const* key, Handler&& handler);
/** Insert a value. /** Insert a value.
@@ -280,12 +271,19 @@ private:
std::rethrow_exception(ep_); std::rethrow_exception(ep_);
} }
std::pair <detail::bucket::value_type, bool> // Fetch key in loaded bucket b or its spills.
find (void const* key, detail::bucket& b); //
template <class Handler>
bool
fetch (std::size_t h, void const* key,
detail::bucket b, Handler&& handler);
void // Returns `true` if the key exists
maybe_spill (detail::bucket& b, // lock is unlocked after the first bucket processed
detail::bulk_writer<File>& w); //
bool
exists (std::size_t h, void const* key,
shared_lock_type* lock, detail::bucket b);
void void
split (detail::bucket& b1, detail::bucket& b2, split (detail::bucket& b1, detail::bucket& b2,
@@ -306,8 +304,8 @@ private:
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
template <class Hasher, class File> template <class Hasher, class Codec, class File>
basic_store<Hasher, File>::state::state ( store<Hasher, Codec, File>::state::state (
File&& df_, File&& kf_, File&& lf_, File&& df_, File&& kf_, File&& lf_,
path_type const& dp_, path_type const& kp_, path_type const& dp_, path_type const& kp_,
path_type const& lp_, path_type const& lp_,
@@ -319,7 +317,6 @@ basic_store<Hasher, File>::state::state (
, dp (dp_) , dp (dp_)
, kp (kp_) , kp (kp_)
, lp (lp_) , lp (lp_)
, b (kh_.block_size)
, p0 (kh_.key_size, arena_alloc_size) , p0 (kh_.key_size, arena_alloc_size)
, p1 (kh_.key_size, arena_alloc_size) , p1 (kh_.key_size, arena_alloc_size)
, c0 (kh_.key_size, kh_.block_size) , c0 (kh_.key_size, kh_.block_size)
@@ -330,8 +327,8 @@ basic_store<Hasher, File>::state::state (
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
template <class Hasher, class File> template <class Hasher, class Codec, class File>
basic_store<Hasher, File>::~basic_store() store<Hasher, Codec, File>::~store()
{ {
try try
{ {
@@ -344,10 +341,10 @@ basic_store<Hasher, File>::~basic_store()
} }
} }
template <class Hasher, class File> template <class Hasher, class Codec, class File>
template <class... Args> template <class... Args>
bool bool
basic_store<Hasher, File>::open ( store<Hasher, Codec, File>::open (
path_type const& dat_path, path_type const& dat_path,
path_type const& key_path, path_type const& key_path,
path_type const& log_path, path_type const& log_path,
@@ -358,11 +355,13 @@ basic_store<Hasher, File>::open (
if (is_open()) if (is_open())
throw std::logic_error("nudb: already open"); throw std::logic_error("nudb: already open");
epb_.store(false); epb_.store(false);
recover (dat_path, key_path, log_path, recover<Hasher, Codec, File>(
recover_read_size); dat_path, key_path, log_path,
File df(std::forward<Args>(args)...); recover_read_size,
File kf(std::forward<Args>(args)...); args...);
File lf(std::forward<Args>(args)...); File df(args...);
File kf(args...);
File lf(args...);
if (! df.open (file_mode::append, dat_path)) if (! df.open (file_mode::append, dat_path))
return false; return false;
if (! kf.open (file_mode::write, key_path)) if (! kf.open (file_mode::write, key_path))
@@ -373,7 +372,7 @@ basic_store<Hasher, File>::open (
key_file_header kh; key_file_header kh;
read (df, dh); read (df, dh);
read (kf, kh); read (kf, kh);
verify (dh); verify<Codec> (dh);
verify<Hasher> (kh); verify<Hasher> (kh);
verify<Hasher> (dh, kh); verify<Hasher> (dh, kh);
auto s = std::make_unique<state>( auto s = std::make_unique<state>(
@@ -392,13 +391,13 @@ basic_store<Hasher, File>::open (
s_ = std::move(s); s_ = std::move(s);
open_ = true; open_ = true;
thread_ = std::thread( thread_ = std::thread(
&basic_store::run, this); &store::run, this);
return true; return true;
} }
template <class Hasher, class File> template <class Hasher, class Codec, class File>
void void
basic_store<Hasher, File>::close() store<Hasher, Codec, File>::close()
{ {
if (open_) if (open_)
{ {
@@ -414,125 +413,70 @@ basic_store<Hasher, File>::close()
} }
} }
template <class Hasher, class File> template <class Hasher, class Codec, class File>
template <class BufferFactory> template <class Handler>
bool bool
basic_store<Hasher, File>::fetch ( store<Hasher, Codec, File>::fetch (
void const* key, BufferFactory&& bf) void const* key, Handler&& handler)
{ {
using namespace detail; using namespace detail;
rethrow(); rethrow();
std::size_t offset;
std::size_t size;
blockbuf buf(s_->b);
bucket tmp (s_->kh.key_size,
s_->kh.block_size, buf.get());
{
auto const h = hash<Hasher>( auto const h = hash<Hasher>(
key, s_->kh.key_size, s_->kh.salt); key, s_->kh.key_size, s_->kh.salt);
shared_lock_type m (m_, shared_lock_type m (m_);
boost::defer_lock);
m.lock();
{ {
typename pool::iterator iter; auto iter = s_->p1.find(key);
iter = s_->p1.find(key); if (iter == s_->p1.end())
if (iter != s_->p1.end())
{ {
void* const b = bf(
iter->first.size);
if (b == nullptr)
return false;
std::memcpy (b,
iter->first.data,
iter->first.size);
return true;
}
iter = s_->p0.find(key); iter = s_->p0.find(key);
if (iter != s_->p0.end()) if (iter == s_->p0.end())
{ goto next;
void* const b = bf( }
iter->first.size); buffer buf;
if (b == nullptr) auto const result =
return false; s_->codec.decompress(
std::memcpy (b,
iter->first.data, iter->first.data,
iter->first.size); iter->first.size, buf);
handler(result.first, result.second);
return true; return true;
} }
} next:
auto const n = bucket_index( auto const n = bucket_index(
h, buckets_, modulus_); h, buckets_, modulus_);
auto const iter = s_->c1.find(n); auto const iter = s_->c1.find(n);
if (iter != s_->c1.end()) if (iter != s_->c1.end())
{ return fetch(h, key,
auto const result = iter->second, handler);
iter->second.find(key);
if (result.second)
{
offset = result.first.offset;
size = result.first.size;
goto found;
}
// VFALCO Audit for concurrency
auto spill = iter->second.spill();
m.unlock();
while (spill)
{
tmp.read(s_->df, spill);
auto const result = tmp.find(key);
if (result.second)
{
offset = result.first.offset;
size = result.first.size;
goto found;
}
spill = tmp.spill();
}
return false;
}
// VFALCO Audit for concurrency // VFALCO Audit for concurrency
genlock <gentex> g (g_); genlock <gentex> g (g_);
m.unlock(); m.unlock();
tmp.read (s_->kf, buffer buf (s_->kh.block_size);
(n + 1) * tmp.block_size()); // VFALCO Constructs with garbage here
auto const result = find(key, tmp); bucket b (s_->kh.block_size,
if (! result.second) buf.get());
return false; b.read (s_->kf,
offset = result.first.offset; (n + 1) * b.block_size());
size = result.first.size; return fetch(h, key, b, handler);
}
found:
void* const b = bf(size);
if (b == nullptr)
return false;
// Data Record
s_->df.read (offset +
field<uint48_t>::size + // Size
s_->kh.key_size, // Key
b, size);
return true;
} }
template <class Hasher, class File> template <class Hasher, class Codec, class File>
bool bool
basic_store<Hasher, File>::insert (void const* key, store<Hasher, Codec, File>::insert (
void const* data, std::size_t size) void const* key, void const* data,
std::size_t size)
{ {
using namespace detail; using namespace detail;
rethrow(); rethrow();
#if ! BEAST_NUDB_NO_DOMAIN_CHECK buffer buf;
// Data Record
if (size > field<uint48_t>::max) if (size > field<uint48_t>::max)
throw std::logic_error( throw std::logic_error(
"nudb: size too large"); "nudb: size too large");
#endif
blockbuf buf (s_->b);
bucket tmp (s_->kh.key_size,
s_->kh.block_size, buf.get());
auto const h = hash<Hasher>( auto const h = hash<Hasher>(
key, s_->kh.key_size, s_->kh.salt); key, s_->kh.key_size, s_->kh.salt);
std::lock_guard<std::mutex> u (u_); std::lock_guard<std::mutex> u (u_);
shared_lock_type m (m_, boost::defer_lock); {
m.lock(); shared_lock_type m (m_);
if (s_->p1.find(key) != s_->p1.end()) if (s_->p1.find(key) != s_->p1.end())
return false; return false;
if (s_->p0.find(key) != s_->p0.end()) if (s_->p0.find(key) != s_->p0.end())
@@ -542,32 +486,31 @@ basic_store<Hasher, File>::insert (void const* key,
auto const iter = s_->c1.find(n); auto const iter = s_->c1.find(n);
if (iter != s_->c1.end()) if (iter != s_->c1.end())
{ {
if (iter->second.find(key).second) if (exists(h, key, &m,
iter->second))
return false; return false;
// VFALCO Audit for concurrency // m is now unlocked
auto spill = iter->second.spill();
m.unlock();
while (spill)
{
tmp.read (s_->df, spill);
if (tmp.find(key).second)
return false;
spill = tmp.spill();
}
} }
else else
{ {
// VFALCO Audit for concurrency
genlock <gentex> g (g_); genlock <gentex> g (g_);
m.unlock(); m.unlock();
// VFALCO Audit for concurrency buf.reserve(s_->kh.block_size);
tmp.read (s_->kf, bucket b (s_->kh.block_size,
buf.get());
b.read (s_->kf,
(n + 1) * s_->kh.block_size); (n + 1) * s_->kh.block_size);
if (find(key, tmp).second) if (exists(h, key, nullptr, b))
return false; return false;
} }
{ }
auto const result =
s_->codec.compress(data, size, buf);
// Perform insert
unique_lock_type m (m_); unique_lock_type m (m_);
s_->p1.insert (h, key, data, size); s_->p1.insert (h, key,
result.first, result.second);
// Did we go over the commit limit? // Did we go over the commit limit?
if (commit_limit_ > 0 && if (commit_limit_ > 0 &&
s_->p1.data_size() >= commit_limit_) s_->p1.data_size() >= commit_limit_)
@@ -585,68 +528,103 @@ basic_store<Hasher, File>::insert (void const* key,
m.unlock(); m.unlock();
if (notify) if (notify)
cond_.notify_all(); cond_.notify_all();
}
return true; return true;
} }
// Find key in loaded bucket b or its spills. template <class Hasher, class Codec, class File>
// template <class Handler>
template <class Hasher, class File> bool
std::pair <detail::bucket::value_type, bool> store<Hasher, Codec, File>::fetch (
basic_store<Hasher, File>::find ( std::size_t h, void const* key,
void const* key, detail::bucket& b) detail::bucket b, Handler&& handler)
{
auto result = b.find(key);
if (result.second)
return result;
auto spill = b.spill();
while (spill)
{
b.read (s_->df, spill);
result = b.find(key);
if (result.second)
return result;
spill = b.spill();
}
return result;
}
// Spill bucket if full
//
template <class Hasher, class File>
void
basic_store<Hasher, File>::maybe_spill(
detail::bucket& b, detail::bulk_writer<File>& w)
{ {
using namespace detail; using namespace detail;
if (b.full()) buffer buf0;
buffer buf1;
for(;;)
{ {
// Spill Record for (auto i = b.lower_bound(h);
auto const offset = w.offset(); i < b.size(); ++i)
auto os = w.prepare( {
field<uint48_t>::size + // Zero auto const item = b[i];
field<uint16_t>::size + // Size if (item.hash != h)
b.compact_size()); break;
write <uint48_t> (os, 0); // Zero // Data Record
write <std::uint16_t> ( auto const len =
os, b.compact_size()); // Size s_->kh.key_size + // Key
auto const spill = item.size; // Value
offset + os.size(); buf0.reserve(len);
b.write (os); // Bucket s_->df.read(item.offset +
// Update bucket field<uint48_t>::size, // Size
b.clear(); buf0.get(), len);
b.spill (spill); if (std::memcmp(buf0.get(), key,
s_->kh.key_size) == 0)
{
auto const result =
s_->codec.decompress(
buf0.get() + s_->kh.key_size,
item.size, buf1);
handler(result.first, result.second);
return true;
} }
} }
auto const spill = b.spill();
if (! spill)
break;
buf1.reserve(s_->kh.block_size);
b = bucket(s_->kh.block_size,
buf1.get());
b.read(s_->df, spill);
}
return false;
}
template <class Hasher, class Codec, class File>
bool
store<Hasher, Codec, File>::exists (
std::size_t h, void const* key,
shared_lock_type* lock, detail::bucket b)
{
using namespace detail;
buffer buf(s_->kh.key_size +
s_->kh.block_size);
void* pk = buf.get();
void* pb = buf.get() + s_->kh.key_size;
for(;;)
{
for (auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if (item.hash != h)
break;
// Data Record
s_->df.read(item.offset +
field<uint48_t>::size, // Size
pk, s_->kh.key_size); // Key
if (std::memcmp(pk, key,
s_->kh.key_size) == 0)
return true;
}
auto spill = b.spill();
if (lock && lock->owns_lock())
lock->unlock();
if (! spill)
break;
b = bucket(s_->kh.block_size, pb);
b.read(s_->df, spill);
}
return false;
}
// Split the bucket in b1 to b2 // Split the bucket in b1 to b2
// b1 must be loaded // b1 must be loaded
// tmp is used as a temporary buffer // tmp is used as a temporary buffer
// splits are written but not the new buckets // splits are written but not the new buckets
// //
template <class Hasher, class File> template <class Hasher, class Codec, class File>
void void
basic_store<Hasher, File>::split (detail::bucket& b1, store<Hasher, Codec, File>::split (detail::bucket& b1,
detail::bucket& b2, detail::bucket& tmp, detail::bucket& b2, detail::bucket& tmp,
std::size_t n1, std::size_t n2, std::size_t n1, std::size_t n2,
std::size_t buckets, std::size_t modulus, std::size_t buckets, std::size_t modulus,
@@ -659,15 +637,13 @@ basic_store<Hasher, File>::split (detail::bucket& b1,
// Split // Split
for (std::size_t i = 0; i < b1.size();) for (std::size_t i = 0; i < b1.size();)
{ {
auto e = b1[i]; auto const e = b1[i];
auto const h = hash<Hasher>(
e.key, s_->kh.key_size, s_->kh.salt);
auto const n = bucket_index( auto const n = bucket_index(
h, buckets, modulus); e.hash, buckets, modulus);
assert(n==n1 || n==n2); assert(n==n1 || n==n2);
if (n == n2) if (n == n2)
{ {
b2.insert (e.offset, e.size, e.key); b2.insert (e.offset, e.size, e.hash);
b1.erase (i); b1.erase (i);
} }
else else
@@ -684,26 +660,27 @@ basic_store<Hasher, File>::split (detail::bucket& b1,
// If any part of the spill record is // If any part of the spill record is
// in the write buffer then flush first // in the write buffer then flush first
// VFALCO Needs audit // VFALCO Needs audit
if (spill + bucket_size(s_->kh.key_size, if (spill + bucket_size(s_->kh.capacity) >
s_->kh.capacity) > w.offset() - w.size()) w.offset() - w.size())
w.flush(); w.flush();
tmp.read (s_->df, spill); tmp.read (s_->df, spill);
for (std::size_t i = 0; i < tmp.size(); ++i) for (std::size_t i = 0; i < tmp.size(); ++i)
{ {
auto e = tmp[i]; auto const e = tmp[i];
auto const n = bucket_index<Hasher>( auto const n = bucket_index(
e.key, s_->kh.key_size, s_->kh.salt, e.hash, buckets, modulus);
buckets, modulus);
assert(n==n1 || n==n2); assert(n==n1 || n==n2);
if (n == n2) if (n == n2)
{ {
maybe_spill(b2, w); maybe_spill(b2, w);
b2.insert (e.offset, e.size, e.key); b2.insert(
e.offset, e.size, e.hash);
} }
else else
{ {
maybe_spill(b1, w); maybe_spill(b1, w);
b1.insert (e.offset, e.size, e.key); b1.insert(
e.offset, e.size, e.hash);
} }
} }
spill = tmp.spill(); spill = tmp.spill();
@@ -732,9 +709,9 @@ basic_store<Hasher, File>::split (detail::bucket& b1,
// Postconditions: // Postconditions:
// c1, and c0, and the memory pointed to by buf may be modified // c1, and c0, and the memory pointed to by buf may be modified
// //
template <class Hasher, class File> template <class Hasher, class Codec, class File>
detail::bucket detail::bucket
basic_store<Hasher, File>::load ( store<Hasher, Codec, File>::load (
std::size_t n, detail::cache& c1, std::size_t n, detail::cache& c1,
detail::cache& c0, void* buf) detail::cache& c0, void* buf)
{ {
@@ -746,8 +723,7 @@ basic_store<Hasher, File>::load (
if (iter != c0.end()) if (iter != c0.end())
return c1.insert (n, return c1.insert (n,
iter->second)->second; iter->second)->second;
bucket tmp (s_->kh.key_size, bucket tmp (s_->kh.block_size, buf);
s_->kh.block_size, buf);
tmp.read (s_->kf, (n + 1) * tmp.read (s_->kf, (n + 1) *
s_->kh.block_size); s_->kh.block_size);
c0.insert (n, tmp); c0.insert (n, tmp);
@@ -760,15 +736,14 @@ basic_store<Hasher, File>::load (
// //
// Effects: // Effects:
// //
template <class Hasher, class File> template <class Hasher, class Codec, class File>
void void
basic_store<Hasher, File>::commit() store<Hasher, Codec, File>::commit()
{ {
using namespace detail; using namespace detail;
blockbuf buf1 (s_->b); buffer buf1 (s_->kh.block_size);
blockbuf buf2 (s_->b); buffer buf2 (s_->kh.block_size);
bucket tmp (s_->kh.key_size, bucket tmp (s_->kh.block_size, buf1.get());
s_->kh.block_size, buf1.get());
// Empty cache put in place temporarily // Empty cache put in place temporarily
// so we can reuse the memory from s_->c1 // so we can reuse the memory from s_->c1
cache c1; cache c1;
@@ -788,11 +763,14 @@ basic_store<Hasher, File>::commit()
// Log File Header // Log File Header
log_file_header lh; log_file_header lh;
lh.version = currentVersion; // Version lh.version = currentVersion; // Version
lh.uid = s_->kh.uid; // UID
lh.appnum = s_->kh.appnum; // Appnum lh.appnum = s_->kh.appnum; // Appnum
lh.key_size = s_->kh.key_size; // Key Size
lh.salt = s_->kh.salt; // Salt lh.salt = s_->kh.salt; // Salt
lh.pepper = pepper<Hasher>( lh.pepper = pepper<Hasher>(
lh.salt); // Pepper lh.salt); // Pepper
lh.key_size = s_->kh.key_size; // Key Size lh.block_size =
s_->kh.block_size; // Block Size
lh.key_file_size = lh.key_file_size =
s_->kf.actual_size(); // Key File Size s_->kf.actual_size(); // Key File Size
lh.dat_file_size = lh.dat_file_size =
@@ -813,7 +791,7 @@ basic_store<Hasher, File>::commit()
// threads are reading other data members // threads are reading other data members
// of this object in memory // of this object in memory
e.second = w.offset(); e.second = w.offset();
auto os = w.prepare (data_size( auto os = w.prepare (value_size(
e.first.size, s_->kh.key_size)); e.first.size, s_->kh.key_size));
// Data Record // Data Record
write <uint48_t> (os, write <uint48_t> (os,
@@ -849,7 +827,8 @@ basic_store<Hasher, File>::commit()
auto b = load (n, c1, s_->c0, buf2.get()); auto b = load (n, c1, s_->c0, buf2.get());
// This can amplify writes if it spills. // This can amplify writes if it spills.
maybe_spill(b, w); maybe_spill(b, w);
b.insert (e.second, e.first.size, e.first.key); b.insert (e.second,
e.first.size, e.first.hash);
} }
w.flush(); w.flush();
} }
@@ -905,9 +884,9 @@ basic_store<Hasher, File>::commit()
} }
} }
template <class Hasher, class File> template <class Hasher, class Codec, class File>
void void
basic_store<Hasher, File>::run() store<Hasher, Codec, File>::run()
{ {
auto const pred = auto const pred =
[this]() [this]()
@@ -957,29 +936,6 @@ basic_store<Hasher, File>::run()
} }
} }
//------------------------------------------------------------------------------
using store = basic_store <default_hash, native_file>;
/** Generate a random salt. */
template <class = void>
std::uint64_t
make_salt()
{
std::random_device rng;
std::mt19937_64 gen {rng()};
std::uniform_int_distribution <std::size_t> dist;
return dist(gen);
}
/** Returns the best guess at the volume's block size. */
inline
std::size_t
block_size(std::string const& /*path*/)
{
return 4096;
}
} // nudb } // nudb
} // beast } // beast

View File

@@ -17,10 +17,7 @@
*/ */
//============================================================================== //==============================================================================
#include <beast/nudb/store.h>
#include <beast/nudb/recover.h>
#include <beast/nudb/tests/common.h> #include <beast/nudb/tests/common.h>
#include <beast/nudb/tests/fail_file.h>
#include <beast/module/core/diagnostic/UnitTestUtilities.h> #include <beast/module/core/diagnostic/UnitTestUtilities.h>
#include <beast/module/core/files/File.h> #include <beast/module/core/files/File.h>
#include <beast/random/xor_shift_engine.h> #include <beast/random/xor_shift_engine.h>
@@ -44,18 +41,18 @@ public:
// with keys not present. // with keys not present.
void void
do_test (std::size_t count, do_test (std::size_t count,
nudb::path_type const& path) path_type const& path)
{ {
auto const dp = path + ".dat"; auto const dp = path + ".dat";
auto const kp = path + ".key"; auto const kp = path + ".key";
auto const lp = path + ".log"; auto const lp = path + ".log";
nudb::create (dp, kp, lp, test_api::create (dp, kp, lp,
appnum, appnum,
salt, salt,
sizeof(nudb::test::key_type), sizeof(nudb::test::key_type),
nudb::block_size(path), nudb::block_size(path),
0.50); 0.50);
nudb::store db; test_api::store db;
if (! expect (db.open(dp, kp, lp, if (! expect (db.open(dp, kp, lp,
arena_alloc_size), "open")) arena_alloc_size), "open"))
return; return;
@@ -67,7 +64,7 @@ public:
expect (db.insert(&v.key, v.data, v.size), expect (db.insert(&v.key, v.data, v.size),
"insert"); "insert");
} }
storage s; Storage s;
for (std::size_t i = 0; i < count * 2; ++i) for (std::size_t i = 0; i < count * 2; ++i)
{ {
if (! (i%2)) if (! (i%2))

View File

@@ -17,11 +17,13 @@
*/ */
//============================================================================== //==============================================================================
#ifndef BEAST_NUDB_COMMON_H_INCLUDED #ifndef BEAST_NUDB_TESTS_COMMON_H_INCLUDED
#define BEAST_NUDB_COMMON_H_INCLUDED #define BEAST_NUDB_TESTS_COMMON_H_INCLUDED
#include <beast/nudb.h> #include <beast/nudb.h>
#include <beast/nudb/identity_codec.h>
#include <beast/nudb/tests/fail_file.h> #include <beast/nudb/tests/fail_file.h>
#include <beast/hash/xxhasher.h>
#include <beast/random/xor_shift_engine.h> #include <beast/random/xor_shift_engine.h>
#include <cstdint> #include <cstdint>
#include <iomanip> #include <iomanip>
@@ -33,9 +35,17 @@ namespace test {
using key_type = std::size_t; using key_type = std::size_t;
using fail_store = nudb::basic_store< // xxhasher is fast and produces good results
beast::nudb::default_hash, nudb::fail_file < using test_api_base =
nudb::native_file>>; nudb::api<xxhasher, identity_codec, native_file>;
struct test_api : test_api_base
{
using fail_store = nudb::store<
typename test_api_base::hash_type,
typename test_api_base::codec_type,
nudb::fail_file <typename test_api_base::file_type>>;
};
static std::size_t BEAST_CONSTEXPR arena_alloc_size = 16 * 1024 * 1024; static std::size_t BEAST_CONSTEXPR arena_alloc_size = 16 * 1024 * 1024;
@@ -45,8 +55,8 @@ static std::uint64_t BEAST_CONSTEXPR salt = 42;
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Meets the requirements of BufferFactory // Meets the requirements of Handler
class storage class Storage
{ {
private: private:
std::size_t size_ = 0; std::size_t size_ = 0;
@@ -54,9 +64,9 @@ private:
std::unique_ptr<std::uint8_t[]> buf_; std::unique_ptr<std::uint8_t[]> buf_;
public: public:
storage() = default; Storage() = default;
storage (storage const&) = delete; Storage (Storage const&) = delete;
storage& operator= (storage const&) = delete; Storage& operator= (Storage const&) = delete;
std::size_t std::size_t
size() const size() const
@@ -71,15 +81,23 @@ public:
} }
std::uint8_t* std::uint8_t*
operator()(std::size_t n) reserve (std::size_t size)
{ {
if (capacity_ < n) if (capacity_ < size)
{ {
capacity_ = detail::ceil_pow2(n); capacity_ = detail::ceil_pow2(size);
buf_.reset ( buf_.reset (
new std::uint8_t[capacity_]); new std::uint8_t[capacity_]);
} }
size_ = n; size_ = size;
return buf_.get();
}
std::uint8_t*
operator()(void const* data, std::size_t size)
{
reserve (size);
std::memcpy(buf_.get(), data, size);
return buf_.get(); return buf_.get();
} }
}; };
@@ -134,7 +152,7 @@ private:
maxSize = 1250 maxSize = 1250
}; };
storage s_; Storage s_;
beast::xor_shift_engine gen_; beast::xor_shift_engine gen_;
std::uniform_int_distribution<std::uint32_t> d_size_; std::uniform_int_distribution<std::uint32_t> d_size_;
@@ -162,7 +180,7 @@ public:
value_type v; value_type v;
rngcpy (&v.key, sizeof(v.key), gen_); rngcpy (&v.key, sizeof(v.key), gen_);
v.size = d_size_(gen_); v.size = d_size_(gen_);
v.data = s_(v.size); v.data = s_.reserve(v.size);
rngcpy (v.data, v.size, gen_); rngcpy (v.data, v.size, gen_);
return v; return v;
} }
@@ -205,14 +223,18 @@ print (Log log,
log << "actual_load: " << std::fixed << std::setprecision(0) << log << "actual_load: " << std::fixed << std::setprecision(0) <<
info.actual_load * 100 << "%"; info.actual_load * 100 << "%";
log << "version: " << num(info.version); log << "version: " << num(info.version);
log << "salt: " << std::showbase << std::hex << info.salt; log << "uid: " << std::showbase << std::hex << info.uid;
log << "appnum: " << info.appnum;
log << "key_size: " << num(info.key_size); log << "key_size: " << num(info.key_size);
log << "salt: " << std::showbase << std::hex << info.salt;
log << "pepper: " << std::showbase << std::hex << info.pepper;
log << "block_size: " << num(info.block_size); log << "block_size: " << num(info.block_size);
log << "bucket_size: " << num(info.bucket_size); log << "bucket_size: " << num(info.bucket_size);
log << "load_factor: " << std::fixed << std::setprecision(0) << log << "load_factor: " << std::fixed << std::setprecision(0) <<
info.load_factor * 100 << "%"; info.load_factor * 100 << "%";
log << "capacity: " << num(info.capacity); log << "capacity: " << num(info.capacity);
log << "buckets: " << num(info.buckets); log << "buckets: " << num(info.buckets);
log << "key_count: " << num(info.key_count);
log << "value_count: " << num(info.value_count); log << "value_count: " << num(info.value_count);
log << "value_bytes: " << num(info.value_bytes); log << "value_bytes: " << num(info.value_bytes);
log << "spill_count: " << num(info.spill_count); log << "spill_count: " << num(info.spill_count);

View File

@@ -20,7 +20,7 @@
#ifndef BEAST_NUDB_FAIL_FILE_H_INCLUDED #ifndef BEAST_NUDB_FAIL_FILE_H_INCLUDED
#define BEAST_NUDB_FAIL_FILE_H_INCLUDED #define BEAST_NUDB_FAIL_FILE_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <atomic> #include <atomic>
#include <cstddef> #include <cstddef>
#include <string> #include <string>

View File

@@ -17,10 +17,7 @@
*/ */
//============================================================================== //==============================================================================
#include <beast/nudb/store.h>
#include <beast/nudb/recover.h>
#include <beast/nudb/tests/common.h> #include <beast/nudb/tests/common.h>
#include <beast/nudb/tests/fail_file.h>
#include <beast/module/core/files/File.h> #include <beast/module/core/files/File.h>
#include <beast/random/xor_shift_engine.h> #include <beast/random/xor_shift_engine.h>
#include <beast/unit_test/suite.h> #include <beast/unit_test/suite.h>
@@ -42,17 +39,19 @@ public:
// they are there. Uses a fail_file that causes the n-th // they are there. Uses a fail_file that causes the n-th
// I/O to fail, causing an exception. // I/O to fail, causing an exception.
void void
do_work (std::size_t n, std::size_t count, do_work (std::size_t count, float load_factor,
float load_factor, nudb::path_type const& path) nudb::path_type const& path, fail_counter& c)
{ {
auto const dp = path + ".dat"; auto const dp = path + ".dat";
auto const kp = path + ".key"; auto const kp = path + ".key";
auto const lp = path + ".log"; auto const lp = path + ".log";
nudb::fail_counter c(0); test_api::file_type::erase (dp);
nudb::create (dp, kp, lp, appnum, salt, test_api::file_type::erase (kp);
sizeof(key_type), block_size(path), test_api::file_type::erase (lp);
load_factor); expect(test_api::create (
fail_store db; dp, kp, lp, appnum, salt, sizeof(key_type),
block_size(path), load_factor), "create");
test_api::fail_store db;
if (! expect(db.open(dp, kp, lp, if (! expect(db.open(dp, kp, lp,
arena_alloc_size, c), "open")) arena_alloc_size, c), "open"))
{ {
@@ -60,14 +59,14 @@ public:
// to report this and terminate the test. // to report this and terminate the test.
} }
expect (db.appnum() == appnum, "appnum"); expect (db.appnum() == appnum, "appnum");
c.reset(n);
Sequence seq; Sequence seq;
for (std::size_t i = 0; i < count; ++i) for (std::size_t i = 0; i < count; ++i)
{ {
auto const v = seq[i]; auto const v = seq[i];
db.insert(&v.key, v.data, v.size); expect(db.insert(&v.key, v.data, v.size),
"insert");
} }
storage s; Storage s;
for (std::size_t i = 0; i < count; ++i) for (std::size_t i = 0; i < count; ++i)
{ {
auto const v = seq[i]; auto const v = seq[i];
@@ -81,26 +80,36 @@ public:
break; break;
} }
db.close(); db.close();
#ifndef NDEBUG verify_info info;
print(log, verify(dp, kp)); try
verify(dp, kp); {
#endif info = test_api::verify(dp, kp);
nudb::native_file::erase (dp); }
nudb::native_file::erase (kp); catch(...)
nudb::native_file::erase (lp); {
print(log, info);
throw;
}
test_api::file_type::erase (dp);
test_api::file_type::erase (kp);
test_api::file_type::erase (lp);
} }
void void
do_recover (path_type const& path) do_recover (path_type const& path,
fail_counter& c)
{ {
auto const dp = path + ".dat"; auto const dp = path + ".dat";
auto const kp = path + ".key"; auto const kp = path + ".key";
auto const lp = path + ".log"; auto const lp = path + ".log";
recover(dp, kp, lp); recover<test_api::hash_type,
verify(dp, kp); test_api::codec_type, fail_file<
nudb::native_file::erase (dp); test_api::file_type>>(dp, kp, lp,
nudb::native_file::erase (kp); test_api::buffer_size, c);
nudb::native_file::erase (lp); test_api::verify(dp, kp);
test_api::file_type::erase (dp);
test_api::file_type::erase (kp);
test_api::file_type::erase (lp);
} }
void void
@@ -114,12 +123,24 @@ public:
{ {
try try
{ {
do_work (n, count, load_factor, path); fail_counter c(n);
do_work (count, load_factor, path, c);
break; break;
} }
catch (nudb::fail_error const&) catch (nudb::fail_error const&)
{ {
do_recover (path); }
for (std::size_t m = 1;;++m)
{
fail_counter c(m);
try
{
do_recover (path, c);
break;
}
catch (nudb::fail_error const&)
{
}
} }
} }
} }
@@ -131,11 +152,10 @@ public:
void void
run() override run() override
{ {
float lf = 0.75f; float lf = 0.55f;
test_recover (lf, 0); test_recover (lf, 0);
test_recover (lf, 10); test_recover (lf, 10);
test_recover (lf, 100); test_recover (lf, 100);
test_recover (lf, 1000);
} }
}; };
@@ -148,7 +168,8 @@ public:
run() override run() override
{ {
float lf = 0.90f; float lf = 0.90f;
test_recover (lf, 100000); test_recover (lf, 1000);
test_recover (lf, 10000);
} }
}; };

View File

@@ -18,9 +18,7 @@
//============================================================================== //==============================================================================
#include <BeastConfig.h> #include <BeastConfig.h>
#include <beast/nudb.h>
#include <beast/nudb/tests/common.h> #include <beast/nudb/tests/common.h>
#include <beast/nudb/tests/fail_file.h>
#include <beast/module/core/diagnostic/UnitTestUtilities.h> #include <beast/module/core/diagnostic/UnitTestUtilities.h>
#include <beast/module/core/files/File.h> #include <beast/module/core/files/File.h>
#include <beast/random/xor_shift_engine.h> #include <beast/random/xor_shift_engine.h>
@@ -55,15 +53,15 @@ public:
auto const kp = path + ".key"; auto const kp = path + ".key";
auto const lp = path + ".log"; auto const lp = path + ".log";
Sequence seq; Sequence seq;
nudb::store db; test_api::store db;
try try
{ {
expect (nudb::create (dp, kp, lp, appnum, expect (test_api::create (dp, kp, lp, appnum,
salt, sizeof(key_type), block_size, salt, sizeof(key_type), block_size,
load_factor), "create"); load_factor), "create");
expect (db.open(dp, kp, lp, expect (db.open(dp, kp, lp,
arena_alloc_size), "open"); arena_alloc_size), "open");
storage s; Storage s;
// insert // insert
for (std::size_t i = 0; i < N; ++i) for (std::size_t i = 0; i < N; ++i)
{ {
@@ -102,7 +100,9 @@ public:
"insert 2"); "insert 2");
} }
db.close(); db.close();
auto const stats = nudb::verify (dp, kp); //auto const stats = test_api::verify(dp, kp);
auto const stats = verify<test_api::hash_type>(
dp, kp, 1 * 1024 * 1024);
expect (stats.hist[1] > 0, "no splits"); expect (stats.hist[1] > 0, "no splits");
print (log, stats); print (log, stats);
} }
@@ -114,9 +114,9 @@ public:
{ {
fail (e.what()); fail (e.what());
} }
expect (native_file::erase(dp)); expect (test_api::file_type::erase(dp));
expect (native_file::erase(kp)); expect (test_api::file_type::erase(kp));
expect (! native_file::erase(lp)); expect (! test_api::file_type::erase(lp));
} }
void void

View File

@@ -0,0 +1,73 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2014, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <beast/nudb/detail/varint.h>
#include <beast/unit_test/suite.h>
#include <array>
namespace beast {
namespace nudb {
namespace tests {
class varint_test : public unit_test::suite
{
public:
void
test_varints (std::vector<std::size_t> vv)
{
testcase("encode, decode");
for (auto const v : vv)
{
std::array<std::uint8_t,
detail::varint_traits<
std::size_t>::max> vi;
auto const n0 =
detail::write_varint(
vi.data(), v);
expect (n0 > 0, "write error");
std::size_t v1;
auto const n1 =
detail::read_varint(
vi.data(), n0, v1);
expect(n1 == n0, "read error");
expect(v == v1, "wrong value");
}
}
void
run() override
{
test_varints({
0, 1, 2,
126, 127, 128,
253, 254, 255,
16127, 16128, 16129,
0xff,
0xffff,
0xffffffff,
0xffffffffffffUL,
0xffffffffffffffffUL});
}
};
BEAST_DEFINE_TESTSUITE(varint,nudb,beast);
} // test
} // nudb
} // beast

View File

@@ -20,11 +20,213 @@
#include <beast/nudb/verify.h> #include <beast/nudb/verify.h>
#include <beast/nudb/tests/common.h> #include <beast/nudb/tests/common.h>
#include <beast/unit_test/suite.h> #include <beast/unit_test/suite.h>
#include <beast/chrono/basic_seconds_clock.h>
#include <chrono>
#include <iomanip>
#include <ostream>
namespace beast { namespace beast {
namespace nudb { namespace nudb {
namespace test { namespace test {
namespace detail {
class save_stream_state
{
std::ostream& os_;
std::streamsize precision_;
std::ios::fmtflags flags_;
std::ios::char_type fill_;
public:
~save_stream_state()
{
os_.precision(precision_);
os_.flags(flags_);
os_.fill(fill_);
}
save_stream_state(save_stream_state const&) = delete;
save_stream_state& operator=(save_stream_state const&) = delete;
explicit save_stream_state(std::ostream& os)
: os_(os)
, precision_(os.precision())
, flags_(os.flags())
, fill_(os.fill())
{
}
};
template <class Rep, class Period>
std::ostream&
pretty_time(std::ostream& os, std::chrono::duration<Rep, Period> d)
{
save_stream_state _(os);
using namespace std::chrono;
if (d < microseconds{1})
{
// use nanoseconds
if (d < nanoseconds{100})
{
// use floating
using ns = duration<float, std::nano>;
os << std::fixed << std::setprecision(1) << ns(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<nanoseconds>(d).count();
}
os << "ns";
}
else if (d < milliseconds{1})
{
// use microseconds
if (d < microseconds{100})
{
// use floating
using ms = duration<float, std::micro>;
os << std::fixed << std::setprecision(1) << ms(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<microseconds>(d).count();
}
os << "us";
}
else if (d < seconds{1})
{
// use milliseconds
if (d < milliseconds{100})
{
// use floating
using ms = duration<float, std::milli>;
os << std::fixed << std::setprecision(1) << ms(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<milliseconds>(d).count();
}
os << "ms";
}
else if (d < minutes{1})
{
// use seconds
if (d < seconds{100})
{
// use floating
using s = duration<float>;
os << std::fixed << std::setprecision(1) << s(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<seconds>(d).count();
}
os << "s";
}
else
{
// use minutes
if (d < minutes{100})
{
// use floating
using m = duration<float, std::ratio<60>>;
os << std::fixed << std::setprecision(1) << m(d).count();
}
else
{
// use integral
os << std::chrono::duration_cast<minutes>(d).count();
}
os << "min";
}
return os;
}
template <class Period, class Rep>
inline
std::string
fmtdur(std::chrono::duration<Period, Rep> const& d)
{
std::stringstream ss;
pretty_time(ss, d);
return ss.str();
}
} // detail
//------------------------------------------------------------------------------
template <class Log>
class progress
{
private:
using clock_type =
beast::basic_seconds_clock<
std::chrono::steady_clock>;
Log& log_;
clock_type::time_point start_ = clock_type::now();
clock_type::time_point now_ = clock_type::now();
clock_type::time_point report_ = clock_type::now();
std::size_t prev_ = 0;
bool estimate_ = false;
public:
explicit
progress(Log& log)
: log_(log)
{
}
void
operator()(std::size_t w, std::size_t w1)
{
using namespace std::chrono;
auto const now = clock_type::now();
if (now == now_)
return;
now_ = now;
auto const elapsed = now - start_;
if (! estimate_)
{
if (elapsed < seconds(15))
return;
estimate_ = true;
}
else if (now - report_ <
std::chrono::seconds(60))
{
return;
}
auto const rate =
elapsed.count() / double(w);
clock_type::duration const remain(
static_cast<clock_type::duration::rep>(
(w1 - w) * rate));
log_ <<
"Remaining: " << detail::fmtdur(remain) <<
" (" << w << " of " << w1 <<
" in " << detail::fmtdur(elapsed) <<
", " << (w - prev_) <<
" in " << detail::fmtdur(now - report_) <<
")";
report_ = now;
prev_ = w;
}
void
finish()
{
log_ <<
"Total time: " << detail::fmtdur(
clock_type::now() - start_);
}
};
//------------------------------------------------------------------------------
class verify_test : public unit_test::suite class verify_test : public unit_test::suite
{ {
public: public:
@@ -34,12 +236,41 @@ public:
{ {
auto const dp = path + ".dat"; auto const dp = path + ".dat";
auto const kp = path + ".key"; auto const kp = path + ".key";
print(log, verify(dp, kp)); print(log, test_api::verify(dp, kp));
} }
void void
run() override run() override
{ {
testcase(abort_on_fail) << "verify " << arg();
if (arg().empty())
return fail("missing unit test argument");
do_verify(arg());
pass();
}
};
class verify_fast_test : public unit_test::suite
{
public:
// Runs verify on the database and reports statistics
void
do_verify (nudb::path_type const& path)
{
auto const dp = path + ".dat";
auto const kp = path + ".key";
progress<decltype(log)> p(log);
// VFALCO HACK 32gb hardcoded!
auto const info = verify_fast<
test_api::hash_type>(
dp, kp, 34359738368, p);
print(log, info);
}
void
run() override
{
testcase(abort_on_fail) << "verify_fast " << arg();
if (arg().empty()) if (arg().empty())
return fail("missing unit test argument"); return fail("missing unit test argument");
do_verify(arg()); do_verify(arg());
@@ -48,8 +279,8 @@ public:
}; };
BEAST_DEFINE_TESTSUITE_MANUAL(verify,nudb,beast); BEAST_DEFINE_TESTSUITE_MANUAL(verify,nudb,beast);
BEAST_DEFINE_TESTSUITE_MANUAL(verify_fast,nudb,beast);
} // test } // test
} // nudb } // nudb
} // beast } // beast

View File

@@ -20,12 +20,10 @@
#ifndef BEAST_NUDB_VERIFY_H_INCLUDED #ifndef BEAST_NUDB_VERIFY_H_INCLUDED
#define BEAST_NUDB_VERIFY_H_INCLUDED #define BEAST_NUDB_VERIFY_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/file.h> #include <beast/nudb/file.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/bucket.h> #include <beast/nudb/detail/bucket.h>
#include <beast/nudb/detail/bulkio.h> #include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h> #include <beast/nudb/detail/format.h>
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
@@ -39,8 +37,11 @@ struct verify_info
{ {
// Configured // Configured
std::size_t version = 0; // API version std::size_t version = 0; // API version
std::size_t salt = 0; // Salt or database ID std::size_t uid = 0; // UID
std::size_t appnum = 0; // Appnum
std::size_t key_size = 0; // Size of a key in bytes std::size_t key_size = 0; // Size of a key in bytes
std::size_t salt = 0; // Salt
std::size_t pepper = 0; // Pepper
std::size_t block_size = 0; // Block size in bytes std::size_t block_size = 0; // Block size in bytes
float load_factor = 0; // Target bucket fill fraction float load_factor = 0; // Target bucket fill fraction
@@ -82,12 +83,12 @@ struct verify_info
Iterates the key and data files, throws store_corrupt_error Iterates the key and data files, throws store_corrupt_error
on broken invariants. on broken invariants.
*/ */
template <class Hasher = default_hash> template <class Hasher>
verify_info verify_info
verify ( verify (
path_type const& dat_path, path_type const& dat_path,
path_type const& key_path, path_type const& key_path,
std::size_t read_size = 16 * 1024 * 1024) std::size_t read_size)
{ {
using namespace detail; using namespace detail;
using File = native_file; using File = native_file;
@@ -103,12 +104,16 @@ verify (
dat_file_header dh; dat_file_header dh;
read (df, dh); read (df, dh);
read (kf, kh); read (kf, kh);
verify(dh);
verify<Hasher>(dh, kh); verify<Hasher>(dh, kh);
verify_info info; verify_info info;
info.version = dh.version; info.version = dh.version;
info.salt = dh.salt; info.uid = dh.uid;
info.appnum = dh.appnum;
info.key_size = dh.key_size; info.key_size = dh.key_size;
info.salt = kh.salt;
info.pepper = kh.pepper;
info.block_size = kh.block_size; info.block_size = kh.block_size;
info.load_factor = kh.load_factor / 65536.f; info.load_factor = kh.load_factor / 65536.f;
info.capacity = kh.capacity; info.capacity = kh.capacity;
@@ -117,21 +122,28 @@ verify (
info.key_file_size = kf.actual_size(); info.key_file_size = kf.actual_size();
info.dat_file_size = df.actual_size(); info.dat_file_size = df.actual_size();
buffer buf (kh.block_size); // Data Record
bucket b (kh.key_size, auto const dh_len =
kh.block_size, buf.get()); field<uint48_t>::size + // Size
kh.key_size; // Key
std::size_t fetches = 0;
// Iterate Data File // Iterate Data File
buffer buf (kh.block_size + dh_len);
bucket b (kh.block_size, buf.get());
std::uint8_t* pd = buf.get() + kh.block_size;
{ {
bulk_reader<File> r(df, bulk_reader<File> r(df,
dat_file_header::size, dat_file_header::size,
df.actual_size(), read_size); df.actual_size(), read_size);
while (! r.eof()) while (! r.eof())
{ {
auto const offset = r.offset();
// Data Record or Spill Record // Data Record or Spill Record
std::size_t size;
auto is = r.prepare( auto is = r.prepare(
field<uint48_t>::size); // Size field<uint48_t>::size); // Size
std::size_t size;
read<uint48_t>(is, size); read<uint48_t>(is, size);
if (size > 0) if (size > 0)
{ {
@@ -144,11 +156,15 @@ verify (
std::uint8_t const* const data = std::uint8_t const* const data =
is.data(size); is.data(size);
(void)data; (void)data;
auto const h = hash<Hasher>(
key, kh.key_size, kh.salt);
// Check bucket and spills // Check bucket and spills
try try
{ {
b.read (kf, (bucket_index<Hasher>( auto const n = bucket_index(
key, kh) + 1) * kh.block_size); h, kh.buckets, kh.modulus);
b.read (kf, (n + 1) * kh.block_size);
++fetches;
} }
catch (file_short_read_error const&) catch (file_short_read_error const&)
{ {
@@ -157,13 +173,24 @@ verify (
} }
for (;;) for (;;)
{ {
if (b.find(key).second) for (auto i = b.lower_bound(h);
break; i < b.size(); ++i)
if (b.spill() != 0)
{ {
auto const item = b[i];
if (item.hash != h)
break;
if (item.offset == offset)
goto found;
++fetches;
}
auto const spill = b.spill();
if (! spill)
throw store_corrupt_error(
"orphaned value");
try try
{ {
b.read (df, b.spill()); b.read (df, spill);
++fetches;
} }
catch (file_short_read_error const&) catch (file_short_read_error const&)
{ {
@@ -171,12 +198,7 @@ verify (
"short spill"); "short spill");
} }
} }
else found:
{
throw store_corrupt_error(
"orphaned value");
}
}
// Update // Update
++info.value_count; ++info.value_count;
info.value_bytes += size; info.value_bytes += size;
@@ -196,17 +218,12 @@ verify (
field<uint48_t>::size + // Zero field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size field<uint16_t>::size + // Size
b.compact_size(); // Bucket b.compact_size(); // Bucket
} }
} }
} }
// Iterate Key File // Iterate Key File
{ {
// Data Record (header)
buffer buf (
field<uint48_t>::size + // Size
kh.key_size); // Key Size
for (std::size_t n = 0; n < kh.buckets; ++n) for (std::size_t n = 0; n < kh.buckets; ++n)
{ {
std::size_t nspill = 0; std::size_t nspill = 0;
@@ -219,8 +236,7 @@ verify (
auto const e = b[i]; auto const e = b[i];
try try
{ {
df.read (e.offset, df.read (e.offset, pd, dh_len);
buf.get(), buf.size());
} }
catch (file_short_read_error const&) catch (file_short_read_error const&)
{ {
@@ -228,16 +244,19 @@ verify (
"missing value"); "missing value");
} }
// Data Record // Data Record
istream is(buf.get(), buf.size()); istream is(pd, dh_len);
std::size_t size; std::size_t size;
read<uint48_t>(is, size); // Size read<uint48_t>(is, size); // Size
void const* key =
is.data(kh.key_size); // Key
if (size != e.size) if (size != e.size)
throw store_corrupt_error( throw store_corrupt_error(
"wrong size"); "wrong size");
if (std::memcmp(is.data(kh.key_size), auto const h = hash<Hasher>(key,
e.key, kh.key_size) != 0) kh.key_size, kh.salt);
if (h != e.hash)
throw store_corrupt_error( throw store_corrupt_error(
"wrong key"); "wrong hash");
} }
if (! b.spill()) if (! b.spill())
break; break;
@@ -266,12 +285,242 @@ verify (
float sum = 0; float sum = 0;
for (int i = 0; i < info.hist.size(); ++i) for (int i = 0; i < info.hist.size(); ++i)
sum += info.hist[i] * (i + 1); sum += info.hist[i] * (i + 1);
info.avg_fetch = sum / info.buckets; //info.avg_fetch = sum / info.buckets;
info.avg_fetch = float(fetches) / info.value_count;
info.waste = (info.spill_bytes_tot - info.spill_bytes) / info.waste = (info.spill_bytes_tot - info.spill_bytes) /
float(info.dat_file_size); float(info.dat_file_size);
info.overhead = info.overhead =
float(info.key_file_size + info.dat_file_size) / float(info.key_file_size + info.dat_file_size) /
(info.value_bytes + info.key_count * info.key_size) - 1; (
info.value_bytes +
info.key_count *
(info.key_size +
// Data Record
field<uint48_t>::size) // Size
) - 1;
info.actual_load = info.key_count / float(
info.capacity * info.buckets);
return info;
}
/** Verify consistency of the key and data files.
Effects:
Opens the key and data files in read-only mode.
Throws file_error if a file can't be opened.
Iterates the key and data files, throws store_corrupt_error
on broken invariants.
This uses a different algorithm that depends on allocating
a large buffer.
*/
template <class Hasher, class Progress>
verify_info
verify_fast (
path_type const& dat_path,
path_type const& key_path,
std::size_t buffer_size,
Progress&& progress)
{
using namespace detail;
using File = native_file;
File df;
File kf;
if (! df.open (file_mode::scan, dat_path))
throw store_corrupt_error(
"no data file");
if (! kf.open (file_mode::read, key_path))
throw store_corrupt_error(
"no key file");
key_file_header kh;
dat_file_header dh;
read (df, dh);
read (kf, kh);
verify(dh);
verify<Hasher>(dh, kh);
verify_info info;
info.version = dh.version;
info.uid = dh.uid;
info.appnum = dh.appnum;
info.key_size = dh.key_size;
info.salt = kh.salt;
info.pepper = kh.pepper;
info.block_size = kh.block_size;
info.load_factor = kh.load_factor / 65536.f;
info.capacity = kh.capacity;
info.buckets = kh.buckets;
info.bucket_size = kh.bucket_size;
info.key_file_size = kf.actual_size();
info.dat_file_size = df.actual_size();
std::size_t fetches = 0;
// Counts unverified keys per bucket
std::unique_ptr<std::uint32_t[]> nkeys(
new std::uint32_t[kh.buckets]);
// Verify contiguous sequential sections of the
// key file using multiple passes over the data.
//
auto const buckets = std::max<std::size_t>(1,
buffer_size / kh.block_size);
buffer buf((buckets + 1) * kh.block_size);
bucket tmp(kh.block_size, buf.get() +
buckets * kh.block_size);
std::size_t const passes =
(kh.buckets + buckets - 1) / buckets;
auto const df_size = df.actual_size();
std::size_t const work = passes * df_size;
std::size_t npass = 0;
for (std::size_t b0 = 0; b0 < kh.buckets;
b0 += buckets)
{
auto const b1 = std::min(
b0 + buckets, kh.buckets);
// Buffered range is [b0, b1)
auto const bn = b1 - b0;
kf.read((b0 + 1) * kh.block_size,
buf.get(), bn * kh.block_size);
// Count keys in buckets
for (std::size_t i = b0 ; i < b1; ++i)
{
bucket b(kh.block_size, buf.get() +
(i - b0) * kh.block_size);
nkeys[i] = b.size();
std::size_t nspill = 0;
auto spill = b.spill();
while (spill != 0)
{
tmp.read(df, spill);
nkeys[i] += tmp.size();
spill = tmp.spill();
++nspill;
++info.spill_count;
info.spill_bytes +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
tmp.compact_size(); // SpillBucket
}
if (nspill >= info.hist.size())
nspill = info.hist.size() - 1;
++info.hist[nspill];
info.key_count += nkeys[i];
}
// Iterate Data File
bulk_reader<File> r(df,
dat_file_header::size, df_size,
64 * 1024 * 1024);
while (! r.eof())
{
auto const offset = r.offset();
progress(npass * df_size + offset, work);
// Data Record or Spill Record
auto is = r.prepare(
field<uint48_t>::size); // Size
std::size_t size;
read<uint48_t>(is, size);
if (size > 0)
{
// Data Record
is = r.prepare(
kh.key_size + // Key
size); // Data
std::uint8_t const* const key =
is.data(kh.key_size);
std::uint8_t const* const data =
is.data(size);
(void)data;
auto const h = hash<Hasher>(
key, kh.key_size, kh.salt);
auto const n = bucket_index(
h, kh.buckets, kh.modulus);
if (n < b0 || n >= b1)
continue;
// Check bucket and spills
bucket b (kh.block_size, buf.get() +
(n - b0) * kh.block_size);
++fetches;
for (;;)
{
for (auto i = b.lower_bound(h);
i < b.size(); ++i)
{
auto const item = b[i];
if (item.hash != h)
break;
if (item.offset == offset)
goto found;
++fetches;
}
auto const spill = b.spill();
if (! spill)
throw store_corrupt_error(
"orphaned value");
b = tmp;
try
{
b.read (df, spill);
++fetches;
}
catch (file_short_read_error const&)
{
throw store_corrupt_error(
"short spill");
}
}
found:
// Update
++info.value_count;
info.value_bytes += size;
if (nkeys[n]-- == 0)
throw store_corrupt_error(
"duplicate value");
}
else
{
// Spill Record
is = r.prepare(
field<std::uint16_t>::size);
read<std::uint16_t>(is, size); // Size
if (size != kh.bucket_size)
throw store_corrupt_error(
"bad spill size");
tmp.read(r); // Bucket
if (b0 == 0)
{
++info.spill_count_tot;
info.spill_bytes_tot +=
field<uint48_t>::size + // Zero
field<uint16_t>::size + // Size
tmp.compact_size(); // Bucket
}
}
}
++npass;
}
// Make sure every key in every bucket was visited
for (std::size_t i = 0;
i < kh.buckets; ++i)
if (nkeys[i] != 0)
throw store_corrupt_error(
"orphan value");
float sum = 0;
for (int i = 0; i < info.hist.size(); ++i)
sum += info.hist[i] * (i + 1);
//info.avg_fetch = sum / info.buckets;
info.avg_fetch = float(fetches) / info.value_count;
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
float(info.dat_file_size);
info.overhead =
float(info.key_file_size + info.dat_file_size) /
(
info.value_bytes +
info.key_count *
(info.key_size +
// Data Record
field<uint48_t>::size) // Size
) - 1;
info.actual_load = info.key_count / float( info.actual_load = info.key_count / float(
info.capacity * info.buckets); info.capacity * info.buckets);
return info; return info;

View File

@@ -20,11 +20,10 @@
#ifndef BEAST_NUDB_VISIT_H_INCLUDED #ifndef BEAST_NUDB_VISIT_H_INCLUDED
#define BEAST_NUDB_VISIT_H_INCLUDED #define BEAST_NUDB_VISIT_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/file.h> #include <beast/nudb/file.h>
#include <beast/nudb/mode.h> #include <beast/nudb/detail/buffer.h>
#include <beast/nudb/detail/bulkio.h> #include <beast/nudb/detail/bulkio.h>
#include <beast/nudb/detail/config.h>
#include <beast/nudb/detail/format.h> #include <beast/nudb/detail/format.h>
#include <algorithm> #include <algorithm>
#include <cstddef> #include <cstddef>
@@ -44,12 +43,12 @@ namespace nudb {
@return `true` if the visit completed @return `true` if the visit completed
This only requires the data file. This only requires the data file.
*/ */
template <class Function> template <class Codec, class Function>
bool bool
visit( visit(
path_type const& path, path_type const& path,
Function f, std::size_t read_size,
std::size_t read_size = 16 * 1024 * 1024) Function&& f)
{ {
using namespace detail; using namespace detail;
using File = native_file; using File = native_file;
@@ -57,11 +56,13 @@ visit(
df.open (file_mode::scan, path); df.open (file_mode::scan, path);
dat_file_header dh; dat_file_header dh;
read (df, dh); read (df, dh);
verify (dh); verify<Codec> (dh);
Codec codec;
// Iterate Data File // Iterate Data File
bulk_reader<File> r( bulk_reader<File> r(
df, dat_file_header::size, df, dat_file_header::size,
df.actual_size(), read_size); df.actual_size(), read_size);
buffer buf;
try try
{ {
while (! r.eof()) while (! r.eof())
@@ -79,10 +80,10 @@ visit(
size); // Data size); // Data
std::uint8_t const* const key = std::uint8_t const* const key =
is.data(dh.key_size); is.data(dh.key_size);
std::uint8_t const* const data = auto const result = codec.decompress(
is.data(size); is.data(size), size, buf);
if (! f(key, dh.key_size, if (! f(key, dh.key_size,
data, size)) result.first, result.second))
return false; return false;
} }
else else

View File

@@ -20,9 +20,7 @@
#ifndef BEAST_NUDB_DETAIL_WIN32_FILE_H_INCLUDED #ifndef BEAST_NUDB_DETAIL_WIN32_FILE_H_INCLUDED
#define BEAST_NUDB_DETAIL_WIN32_FILE_H_INCLUDED #define BEAST_NUDB_DETAIL_WIN32_FILE_H_INCLUDED
#include <beast/nudb/error.h> #include <beast/nudb/common.h>
#include <beast/nudb/mode.h>
#include <beast/nudb/detail/config.h>
#include <cassert> #include <cassert>
#include <string> #include <string>
@@ -320,6 +318,8 @@ template <class _>
void void
win32_file<_>::read (std::size_t offset, win32_file<_>::read (std::size_t offset,
void* buffer, std::size_t bytes) void* buffer, std::size_t bytes)
{
while(bytes > 0)
{ {
DWORD bytesRead; DWORD bytesRead;
LARGE_INTEGER li; LARGE_INTEGER li;
@@ -338,14 +338,21 @@ win32_file<_>::read (std::size_t offset,
"read file", dwError); "read file", dwError);
throw file_short_read_error(); throw file_short_read_error();
} }
if (bytesRead != bytes) if (bytesRead == 0)
throw file_short_read_error(); throw file_short_read_error();
offset += bytesRead;
bytes -= bytesRead;
buffer = reinterpret_cast<char*>(
buffer) + bytesRead;
}
} }
template <class _> template <class _>
void void
win32_file<_>::write (std::size_t offset, win32_file<_>::write (std::size_t offset,
void const* buffer, std::size_t bytes) void const* buffer, std::size_t bytes)
{
while(bytes > 0)
{ {
LARGE_INTEGER li; LARGE_INTEGER li;
li.QuadPart = static_cast<LONGLONG>(offset); li.QuadPart = static_cast<LONGLONG>(offset);
@@ -359,8 +366,14 @@ win32_file<_>::write (std::size_t offset,
if (! bSuccess) if (! bSuccess)
throw file_win32_error( throw file_win32_error(
"write file"); "write file");
if (bytesWritten != bytes) if (bytesWritten == 0)
throw file_short_write_error(); throw file_short_write_error();
offset += bytesWritten;
bytes -= bytesWritten;
buffer = reinterpret_cast<
char const*>(buffer) +
bytesWritten;
}
} }
template <class _> template <class _>

View File

@@ -24,7 +24,10 @@
#include <ripple/nodestore/impl/DecodedBlob.h> #include <ripple/nodestore/impl/DecodedBlob.h>
#include <ripple/nodestore/impl/EncodedBlob.h> #include <ripple/nodestore/impl/EncodedBlob.h>
#include <beast/nudb.h> #include <beast/nudb.h>
#include <beast/nudb/detail/bucket.h> // remove asap
#include <beast/nudb/identity_codec.h>
#include <beast/nudb/visit.h> #include <beast/nudb/visit.h>
#include <beast/hash/xxhasher.h>
#include <snappy.h> #include <snappy.h>
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
#include <cassert> #include <cassert>
@@ -61,10 +64,13 @@ public:
currentType = typeTwo currentType = typeTwo
}; };
using api = beast::nudb::api<
beast::xxhasher, beast::nudb::identity_codec>;
beast::Journal journal_; beast::Journal journal_;
size_t const keyBytes_; size_t const keyBytes_;
std::string const name_; std::string const name_;
beast::nudb::store db_; api::store db_;
std::atomic <bool> deletePath_; std::atomic <bool> deletePath_;
Scheduler& scheduler_; Scheduler& scheduler_;
@@ -85,7 +91,7 @@ public:
auto const kp = (folder / "nudb.key").string (); auto const kp = (folder / "nudb.key").string ();
auto const lp = (folder / "nudb.log").string (); auto const lp = (folder / "nudb.log").string ();
using beast::nudb::make_salt; using beast::nudb::make_salt;
beast::nudb::create (dp, kp, lp, api::create (dp, kp, lp,
currentType, make_salt(), keyBytes, currentType, make_salt(), keyBytes,
beast::nudb::block_size(kp), beast::nudb::block_size(kp),
0.50); 0.50);
@@ -200,22 +206,24 @@ public:
fetch1 (void const* key, fetch1 (void const* key,
std::shared_ptr <NodeObject>* pno) std::shared_ptr <NodeObject>* pno)
{ {
Status status;
pno->reset(); pno->reset();
std::size_t bytes;
std::unique_ptr <std::uint8_t[]> data;
if (! db_.fetch (key, if (! db_.fetch (key,
[&data, &bytes](std::size_t n) [key, pno, &status](void const* data, std::size_t size)
{ {
bytes = n; DecodedBlob decoded (key, data, size);
data.reset(new std::uint8_t[bytes]);
return data.get();
}))
return notFound;
DecodedBlob decoded (key, data.get(), bytes);
if (! decoded.wasOk ()) if (! decoded.wasOk ())
return dataCorrupt; {
status = dataCorrupt;
return;
}
*pno = decoded.createObject(); *pno = decoded.createObject();
return ok; status = ok;
}))
{
return notFound;
}
return status;
} }
void void
@@ -236,31 +244,35 @@ public:
fetch2 (void const* key, fetch2 (void const* key,
std::shared_ptr <NodeObject>* pno) std::shared_ptr <NodeObject>* pno)
{ {
Status status;
pno->reset(); pno->reset();
std::size_t actual;
std::unique_ptr <char[]> compressed;
if (! db_.fetch (key, if (! db_.fetch (key,
[&](std::size_t n) [&](void const* data, std::size_t size)
{ {
actual = n; std::size_t actual;
compressed.reset(
new char[n]);
return compressed.get();
}))
return notFound;
std::size_t size;
if (! snappy::GetUncompressedLength( if (! snappy::GetUncompressedLength(
(char const*)compressed.get(), (char const*)data, size, &actual))
actual, &size)) {
return dataCorrupt; status = dataCorrupt;
std::unique_ptr <char[]> data (new char[size]); return;
snappy::RawUncompress (compressed.get(), }
actual, data.get()); std::unique_ptr <char[]> buf (new char[actual]);
DecodedBlob decoded (key, data.get(), size); snappy::RawUncompress (
(char const*)data, size, buf.get());
DecodedBlob decoded (key, buf.get(), actual);
if (! decoded.wasOk ()) if (! decoded.wasOk ())
return dataCorrupt; {
status = dataCorrupt;
return;
}
*pno = decoded.createObject(); *pno = decoded.createObject();
return ok; status = ok;
}))
{
return notFound;
}
return status;
} }
void void
@@ -342,7 +354,7 @@ public:
auto const lp = db_.log_path(); auto const lp = db_.log_path();
auto const appnum = db_.appnum(); auto const appnum = db_.appnum();
db_.close(); db_.close();
beast::nudb::visit (dp, api::visit (dp,
[&]( [&](
void const* key, std::size_t key_bytes, void const* key, std::size_t key_bytes,
void const* data, std::size_t size) void const* data, std::size_t size)
@@ -399,7 +411,7 @@ public:
auto const kp = db_.key_path(); auto const kp = db_.key_path();
auto const lp = db_.log_path(); auto const lp = db_.log_path();
db_.close(); db_.close();
beast::nudb::verify (dp, kp); api::verify (dp, kp);
db_.open (dp, kp, lp, db_.open (dp, kp, lp,
arena_alloc_size); arena_alloc_size);
} }