mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-19 10:35:50 +00:00
201 lines
5.3 KiB
C++
201 lines
5.3 KiB
C++
//
|
|
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
//
|
|
|
|
#ifndef NUDB_VERIFY_HPP
|
|
#define NUDB_VERIFY_HPP
|
|
|
|
#include <nudb/file.hpp>
|
|
#include <nudb/type_traits.hpp>
|
|
#include <nudb/detail/bucket.hpp>
|
|
#include <nudb/detail/bulkio.hpp>
|
|
#include <nudb/detail/format.hpp>
|
|
#include <algorithm>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <string>
|
|
|
|
namespace nudb {
|
|
|
|
/// Describes database statistics calculated by @ref verify.
|
|
struct verify_info
|
|
{
|
|
/** Indicates the verify algorithm used.
|
|
|
|
@li @b 0 Normal algorithm
|
|
@li @b 1 Fast algorith
|
|
*/
|
|
int algorithm; // 0 = normal, 1 = fast
|
|
|
|
/// The path to the data file
|
|
path_type dat_path;
|
|
|
|
/// The path to the key file
|
|
path_type key_path;
|
|
|
|
/// The API version used to create the database
|
|
std::size_t version = 0;
|
|
|
|
/// The unique identifier
|
|
std::uint64_t uid = 0;
|
|
|
|
/// The application-defined constant
|
|
std::uint64_t appnum = 0;
|
|
|
|
/// The size of each key, in bytes
|
|
nsize_t key_size = 0;
|
|
|
|
/// The salt used in the key file
|
|
std::uint64_t salt = 0;
|
|
|
|
/// The salt fingerprint
|
|
std::uint64_t pepper = 0;
|
|
|
|
/// The block size used in the key file
|
|
nsize_t block_size = 0;
|
|
|
|
/// The target load factor used in the key file
|
|
float load_factor = 0;
|
|
|
|
/// The maximum number of keys each bucket can hold
|
|
nkey_t capacity = 0;
|
|
|
|
/// The number of buckets in the key file
|
|
nbuck_t buckets = 0;
|
|
|
|
/// The size of a bucket in bytes
|
|
nsize_t bucket_size = 0;
|
|
|
|
/// The size of the key file
|
|
noff_t key_file_size = 0;
|
|
|
|
/// The size of the data file
|
|
noff_t dat_file_size = 0;
|
|
|
|
/// The number of keys found
|
|
std::uint64_t key_count = 0;
|
|
|
|
/// The number of values found
|
|
std::uint64_t value_count = 0;
|
|
|
|
/// The total number of bytes occupied by values
|
|
std::uint64_t value_bytes = 0;
|
|
|
|
/// The number of spill records in use
|
|
std::uint64_t spill_count = 0;
|
|
|
|
/// The total number of spill records
|
|
std::uint64_t spill_count_tot = 0;
|
|
|
|
/// The number of bytes occupied by spill records in use
|
|
std::uint64_t spill_bytes = 0;
|
|
|
|
/// The number of bytes occupied by all spill records
|
|
std::uint64_t spill_bytes_tot = 0;
|
|
|
|
/// Average number of key file reads per fetch
|
|
float avg_fetch = 0;
|
|
|
|
/// The fraction of the data file that is wasted
|
|
float waste = 0;
|
|
|
|
/// The data amplification ratio
|
|
float overhead = 0;
|
|
|
|
/// The measured bucket load fraction
|
|
float actual_load = 0;
|
|
|
|
/// A histogram of the number of buckets having N spill records
|
|
std::array<nbuck_t, 10> hist;
|
|
|
|
/// Default constructor
|
|
verify_info()
|
|
{
|
|
hist.fill(0);
|
|
}
|
|
};
|
|
|
|
/** Verify consistency of the key and data files.
|
|
|
|
This function opens the key and data files, and
|
|
performs the following checks on the contents:
|
|
|
|
@li Data file header validity
|
|
|
|
@li Key file header validity
|
|
|
|
@li Data and key file header agreements
|
|
|
|
@li Check that each value is contained in a bucket
|
|
|
|
@li Check that each bucket item reflects a value
|
|
|
|
@li Ensure no values with duplicate keys
|
|
|
|
Undefined behavior results when verifying a database
|
|
that still has a log file. Use @ref recover on such
|
|
databases first.
|
|
|
|
This function selects one of two algorithms to use, the
|
|
normal version, and a faster version that can take advantage
|
|
of a buffer of sufficient size. Depending on the value of
|
|
the bufferSize argument, the appropriate algorithm is chosen.
|
|
|
|
A good value of bufferSize is one that is a large fraction
|
|
of the key file size. For example, 20% of the size of the
|
|
key file. Larger is better, with the highest usable value
|
|
depending on the size of the key file. If presented with
|
|
a buffer size that is too large to be of extra use, the
|
|
fast algorithm will simply allocate what it needs.
|
|
|
|
@par Template Parameters
|
|
|
|
@tparam Hasher The hash function to use. This type must
|
|
meet the requirements of @b HashFunction. The hash function
|
|
must be the same as that used to create the database, or
|
|
else an error is returned.
|
|
|
|
@param info A structure which will be default constructed
|
|
inside this function, and filled in if the operation completes
|
|
successfully. If an error is indicated, the contents of this
|
|
variable are undefined.
|
|
|
|
@param dat_path The path to the data file.
|
|
|
|
@param key_path The path to the key file.
|
|
|
|
@param bufferSize The number of bytes to allocate for the buffer.
|
|
If this number is too small, or zero, a slower algorithm will be
|
|
used that does not require a buffer.
|
|
|
|
@param progress A function which will be called periodically
|
|
as the algorithm proceeds. The equivalent signature of the
|
|
progress function must be:
|
|
@code
|
|
void progress(
|
|
std::uint64_t amount, // Amount of work done so far
|
|
std::uint64_t total // Total amount of work to do
|
|
);
|
|
@endcode
|
|
|
|
@param ec Set to the error, if any occurred.
|
|
*/
|
|
template<class Hasher, class Progress>
|
|
void
|
|
verify(
|
|
verify_info& info,
|
|
path_type const& dat_path,
|
|
path_type const& key_path,
|
|
std::size_t bufferSize,
|
|
Progress&& progress,
|
|
error_code& ec);
|
|
|
|
} // nudb
|
|
|
|
#include <nudb/impl/verify.ipp>
|
|
|
|
#endif
|