// // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #ifndef NUDB_VERIFY_HPP #define NUDB_VERIFY_HPP #include #include #include #include #include #include #include #include #include namespace nudb { /// Describes database statistics calculated by @ref verify. struct verify_info { /** Indicates the verify algorithm used. @li @b 0 Normal algorithm @li @b 1 Fast algorith */ int algorithm; // 0 = normal, 1 = fast /// The path to the data file path_type dat_path; /// The path to the key file path_type key_path; /// The API version used to create the database std::size_t version = 0; /// The unique identifier std::uint64_t uid = 0; /// The application-defined constant std::uint64_t appnum = 0; /// The size of each key, in bytes nsize_t key_size = 0; /// The salt used in the key file std::uint64_t salt = 0; /// The salt fingerprint std::uint64_t pepper = 0; /// The block size used in the key file nsize_t block_size = 0; /// The target load factor used in the key file float load_factor = 0; /// The maximum number of keys each bucket can hold nkey_t capacity = 0; /// The number of buckets in the key file nbuck_t buckets = 0; /// The size of a bucket in bytes nsize_t bucket_size = 0; /// The size of the key file noff_t key_file_size = 0; /// The size of the data file noff_t dat_file_size = 0; /// The number of keys found std::uint64_t key_count = 0; /// The number of values found std::uint64_t value_count = 0; /// The total number of bytes occupied by values std::uint64_t value_bytes = 0; /// The number of spill records in use std::uint64_t spill_count = 0; /// The total number of spill records std::uint64_t spill_count_tot = 0; /// The number of bytes occupied by spill records in use std::uint64_t spill_bytes = 0; /// The number of bytes occupied by all spill records std::uint64_t spill_bytes_tot = 0; /// Average number of key file reads per fetch float avg_fetch = 0; /// The fraction of the data file that is wasted float waste = 0; /// The data amplification ratio float overhead = 0; /// The measured bucket load fraction float actual_load = 0; /// A histogram of the number of buckets having N spill records std::array hist; /// Default constructor verify_info() { hist.fill(0); } }; /** Verify consistency of the key and data files. This function opens the key and data files, and performs the following checks on the contents: @li Data file header validity @li Key file header validity @li Data and key file header agreements @li Check that each value is contained in a bucket @li Check that each bucket item reflects a value @li Ensure no values with duplicate keys Undefined behavior results when verifying a database that still has a log file. Use @ref recover on such databases first. This function selects one of two algorithms to use, the normal version, and a faster version that can take advantage of a buffer of sufficient size. Depending on the value of the bufferSize argument, the appropriate algorithm is chosen. A good value of bufferSize is one that is a large fraction of the key file size. For example, 20% of the size of the key file. Larger is better, with the highest usable value depending on the size of the key file. If presented with a buffer size that is too large to be of extra use, the fast algorithm will simply allocate what it needs. @par Template Parameters @tparam Hasher The hash function to use. This type must meet the requirements of @b HashFunction. The hash function must be the same as that used to create the database, or else an error is returned. @param info A structure which will be default constructed inside this function, and filled in if the operation completes successfully. If an error is indicated, the contents of this variable are undefined. @param dat_path The path to the data file. @param key_path The path to the key file. @param bufferSize The number of bytes to allocate for the buffer. If this number is too small, or zero, a slower algorithm will be used that does not require a buffer. @param progress A function which will be called periodically as the algorithm proceeds. The equivalent signature of the progress function must be: @code void progress( std::uint64_t amount, // Amount of work done so far std::uint64_t total // Total amount of work to do ); @endcode @param ec Set to the error, if any occurred. */ template void verify( verify_info& info, path_type const& dat_path, path_type const& key_path, std::size_t bufferSize, Progress&& progress, error_code& ec); } // nudb #include #endif