mirror of
https://github.com/Xahau/xahaud.git
synced 2025-12-06 17:27:52 +00:00
Merge commit '79159ffd87bf86e92ab5af6fffd5cc93c205a630' as 'src/nudb'
This commit is contained in:
17
src/nudb/tools/CMakeLists.txt
Normal file
17
src/nudb/tools/CMakeLists.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
# Part of nudb
|
||||
|
||||
GroupSources (include/nudb nudb)
|
||||
GroupSources (extras/nudb extras)
|
||||
GroupSources (tools "/")
|
||||
|
||||
add_executable (nudb
|
||||
${NUDB_INCLUDES}
|
||||
${EXTRAS_INCLUDES}
|
||||
nudb.cpp
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
target_link_libraries (nudb ${Boost_LIBRARIES})
|
||||
else ()
|
||||
target_link_libraries (nudb ${Boost_LIBRARIES} rt Threads::Threads)
|
||||
endif ()
|
||||
12
src/nudb/tools/Jamfile
Normal file
12
src/nudb/tools/Jamfile
Normal file
@@ -0,0 +1,12 @@
|
||||
#
|
||||
# Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
|
||||
#
|
||||
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#
|
||||
|
||||
import os ;
|
||||
|
||||
exe nudb :
|
||||
nudb.cpp
|
||||
;
|
||||
514
src/nudb/tools/nudb.cpp
Normal file
514
src/nudb/tools/nudb.cpp
Normal file
@@ -0,0 +1,514 @@
|
||||
//
|
||||
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
|
||||
#include <nudb/nudb.hpp>
|
||||
#include <nudb/util.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
namespace nudb {
|
||||
|
||||
namespace detail {
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, dat_file_header const h)
|
||||
{
|
||||
os <<
|
||||
"type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n"
|
||||
"version: " << h.version << "\n"
|
||||
"uid: " << fhex(h.uid) << "\n"
|
||||
"appnum: " << fhex(h.appnum) << "\n"
|
||||
"key_size: " << h.key_size << "\n"
|
||||
;
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, key_file_header const h)
|
||||
{
|
||||
os <<
|
||||
"type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n"
|
||||
"version: " << h.version << "\n"
|
||||
"uid: " << fhex(h.uid) << "\n"
|
||||
"appnum: " << fhex(h.appnum) << "\n"
|
||||
"key_size: " << h.key_size << "\n"
|
||||
"salt: " << fhex(h.salt) << "\n"
|
||||
"pepper: " << fhex(h.pepper) << "\n"
|
||||
"block_size: " << fdec(h.block_size) << "\n"
|
||||
;
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, log_file_header const h)
|
||||
{
|
||||
os << std::setfill('0') << std::internal << std::showbase <<
|
||||
"type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n"
|
||||
"version: " << h.version << "\n"
|
||||
"uid: " << fhex(h.uid) << "\n"
|
||||
"appnum: " << fhex(h.appnum) << "\n"
|
||||
"key_size: " << h.key_size << "\n"
|
||||
"salt: " << fhex(h.salt) << "\n"
|
||||
"pepper: " << fhex(h.pepper) << "\n"
|
||||
"block_size: " << fdec(h.block_size) << "\n"
|
||||
"key_file_size: " << fdec(h.key_file_size) << "\n"
|
||||
"dat_file_size: " << fdec(h.dat_file_size) << "\n"
|
||||
;
|
||||
return os;
|
||||
}
|
||||
|
||||
} // detail
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& os, verify_info const& info)
|
||||
{
|
||||
os <<
|
||||
"dat_path " << info.dat_path << "\n"
|
||||
"key_path " << info.key_path << "\n"
|
||||
"algorithm " <<(info.algorithm ? "fast" : "normal") << "\n"
|
||||
"avg_fetch: " << std::fixed << std::setprecision(3) << info.avg_fetch << "\n" <<
|
||||
"waste: " << std::fixed << std::setprecision(3) << info.waste * 100 << "%" << "\n" <<
|
||||
"overhead: " << std::fixed << std::setprecision(1) << info.overhead * 100 << "%" << "\n" <<
|
||||
"actual_load: " << std::fixed << std::setprecision(0) << info.actual_load * 100 << "%" << "\n" <<
|
||||
"version: " << fdec(info.version) << "\n" <<
|
||||
"uid: " << fhex(info.uid) << "\n" <<
|
||||
"appnum: " << fhex(info.appnum) << "\n" <<
|
||||
"key_size: " << fdec(info.key_size) << "\n" <<
|
||||
"salt: " << fhex(info.salt) << "\n" <<
|
||||
"pepper: " << fhex(info.pepper) << "\n" <<
|
||||
"block_size: " << fdec(info.block_size) << "\n" <<
|
||||
"bucket_size: " << fdec(info.bucket_size) << "\n" <<
|
||||
"load_factor: " << std::fixed << std::setprecision(0) << info.load_factor * 100 << "%" << "\n" <<
|
||||
"capacity: " << fdec(info.capacity) << "\n" <<
|
||||
"buckets: " << fdec(info.buckets) << "\n" <<
|
||||
"key_count: " << fdec(info.key_count) << "\n" <<
|
||||
"value_count: " << fdec(info.value_count) << "\n" <<
|
||||
"value_bytes: " << fdec(info.value_bytes) << "\n" <<
|
||||
"spill_count: " << fdec(info.spill_count) << "\n" <<
|
||||
"spill_count_tot: " << fdec(info.spill_count_tot) << "\n" <<
|
||||
"spill_bytes: " << fdec(info.spill_bytes) << "\n" <<
|
||||
"spill_bytes_tot: " << fdec(info.spill_bytes_tot) << "\n" <<
|
||||
"key_file_size: " << fdec(info.key_file_size) << "\n" <<
|
||||
"dat_file_size: " << fdec(info.dat_file_size) << "\n" <<
|
||||
"hist: " << fhist(info.hist) << "\n"
|
||||
;
|
||||
return os;
|
||||
}
|
||||
|
||||
template<class Hasher>
|
||||
class admin_tool
|
||||
{
|
||||
int ac_ = 0;
|
||||
char const* const* av_ = nullptr;
|
||||
boost::program_options::options_description desc_;
|
||||
|
||||
public:
|
||||
admin_tool()
|
||||
: desc_("Options")
|
||||
{
|
||||
namespace po = boost::program_options;
|
||||
desc_.add_options()
|
||||
("buffer,b", po::value<std::size_t>(),
|
||||
"Set the buffer size in bytes (larger is faster).")
|
||||
("dat,d", po::value<std::string>(),
|
||||
"Path to data file.")
|
||||
("key,k", po::value<std::string>(),
|
||||
"Path to key file.")
|
||||
("log,l", po::value<std::string>(),
|
||||
"Path to log file.")
|
||||
("count,n", po::value<std::uint64_t>(),
|
||||
"The number of items in the data file.")
|
||||
("command", "Command to run.")
|
||||
;
|
||||
}
|
||||
|
||||
std::string
|
||||
progname() const
|
||||
{
|
||||
using namespace boost::filesystem;
|
||||
return path{av_[0]}.stem().string();
|
||||
}
|
||||
|
||||
std::string
|
||||
filename(std::string const& s)
|
||||
{
|
||||
using namespace boost::filesystem;
|
||||
return path{s}.filename().string();
|
||||
}
|
||||
|
||||
void
|
||||
help()
|
||||
{
|
||||
std::cout <<
|
||||
"usage: " << progname() << " <command> [file...] <options>\n";
|
||||
std::cout <<
|
||||
"\n"
|
||||
"Commands:\n"
|
||||
"\n"
|
||||
" help\n"
|
||||
"\n"
|
||||
" Print this help information.\n"
|
||||
"\n"
|
||||
" info <dat-path> [<key-path> [<log-path>]]\n"
|
||||
"\n"
|
||||
" Show metadata and header information for database files.\n"
|
||||
"\n"
|
||||
" recover <dat-path> <key-path> <log-path>\n"
|
||||
"\n"
|
||||
" Perform a database recovery. A recovery is necessary if a log\n"
|
||||
" file is present. Running commands on an unrecovered database\n"
|
||||
" may result in lost or corrupted data.\n"
|
||||
"\n"
|
||||
" rekey <dat-path] <key-path> <log-path> --count=<items> --buffer=<bytes>\n"
|
||||
"\n"
|
||||
" Generate the key file for a data file. The buffer option is\n"
|
||||
" required, larger buffers process faster. A buffer equal to\n"
|
||||
" the size of the key file processes the fastest. This command\n"
|
||||
" must be passed the count of items in the data file, which\n"
|
||||
" can be calculated with the 'visit' command.\n"
|
||||
"\n"
|
||||
" If the rekey is aborted before completion, the database must\n"
|
||||
" be subsequently restored by running the 'recover' command.\n"
|
||||
"\n"
|
||||
" verify <dat-path> <key-path> [--buffer=<bytes>]\n"
|
||||
"\n"
|
||||
" Verify the integrity of a database. The buffer option is\n"
|
||||
" optional, if omitted a slow algorithm is used. When a buffer\n"
|
||||
" size is provided, a fast algorithm is used with larger\n"
|
||||
" buffers resulting in bigger speedups. A buffer equal to the\n"
|
||||
" size of the key file provides the fastest speedup.\n"
|
||||
"\n"
|
||||
" visit <dat-path>\n"
|
||||
"\n"
|
||||
" Iterate a data file and show information, including the count\n"
|
||||
" of items in the file and a histogram of their log base2 size.\n"
|
||||
"\n"
|
||||
"Notes:\n"
|
||||
"\n"
|
||||
" Paths may be full or relative, and should include the extension.\n"
|
||||
" The recover algorithm should be invoked before running any\n"
|
||||
" operation which can modify the database.\n"
|
||||
"\n"
|
||||
;
|
||||
desc_.print(std::cout);
|
||||
};
|
||||
|
||||
int
|
||||
error(std::string const& why)
|
||||
{
|
||||
std::cerr <<
|
||||
progname() << ": " << why << ".\n"
|
||||
"Use '" << progname() << " help' for usage.\n";
|
||||
return EXIT_FAILURE;
|
||||
};
|
||||
|
||||
int
|
||||
operator()(int ac, char const* const* av)
|
||||
{
|
||||
namespace po = boost::program_options;
|
||||
|
||||
ac_ = ac;
|
||||
av_ = av;
|
||||
|
||||
try
|
||||
{
|
||||
po::positional_options_description pod;
|
||||
pod.add("command", 1);
|
||||
pod.add("dat", 1);
|
||||
pod.add("key", 1);
|
||||
pod.add("log", 1);
|
||||
|
||||
po::variables_map vm;
|
||||
po::store(po::command_line_parser(ac, av)
|
||||
.options(desc_)
|
||||
.positional(pod)
|
||||
.run()
|
||||
,vm);
|
||||
po::notify(vm);
|
||||
|
||||
std::string cmd;
|
||||
|
||||
if(vm.count("command"))
|
||||
cmd = vm["command"].as<std::string>();
|
||||
|
||||
if(cmd == "help")
|
||||
{
|
||||
help();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
if(cmd == "info")
|
||||
return do_info(vm);
|
||||
|
||||
if(cmd == "recover")
|
||||
return do_recover(vm);
|
||||
|
||||
if(cmd == "rekey")
|
||||
return do_rekey(vm);
|
||||
|
||||
if(cmd == "verify")
|
||||
return do_verify(vm);
|
||||
|
||||
if(cmd == "visit")
|
||||
return do_visit(vm);
|
||||
|
||||
return error("Unknown command '" + cmd + "'");
|
||||
}
|
||||
catch(std::exception const& e)
|
||||
{
|
||||
return error(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int
|
||||
do_info(boost::program_options::variables_map const& vm)
|
||||
{
|
||||
if(! vm.count("dat") && ! vm.count("key") && ! vm.count("log"))
|
||||
return error("No files specified");
|
||||
if(vm.count("dat"))
|
||||
do_info(vm["dat"].as<std::string>());
|
||||
if(vm.count("key"))
|
||||
do_info(vm["key"].as<std::string>());
|
||||
if(vm.count("log"))
|
||||
do_info(vm["log"].as<std::string>());
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
do_info(path_type const& path)
|
||||
{
|
||||
error_code ec;
|
||||
auto const err =
|
||||
[&]
|
||||
{
|
||||
std::cout << path << ": " << ec.message() << "\n";
|
||||
};
|
||||
native_file f;
|
||||
f.open(file_mode::read, path, ec);
|
||||
if(ec)
|
||||
return err();
|
||||
auto const size = f.size(ec);
|
||||
if(ec)
|
||||
return err();
|
||||
if(size < 8)
|
||||
{
|
||||
std::cout << "File " << path << " is too small to be a database file.\n";
|
||||
return;
|
||||
}
|
||||
std::array<char, 8> ta;
|
||||
f.read(0, ta.data(), ta.size(), ec);
|
||||
if(ec)
|
||||
return err();
|
||||
std::string ts{ta.data(), ta.size()};
|
||||
|
||||
if(ts == "nudb.dat")
|
||||
{
|
||||
detail::dat_file_header h;
|
||||
detail::read(f, h, ec);
|
||||
if(ec)
|
||||
return err();
|
||||
f.close();
|
||||
std::cout <<
|
||||
"data file: " << path << "\n"
|
||||
"file size: " << fdec(size) << "\n" <<
|
||||
h << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if(ts == "nudb.key")
|
||||
{
|
||||
detail::key_file_header h;
|
||||
detail::read(f, h, ec);
|
||||
if(ec)
|
||||
return err();
|
||||
f.close();
|
||||
std::cout <<
|
||||
"key file: " << path << "\n"
|
||||
"file size: " << fdec(size) << "\n" <<
|
||||
h << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if(ts == "nudb.log")
|
||||
{
|
||||
detail::log_file_header h;
|
||||
detail::read(f, h, ec);
|
||||
if(ec)
|
||||
return err();
|
||||
f.close();
|
||||
std::cout <<
|
||||
"log file: " << path << "\n"
|
||||
"file size: " << fdec(size) << "\n" <<
|
||||
h << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << "File " << path << " has unknown type '" << ts << "'.\n";
|
||||
}
|
||||
|
||||
int
|
||||
do_recover(boost::program_options::variables_map const& vm)
|
||||
{
|
||||
if(! vm.count("dat") || ! vm.count("key") || ! vm.count("log"))
|
||||
return error("Missing file specifications");
|
||||
error_code ec;
|
||||
recover<xxhasher>(
|
||||
vm["dat"].as<std::string>(),
|
||||
vm["key"].as<std::string>(),
|
||||
vm["log"].as<std::string>(),
|
||||
ec);
|
||||
if(ec)
|
||||
{
|
||||
std::cerr << "recover: " << ec.message() << "\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
do_rekey(boost::program_options::variables_map const& vm)
|
||||
{
|
||||
if(! vm.count("dat"))
|
||||
return error("Missing data file path");
|
||||
if(! vm.count("key"))
|
||||
return error("Missing key file path");
|
||||
if(! vm.count("log"))
|
||||
return error("Missing log file path");
|
||||
if(! vm.count("count"))
|
||||
return error("Missing item count");
|
||||
if(! vm.count("buffer"))
|
||||
return error("Missing buffer size");
|
||||
auto const dp = vm["dat"].as<std::string>();
|
||||
auto const kp = vm["key"].as<std::string>();
|
||||
auto const lp = vm["log"].as<std::string>();
|
||||
auto const itemCount = vm["count"].as<std::size_t>();
|
||||
auto const bufferSize = vm["buffer"].as<std::size_t>();
|
||||
error_code ec;
|
||||
progress p{std::cout};
|
||||
rekey<Hasher, native_file>(dp, kp, lp,
|
||||
block_size(kp), 0.5f, itemCount,
|
||||
bufferSize, ec, p);
|
||||
if(ec)
|
||||
{
|
||||
std::cerr << "rekey: " << ec.message() << "\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
do_verify(boost::program_options::variables_map const& vm)
|
||||
{
|
||||
if(! vm.count("dat"))
|
||||
return error("Missing data file path");
|
||||
if(! vm.count("key"))
|
||||
return error("Missing key file path");
|
||||
|
||||
auto const bufferSize = vm.count("buffer") ?
|
||||
vm["buffer"].as<std::size_t>() : 0;
|
||||
auto const dp = vm["dat"].as<std::string>();
|
||||
auto const kp = vm.count("key") ?
|
||||
vm["key"].as<std::string>() : std::string{};
|
||||
|
||||
if(! vm.count("key"))
|
||||
{
|
||||
// todo
|
||||
std::cerr << "unimplemented: dat-only verify\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
error_code ec;
|
||||
progress p(std::cout);
|
||||
{
|
||||
verify_info info;
|
||||
verify<Hasher>(info, dp, kp, bufferSize, p, ec);
|
||||
if(! ec)
|
||||
std::cout << info;
|
||||
}
|
||||
if(ec)
|
||||
{
|
||||
std::cerr << "verify: " << ec.message() << "\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
do_visit(boost::program_options::variables_map const& vm)
|
||||
{
|
||||
if(! vm.count("dat"))
|
||||
return error("Missing dat path");
|
||||
auto const path = vm["dat"].as<std::string>();
|
||||
error_code ec;
|
||||
auto const err =
|
||||
[&]
|
||||
{
|
||||
std::cout << path << ": " << ec.message() << "\n";
|
||||
return EXIT_FAILURE;
|
||||
};
|
||||
{
|
||||
native_file f;
|
||||
f.open(file_mode::read, path, ec);
|
||||
if(ec)
|
||||
return err();
|
||||
auto const fileSize = f.size(ec);
|
||||
if(ec)
|
||||
return err();
|
||||
detail::dat_file_header h;
|
||||
detail::read(f, h, ec);
|
||||
if(ec)
|
||||
return err();
|
||||
f.close();
|
||||
std::cout <<
|
||||
"data file: " << path << "\n"
|
||||
"file size: " << fdec(fileSize) << "\n" <<
|
||||
h;
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
std::uint64_t n = 0;
|
||||
std::array<std::uint64_t, 64> hist;
|
||||
hist.fill(0);
|
||||
progress p{std::cout};
|
||||
visit(path,
|
||||
[&](void const*, std::size_t,
|
||||
void const*, std::size_t data_size,
|
||||
error_code& ec)
|
||||
{
|
||||
++n;
|
||||
++hist[log2(data_size)];
|
||||
//std::this_thread::sleep_for(std::chrono::milliseconds{1});
|
||||
}, p, ec);
|
||||
if(! ec)
|
||||
std::cout <<
|
||||
"value_count " << fdec(n) << "\n" <<
|
||||
"sizes: " << fhist(hist) << "\n";
|
||||
if(ec)
|
||||
{
|
||||
std::cerr << "visit: " << ec.message() << "\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
} // nudb
|
||||
|
||||
int
|
||||
main(int ac, char const* const* av)
|
||||
{
|
||||
using namespace nudb;
|
||||
admin_tool<xxhasher> t;
|
||||
auto const rv = t(ac, av);
|
||||
std::cout.flush();
|
||||
basic_seconds_clock_main_hook();
|
||||
return rv;
|
||||
}
|
||||
Reference in New Issue
Block a user