Files
rippled/tools/nudb.cpp
Vinnie Falco 79159ffd87 Squashed 'src/nudb/' content from commit 00adc6a
git-subtree-dir: src/nudb
git-subtree-split: 00adc6a4f16679a376f40c967f77dfa544c179c1
2016-09-29 19:24:12 -04:00

515 lines
16 KiB
C++

//
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#include <nudb/nudb.hpp>
#include <nudb/util.hpp>
#include <boost/filesystem.hpp>
#include <boost/program_options.hpp>
#include <chrono>
#include <iomanip>
#include <iostream>
namespace nudb {
namespace detail {
std::ostream&
operator<<(std::ostream& os, dat_file_header const h)
{
os <<
"type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n"
"version: " << h.version << "\n"
"uid: " << fhex(h.uid) << "\n"
"appnum: " << fhex(h.appnum) << "\n"
"key_size: " << h.key_size << "\n"
;
return os;
}
std::ostream&
operator<<(std::ostream& os, key_file_header const h)
{
os <<
"type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n"
"version: " << h.version << "\n"
"uid: " << fhex(h.uid) << "\n"
"appnum: " << fhex(h.appnum) << "\n"
"key_size: " << h.key_size << "\n"
"salt: " << fhex(h.salt) << "\n"
"pepper: " << fhex(h.pepper) << "\n"
"block_size: " << fdec(h.block_size) << "\n"
;
return os;
}
std::ostream&
operator<<(std::ostream& os, log_file_header const h)
{
os << std::setfill('0') << std::internal << std::showbase <<
"type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n"
"version: " << h.version << "\n"
"uid: " << fhex(h.uid) << "\n"
"appnum: " << fhex(h.appnum) << "\n"
"key_size: " << h.key_size << "\n"
"salt: " << fhex(h.salt) << "\n"
"pepper: " << fhex(h.pepper) << "\n"
"block_size: " << fdec(h.block_size) << "\n"
"key_file_size: " << fdec(h.key_file_size) << "\n"
"dat_file_size: " << fdec(h.dat_file_size) << "\n"
;
return os;
}
} // detail
std::ostream&
operator<<(std::ostream& os, verify_info const& info)
{
os <<
"dat_path " << info.dat_path << "\n"
"key_path " << info.key_path << "\n"
"algorithm " <<(info.algorithm ? "fast" : "normal") << "\n"
"avg_fetch: " << std::fixed << std::setprecision(3) << info.avg_fetch << "\n" <<
"waste: " << std::fixed << std::setprecision(3) << info.waste * 100 << "%" << "\n" <<
"overhead: " << std::fixed << std::setprecision(1) << info.overhead * 100 << "%" << "\n" <<
"actual_load: " << std::fixed << std::setprecision(0) << info.actual_load * 100 << "%" << "\n" <<
"version: " << fdec(info.version) << "\n" <<
"uid: " << fhex(info.uid) << "\n" <<
"appnum: " << fhex(info.appnum) << "\n" <<
"key_size: " << fdec(info.key_size) << "\n" <<
"salt: " << fhex(info.salt) << "\n" <<
"pepper: " << fhex(info.pepper) << "\n" <<
"block_size: " << fdec(info.block_size) << "\n" <<
"bucket_size: " << fdec(info.bucket_size) << "\n" <<
"load_factor: " << std::fixed << std::setprecision(0) << info.load_factor * 100 << "%" << "\n" <<
"capacity: " << fdec(info.capacity) << "\n" <<
"buckets: " << fdec(info.buckets) << "\n" <<
"key_count: " << fdec(info.key_count) << "\n" <<
"value_count: " << fdec(info.value_count) << "\n" <<
"value_bytes: " << fdec(info.value_bytes) << "\n" <<
"spill_count: " << fdec(info.spill_count) << "\n" <<
"spill_count_tot: " << fdec(info.spill_count_tot) << "\n" <<
"spill_bytes: " << fdec(info.spill_bytes) << "\n" <<
"spill_bytes_tot: " << fdec(info.spill_bytes_tot) << "\n" <<
"key_file_size: " << fdec(info.key_file_size) << "\n" <<
"dat_file_size: " << fdec(info.dat_file_size) << "\n" <<
"hist: " << fhist(info.hist) << "\n"
;
return os;
}
template<class Hasher>
class admin_tool
{
int ac_ = 0;
char const* const* av_ = nullptr;
boost::program_options::options_description desc_;
public:
admin_tool()
: desc_("Options")
{
namespace po = boost::program_options;
desc_.add_options()
("buffer,b", po::value<std::size_t>(),
"Set the buffer size in bytes (larger is faster).")
("dat,d", po::value<std::string>(),
"Path to data file.")
("key,k", po::value<std::string>(),
"Path to key file.")
("log,l", po::value<std::string>(),
"Path to log file.")
("count,n", po::value<std::uint64_t>(),
"The number of items in the data file.")
("command", "Command to run.")
;
}
std::string
progname() const
{
using namespace boost::filesystem;
return path{av_[0]}.stem().string();
}
std::string
filename(std::string const& s)
{
using namespace boost::filesystem;
return path{s}.filename().string();
}
void
help()
{
std::cout <<
"usage: " << progname() << " <command> [file...] <options>\n";
std::cout <<
"\n"
"Commands:\n"
"\n"
" help\n"
"\n"
" Print this help information.\n"
"\n"
" info <dat-path> [<key-path> [<log-path>]]\n"
"\n"
" Show metadata and header information for database files.\n"
"\n"
" recover <dat-path> <key-path> <log-path>\n"
"\n"
" Perform a database recovery. A recovery is necessary if a log\n"
" file is present. Running commands on an unrecovered database\n"
" may result in lost or corrupted data.\n"
"\n"
" rekey <dat-path] <key-path> <log-path> --count=<items> --buffer=<bytes>\n"
"\n"
" Generate the key file for a data file. The buffer option is\n"
" required, larger buffers process faster. A buffer equal to\n"
" the size of the key file processes the fastest. This command\n"
" must be passed the count of items in the data file, which\n"
" can be calculated with the 'visit' command.\n"
"\n"
" If the rekey is aborted before completion, the database must\n"
" be subsequently restored by running the 'recover' command.\n"
"\n"
" verify <dat-path> <key-path> [--buffer=<bytes>]\n"
"\n"
" Verify the integrity of a database. The buffer option is\n"
" optional, if omitted a slow algorithm is used. When a buffer\n"
" size is provided, a fast algorithm is used with larger\n"
" buffers resulting in bigger speedups. A buffer equal to the\n"
" size of the key file provides the fastest speedup.\n"
"\n"
" visit <dat-path>\n"
"\n"
" Iterate a data file and show information, including the count\n"
" of items in the file and a histogram of their log base2 size.\n"
"\n"
"Notes:\n"
"\n"
" Paths may be full or relative, and should include the extension.\n"
" The recover algorithm should be invoked before running any\n"
" operation which can modify the database.\n"
"\n"
;
desc_.print(std::cout);
};
int
error(std::string const& why)
{
std::cerr <<
progname() << ": " << why << ".\n"
"Use '" << progname() << " help' for usage.\n";
return EXIT_FAILURE;
};
int
operator()(int ac, char const* const* av)
{
namespace po = boost::program_options;
ac_ = ac;
av_ = av;
try
{
po::positional_options_description pod;
pod.add("command", 1);
pod.add("dat", 1);
pod.add("key", 1);
pod.add("log", 1);
po::variables_map vm;
po::store(po::command_line_parser(ac, av)
.options(desc_)
.positional(pod)
.run()
,vm);
po::notify(vm);
std::string cmd;
if(vm.count("command"))
cmd = vm["command"].as<std::string>();
if(cmd == "help")
{
help();
return EXIT_SUCCESS;
}
if(cmd == "info")
return do_info(vm);
if(cmd == "recover")
return do_recover(vm);
if(cmd == "rekey")
return do_rekey(vm);
if(cmd == "verify")
return do_verify(vm);
if(cmd == "visit")
return do_visit(vm);
return error("Unknown command '" + cmd + "'");
}
catch(std::exception const& e)
{
return error(e.what());
}
}
private:
int
do_info(boost::program_options::variables_map const& vm)
{
if(! vm.count("dat") && ! vm.count("key") && ! vm.count("log"))
return error("No files specified");
if(vm.count("dat"))
do_info(vm["dat"].as<std::string>());
if(vm.count("key"))
do_info(vm["key"].as<std::string>());
if(vm.count("log"))
do_info(vm["log"].as<std::string>());
return EXIT_SUCCESS;
}
void
do_info(path_type const& path)
{
error_code ec;
auto const err =
[&]
{
std::cout << path << ": " << ec.message() << "\n";
};
native_file f;
f.open(file_mode::read, path, ec);
if(ec)
return err();
auto const size = f.size(ec);
if(ec)
return err();
if(size < 8)
{
std::cout << "File " << path << " is too small to be a database file.\n";
return;
}
std::array<char, 8> ta;
f.read(0, ta.data(), ta.size(), ec);
if(ec)
return err();
std::string ts{ta.data(), ta.size()};
if(ts == "nudb.dat")
{
detail::dat_file_header h;
detail::read(f, h, ec);
if(ec)
return err();
f.close();
std::cout <<
"data file: " << path << "\n"
"file size: " << fdec(size) << "\n" <<
h << "\n";
return;
}
if(ts == "nudb.key")
{
detail::key_file_header h;
detail::read(f, h, ec);
if(ec)
return err();
f.close();
std::cout <<
"key file: " << path << "\n"
"file size: " << fdec(size) << "\n" <<
h << "\n";
return;
}
if(ts == "nudb.log")
{
detail::log_file_header h;
detail::read(f, h, ec);
if(ec)
return err();
f.close();
std::cout <<
"log file: " << path << "\n"
"file size: " << fdec(size) << "\n" <<
h << "\n";
return;
}
std::cout << "File " << path << " has unknown type '" << ts << "'.\n";
}
int
do_recover(boost::program_options::variables_map const& vm)
{
if(! vm.count("dat") || ! vm.count("key") || ! vm.count("log"))
return error("Missing file specifications");
error_code ec;
recover<xxhasher>(
vm["dat"].as<std::string>(),
vm["key"].as<std::string>(),
vm["log"].as<std::string>(),
ec);
if(ec)
{
std::cerr << "recover: " << ec.message() << "\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int
do_rekey(boost::program_options::variables_map const& vm)
{
if(! vm.count("dat"))
return error("Missing data file path");
if(! vm.count("key"))
return error("Missing key file path");
if(! vm.count("log"))
return error("Missing log file path");
if(! vm.count("count"))
return error("Missing item count");
if(! vm.count("buffer"))
return error("Missing buffer size");
auto const dp = vm["dat"].as<std::string>();
auto const kp = vm["key"].as<std::string>();
auto const lp = vm["log"].as<std::string>();
auto const itemCount = vm["count"].as<std::size_t>();
auto const bufferSize = vm["buffer"].as<std::size_t>();
error_code ec;
progress p{std::cout};
rekey<Hasher, native_file>(dp, kp, lp,
block_size(kp), 0.5f, itemCount,
bufferSize, ec, p);
if(ec)
{
std::cerr << "rekey: " << ec.message() << "\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int
do_verify(boost::program_options::variables_map const& vm)
{
if(! vm.count("dat"))
return error("Missing data file path");
if(! vm.count("key"))
return error("Missing key file path");
auto const bufferSize = vm.count("buffer") ?
vm["buffer"].as<std::size_t>() : 0;
auto const dp = vm["dat"].as<std::string>();
auto const kp = vm.count("key") ?
vm["key"].as<std::string>() : std::string{};
if(! vm.count("key"))
{
// todo
std::cerr << "unimplemented: dat-only verify\n";
return EXIT_FAILURE;
}
error_code ec;
progress p(std::cout);
{
verify_info info;
verify<Hasher>(info, dp, kp, bufferSize, p, ec);
if(! ec)
std::cout << info;
}
if(ec)
{
std::cerr << "verify: " << ec.message() << "\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int
do_visit(boost::program_options::variables_map const& vm)
{
if(! vm.count("dat"))
return error("Missing dat path");
auto const path = vm["dat"].as<std::string>();
error_code ec;
auto const err =
[&]
{
std::cout << path << ": " << ec.message() << "\n";
return EXIT_FAILURE;
};
{
native_file f;
f.open(file_mode::read, path, ec);
if(ec)
return err();
auto const fileSize = f.size(ec);
if(ec)
return err();
detail::dat_file_header h;
detail::read(f, h, ec);
if(ec)
return err();
f.close();
std::cout <<
"data file: " << path << "\n"
"file size: " << fdec(fileSize) << "\n" <<
h;
std::cout.flush();
}
std::uint64_t n = 0;
std::array<std::uint64_t, 64> hist;
hist.fill(0);
progress p{std::cout};
visit(path,
[&](void const*, std::size_t,
void const*, std::size_t data_size,
error_code& ec)
{
++n;
++hist[log2(data_size)];
//std::this_thread::sleep_for(std::chrono::milliseconds{1});
}, p, ec);
if(! ec)
std::cout <<
"value_count " << fdec(n) << "\n" <<
"sizes: " << fhist(hist) << "\n";
if(ec)
{
std::cerr << "visit: " << ec.message() << "\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
};
} // nudb
int
main(int ac, char const* const* av)
{
using namespace nudb;
admin_tool<xxhasher> t;
auto const rv = t(ac, av);
std::cout.flush();
basic_seconds_clock_main_hook();
return rv;
}