// // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #include #include #include #include #include #include #include namespace nudb { namespace detail { std::ostream& operator<<(std::ostream& os, dat_file_header const h) { os << "type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n" "version: " << h.version << "\n" "uid: " << fhex(h.uid) << "\n" "appnum: " << fhex(h.appnum) << "\n" "key_size: " << h.key_size << "\n" ; return os; } std::ostream& operator<<(std::ostream& os, key_file_header const h) { os << "type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n" "version: " << h.version << "\n" "uid: " << fhex(h.uid) << "\n" "appnum: " << fhex(h.appnum) << "\n" "key_size: " << h.key_size << "\n" "salt: " << fhex(h.salt) << "\n" "pepper: " << fhex(h.pepper) << "\n" "block_size: " << fdec(h.block_size) << "\n" ; return os; } std::ostream& operator<<(std::ostream& os, log_file_header const h) { os << std::setfill('0') << std::internal << std::showbase << "type: '" << std::string{h.type, h.type + sizeof(h.type)} << "'\n" "version: " << h.version << "\n" "uid: " << fhex(h.uid) << "\n" "appnum: " << fhex(h.appnum) << "\n" "key_size: " << h.key_size << "\n" "salt: " << fhex(h.salt) << "\n" "pepper: " << fhex(h.pepper) << "\n" "block_size: " << fdec(h.block_size) << "\n" "key_file_size: " << fdec(h.key_file_size) << "\n" "dat_file_size: " << fdec(h.dat_file_size) << "\n" ; return os; } } // detail std::ostream& operator<<(std::ostream& os, verify_info const& info) { os << "dat_path " << info.dat_path << "\n" "key_path " << info.key_path << "\n" "algorithm " <<(info.algorithm ? "fast" : "normal") << "\n" "avg_fetch: " << std::fixed << std::setprecision(3) << info.avg_fetch << "\n" << "waste: " << std::fixed << std::setprecision(3) << info.waste * 100 << "%" << "\n" << "overhead: " << std::fixed << std::setprecision(1) << info.overhead * 100 << "%" << "\n" << "actual_load: " << std::fixed << std::setprecision(0) << info.actual_load * 100 << "%" << "\n" << "version: " << fdec(info.version) << "\n" << "uid: " << fhex(info.uid) << "\n" << "appnum: " << fhex(info.appnum) << "\n" << "key_size: " << fdec(info.key_size) << "\n" << "salt: " << fhex(info.salt) << "\n" << "pepper: " << fhex(info.pepper) << "\n" << "block_size: " << fdec(info.block_size) << "\n" << "bucket_size: " << fdec(info.bucket_size) << "\n" << "load_factor: " << std::fixed << std::setprecision(0) << info.load_factor * 100 << "%" << "\n" << "capacity: " << fdec(info.capacity) << "\n" << "buckets: " << fdec(info.buckets) << "\n" << "key_count: " << fdec(info.key_count) << "\n" << "value_count: " << fdec(info.value_count) << "\n" << "value_bytes: " << fdec(info.value_bytes) << "\n" << "spill_count: " << fdec(info.spill_count) << "\n" << "spill_count_tot: " << fdec(info.spill_count_tot) << "\n" << "spill_bytes: " << fdec(info.spill_bytes) << "\n" << "spill_bytes_tot: " << fdec(info.spill_bytes_tot) << "\n" << "key_file_size: " << fdec(info.key_file_size) << "\n" << "dat_file_size: " << fdec(info.dat_file_size) << "\n" << "hist: " << fhist(info.hist) << "\n" ; return os; } template class admin_tool { int ac_ = 0; char const* const* av_ = nullptr; boost::program_options::options_description desc_; public: admin_tool() : desc_("Options") { namespace po = boost::program_options; desc_.add_options() ("buffer,b", po::value(), "Set the buffer size in bytes (larger is faster).") ("dat,d", po::value(), "Path to data file.") ("key,k", po::value(), "Path to key file.") ("log,l", po::value(), "Path to log file.") ("count,n", po::value(), "The number of items in the data file.") ("command", "Command to run.") ; } std::string progname() const { using namespace boost::filesystem; return path{av_[0]}.stem().string(); } std::string filename(std::string const& s) { using namespace boost::filesystem; return path{s}.filename().string(); } void help() { std::cout << "usage: " << progname() << " [file...] \n"; std::cout << "\n" "Commands:\n" "\n" " help\n" "\n" " Print this help information.\n" "\n" " info [ []]\n" "\n" " Show metadata and header information for database files.\n" "\n" " recover \n" "\n" " Perform a database recovery. A recovery is necessary if a log\n" " file is present. Running commands on an unrecovered database\n" " may result in lost or corrupted data.\n" "\n" " rekey --count= --buffer=\n" "\n" " Generate the key file for a data file. The buffer option is\n" " required, larger buffers process faster. A buffer equal to\n" " the size of the key file processes the fastest. This command\n" " must be passed the count of items in the data file, which\n" " can be calculated with the 'visit' command.\n" "\n" " If the rekey is aborted before completion, the database must\n" " be subsequently restored by running the 'recover' command.\n" "\n" " verify [--buffer=]\n" "\n" " Verify the integrity of a database. The buffer option is\n" " optional, if omitted a slow algorithm is used. When a buffer\n" " size is provided, a fast algorithm is used with larger\n" " buffers resulting in bigger speedups. A buffer equal to the\n" " size of the key file provides the fastest speedup.\n" "\n" " visit \n" "\n" " Iterate a data file and show information, including the count\n" " of items in the file and a histogram of their log base2 size.\n" "\n" "Notes:\n" "\n" " Paths may be full or relative, and should include the extension.\n" " The recover algorithm should be invoked before running any\n" " operation which can modify the database.\n" "\n" ; desc_.print(std::cout); }; int error(std::string const& why) { std::cerr << progname() << ": " << why << ".\n" "Use '" << progname() << " help' for usage.\n"; return EXIT_FAILURE; }; int operator()(int ac, char const* const* av) { namespace po = boost::program_options; ac_ = ac; av_ = av; try { po::positional_options_description pod; pod.add("command", 1); pod.add("dat", 1); pod.add("key", 1); pod.add("log", 1); po::variables_map vm; po::store(po::command_line_parser(ac, av) .options(desc_) .positional(pod) .run() ,vm); po::notify(vm); std::string cmd; if(vm.count("command")) cmd = vm["command"].as(); if(cmd == "help") { help(); return EXIT_SUCCESS; } if(cmd == "info") return do_info(vm); if(cmd == "recover") return do_recover(vm); if(cmd == "rekey") return do_rekey(vm); if(cmd == "verify") return do_verify(vm); if(cmd == "visit") return do_visit(vm); return error("Unknown command '" + cmd + "'"); } catch(std::exception const& e) { return error(e.what()); } } private: int do_info(boost::program_options::variables_map const& vm) { if(! vm.count("dat") && ! vm.count("key") && ! vm.count("log")) return error("No files specified"); if(vm.count("dat")) do_info(vm["dat"].as()); if(vm.count("key")) do_info(vm["key"].as()); if(vm.count("log")) do_info(vm["log"].as()); return EXIT_SUCCESS; } void do_info(path_type const& path) { error_code ec; auto const err = [&] { std::cout << path << ": " << ec.message() << "\n"; }; native_file f; f.open(file_mode::read, path, ec); if(ec) return err(); auto const size = f.size(ec); if(ec) return err(); if(size < 8) { std::cout << "File " << path << " is too small to be a database file.\n"; return; } std::array ta; f.read(0, ta.data(), ta.size(), ec); if(ec) return err(); std::string ts{ta.data(), ta.size()}; if(ts == "nudb.dat") { detail::dat_file_header h; detail::read(f, h, ec); if(ec) return err(); f.close(); std::cout << "data file: " << path << "\n" "file size: " << fdec(size) << "\n" << h << "\n"; return; } if(ts == "nudb.key") { detail::key_file_header h; detail::read(f, h, ec); if(ec) return err(); f.close(); std::cout << "key file: " << path << "\n" "file size: " << fdec(size) << "\n" << h << "\n"; return; } if(ts == "nudb.log") { detail::log_file_header h; detail::read(f, h, ec); if(ec) return err(); f.close(); std::cout << "log file: " << path << "\n" "file size: " << fdec(size) << "\n" << h << "\n"; return; } std::cout << "File " << path << " has unknown type '" << ts << "'.\n"; } int do_recover(boost::program_options::variables_map const& vm) { if(! vm.count("dat") || ! vm.count("key") || ! vm.count("log")) return error("Missing file specifications"); error_code ec; recover( vm["dat"].as(), vm["key"].as(), vm["log"].as(), ec); if(ec) { std::cerr << "recover: " << ec.message() << "\n"; return EXIT_FAILURE; } return EXIT_SUCCESS; } int do_rekey(boost::program_options::variables_map const& vm) { if(! vm.count("dat")) return error("Missing data file path"); if(! vm.count("key")) return error("Missing key file path"); if(! vm.count("log")) return error("Missing log file path"); if(! vm.count("count")) return error("Missing item count"); if(! vm.count("buffer")) return error("Missing buffer size"); auto const dp = vm["dat"].as(); auto const kp = vm["key"].as(); auto const lp = vm["log"].as(); auto const itemCount = vm["count"].as(); auto const bufferSize = vm["buffer"].as(); error_code ec; progress p{std::cout}; rekey(dp, kp, lp, block_size(kp), 0.5f, itemCount, bufferSize, ec, p); if(ec) { std::cerr << "rekey: " << ec.message() << "\n"; return EXIT_FAILURE; } return EXIT_SUCCESS; } int do_verify(boost::program_options::variables_map const& vm) { if(! vm.count("dat")) return error("Missing data file path"); if(! vm.count("key")) return error("Missing key file path"); auto const bufferSize = vm.count("buffer") ? vm["buffer"].as() : 0; auto const dp = vm["dat"].as(); auto const kp = vm.count("key") ? vm["key"].as() : std::string{}; if(! vm.count("key")) { // todo std::cerr << "unimplemented: dat-only verify\n"; return EXIT_FAILURE; } error_code ec; progress p(std::cout); { verify_info info; verify(info, dp, kp, bufferSize, p, ec); if(! ec) std::cout << info; } if(ec) { std::cerr << "verify: " << ec.message() << "\n"; return EXIT_FAILURE; } return EXIT_SUCCESS; } int do_visit(boost::program_options::variables_map const& vm) { if(! vm.count("dat")) return error("Missing dat path"); auto const path = vm["dat"].as(); error_code ec; auto const err = [&] { std::cout << path << ": " << ec.message() << "\n"; return EXIT_FAILURE; }; { native_file f; f.open(file_mode::read, path, ec); if(ec) return err(); auto const fileSize = f.size(ec); if(ec) return err(); detail::dat_file_header h; detail::read(f, h, ec); if(ec) return err(); f.close(); std::cout << "data file: " << path << "\n" "file size: " << fdec(fileSize) << "\n" << h; std::cout.flush(); } std::uint64_t n = 0; std::array hist; hist.fill(0); progress p{std::cout}; visit(path, [&](void const*, std::size_t, void const*, std::size_t data_size, error_code& ec) { ++n; ++hist[log2(data_size)]; //std::this_thread::sleep_for(std::chrono::milliseconds{1}); }, p, ec); if(! ec) std::cout << "value_count " << fdec(n) << "\n" << "sizes: " << fhist(hist) << "\n"; if(ec) { std::cerr << "visit: " << ec.message() << "\n"; return EXIT_FAILURE; } return EXIT_SUCCESS; } }; } // nudb int main(int ac, char const* const* av) { using namespace nudb; admin_tool t; auto const rv = t(ac, av); std::cout.flush(); basic_seconds_clock_main_hook(); return rv; }