Add RocksDB to nudb import tool (RIPD-781,785):

This custom tool is specifically designed for very fast import of RocksDB nodestore databases into NuDB.
2025-11-20 19:15:54 +00:00 · 2015-02-07 07:01:13 -08:00
parent b7ba509618
commit 2a201f9525
5 changed files with 1007 additions and 2 deletions
--- a/Builds/VisualStudio2013/RippleD.vcxproj
+++ b/Builds/VisualStudio2013/RippleD.vcxproj
@@ -2168,6 +2168,9 @@
    <ClCompile Include="..\..\src\ripple\nodestore\tests\Database.test.cpp">
      <ExcludedFromBuild>True</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="..\..\src\ripple\nodestore\tests\import_test.cpp">
      <ExcludedFromBuild>True</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="..\..\src\ripple\nodestore\tests\Timing.test.cpp">
      <ExcludedFromBuild>True</ExcludedFromBuild>
    </ClCompile>
--- a/Builds/VisualStudio2013/RippleD.vcxproj.filters
+++ b/Builds/VisualStudio2013/RippleD.vcxproj.filters
@@ -3141,6 +3141,9 @@
    <ClCompile Include="..\..\src\ripple\nodestore\tests\Database.test.cpp">
      <Filter>ripple\nodestore\tests</Filter>
    </ClCompile>
    <ClCompile Include="..\..\src\ripple\nodestore\tests\import_test.cpp">
      <Filter>ripple\nodestore\tests</Filter>
    </ClCompile>
    <ClCompile Include="..\..\src\ripple\nodestore\tests\Timing.test.cpp">
      <Filter>ripple\nodestore\tests</Filter>
    </ClCompile>
--- a/doc/rippled-example.cfg
+++ b/doc/rippled-example.cfg
@@ -659,9 +659,8 @@
 #       ...
 #
 #   Examples:
-#       type=RocksDB
+#       type=rocksdb
 #       path=db/rocksdb
 #       compression=0
 #
 #   Choices for 'type' (not case-sensitive)
 #       RocksDB             Use Facebook's RocksDB database (preferred)
--- a/src/ripple/nodestore/tests/import_test.cpp
+++ b/src/ripple/nodestore/tests/import_test.cpp
@@ -0,0 +1,998 @@
 //------------------------------------------------------------------------------
 /*
    This file is part of rippled: https://github.com/ripple/rippled
    Copyright (c) 2012, 2013 Ripple Labs Inc.
    Permission to use, copy, modify, and/or distribute this software for any
    purpose  with  or without fee is hereby granted, provided that the above
    copyright notice and this permission notice appear in all copies.
    THE  SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    WITH  REGARD  TO  THIS  SOFTWARE  INCLUDING  ALL  IMPLIED  WARRANTIES  OF
    MERCHANTABILITY  AND  FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    ANY  SPECIAL ,  DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    WHATSOEVER  RESULTING  FROM  LOSS  OF USE, DATA OR PROFITS, WHETHER IN AN
    ACTION  OF  CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
 //==============================================================================
 #include <BeastConfig.h>
 #include <beast/hash/xxhasher.h>
 #include <ripple/nodestore/impl/codec.h>
 #include <beast/chrono/basic_seconds_clock.h>
 #include <beast/chrono/chrono_io.h>
 #include <beast/http/rfc2616.h>
 #include <beast/unit_test/suite.h>
 #include <beast/utility/ci_char_traits.h>
 #include <boost/regex.hpp>
 #include <algorithm>
 #include <chrono>
 #include <map>
 #include <sstream>
 #include <ripple/unity/rocksdb.h>
 /*
 Math:
 1000 gb dat file
 170 gb key file
 capacity 113 keys/bucket
 normal:
 1,000gb data file read
 19,210gb key file read (113 * 170)
 19,210gb key file write
 multi(32gb):
 6 passes (170/32)
 6,000gb data file read
 170gb key file write
 */
 namespace ripple {
 namespace NodeStore {
 namespace detail {
 class save_stream_state
 {
    std::ostream& os_;
    std::streamsize precision_;
    std::ios::fmtflags flags_;
    std::ios::char_type fill_;
 public:
    ~save_stream_state()
    {
        os_.precision(precision_);
        os_.flags(flags_);
        os_.fill(fill_);
    }
    save_stream_state(save_stream_state const&) = delete;
    save_stream_state& operator=(save_stream_state const&) = delete;
    explicit save_stream_state(std::ostream& os)
        : os_(os)
        , precision_(os.precision())
        , flags_(os.flags())
        , fill_(os.fill())
    {
    }
 };
 template <class Rep, class Period>
 std::ostream&
 pretty_time(std::ostream& os, std::chrono::duration<Rep, Period> d)
 {
    save_stream_state _(os);
    using namespace std::chrono;
    if (d < microseconds{1})
    {
        // use nanoseconds
        if (d < nanoseconds{100})
        {
            // use floating
            using ns = duration<float, std::nano>;
            os << std::fixed << std::setprecision(1) << ns(d).count();
        }
        else
        {
            // use integral
            os << std::chrono::round<nanoseconds>(d).count();
        }
        os << "ns";
    }
    else if (d < milliseconds{1})
    {
        // use microseconds
        if (d < microseconds{100})
        {
            // use floating
            using ms = duration<float, std::micro>;
            os << std::fixed << std::setprecision(1) << ms(d).count();
        }
        else
        {
            // use integral
            os << std::chrono::round<microseconds>(d).count();
        }
        os << "us";
    }
    else if (d < seconds{1})
    {
        // use milliseconds
        if (d < milliseconds{100})
        {
            // use floating
            using ms = duration<float, std::milli>;
            os << std::fixed << std::setprecision(1) << ms(d).count();
        }
        else
        {
            // use integral
            os << std::chrono::round<milliseconds>(d).count();
        }
        os << "ms";
    }
    else if (d < minutes{1})
    {
        // use seconds
        if (d < seconds{100})
        {
            // use floating
            using s = duration<float>;
            os << std::fixed << std::setprecision(1) << s(d).count();
        }
        else
        {
            // use integral
            os << std::chrono::round<seconds>(d).count();
        }
        os << "s";
    }
    else
    {
        // use minutes
        if (d < minutes{100})
        {
            // use floating
            using m = duration<float, std::ratio<60>>;
            os << std::fixed << std::setprecision(1) << m(d).count();
        }
        else
        {
            // use integral
            os << std::chrono::round<minutes>(d).count();
        }
        os << "min";
    }
    return os;
 }
 template <class Period, class Rep>
 inline
 std::string
 fmtdur(std::chrono::duration<Period, Rep> const& d)
 {
    std::stringstream ss;
    pretty_time(ss, d);
    return ss.str();
 }
 } // detail
 //------------------------------------------------------------------------------
 class progress
 {
 private:
    using clock_type =
        beast::basic_seconds_clock<
            std::chrono::steady_clock>;
    std::size_t const work_;
    clock_type::time_point start_ = clock_type::now();
    clock_type::time_point now_ = clock_type::now();
    clock_type::time_point report_ = clock_type::now();
    std::size_t prev_ = 0;
    bool estimate_ = false;
 public:
    explicit
    progress(std::size_t work)
        : work_(work)
    {
    }
    template <class Log>
    void
    operator()(Log& log, std::size_t work)
    {
        using namespace std::chrono;
        auto const now = clock_type::now();
        if (now == now_)
            return;
        now_ = now;
        auto const elapsed = now - start_;
        if (! estimate_)
        {
            if (elapsed < seconds(15))
                return;
            estimate_ = true;
        }
        else if (now - report_ <
            std::chrono::seconds(60))
        {
            return;
        }
        auto const rate =
            elapsed.count() / double(work);
        clock_type::duration const remain(
            static_cast<clock_type::duration::rep>(
                (work_ - work) * rate));
        log <<
            "Remaining: " << detail::fmtdur(remain) <<
                " (" << work << " of " << work_ <<
                    " in " << detail::fmtdur(elapsed) <<
                ", " << (work - prev_) <<
                    " in " << detail::fmtdur(now - report_) <<
                ")";
        report_ = now;
        prev_ = work;
    }
    template <class Log>
    void
    finish(Log& log)
    {
        log <<
            "Total time: " << detail::fmtdur(
                clock_type::now() - start_);
    }
 };
 std::map <std::string, std::string, beast::ci_less>
 parse_args(std::string const& s)
 {
    // <key> '=' <value>
    static boost::regex const re1 (
        "^"                         // start of line
        "(?:\\s*)"                  // whitespace (optonal)
        "([a-zA-Z][_a-zA-Z0-9]*)"   // <key>
        "(?:\\s*)"                  // whitespace (optional)
        "(?:=)"                     // '='
        "(?:\\s*)"                  // whitespace (optional)
        "(.*\\S+)"                  // <value>
        "(?:\\s*)"                  // whitespace (optional)
        , boost::regex_constants::optimize
    );
    std::map <std::string,
        std::string, beast::ci_less> map;
    auto const v = beast::rfc2616::split(
        s.begin(), s.end(), ',');
    for (auto const& kv : v)
    {
        boost::smatch m;
        if (! boost::regex_match (kv, m, re1))
            throw std::runtime_error(
                "invalid parameter " + kv);
        auto const result =
            map.emplace(m[1], m[2]);
        if (! result.second)
            throw std::runtime_error(
                "duplicate parameter " + m[1]);
    }
    return map;
 }
 //------------------------------------------------------------------------------
 #if RIPPLE_ROCKSDB_AVAILABLE
 class import_test : public beast::unit_test::suite
 {
 public:
    void
    run() override
    {
        testcase(abort_on_fail) << arg();
        using namespace beast::nudb;
        using namespace beast::nudb::detail;
        pass();
        auto const args = parse_args(arg());
        bool usage = args.empty();
        if (! usage &&
            args.find("from") == args.end())
        {
            log <<
                "Missing parameter: from";
            usage = true;
        }
        if (! usage &&
            args.find("to") == args.end())
        {
            log <<
                "Missing parameter: to";
            usage = true;
        }
        if (! usage &&
            args.find("buffer") == args.end())
        {
            log <<
                "Missing parameter: buffer";
            usage = true;
        }
        if (usage)
        {
            log <<
                "Usage:\n" <<
                "--unittest-arg=from=<from>,to=<to>,buffer=<buffer>\n" <<
                "from:   RocksDB database to import from\n" <<
                "to:     NuDB database to import to\n" <<
                "buffer: Buffer size (bigger is faster)\n" <<
                "NuDB database must not already exist.";
            return;
        }
        // This controls the size of the bucket buffer.
        // For a 1TB data file, a 32GB bucket buffer is suggested.
        // The larger the buffer, the faster the import.
        //
        std::size_t const buffer_size =
            std::stoull(args.at("buffer"));
        auto const from_path = args.at("from");
        auto const to_path = args.at("to");
        using hash_type = beast::xxhasher;
        using codec_type = nodeobject_codec;
        auto const bulk_size = 64 * 1024 * 1024;
        float const load_factor = 0.5;
        auto const dp = to_path + ".dat";
        auto const kp = to_path + ".key";
        auto const start =
            std::chrono::steady_clock::now();
        log <<
            "from:    " << from_path << "\n"
            "to:      " << to_path << "\n"
            "buffer:  " << buffer_size;
        std::unique_ptr<rocksdb::DB> db;
        {
            rocksdb::Options options;
            options.create_if_missing = false;
            options.max_open_files = 2000; // 5000?
            rocksdb::DB* pdb = nullptr;
            rocksdb::Status status =
                rocksdb::DB::OpenForReadOnly(
                    options, from_path, &pdb);
            if (! status.ok () || ! pdb)
                throw std::runtime_error (
                    "Can't open '" + from_path + "': " +
                        status.ToString());
            db.reset(pdb);
        }
        // Create data file with values
        std::size_t nitems = 0;
        std::size_t nbytes = 0;
        dat_file_header dh;
        dh.version = currentVersion;
        dh.uid = make_uid();
        dh.appnum = 1;
        dh.key_size = 32;
        native_file df;
        df.create(file_mode::append, dp);
        bulk_writer<native_file> dw(
            df, 0, bulk_size);
        {
            {
                auto os = dw.prepare(dat_file_header::size);
                write(os, dh);
            }
            rocksdb::ReadOptions options;
            options.verify_checksums = false;
            options.fill_cache = false;
            std::unique_ptr<rocksdb::Iterator> it(
                db->NewIterator(options));
            buffer buf;
            codec_type codec;
            for (it->SeekToFirst (); it->Valid (); it->Next())
            {
                if (it->key().size() != 32)
                    throw std::runtime_error(
                        "Unexpected key size " +
                            std::to_string(it->key().size()));
                void const* const key = it->key().data();
                void const* const data = it->value().data();
                auto const size = it->value().size();
                std::unique_ptr<char[]> clean(
                    new char[size]);
                std::memcpy(clean.get(), data, size);
                filter_inner(clean.get(), size);
                auto const out = codec.compress(
                    clean.get(), size, buf);
                // Verify codec correctness
                {
                    buffer buf2;
                    auto const check = codec.decompress(
                        out.first, out.second, buf2);
                    expect(check.second == size,
                        "codec size error");
                    expect(std::memcmp(
                        check.first, clean.get(), size) == 0,
                            "codec data error");
                }
                // Data Record
                auto os = dw.prepare(
                    field<uint48_t>::size + // Size
                    32 +                    // Key
                    out.second);
                write<uint48_t>(os, out.second);
                std::memcpy(os.data(32), key, 32);
                std::memcpy(os.data(out.second),
                    out.first, out.second);
                ++nitems;
                nbytes += size;
            }
            dw.flush();
        }
        db.reset();
        log <<
            "Import data: " << detail::fmtdur(
                std::chrono::steady_clock::now() - start);
        auto const df_size =
            df.actual_size();
        // Create key file
        key_file_header kh;
        kh.version = currentVersion;
        kh.uid = dh.uid;
        kh.appnum = dh.appnum;
        kh.key_size = 32;
        kh.salt = make_salt();
        kh.pepper = pepper<hash_type>(kh.salt);
        kh.block_size = block_size(kp);
        kh.load_factor = std::min<std::size_t>(
            65536.0 * load_factor, 65535);
        kh.buckets = std::ceil(nitems / (bucket_capacity(
            kh.block_size) * load_factor));
        kh.modulus = ceil_pow2(kh.buckets);
        native_file kf;
        kf.create(file_mode::append, kp);
        buffer buf(kh.block_size);
        {
            std::memset(buf.get(), 0, kh.block_size);
            ostream os(buf.get(), kh.block_size);
            write(os, kh);
            kf.write(0, buf.get(), kh.block_size);
        }
        // Build contiguous sequential sections of the
        // key file using multiple passes over the data.
        //
        auto const buckets = std::max<std::size_t>(1,
            buffer_size / kh.block_size);
        buf.reserve(buckets * kh.block_size);
        auto const passes =
            (kh.buckets + buckets - 1) / buckets;
        log <<
            "items:   " << nitems << "\n"
            "buckets: " << kh.buckets << "\n"
            "data:    " << df_size << "\n"
            "passes:  " << passes;
        progress p(df_size * passes);
        std::size_t npass = 0;
        for (std::size_t b0 = 0; b0 < kh.buckets;
                b0 += buckets)
        {
            auto const b1 = std::min(
                b0 + buckets, kh.buckets);
            // Buffered range is [b0, b1)
            auto const bn = b1 - b0;
            // Create empty buckets
            for (std::size_t i = 0; i < bn; ++i)
            {
                bucket b(kh.block_size,
                    buf.get() + i * kh.block_size,
                        empty);
            }
            // Insert all keys into buckets
            // Iterate Data File
            bulk_reader<native_file> r(
                df, dat_file_header::size,
                    df_size, bulk_size);
            while (! r.eof())
            {
                auto const offset = r.offset();
                // Data Record or Spill Record
                std::size_t size;
                auto is = r.prepare(
                    field<uint48_t>::size); // Size
                read<uint48_t>(is, size);
                if (size > 0)
                {
                    // Data Record
                    is = r.prepare(
                        dh.key_size +           // Key
                        size);                  // Data
                    std::uint8_t const* const key =
                        is.data(dh.key_size);
                    auto const h = hash<hash_type>(
                        key, kh.key_size, kh.salt);
                    auto const n = bucket_index(
                        h, kh.buckets, kh.modulus);
                    p(log,
                        npass * df_size + r.offset());
                    if (n < b0 || n >= b1)
                        continue;
                    bucket b(kh.block_size, buf.get() +
                        (n - b0) * kh.block_size);
                    maybe_spill(b, dw);
                    b.insert(offset, size, h);
                }
                else
                {
                    // VFALCO Should never get here
                    // Spill Record
                    is = r.prepare(
                        field<std::uint16_t>::size);
                    read<std::uint16_t>(is, size);  // Size
                    r.prepare(size); // skip
                }
            }
            kf.write((b0 + 1) * kh.block_size,
                buf.get(), bn * kh.block_size);
            ++npass;
        }
        dw.flush();
        p.finish(log);
    }
 };
 BEAST_DEFINE_TESTSUITE_MANUAL(import,NodeStore,ripple);
 #endif
 //------------------------------------------------------------------------------
 class rekey_test : public beast::unit_test::suite
 {
 public:
    void
    run() override
    {
        testcase(abort_on_fail) << arg();
        using namespace beast::nudb;
        using namespace beast::nudb::detail;
        pass();
        auto const args = parse_args(arg());
        bool usage = args.empty();
        if (! usage &&
            args.find("path") == args.end())
        {
            log <<
                "Missing parameter: path";
            usage = true;
        }
        if (! usage &&
            args.find("items") == args.end())
        {
            log <<
                "Missing parameter: items";
            usage = true;
        }
        if (! usage &&
            args.find("buffer") == args.end())
        {
            log <<
                "Missing parameter: buffer";
            usage = true;
        }
        if (usage)
        {
            log <<
                "Usage:\n" <<
                "--unittest-arg=path=<path>,items=<items>,buffer=<buffer>\n" <<
                "path:   NuDB path to rekey (without the .dat)\n" <<
                "items:  Number of items in the .dat file\n" <<
                "buffer: Buffer size (bigger is faster)\n" <<
                "NuDB key file must not already exist.";
            return;
        }
        std::size_t const buffer_size =
            std::stoull(args.at("buffer"));
        auto const path = args.at("path");
        std::size_t const items =
            std::stoull(args.at("items"));
        using hash_type = beast::xxhasher;
        auto const bulk_size = 64 * 1024 * 1024;
        float const load_factor = 0.5;
        auto const dp = path + ".dat";
        auto const kp = path + ".key";
        log <<
            "path:   " << path << "\n"
            "items:  " << items << "\n"
            "buffer: " << buffer_size;
        // Create data file with values
        native_file df;
        df.open(file_mode::append, dp);
        dat_file_header dh;
        read(df, dh);
        auto const df_size = df.actual_size();
        bulk_writer<native_file> dw(
            df, df_size, bulk_size);
        // Create key file
        key_file_header kh;
        kh.version = currentVersion;
        kh.uid = dh.uid;
        kh.appnum = dh.appnum;
        kh.key_size = 32;
        kh.salt = make_salt();
        kh.pepper = pepper<hash_type>(kh.salt);
        kh.block_size = block_size(kp);
        kh.load_factor = std::min<std::size_t>(
            65536.0 * load_factor, 65535);
        kh.buckets = std::ceil(items / (bucket_capacity(
            kh.block_size) * load_factor));
        kh.modulus = ceil_pow2(kh.buckets);
        native_file kf;
        kf.create(file_mode::append, kp);
        buffer buf(kh.block_size);
        {
            std::memset(buf.get(), 0, kh.block_size);
            ostream os(buf.get(), kh.block_size);
            write(os, kh);
            kf.write(0, buf.get(), kh.block_size);
        }
        // Build contiguous sequential sections of the
        // key file using multiple passes over the data.
        //
        auto const buckets = std::max<std::size_t>(1,
            buffer_size / kh.block_size);
        buf.reserve(buckets * kh.block_size);
        auto const passes =
            (kh.buckets + buckets - 1) / buckets;
        log <<
            "buckets: " << kh.buckets << "\n"
            "data:    " << df_size << "\n"
            "passes:  " << passes;
        progress p(df_size * passes);
        std::size_t npass = 0;
        for (std::size_t b0 = 0; b0 < kh.buckets;
                b0 += buckets)
        {
            auto const b1 = std::min(
                b0 + buckets, kh.buckets);
            // Buffered range is [b0, b1)
            auto const bn = b1 - b0;
            // Create empty buckets
            for (std::size_t i = 0; i < bn; ++i)
            {
                bucket b(kh.block_size,
                    buf.get() + i * kh.block_size,
                        empty);
            }
            // Insert all keys into buckets
            // Iterate Data File
            bulk_reader<native_file> r(
                df, dat_file_header::size,
                    df_size, bulk_size);
            while (! r.eof())
            {
                auto const offset = r.offset();
                // Data Record or Spill Record
                std::size_t size;
                auto is = r.prepare(
                    field<uint48_t>::size); // Size
                read<uint48_t>(is, size);
                if (size > 0)
                {
                    // Data Record
                    is = r.prepare(
                        dh.key_size +           // Key
                        size);                  // Data
                    std::uint8_t const* const key =
                        is.data(dh.key_size);
                    auto const h = hash<hash_type>(
                        key, dh.key_size, kh.salt);
                    auto const n = bucket_index(
                        h, kh.buckets, kh.modulus);
                    p(log,
                        npass * df_size + r.offset());
                    if (n < b0 || n >= b1)
                        continue;
                    bucket b(kh.block_size, buf.get() +
                        (n - b0) * kh.block_size);
                    maybe_spill(b, dw);
                    b.insert(offset, size, h);
                }
                else
                {
                    // VFALCO Should never get here
                    // Spill Record
                    is = r.prepare(
                        field<std::uint16_t>::size);
                    read<std::uint16_t>(is, size);  // Size
                    r.prepare(size); // skip
                }
            }
            kf.write((b0 + 1) * kh.block_size,
                buf.get(), bn * kh.block_size);
            ++npass;
        }
        dw.flush();
        p.finish(log);
    }
 };
 BEAST_DEFINE_TESTSUITE_MANUAL(rekey,NodeStore,ripple);
 //------------------------------------------------------------------------------
 namespace legacy {
 using namespace beast::nudb;
 using namespace beast::nudb::detail;
 struct dat_file_header
 {
    static std::size_t BEAST_CONSTEXPR size =
        8 +     // Type
        2 +     // Version
        8 +     // Appnum
        8 +     // Salt
        2 +     // KeySize
        64;     // (Reserved)
    char type[8];
    std::size_t version;
    std::uint64_t appnum;
    std::uint64_t salt;
    std::size_t key_size;
 };
 struct key_file_header
 {
    static std::size_t BEAST_CONSTEXPR size =
        8 +     // Type
        2 +     // Version
        8 +     // Appnum
        8 +     // Salt
        8 +     // Pepper
        2 +     // KeySize
        2 +     // BlockSize
        2 +     // LoadFactor
        64;     // (Reserved)
    char type[8];
    std::size_t version;
    std::uint64_t appnum;
    std::uint64_t salt;
    std::uint64_t pepper;
    std::size_t key_size;
    std::size_t block_size;
    std::size_t load_factor;
    // Computed values
    std::size_t capacity;
    std::size_t bucket_size;
    std::size_t buckets;
    std::size_t modulus;
 };
 // Read data file header from stream
 template <class = void>
 void
 read (istream& is, dat_file_header& dh)
 {
    read (is, dh.type, sizeof(dh.type));
    read<std::uint16_t>(is, dh.version);
    read<std::uint64_t>(is, dh.appnum);
    read<std::uint64_t>(is, dh.salt);
    read<std::uint16_t>(is, dh.key_size);
    std::array <std::uint8_t, 64> zero;
    read (is, zero.data(), zero.size());
 }
 // Read data file header from file
 template <class File>
 void
 read (File& f, dat_file_header& dh)
 {
    std::array<std::uint8_t,
        dat_file_header::size> buf;
    try
    {
        f.read(0, buf.data(), buf.size());
    }
    catch (file_short_read_error const&)
    {
        throw store_corrupt_error(
            "short data file header");
    }
    istream is(buf);
    read (is, dh);
 }
 // Read key file header from stream
 template <class = void>
 void
 read (istream& is, std::size_t file_size,
    key_file_header& kh)
 {
    read(is, kh.type, sizeof(kh.type));
    read<std::uint16_t>(is, kh.version);
    read<std::uint64_t>(is, kh.appnum);
    read<std::uint64_t>(is, kh.salt);
    read<std::uint64_t>(is, kh.pepper);
    read<std::uint16_t>(is, kh.key_size);
    read<std::uint16_t>(is, kh.block_size);
    read<std::uint16_t>(is, kh.load_factor);
    std::array <std::uint8_t, 64> zero;
    read (is, zero.data(), zero.size());
    // VFALCO These need to be checked to handle
    //        when the file size is too small
    kh.capacity = bucket_capacity(kh.block_size);
    kh.bucket_size = bucket_size(kh.capacity);
    if (file_size > kh.block_size)
    {
        // VFALCO This should be handled elsewhere.
        //        we shouldn't put the computed fields in this header.
        if (kh.block_size > 0)
            kh.buckets = (file_size - kh.bucket_size)
                / kh.block_size;
        else
            // VFALCO Corruption or logic error
            kh.buckets = 0;
    }
    else
    {
        kh.buckets = 0;
    }
    kh.modulus = ceil_pow2(kh.buckets);
 }
 // Read key file header from file
 template <class File>
 void
 read (File& f, key_file_header& kh)
 {
    std::array <std::uint8_t,
        key_file_header::size> buf;
    try
    {
        f.read(0, buf.data(), buf.size());
    }
    catch (file_short_read_error const&)
    {
        throw store_corrupt_error(
            "short key file header");
    }
    istream is(buf);
    read (is, f.actual_size(), kh);
 }
 } // detail
 class update_test : public beast::unit_test::suite
 {
 public:
    void
    run() override
    {
        testcase(abort_on_fail) << arg();
        using namespace beast::nudb;
        using namespace beast::nudb::detail;
        pass();
        auto const args = parse_args(arg());
        bool usage = args.empty();
        if (! usage &&
            args.find("path") == args.end())
        {
            log <<
                "Missing parameter: path";
            usage = true;
        }
        if (usage)
        {
            log <<
                "Usage:\n" <<
                "--unittest-arg=path=<dat>\n" <<
                "path:   NuDB path to update (without extensions)";
            return;
        }
        auto const path = args.at("path");
        using hash_type = beast::xxhasher;
        auto const dp = path + ".dat";
        auto const kp = path + ".key";
        log <<
            "path:   " << path;
        native_file df;
        native_file kf;
        df.open(file_mode::write, dp);
        kf.open(file_mode::write, kp);
        legacy::dat_file_header dh0;
        legacy::key_file_header kh0;
        read(df, dh0);
        read(kf, kh0);
        dat_file_header dh;
        std::memcpy(dh.type, "nudb.dat", 8);
        dh.version = dh0.version;;
        dh.uid = make_uid();
        dh.appnum = dh0.appnum;
        dh.key_size = dh0.key_size;
        key_file_header kh;
        std::memcpy(kh.type, "nudb.key", 8);
        kh.version = dh.version;
        kh.uid = dh.uid;
        kh.appnum = dh.appnum;
        kh.key_size = dh.key_size;
        kh.salt = kh0.salt;
        kh.pepper = kh0.pepper;
        kh.block_size = kh0.block_size;
        kh.load_factor = kh0.load_factor;
        // VFALCO These need to be checked to handle
        //        when the file size is too small
        kh.capacity = bucket_capacity(kh.block_size);
        kh.bucket_size = bucket_size(kh.capacity);
        auto const kf_size = kf.actual_size();
        if (kf_size > kh.block_size)
        {
            // VFALCO This should be handled elsewhere.
            //        we shouldn't put the computed fields
            //        in this header.
            if (kh.block_size > 0)
                kh.buckets = (kf_size - kh.bucket_size)
                    / kh.block_size;
            else
                // VFALCO Corruption or logic error
                kh.buckets = 0;
        }
        else
        {
            kh.buckets = 0;
        }
        kh.modulus = ceil_pow2(kh.buckets);
        verify(dh);
        verify<hash_type>(dh, kh);
        write(df, dh);
        write(kf, kh);
    }
 };
 BEAST_DEFINE_TESTSUITE_MANUAL(update,NodeStore,ripple);
 }
 }
--- a/src/ripple/unity/nodestore.cpp
+++ b/src/ripple/unity/nodestore.cpp
@@ -40,4 +40,6 @@
 #include <ripple/nodestore/tests/Backend.test.cpp>
 #include <ripple/nodestore/tests/Basics.test.cpp>
 #include <ripple/nodestore/tests/Database.test.cpp>
 #include <ripple/nodestore/tests/import_test.cpp>
 #include <ripple/nodestore/tests/Timing.test.cpp>