Add repeatable NodeStore timing benchmark:

The timing test is changed to overcome possible file buffer cache effects by creating different read access patterns. The unittest-arg command line arguments allow running the benchmarks against any of the available backends and altering the parameters passed in the same format as rippled.cfg. The num_objects parameter permits variation of the number of key/values inserted. The data is random but matches reasonably well the values that rippled might generate.
This commit is contained in:
Donovan Hide
2014-10-31 18:47:29 +00:00
committed by Vinnie Falco
parent ffe6707595
commit 6540804571
2 changed files with 255 additions and 55 deletions

View File

@@ -22,6 +22,9 @@
#include <beast/unit_test/suite.h>
#include <beast/module/core/maths/Random.h>
#include <ripple/basics/StringUtilities.h>
#include <boost/algorithm/string.hpp>
#include <iomanip>
namespace ripple {
namespace NodeStore {

View File

@@ -23,10 +23,110 @@ namespace NodeStore {
class NodeStoreTiming_test : public TestBase
{
public:
enum
class NodeFactory
{
numObjectsToTest = 1000000
};
enum
{
minLedger = 1,
maxLedger = 10000000,
minValueLength = 128, // Must be a multiple of 8
maxValueLength = 256 // Will be multiplied by 8
};
public : NodeFactory(std::int64_t seed,
std::int64_t numObjects,
std::int64_t minKey,
std::int64_t maxKey)
: seed_(seed),
numObjects_(numObjects),
count_(0),
rng_(seed),
key_(minKey, maxKey),
value_(minValueLength, maxValueLength),
type_(hotLEDGER, hotTRANSACTION_NODE),
ledger_(minLedger, maxLedger)
{
}
NodeObject::Ptr next()
{
// Stop when done
if (count_==numObjects_) return nullptr;
count_++;
// Seed from range between minKey and maxKey to ensure repeatability
r_.seed(key_(rng_));
uint256 hash;
std::generate_n(reinterpret_cast<uint64_t*>(hash.begin()),
hash.size() / sizeof(std::uint64_t),
std::bind(filler_, r_));
Blob data(value_(r_)*8);
std::generate_n(reinterpret_cast<uint64_t*>(data.data()),
data.size() / sizeof(std::uint64_t),
std::bind(filler_, r_));
NodeObjectType nodeType(static_cast<NodeObjectType>(type_(r_)));
return NodeObject::createObject(nodeType, ledger_(r_),
std::move(data), hash);
}
bool fillBatch(Batch& batch,std::int64_t batchSize)
{
batch.clear();
for (std::uint64_t i = 0; i < batchSize; i++)
{
auto node = next();
if (!node)
return false;
batch.emplace_back(node);
}
return true;
}
void reset()
{
count_ = 0;
rng_.seed(seed_);
}
private:
std::int64_t seed_;
std::int64_t numObjects_;
std::int64_t count_;
std::mt19937_64 rng_;
std::mt19937_64 r_;
std::uniform_int_distribution<std::uint64_t> key_;
std::uniform_int_distribution<std::uint64_t> value_;
std::uniform_int_distribution<std::uint32_t> type_;
std::uniform_int_distribution<std::uint32_t> ledger_;
std::uniform_int_distribution<std::uint64_t> filler_;
}; // end NodeFactory
// Checks NodeFactory
void testNodeFactory(std::int64_t const seedValue)
{
testcase("repeatableObject");
NodeFactory factory(seedValue, 10000, 0, 99);
std::set<NodeObject::Ptr, NodeObject::LessThan> out;
for (auto node = factory.next(); node; node = factory.next())
{
auto it = out.find(node);
if (it == out.end())
{
out.insert(node);
}
else
{
expect(it->get()->isCloneOf(node), "Should be clones");
}
}
expect(out.size() == 100, "Too many objects created");
}
class Stopwatch
{
@@ -53,57 +153,121 @@ public:
//--------------------------------------------------------------------------
void testBackend (std::string const& type, std::int64_t const seedValue)
enum
{
std::unique_ptr <Manager> manager (make_Manager ());
batchSize = 128
};
DummyScheduler scheduler;
using check_func = std::function<bool(Status const)>;
using backend_ptr = std::unique_ptr<Backend>;
using manager_ptr = std::unique_ptr<Manager>;
using result_type = std::map<std::string, double>;
testcase ("Testing backend '" + type + "' performance");
static bool checkNotFound(Status const status)
{
return status == notFound;
};
static bool checkOk(Status const status) { return status == ok; };
static bool checkOkOrNotFound(Status const status)
{
return (status == ok) || (status == notFound);
};
void testFetch(backend_ptr& backend, NodeFactory& factory,
check_func f)
{
factory.reset();
for (auto expected = factory.next(); expected; expected = factory.next())
{
NodeObject::Ptr got;
Status const status =
backend->fetch(expected->getHash().cbegin(), &got);
expect(f(status), "Wrong status");
if (status == ok)
{
expect(got != nullptr, "Should not be null");
expect(got->isCloneOf(expected), "Should be clones");
}
}
}
static void testInsert(backend_ptr& backend, NodeFactory& factory)
{
factory.reset();
while (auto node = factory.next())
backend->store(node);
}
static void testBatchInsert(backend_ptr& backend, NodeFactory& factory)
{
factory.reset();
Batch batch;
for (; factory.fillBatch(batch, batchSize);)
backend->storeBatch(batch);
}
result_type benchmarkBackend(std::string const& config,
std::int64_t const seedValue)
{
Stopwatch t;
result_type results;
auto params = parseDelimitedKeyValueString(config, ',');
std::int64_t numObjects = params["num_objects"].getIntValue();
params.remove("num_objects");
auto manager = make_Manager();
beast::StringPairArray params;
beast::UnitTestUtilities::TempDirectory path ("node_db");
params.set ("type", type);
params.set ("path", path.getFullPathName ());
// Create batches
NodeStore::Batch batch1;
createPredictableBatch (batch1, numObjectsToTest, seedValue);
NodeStore::Batch batch2;
createPredictableBatch (batch2, numObjectsToTest, seedValue);
NodeStore::Batch missingBatch;
createPredictableBatch (missingBatch, numObjectsToTest, seedValue+1);
params.set("path", path.getFullPathName());
DummyScheduler scheduler;
beast::Journal j;
// Open the backend
std::unique_ptr <Backend> backend (manager->make_Backend (
params, scheduler, j));
auto backend = manager->make_Backend(params, scheduler, j);
Stopwatch t;
NodeFactory insertFactory(seedValue, numObjects, 0, numObjects);
NodeFactory batchFactory(seedValue, numObjects, numObjects * 10,
numObjects * 11);
// Individual write batch test
t.start ();
storeBatch (*backend, batch1);
log << " Single write: " << std::to_string (t.getElapsed ()) << " seconds";
// Twice the range of insert
NodeFactory mixedFactory(seedValue, numObjects, numObjects,
numObjects * 2);
// Same as batch, different order
NodeFactory randomFactory(seedValue + 1, numObjects, numObjects * 10,
numObjects * 11);
// Don't exist
NodeFactory missingFactory(seedValue, numObjects, numObjects * 3,
numObjects * 4);
// Bulk write batch test
t.start ();
backend->storeBatch (batch2);
log << " Batch write: " << std::to_string (t.getElapsed ()) << " seconds";
t.start();
testInsert(backend, insertFactory);
results["Inserts"] = t.getElapsed();
// Read test
Batch copy;
t.start ();
fetchCopyOfBatch (*backend, &copy, batch1);
fetchCopyOfBatch (*backend, &copy, batch2);
log << " Batch read: " << std::to_string (t.getElapsed ()) << " seconds";
t.start();
testBatchInsert(backend, batchFactory);
results["Batch Insert"] = t.getElapsed();
// Read missing keys test
t.start ();
fetchMissing (*backend, missingBatch);
log << " Batch read missing: " << std::to_string (t.getElapsed ()) << " seconds";
t.start();
testFetch(backend, mixedFactory, checkOkOrNotFound);
results["Fetch 50/50"] = t.getElapsed();
t.start();
testFetch(backend, insertFactory, checkOk);
results["Ordered Fetch"] = t.getElapsed();
t.start();
testFetch(backend, randomFactory, checkOkOrNotFound);
results["Fetch Random"] = t.getElapsed();
t.start();
testFetch(backend, missingFactory, checkNotFound);
results["Fetch Missing"] = t.getElapsed();
return results;
}
//--------------------------------------------------------------------------
@@ -111,24 +275,57 @@ public:
void run ()
{
int const seedValue = 50;
testNodeFactory(seedValue);
testBackend ("leveldb", seedValue);
// Expects a semi-colon delimited list of backend configurations.
// Each configuration is a comma delimited list of key-value pairs.
// Each pair is separated by a '='.
// 'type' defaults to 'rocksdb'
// 'num_objects' defaults to '100000'
// defaultArguments serves as an example.
#if RIPPLE_HYPERLEVELDB_AVAILABLE
testBackend ("hyperleveldb", seedValue);
#endif
std::string defaultArguments =
"type=rocksdb,open_files=2000,filter_bits=12,cache_mb=256,file_size_mb=8,file_size_mult=2,num_objects=100000;"
"type=hyperleveldb,num_objects=100000";
#if RIPPLE_ROCKSDB_AVAILABLE
testBackend ("rocksdb", seedValue);
#endif
auto args = arg();
#if RIPPLE_ENABLE_SQLITE_BACKEND_TESTS
testBackend ("sqlite", seedValue);
#endif
if (args.empty()) args = defaultArguments;
std::vector<std::string> configs;
boost::split (configs, args, boost::algorithm::is_any_of (";"));
std::map<std::string, result_type> results;
for (auto& config : configs)
{
// Trim trailing comma if exists
boost::trim_right_if(config, boost::algorithm::is_any_of(","));
// Defaults
if (config.find("type=") == std::string::npos)
config += ",type=rocksdb";
if (config.find("num_objects") == std::string::npos)
config += ",num_objects=100000";
results[config] = benchmarkBackend(config, seedValue);
}
std::stringstream ss;
ss << std::setprecision(2) << std::fixed;
for (auto const& header : results.begin()->second)
ss << std::setw(14) << header.first << " ";
ss << std::endl;
for (auto const& result : results)
{
for (auto const item : result.second)
ss << std::setw(14) << item.second << " ";
ss << result.first << std::endl;
}
log << ss.str();
}
};
BEAST_DEFINE_TESTSUITE_MANUAL(NodeStoreTiming,bench,ripple);
BEAST_DEFINE_TESTSUITE_MANUAL(NodeStoreTiming,ripple_core,ripple);
}
} // namespace Nodestore
}