mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
Fixed cache key for block cache
Summary: Added function to `RandomAccessFile` to generate an unique ID for that file. Currently only `PosixRandomAccessFile` has this behaviour implemented and only on Linux. Changed how key is generated in `Table::BlockReader`. Added tests to check whether the unique ID is stable, unique and not a prefix of another unique ID. Added tests to see that `Table` uses the cache more efficiently. Test Plan: make check Reviewers: chip, vamsi, dhruba Reviewed By: chip CC: leveldb Differential Revision: https://reviews.facebook.net/D8145
This commit is contained in:
@@ -223,6 +223,26 @@ class RandomAccessFile {
|
||||
// Safe for concurrent use by multiple threads.
|
||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const = 0;
|
||||
|
||||
// Tries to get an unique ID for this file that will be the same each time
|
||||
// the file is opened (and will stay the same while the file is open).
|
||||
// Furthermore, it tries to make this ID at most "max_size" bytes. If such an
|
||||
// ID can be created this function returns the length of the ID and places it
|
||||
// in "id"; otherwise, this function returns 0, in which case "id" may more
|
||||
// may not have been modified.
|
||||
//
|
||||
// This function guarantees, for IDs from a given environment, two unique ids
|
||||
// cannot be made equal to eachother by adding arbitrary bytes to one of
|
||||
// them. That is, no unique ID is the prefix of another.
|
||||
//
|
||||
// This function guarantees that the returned ID will not be interpretable as
|
||||
// a single varint.
|
||||
//
|
||||
// Note: these IDs are only valid for the duration of the process.
|
||||
virtual size_t GetUniqueId(char* id, size_t max_size) const {
|
||||
return 0; // Default implementation to prevent issues with backwards
|
||||
// compatibility.
|
||||
};
|
||||
};
|
||||
|
||||
// A file abstraction for sequential writing. The implementation
|
||||
|
||||
@@ -58,6 +58,10 @@ class Table {
|
||||
// be close to the file length.
|
||||
uint64_t ApproximateOffsetOf(const Slice& key) const;
|
||||
|
||||
// Returns true if the block for the specified key is in cache.
|
||||
// REQUIRES: key is in this table.
|
||||
bool TEST_KeyInCache(const ReadOptions& options, const Slice& key);
|
||||
|
||||
private:
|
||||
struct Rep;
|
||||
Rep* rep_;
|
||||
@@ -80,6 +84,8 @@ class Table {
|
||||
void ReadMeta(const Footer& footer);
|
||||
void ReadFilter(const Slice& filter_handle_value);
|
||||
|
||||
static void SetupCacheKeyPrefix(Rep* rep);
|
||||
|
||||
// No copying allowed
|
||||
Table(const Table&);
|
||||
void operator=(const Table&);
|
||||
|
||||
@@ -18,6 +18,11 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
// The longest the prefix of the cache key used to identify blocks can be.
|
||||
// We are using the fact that we know for Posix files the unique ID is three
|
||||
// varints.
|
||||
const size_t kMaxCacheKeyPrefixSize = kMaxVarint64Length*3+1;
|
||||
|
||||
struct Table::Rep {
|
||||
~Rep() {
|
||||
delete filter;
|
||||
@@ -28,7 +33,8 @@ struct Table::Rep {
|
||||
Options options;
|
||||
Status status;
|
||||
unique_ptr<RandomAccessFile> file;
|
||||
uint64_t cache_id;
|
||||
char cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||
size_t cache_key_prefix_size;
|
||||
FilterBlockReader* filter;
|
||||
const char* filter_data;
|
||||
|
||||
@@ -36,6 +42,25 @@ struct Table::Rep {
|
||||
Block* index_block;
|
||||
};
|
||||
|
||||
// Helper function to setup the cache key's prefix for the Table.
|
||||
void Table::SetupCacheKeyPrefix(Rep* rep) {
|
||||
assert(kMaxCacheKeyPrefixSize >= 10);
|
||||
rep->cache_key_prefix_size = 0;
|
||||
if (rep->options.block_cache) {
|
||||
rep->cache_key_prefix_size = rep->file->GetUniqueId(rep->cache_key_prefix,
|
||||
kMaxCacheKeyPrefixSize);
|
||||
|
||||
if (rep->cache_key_prefix_size == 0) {
|
||||
// If the prefix wasn't generated or was too long, we create one from the
|
||||
// cache.
|
||||
char* end = EncodeVarint64(rep->cache_key_prefix,
|
||||
rep->options.block_cache->NewId());
|
||||
rep->cache_key_prefix_size =
|
||||
static_cast<size_t>(end - rep->cache_key_prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status Table::Open(const Options& options,
|
||||
unique_ptr<RandomAccessFile>&& file,
|
||||
uint64_t size,
|
||||
@@ -80,7 +105,7 @@ Status Table::Open(const Options& options,
|
||||
rep->file = std::move(file);
|
||||
rep->metaindex_handle = footer.metaindex_handle();
|
||||
rep->index_block = index_block;
|
||||
rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
|
||||
SetupCacheKeyPrefix(rep);
|
||||
rep->filter_data = NULL;
|
||||
rep->filter = NULL;
|
||||
table->reset(new Table(rep));
|
||||
@@ -179,10 +204,15 @@ Iterator* Table::BlockReader(void* arg,
|
||||
if (s.ok()) {
|
||||
BlockContents contents;
|
||||
if (block_cache != NULL) {
|
||||
char cache_key_buffer[16];
|
||||
EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
|
||||
EncodeFixed64(cache_key_buffer+8, handle.offset());
|
||||
Slice key(cache_key_buffer, sizeof(cache_key_buffer));
|
||||
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||
const size_t cache_key_prefix_size = table->rep_->cache_key_prefix_size;
|
||||
assert(cache_key_prefix_size != 0);
|
||||
assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize);
|
||||
memcpy(cache_key, table->rep_->cache_key_prefix,
|
||||
cache_key_prefix_size);
|
||||
char* end = EncodeVarint64(cache_key + cache_key_prefix_size,
|
||||
handle.offset());
|
||||
Slice key(cache_key, static_cast<size_t>(end-cache_key));
|
||||
cache_handle = block_cache->Lookup(key);
|
||||
if (cache_handle != NULL) {
|
||||
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
|
||||
@@ -274,6 +304,17 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
||||
return s;
|
||||
}
|
||||
|
||||
void SaveDidIO(void* arg, const Slice& key, const Slice& value, bool didIO) {
|
||||
*reinterpret_cast<bool*>(arg) = didIO;
|
||||
}
|
||||
bool Table::TEST_KeyInCache(const ReadOptions& options, const Slice& key) {
|
||||
// We use InternalGet() as it has logic that checks whether we read the
|
||||
// block from the disk or not.
|
||||
bool didIO = false;
|
||||
Status s = InternalGet(options, key, &didIO, SaveDidIO);
|
||||
assert(s.ok());
|
||||
return !didIO;
|
||||
}
|
||||
|
||||
uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
|
||||
Iterator* index_iter =
|
||||
|
||||
@@ -109,8 +109,8 @@ class StringSink: public WritableFile {
|
||||
|
||||
class StringSource: public RandomAccessFile {
|
||||
public:
|
||||
StringSource(const Slice& contents)
|
||||
: contents_(contents.data(), contents.size()) {
|
||||
StringSource(const Slice& contents, uint64_t uniq_id)
|
||||
: contents_(contents.data(), contents.size()), uniq_id_(uniq_id) {
|
||||
}
|
||||
|
||||
virtual ~StringSource() { }
|
||||
@@ -130,8 +130,20 @@ class StringSource: public RandomAccessFile {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual size_t GetUniqueId(char* id, size_t max_size) const {
|
||||
if (max_size < 20) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char* rid = id;
|
||||
rid = EncodeVarint64(rid, uniq_id_);
|
||||
rid = EncodeVarint64(rid, 0);
|
||||
return static_cast<size_t>(rid-id);
|
||||
}
|
||||
|
||||
private:
|
||||
std::string contents_;
|
||||
uint64_t uniq_id_;
|
||||
};
|
||||
|
||||
typedef std::map<std::string, std::string, anon::STLLessThan> KVMap;
|
||||
@@ -228,8 +240,8 @@ class TableConstructor: public Constructor {
|
||||
}
|
||||
virtual Status FinishImpl(const Options& options, const KVMap& data) {
|
||||
Reset();
|
||||
StringSink sink;
|
||||
TableBuilder builder(options, &sink);
|
||||
sink_.reset(new StringSink());
|
||||
TableBuilder builder(options, sink_.get());
|
||||
|
||||
for (KVMap::const_iterator it = data.begin();
|
||||
it != data.end();
|
||||
@@ -240,15 +252,13 @@ class TableConstructor: public Constructor {
|
||||
Status s = builder.Finish();
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
|
||||
ASSERT_EQ(sink.contents().size(), builder.FileSize());
|
||||
ASSERT_EQ(sink_->contents().size(), builder.FileSize());
|
||||
|
||||
// Open the table
|
||||
source_.reset(new StringSource(sink.contents()));
|
||||
Options table_options;
|
||||
table_options.comparator = options.comparator;
|
||||
table_options.compression_opts = options.compression_opts;
|
||||
return Table::Open(table_options, std::move(source_),
|
||||
sink.contents().size(), &table_);
|
||||
uniq_id_ = cur_uniq_id_++;
|
||||
source_.reset(new StringSource(sink_->contents(), uniq_id_));
|
||||
return Table::Open(options, std::move(source_),
|
||||
sink_->contents().size(), &table_);
|
||||
}
|
||||
|
||||
virtual Iterator* NewIterator() const {
|
||||
@@ -259,17 +269,34 @@ class TableConstructor: public Constructor {
|
||||
return table_->ApproximateOffsetOf(key);
|
||||
}
|
||||
|
||||
virtual Status Reopen(const Options& options) {
|
||||
source_.reset(new StringSource(sink_->contents(), uniq_id_));
|
||||
return Table::Open(options, std::move(source_),
|
||||
sink_->contents().size(), &table_);
|
||||
}
|
||||
|
||||
virtual Table* table() {
|
||||
return table_.get();
|
||||
}
|
||||
|
||||
private:
|
||||
void Reset() {
|
||||
uniq_id_ = 0;
|
||||
table_.reset();
|
||||
sink_.reset();
|
||||
source_.reset();
|
||||
}
|
||||
|
||||
uint64_t uniq_id_;
|
||||
unique_ptr<StringSink> sink_;
|
||||
unique_ptr<StringSource> source_;
|
||||
unique_ptr<Table> table_;
|
||||
|
||||
TableConstructor();
|
||||
|
||||
static uint64_t cur_uniq_id_;
|
||||
};
|
||||
uint64_t TableConstructor::cur_uniq_id_ = 1;
|
||||
|
||||
// A helper class that converts internal format keys into user keys
|
||||
class KeyConvertingIterator: public Iterator {
|
||||
@@ -892,6 +919,44 @@ TEST(TableTest, ApproximateOffsetOfCompressed) {
|
||||
|
||||
}
|
||||
|
||||
TEST(TableTest, BlockCacheLeak) {
|
||||
// Check that when we reopen a table we don't lose access to blocks already
|
||||
// in the cache. This test checks whether the Table actually makes use of the
|
||||
// unique ID from the file.
|
||||
|
||||
Options opt;
|
||||
opt.block_size = 1024;
|
||||
opt.compression = kNoCompression;
|
||||
opt.block_cache = NewLRUCache(16*1024*1024); // big enough so we don't ever
|
||||
// lose cached values.
|
||||
|
||||
TableConstructor c(BytewiseComparator());
|
||||
c.Add("k01", "hello");
|
||||
c.Add("k02", "hello2");
|
||||
c.Add("k03", std::string(10000, 'x'));
|
||||
c.Add("k04", std::string(200000, 'x'));
|
||||
c.Add("k05", std::string(300000, 'x'));
|
||||
c.Add("k06", "hello3");
|
||||
c.Add("k07", std::string(100000, 'x'));
|
||||
std::vector<std::string> keys;
|
||||
KVMap kvmap;
|
||||
c.Finish(opt, &keys, &kvmap);
|
||||
|
||||
unique_ptr<Iterator> iter(c.NewIterator());
|
||||
iter->SeekToFirst();
|
||||
while (iter->Valid()) {
|
||||
iter->key();
|
||||
iter->value();
|
||||
iter->Next();
|
||||
}
|
||||
ASSERT_OK(iter->status());
|
||||
|
||||
ASSERT_OK(c.Reopen(opt));
|
||||
for (const std::string& key: keys) {
|
||||
ASSERT_TRUE(c.table()->TEST_KeyInCache(ReadOptions(), key));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
@@ -18,6 +18,10 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
// The maximum length of a varint in bytes for 32 and 64 bits respectively.
|
||||
const unsigned int kMaxVarint32Length = 5;
|
||||
const unsigned int kMaxVarint64Length = 10;
|
||||
|
||||
// Standard Put... routines append to a string
|
||||
extern void PutFixed32(std::string* dst, uint32_t value);
|
||||
extern void PutFixed64(std::string* dst, uint64_t value);
|
||||
|
||||
@@ -11,18 +11,23 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#if defined(OS_LINUX)
|
||||
#include <linux/fs.h>
|
||||
#endif
|
||||
#if defined(LEVELDB_PLATFORM_ANDROID)
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#include "leveldb/env.h"
|
||||
#include "leveldb/slice.h"
|
||||
#include "port/port.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/posix_logger.h"
|
||||
|
||||
@@ -108,6 +113,35 @@ class PosixRandomAccessFile: public RandomAccessFile {
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
virtual size_t GetUniqueId(char* id, size_t max_size) const {
|
||||
// TODO: possibly allow this function to handle tighter bounds.
|
||||
if (max_size < kMaxVarint64Length*3) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct stat buf;
|
||||
int result = fstat(fd_, &buf);
|
||||
if (result == -1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
long version = 0;
|
||||
result = ioctl(fd_, FS_IOC_GETVERSION, &version);
|
||||
if (result == -1) {
|
||||
return 0;
|
||||
}
|
||||
uint64_t uversion = (uint64_t)version;
|
||||
|
||||
char* rid = id;
|
||||
rid = EncodeVarint64(rid, buf.st_dev);
|
||||
rid = EncodeVarint64(rid, buf.st_ino);
|
||||
rid = EncodeVarint64(rid, uversion);
|
||||
assert(rid >= id);
|
||||
return static_cast<size_t>(rid-id);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// mmap() based random-access
|
||||
|
||||
142
util/env_test.cc
142
util/env_test.cc
@@ -4,7 +4,9 @@
|
||||
|
||||
#include "leveldb/env.h"
|
||||
|
||||
#include <unordered_set>
|
||||
#include "port/port.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/testharness.h"
|
||||
|
||||
namespace leveldb {
|
||||
@@ -97,6 +99,146 @@ TEST(EnvPosixTest, StartThread) {
|
||||
ASSERT_EQ(state.val, 3);
|
||||
}
|
||||
|
||||
bool IsSingleVarint(const std::string& s) {
|
||||
Slice slice(s);
|
||||
|
||||
uint64_t v;
|
||||
if (!GetVarint64(&slice, &v)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return slice.size() == 0;
|
||||
}
|
||||
|
||||
bool IsUniqueIDValid(const std::string& s) {
|
||||
return !s.empty() && !IsSingleVarint(s);
|
||||
}
|
||||
|
||||
const size_t MAX_ID_SIZE = 100;
|
||||
char temp_id[MAX_ID_SIZE];
|
||||
|
||||
TEST(EnvPosixTest, RandomAccessUniqueID) {
|
||||
// Create file.
|
||||
std::string fname = test::TmpDir() + "/" + "testfile";
|
||||
unique_ptr<WritableFile> wfile;
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &wfile));
|
||||
|
||||
unique_ptr<RandomAccessFile> file;
|
||||
|
||||
// Get Unique ID
|
||||
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
|
||||
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
|
||||
ASSERT_TRUE(id_size > 0);
|
||||
std::string unique_id1(temp_id, id_size);
|
||||
ASSERT_TRUE(IsUniqueIDValid(unique_id1));
|
||||
|
||||
// Get Unique ID again
|
||||
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
|
||||
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
|
||||
ASSERT_TRUE(id_size > 0);
|
||||
std::string unique_id2(temp_id, id_size);
|
||||
ASSERT_TRUE(IsUniqueIDValid(unique_id2));
|
||||
|
||||
// Get Unique ID again after waiting some time.
|
||||
env_->SleepForMicroseconds(1000000);
|
||||
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
|
||||
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
|
||||
ASSERT_TRUE(id_size > 0);
|
||||
std::string unique_id3(temp_id, id_size);
|
||||
ASSERT_TRUE(IsUniqueIDValid(unique_id3));
|
||||
|
||||
// Check IDs are the same.
|
||||
ASSERT_EQ(unique_id1, unique_id2);
|
||||
ASSERT_EQ(unique_id2, unique_id3);
|
||||
|
||||
// Delete the file
|
||||
env_->DeleteFile(fname);
|
||||
}
|
||||
|
||||
// Returns true if any of the strings in ss are the prefix of another string.
|
||||
bool HasPrefix(const std::unordered_set<std::string>& ss) {
|
||||
for (const std::string& s: ss) {
|
||||
if (s.empty()) {
|
||||
return true;
|
||||
}
|
||||
for (size_t i = 1; i < s.size(); ++i) {
|
||||
if (ss.count(s.substr(0, i)) != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
TEST(EnvPosixTest, RandomAccessUniqueIDConcurrent) {
|
||||
// Check whether a bunch of concurrently existing files have unique IDs.
|
||||
|
||||
// Create the files
|
||||
std::vector<std::string> fnames;
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
fnames.push_back(test::TmpDir() + "/" + "testfile" + std::to_string(i));
|
||||
|
||||
// Create file.
|
||||
unique_ptr<WritableFile> wfile;
|
||||
ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile));
|
||||
}
|
||||
|
||||
// Collect and check whether the IDs are unique.
|
||||
std::unordered_set<std::string> ids;
|
||||
for (const std::string fname: fnames) {
|
||||
unique_ptr<RandomAccessFile> file;
|
||||
std::string unique_id;
|
||||
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
|
||||
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
|
||||
ASSERT_TRUE(id_size > 0);
|
||||
unique_id = std::string(temp_id, id_size);
|
||||
ASSERT_TRUE(IsUniqueIDValid(unique_id));
|
||||
|
||||
ASSERT_TRUE(ids.count(unique_id) == 0);
|
||||
ids.insert(unique_id);
|
||||
}
|
||||
|
||||
// Delete the files
|
||||
for (const std::string fname: fnames) {
|
||||
ASSERT_OK(env_->DeleteFile(fname));
|
||||
}
|
||||
|
||||
ASSERT_TRUE(!HasPrefix(ids));
|
||||
}
|
||||
|
||||
TEST(EnvPosixTest, RandomAccessUniqueIDDeletes) {
|
||||
std::string fname = test::TmpDir() + "/" + "testfile";
|
||||
|
||||
// Check that after file is deleted we don't get same ID again in a new file.
|
||||
std::unordered_set<std::string> ids;
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
// Create file.
|
||||
{
|
||||
unique_ptr<WritableFile> wfile;
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &wfile));
|
||||
}
|
||||
|
||||
// Get Unique ID
|
||||
std::string unique_id;
|
||||
{
|
||||
unique_ptr<RandomAccessFile> file;
|
||||
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
|
||||
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
|
||||
ASSERT_TRUE(id_size > 0);
|
||||
unique_id = std::string(temp_id, id_size);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(IsUniqueIDValid(unique_id));
|
||||
ASSERT_TRUE(ids.count(unique_id) == 0);
|
||||
ids.insert(unique_id);
|
||||
|
||||
// Delete the file
|
||||
ASSERT_OK(env_->DeleteFile(fname));
|
||||
}
|
||||
|
||||
ASSERT_TRUE(!HasPrefix(ids));
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
Reference in New Issue
Block a user