mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
Merge commit 'e8893e96780685b9e39447199d946739e565fef5' as 'src/hyperleveldb'
This commit is contained in:
88
src/hyperleveldb/db/builder.cc
Normal file
88
src/hyperleveldb/db/builder.cc
Normal file
@@ -0,0 +1,88 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "builder.h"
|
||||
|
||||
#include "filename.h"
|
||||
#include "dbformat.h"
|
||||
#include "table_cache.h"
|
||||
#include "version_edit.h"
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../hyperleveldb/iterator.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
Status BuildTable(const std::string& dbname,
|
||||
Env* env,
|
||||
const Options& options,
|
||||
TableCache* table_cache,
|
||||
Iterator* iter,
|
||||
FileMetaData* meta) {
|
||||
Status s;
|
||||
meta->file_size = 0;
|
||||
iter->SeekToFirst();
|
||||
|
||||
std::string fname = TableFileName(dbname, meta->number);
|
||||
if (iter->Valid()) {
|
||||
WritableFile* file;
|
||||
s = env->NewWritableFile(fname, &file);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
TableBuilder* builder = new TableBuilder(options, file);
|
||||
meta->smallest.DecodeFrom(iter->key());
|
||||
for (; iter->Valid(); iter->Next()) {
|
||||
Slice key = iter->key();
|
||||
meta->largest.DecodeFrom(key);
|
||||
builder->Add(key, iter->value());
|
||||
}
|
||||
|
||||
// Finish and check for builder errors
|
||||
if (s.ok()) {
|
||||
s = builder->Finish();
|
||||
if (s.ok()) {
|
||||
meta->file_size = builder->FileSize();
|
||||
assert(meta->file_size > 0);
|
||||
}
|
||||
} else {
|
||||
builder->Abandon();
|
||||
}
|
||||
delete builder;
|
||||
|
||||
// Finish and check for file errors
|
||||
if (s.ok()) {
|
||||
s = file->Sync();
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = file->Close();
|
||||
}
|
||||
delete file;
|
||||
file = NULL;
|
||||
|
||||
if (s.ok()) {
|
||||
// Verify that the table is usable
|
||||
Iterator* it = table_cache->NewIterator(ReadOptions(),
|
||||
meta->number,
|
||||
meta->file_size);
|
||||
s = it->status();
|
||||
delete it;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for input iterator errors
|
||||
if (!iter->status().ok()) {
|
||||
s = iter->status();
|
||||
}
|
||||
|
||||
if (s.ok() && meta->file_size > 0) {
|
||||
// Keep it
|
||||
} else {
|
||||
env->DeleteFile(fname);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
34
src/hyperleveldb/db/builder.h
Normal file
34
src/hyperleveldb/db/builder.h
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_BUILDER_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_BUILDER_H_
|
||||
|
||||
#include "../hyperleveldb/status.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
struct Options;
|
||||
struct FileMetaData;
|
||||
|
||||
class Env;
|
||||
class Iterator;
|
||||
class TableCache;
|
||||
class VersionEdit;
|
||||
|
||||
// Build a Table file from the contents of *iter. The generated file
|
||||
// will be named according to meta->number. On success, the rest of
|
||||
// *meta will be filled with metadata about the generated table.
|
||||
// If no data is present in *iter, meta->file_size will be set to
|
||||
// zero, and no Table file will be produced.
|
||||
extern Status BuildTable(const std::string& dbname,
|
||||
Env* env,
|
||||
const Options& options,
|
||||
TableCache* table_cache,
|
||||
Iterator* iter,
|
||||
FileMetaData* meta);
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_BUILDER_H_
|
||||
595
src/hyperleveldb/db/c.cc
Normal file
595
src/hyperleveldb/db/c.cc
Normal file
@@ -0,0 +1,595 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "c.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include "../hyperleveldb/cache.h"
|
||||
#include "../hyperleveldb/comparator.h"
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../hyperleveldb/filter_policy.h"
|
||||
#include "../hyperleveldb/iterator.h"
|
||||
#include "../hyperleveldb/options.h"
|
||||
#include "../hyperleveldb/status.h"
|
||||
#include "../hyperleveldb/write_batch.h"
|
||||
|
||||
using leveldb::Cache;
|
||||
using leveldb::Comparator;
|
||||
using leveldb::CompressionType;
|
||||
using leveldb::DB;
|
||||
using leveldb::Env;
|
||||
using leveldb::FileLock;
|
||||
using leveldb::FilterPolicy;
|
||||
using leveldb::Iterator;
|
||||
using leveldb::kMajorVersion;
|
||||
using leveldb::kMinorVersion;
|
||||
using leveldb::Logger;
|
||||
using leveldb::NewBloomFilterPolicy;
|
||||
using leveldb::NewLRUCache;
|
||||
using leveldb::Options;
|
||||
using leveldb::RandomAccessFile;
|
||||
using leveldb::Range;
|
||||
using leveldb::ReadOptions;
|
||||
using leveldb::SequentialFile;
|
||||
using leveldb::Slice;
|
||||
using leveldb::Snapshot;
|
||||
using leveldb::Status;
|
||||
using leveldb::WritableFile;
|
||||
using leveldb::WriteBatch;
|
||||
using leveldb::WriteOptions;
|
||||
|
||||
extern "C" {
|
||||
|
||||
struct leveldb_t { DB* rep; };
|
||||
struct leveldb_iterator_t { Iterator* rep; };
|
||||
struct leveldb_writebatch_t { WriteBatch rep; };
|
||||
struct leveldb_snapshot_t { const Snapshot* rep; };
|
||||
struct leveldb_readoptions_t { ReadOptions rep; };
|
||||
struct leveldb_writeoptions_t { WriteOptions rep; };
|
||||
struct leveldb_options_t { Options rep; };
|
||||
struct leveldb_cache_t { Cache* rep; };
|
||||
struct leveldb_seqfile_t { SequentialFile* rep; };
|
||||
struct leveldb_randomfile_t { RandomAccessFile* rep; };
|
||||
struct leveldb_writablefile_t { WritableFile* rep; };
|
||||
struct leveldb_logger_t { Logger* rep; };
|
||||
struct leveldb_filelock_t { FileLock* rep; };
|
||||
|
||||
struct leveldb_comparator_t : public Comparator {
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
int (*compare_)(
|
||||
void*,
|
||||
const char* a, size_t alen,
|
||||
const char* b, size_t blen);
|
||||
const char* (*name_)(void*);
|
||||
|
||||
virtual ~leveldb_comparator_t() {
|
||||
(*destructor_)(state_);
|
||||
}
|
||||
|
||||
virtual int Compare(const Slice& a, const Slice& b) const {
|
||||
return (*compare_)(state_, a.data(), a.size(), b.data(), b.size());
|
||||
}
|
||||
|
||||
virtual const char* Name() const {
|
||||
return (*name_)(state_);
|
||||
}
|
||||
|
||||
// No-ops since the C binding does not support key shortening methods.
|
||||
virtual void FindShortestSeparator(std::string*, const Slice&) const { }
|
||||
virtual void FindShortSuccessor(std::string* key) const { }
|
||||
};
|
||||
|
||||
struct leveldb_filterpolicy_t : public FilterPolicy {
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
const char* (*name_)(void*);
|
||||
char* (*create_)(
|
||||
void*,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length);
|
||||
unsigned char (*key_match_)(
|
||||
void*,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length);
|
||||
|
||||
virtual ~leveldb_filterpolicy_t() {
|
||||
(*destructor_)(state_);
|
||||
}
|
||||
|
||||
virtual const char* Name() const {
|
||||
return (*name_)(state_);
|
||||
}
|
||||
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
std::vector<const char*> key_pointers(n);
|
||||
std::vector<size_t> key_sizes(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
key_pointers[i] = keys[i].data();
|
||||
key_sizes[i] = keys[i].size();
|
||||
}
|
||||
size_t len;
|
||||
char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len);
|
||||
dst->append(filter, len);
|
||||
free(filter);
|
||||
}
|
||||
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
return (*key_match_)(state_, key.data(), key.size(),
|
||||
filter.data(), filter.size());
|
||||
}
|
||||
};
|
||||
|
||||
struct leveldb_env_t {
|
||||
Env* rep;
|
||||
bool is_default;
|
||||
};
|
||||
|
||||
static bool SaveError(char** errptr, const Status& s) {
|
||||
assert(errptr != NULL);
|
||||
if (s.ok()) {
|
||||
return false;
|
||||
} else if (*errptr == NULL) {
|
||||
*errptr = strdup(s.ToString().c_str());
|
||||
} else {
|
||||
// TODO(sanjay): Merge with existing error?
|
||||
free(*errptr);
|
||||
*errptr = strdup(s.ToString().c_str());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static char* CopyString(const std::string& str) {
|
||||
char* result = reinterpret_cast<char*>(malloc(sizeof(char) * str.size()));
|
||||
memcpy(result, str.data(), sizeof(char) * str.size());
|
||||
return result;
|
||||
}
|
||||
|
||||
leveldb_t* leveldb_open(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
DB* db;
|
||||
if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) {
|
||||
return NULL;
|
||||
}
|
||||
leveldb_t* result = new leveldb_t;
|
||||
result->rep = db;
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_close(leveldb_t* db) {
|
||||
delete db->rep;
|
||||
delete db;
|
||||
}
|
||||
|
||||
void leveldb_put(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
const char* val, size_t vallen,
|
||||
char** errptr) {
|
||||
SaveError(errptr,
|
||||
db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen)));
|
||||
}
|
||||
|
||||
void leveldb_delete(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
char** errptr) {
|
||||
SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen)));
|
||||
}
|
||||
|
||||
|
||||
void leveldb_write(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
leveldb_writebatch_t* batch,
|
||||
char** errptr) {
|
||||
SaveError(errptr, db->rep->Write(options->rep, &batch->rep));
|
||||
}
|
||||
|
||||
char* leveldb_get(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
size_t* vallen,
|
||||
char** errptr) {
|
||||
char* result = NULL;
|
||||
std::string tmp;
|
||||
Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp);
|
||||
if (s.ok()) {
|
||||
*vallen = tmp.size();
|
||||
result = CopyString(tmp);
|
||||
} else {
|
||||
*vallen = 0;
|
||||
if (!s.IsNotFound()) {
|
||||
SaveError(errptr, s);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
leveldb_iterator_t* leveldb_create_iterator(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options) {
|
||||
leveldb_iterator_t* result = new leveldb_iterator_t;
|
||||
result->rep = db->rep->NewIterator(options->rep);
|
||||
return result;
|
||||
}
|
||||
|
||||
const leveldb_snapshot_t* leveldb_create_snapshot(
|
||||
leveldb_t* db) {
|
||||
leveldb_snapshot_t* result = new leveldb_snapshot_t;
|
||||
result->rep = db->rep->GetSnapshot();
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_release_snapshot(
|
||||
leveldb_t* db,
|
||||
const leveldb_snapshot_t* snapshot) {
|
||||
db->rep->ReleaseSnapshot(snapshot->rep);
|
||||
delete snapshot;
|
||||
}
|
||||
|
||||
char* leveldb_property_value(
|
||||
leveldb_t* db,
|
||||
const char* propname) {
|
||||
std::string tmp;
|
||||
if (db->rep->GetProperty(Slice(propname), &tmp)) {
|
||||
// We use strdup() since we expect human readable output.
|
||||
return strdup(tmp.c_str());
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void leveldb_approximate_sizes(
|
||||
leveldb_t* db,
|
||||
int num_ranges,
|
||||
const char* const* range_start_key, const size_t* range_start_key_len,
|
||||
const char* const* range_limit_key, const size_t* range_limit_key_len,
|
||||
uint64_t* sizes) {
|
||||
Range* ranges = new Range[num_ranges];
|
||||
for (int i = 0; i < num_ranges; i++) {
|
||||
ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]);
|
||||
ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]);
|
||||
}
|
||||
db->rep->GetApproximateSizes(ranges, num_ranges, sizes);
|
||||
delete[] ranges;
|
||||
}
|
||||
|
||||
void leveldb_compact_range(
|
||||
leveldb_t* db,
|
||||
const char* start_key, size_t start_key_len,
|
||||
const char* limit_key, size_t limit_key_len) {
|
||||
Slice a, b;
|
||||
db->rep->CompactRange(
|
||||
// Pass NULL Slice if corresponding "const char*" is NULL
|
||||
(start_key ? (a = Slice(start_key, start_key_len), &a) : NULL),
|
||||
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
|
||||
}
|
||||
|
||||
void leveldb_destroy_db(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
SaveError(errptr, DestroyDB(name, options->rep));
|
||||
}
|
||||
|
||||
void leveldb_repair_db(
|
||||
const leveldb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
SaveError(errptr, RepairDB(name, options->rep));
|
||||
}
|
||||
|
||||
void leveldb_iter_destroy(leveldb_iterator_t* iter) {
|
||||
delete iter->rep;
|
||||
delete iter;
|
||||
}
|
||||
|
||||
unsigned char leveldb_iter_valid(const leveldb_iterator_t* iter) {
|
||||
return iter->rep->Valid();
|
||||
}
|
||||
|
||||
void leveldb_iter_seek_to_first(leveldb_iterator_t* iter) {
|
||||
iter->rep->SeekToFirst();
|
||||
}
|
||||
|
||||
void leveldb_iter_seek_to_last(leveldb_iterator_t* iter) {
|
||||
iter->rep->SeekToLast();
|
||||
}
|
||||
|
||||
void leveldb_iter_seek(leveldb_iterator_t* iter, const char* k, size_t klen) {
|
||||
iter->rep->Seek(Slice(k, klen));
|
||||
}
|
||||
|
||||
void leveldb_iter_next(leveldb_iterator_t* iter) {
|
||||
iter->rep->Next();
|
||||
}
|
||||
|
||||
void leveldb_iter_prev(leveldb_iterator_t* iter) {
|
||||
iter->rep->Prev();
|
||||
}
|
||||
|
||||
const char* leveldb_iter_key(const leveldb_iterator_t* iter, size_t* klen) {
|
||||
Slice s = iter->rep->key();
|
||||
*klen = s.size();
|
||||
return s.data();
|
||||
}
|
||||
|
||||
const char* leveldb_iter_value(const leveldb_iterator_t* iter, size_t* vlen) {
|
||||
Slice s = iter->rep->value();
|
||||
*vlen = s.size();
|
||||
return s.data();
|
||||
}
|
||||
|
||||
void leveldb_iter_get_error(const leveldb_iterator_t* iter, char** errptr) {
|
||||
SaveError(errptr, iter->rep->status());
|
||||
}
|
||||
|
||||
leveldb_writebatch_t* leveldb_writebatch_create() {
|
||||
return new leveldb_writebatch_t;
|
||||
}
|
||||
|
||||
void leveldb_writebatch_destroy(leveldb_writebatch_t* b) {
|
||||
delete b;
|
||||
}
|
||||
|
||||
void leveldb_writebatch_clear(leveldb_writebatch_t* b) {
|
||||
b->rep.Clear();
|
||||
}
|
||||
|
||||
void leveldb_writebatch_put(
|
||||
leveldb_writebatch_t* b,
|
||||
const char* key, size_t klen,
|
||||
const char* val, size_t vlen) {
|
||||
b->rep.Put(Slice(key, klen), Slice(val, vlen));
|
||||
}
|
||||
|
||||
void leveldb_writebatch_delete(
|
||||
leveldb_writebatch_t* b,
|
||||
const char* key, size_t klen) {
|
||||
b->rep.Delete(Slice(key, klen));
|
||||
}
|
||||
|
||||
void leveldb_writebatch_iterate(
|
||||
leveldb_writebatch_t* b,
|
||||
void* state,
|
||||
void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
|
||||
void (*deleted)(void*, const char* k, size_t klen)) {
|
||||
class H : public WriteBatch::Handler {
|
||||
public:
|
||||
void* state_;
|
||||
void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen);
|
||||
void (*deleted_)(void*, const char* k, size_t klen);
|
||||
virtual void Put(const Slice& key, const Slice& value) {
|
||||
(*put_)(state_, key.data(), key.size(), value.data(), value.size());
|
||||
}
|
||||
virtual void Delete(const Slice& key) {
|
||||
(*deleted_)(state_, key.data(), key.size());
|
||||
}
|
||||
};
|
||||
H handler;
|
||||
handler.state_ = state;
|
||||
handler.put_ = put;
|
||||
handler.deleted_ = deleted;
|
||||
b->rep.Iterate(&handler);
|
||||
}
|
||||
|
||||
leveldb_options_t* leveldb_options_create() {
|
||||
return new leveldb_options_t;
|
||||
}
|
||||
|
||||
void leveldb_options_destroy(leveldb_options_t* options) {
|
||||
delete options;
|
||||
}
|
||||
|
||||
void leveldb_options_set_comparator(
|
||||
leveldb_options_t* opt,
|
||||
leveldb_comparator_t* cmp) {
|
||||
opt->rep.comparator = cmp;
|
||||
}
|
||||
|
||||
void leveldb_options_set_filter_policy(
|
||||
leveldb_options_t* opt,
|
||||
leveldb_filterpolicy_t* policy) {
|
||||
opt->rep.filter_policy = policy;
|
||||
}
|
||||
|
||||
void leveldb_options_set_create_if_missing(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
opt->rep.create_if_missing = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_error_if_exists(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
opt->rep.error_if_exists = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_paranoid_checks(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
opt->rep.paranoid_checks = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_env(leveldb_options_t* opt, leveldb_env_t* env) {
|
||||
opt->rep.env = (env ? env->rep : NULL);
|
||||
}
|
||||
|
||||
void leveldb_options_set_info_log(leveldb_options_t* opt, leveldb_logger_t* l) {
|
||||
opt->rep.info_log = (l ? l->rep : NULL);
|
||||
}
|
||||
|
||||
void leveldb_options_set_write_buffer_size(leveldb_options_t* opt, size_t s) {
|
||||
opt->rep.write_buffer_size = s;
|
||||
}
|
||||
|
||||
void leveldb_options_set_max_open_files(leveldb_options_t* opt, int n) {
|
||||
opt->rep.max_open_files = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_cache(leveldb_options_t* opt, leveldb_cache_t* c) {
|
||||
opt->rep.block_cache = c->rep;
|
||||
}
|
||||
|
||||
void leveldb_options_set_block_size(leveldb_options_t* opt, size_t s) {
|
||||
opt->rep.block_size = s;
|
||||
}
|
||||
|
||||
void leveldb_options_set_block_restart_interval(leveldb_options_t* opt, int n) {
|
||||
opt->rep.block_restart_interval = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_compression(leveldb_options_t* opt, int t) {
|
||||
opt->rep.compression = static_cast<CompressionType>(t);
|
||||
}
|
||||
|
||||
leveldb_comparator_t* leveldb_comparator_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
int (*compare)(
|
||||
void*,
|
||||
const char* a, size_t alen,
|
||||
const char* b, size_t blen),
|
||||
const char* (*name)(void*)) {
|
||||
leveldb_comparator_t* result = new leveldb_comparator_t;
|
||||
result->state_ = state;
|
||||
result->destructor_ = destructor;
|
||||
result->compare_ = compare;
|
||||
result->name_ = name;
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
|
||||
delete cmp;
|
||||
}
|
||||
|
||||
leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
char* (*create_filter)(
|
||||
void*,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length),
|
||||
unsigned char (*key_may_match)(
|
||||
void*,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
const char* (*name)(void*)) {
|
||||
leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
|
||||
result->state_ = state;
|
||||
result->destructor_ = destructor;
|
||||
result->create_ = create_filter;
|
||||
result->key_match_ = key_may_match;
|
||||
result->name_ = name;
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) {
|
||||
delete filter;
|
||||
}
|
||||
|
||||
leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
|
||||
// Make a leveldb_filterpolicy_t, but override all of its methods so
|
||||
// they delegate to a NewBloomFilterPolicy() instead of user
|
||||
// supplied C functions.
|
||||
struct Wrapper : public leveldb_filterpolicy_t {
|
||||
const FilterPolicy* rep_;
|
||||
~Wrapper() { delete rep_; }
|
||||
const char* Name() const { return rep_->Name(); }
|
||||
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||
return rep_->CreateFilter(keys, n, dst);
|
||||
}
|
||||
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||
return rep_->KeyMayMatch(key, filter);
|
||||
}
|
||||
static void DoNothing(void*) { }
|
||||
};
|
||||
Wrapper* wrapper = new Wrapper;
|
||||
wrapper->rep_ = NewBloomFilterPolicy(bits_per_key);
|
||||
wrapper->state_ = NULL;
|
||||
wrapper->destructor_ = &Wrapper::DoNothing;
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
leveldb_readoptions_t* leveldb_readoptions_create() {
|
||||
return new leveldb_readoptions_t;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) {
|
||||
delete opt;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_set_verify_checksums(
|
||||
leveldb_readoptions_t* opt,
|
||||
unsigned char v) {
|
||||
opt->rep.verify_checksums = v;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_set_fill_cache(
|
||||
leveldb_readoptions_t* opt, unsigned char v) {
|
||||
opt->rep.fill_cache = v;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_set_snapshot(
|
||||
leveldb_readoptions_t* opt,
|
||||
const leveldb_snapshot_t* snap) {
|
||||
opt->rep.snapshot = (snap ? snap->rep : NULL);
|
||||
}
|
||||
|
||||
leveldb_writeoptions_t* leveldb_writeoptions_create() {
|
||||
return new leveldb_writeoptions_t;
|
||||
}
|
||||
|
||||
void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) {
|
||||
delete opt;
|
||||
}
|
||||
|
||||
void leveldb_writeoptions_set_sync(
|
||||
leveldb_writeoptions_t* opt, unsigned char v) {
|
||||
opt->rep.sync = v;
|
||||
}
|
||||
|
||||
leveldb_cache_t* leveldb_cache_create_lru(size_t capacity) {
|
||||
leveldb_cache_t* c = new leveldb_cache_t;
|
||||
c->rep = NewLRUCache(capacity);
|
||||
return c;
|
||||
}
|
||||
|
||||
void leveldb_cache_destroy(leveldb_cache_t* cache) {
|
||||
delete cache->rep;
|
||||
delete cache;
|
||||
}
|
||||
|
||||
leveldb_env_t* leveldb_create_default_env() {
|
||||
leveldb_env_t* result = new leveldb_env_t;
|
||||
result->rep = Env::Default();
|
||||
result->is_default = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_env_destroy(leveldb_env_t* env) {
|
||||
if (!env->is_default) delete env->rep;
|
||||
delete env;
|
||||
}
|
||||
|
||||
void leveldb_free(void* ptr) {
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
int leveldb_major_version() {
|
||||
return kMajorVersion;
|
||||
}
|
||||
|
||||
int leveldb_minor_version() {
|
||||
return kMinorVersion;
|
||||
}
|
||||
|
||||
} // end extern "C"
|
||||
390
src/hyperleveldb/db/c_test.c
Normal file
390
src/hyperleveldb/db/c_test.c
Normal file
@@ -0,0 +1,390 @@
|
||||
/* Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
Use of this source code is governed by a BSD-style license that can be
|
||||
found in the LICENSE file. See the AUTHORS file for names of contributors. */
|
||||
|
||||
#include "c.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
const char* phase = "";
|
||||
static char dbname[200];
|
||||
|
||||
static void StartPhase(const char* name) {
|
||||
fprintf(stderr, "=== Test %s\n", name);
|
||||
phase = name;
|
||||
}
|
||||
|
||||
static const char* GetTempDir(void) {
|
||||
const char* ret = getenv("TEST_TMPDIR");
|
||||
if (ret == NULL || ret[0] == '\0')
|
||||
ret = "/tmp";
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define CheckNoError(err) \
|
||||
if ((err) != NULL) { \
|
||||
fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, (err)); \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
#define CheckCondition(cond) \
|
||||
if (!(cond)) { \
|
||||
fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, #cond); \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
static void CheckEqual(const char* expected, const char* v, size_t n) {
|
||||
if (expected == NULL && v == NULL) {
|
||||
// ok
|
||||
} else if (expected != NULL && v != NULL && n == strlen(expected) &&
|
||||
memcmp(expected, v, n) == 0) {
|
||||
// ok
|
||||
return;
|
||||
} else {
|
||||
fprintf(stderr, "%s: expected '%s', got '%s'\n",
|
||||
phase,
|
||||
(expected ? expected : "(null)"),
|
||||
(v ? v : "(null"));
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
static void Free(char** ptr) {
|
||||
if (*ptr) {
|
||||
free(*ptr);
|
||||
*ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void CheckGet(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options,
|
||||
const char* key,
|
||||
const char* expected) {
|
||||
char* err = NULL;
|
||||
size_t val_len;
|
||||
char* val;
|
||||
val = leveldb_get(db, options, key, strlen(key), &val_len, &err);
|
||||
CheckNoError(err);
|
||||
CheckEqual(expected, val, val_len);
|
||||
Free(&val);
|
||||
}
|
||||
|
||||
static void CheckIter(leveldb_iterator_t* iter,
|
||||
const char* key, const char* val) {
|
||||
size_t len;
|
||||
const char* str;
|
||||
str = leveldb_iter_key(iter, &len);
|
||||
CheckEqual(key, str, len);
|
||||
str = leveldb_iter_value(iter, &len);
|
||||
CheckEqual(val, str, len);
|
||||
}
|
||||
|
||||
// Callback from leveldb_writebatch_iterate()
|
||||
static void CheckPut(void* ptr,
|
||||
const char* k, size_t klen,
|
||||
const char* v, size_t vlen) {
|
||||
int* state = (int*) ptr;
|
||||
CheckCondition(*state < 2);
|
||||
switch (*state) {
|
||||
case 0:
|
||||
CheckEqual("bar", k, klen);
|
||||
CheckEqual("b", v, vlen);
|
||||
break;
|
||||
case 1:
|
||||
CheckEqual("box", k, klen);
|
||||
CheckEqual("c", v, vlen);
|
||||
break;
|
||||
}
|
||||
(*state)++;
|
||||
}
|
||||
|
||||
// Callback from leveldb_writebatch_iterate()
|
||||
static void CheckDel(void* ptr, const char* k, size_t klen) {
|
||||
int* state = (int*) ptr;
|
||||
CheckCondition(*state == 2);
|
||||
CheckEqual("bar", k, klen);
|
||||
(*state)++;
|
||||
}
|
||||
|
||||
static void CmpDestroy(void* arg) { }
|
||||
|
||||
static int CmpCompare(void* arg, const char* a, size_t alen,
|
||||
const char* b, size_t blen) {
|
||||
int n = (alen < blen) ? alen : blen;
|
||||
int r = memcmp(a, b, n);
|
||||
if (r == 0) {
|
||||
if (alen < blen) r = -1;
|
||||
else if (alen > blen) r = +1;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static const char* CmpName(void* arg) {
|
||||
return "foo";
|
||||
}
|
||||
|
||||
// Custom filter policy
|
||||
static unsigned char fake_filter_result = 1;
|
||||
static void FilterDestroy(void* arg) { }
|
||||
static const char* FilterName(void* arg) {
|
||||
return "TestFilter";
|
||||
}
|
||||
static char* FilterCreate(
|
||||
void* arg,
|
||||
const char* const* key_array, const size_t* key_length_array,
|
||||
int num_keys,
|
||||
size_t* filter_length) {
|
||||
*filter_length = 4;
|
||||
char* result = malloc(4);
|
||||
memcpy(result, "fake", 4);
|
||||
return result;
|
||||
}
|
||||
unsigned char FilterKeyMatch(
|
||||
void* arg,
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length) {
|
||||
CheckCondition(filter_length == 4);
|
||||
CheckCondition(memcmp(filter, "fake", 4) == 0);
|
||||
return fake_filter_result;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
leveldb_t* db;
|
||||
leveldb_comparator_t* cmp;
|
||||
leveldb_cache_t* cache;
|
||||
leveldb_env_t* env;
|
||||
leveldb_options_t* options;
|
||||
leveldb_readoptions_t* roptions;
|
||||
leveldb_writeoptions_t* woptions;
|
||||
char* err = NULL;
|
||||
int run = -1;
|
||||
|
||||
CheckCondition(leveldb_major_version() >= 1);
|
||||
CheckCondition(leveldb_minor_version() >= 1);
|
||||
|
||||
snprintf(dbname, sizeof(dbname),
|
||||
"%s/leveldb_c_test-%d",
|
||||
GetTempDir(),
|
||||
((int) geteuid()));
|
||||
|
||||
StartPhase("create_objects");
|
||||
cmp = leveldb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName);
|
||||
env = leveldb_create_default_env();
|
||||
cache = leveldb_cache_create_lru(100000);
|
||||
|
||||
options = leveldb_options_create();
|
||||
leveldb_options_set_comparator(options, cmp);
|
||||
leveldb_options_set_error_if_exists(options, 1);
|
||||
leveldb_options_set_cache(options, cache);
|
||||
leveldb_options_set_env(options, env);
|
||||
leveldb_options_set_info_log(options, NULL);
|
||||
leveldb_options_set_write_buffer_size(options, 100000);
|
||||
leveldb_options_set_paranoid_checks(options, 1);
|
||||
leveldb_options_set_max_open_files(options, 10);
|
||||
leveldb_options_set_block_size(options, 1024);
|
||||
leveldb_options_set_block_restart_interval(options, 8);
|
||||
leveldb_options_set_compression(options, leveldb_no_compression);
|
||||
|
||||
roptions = leveldb_readoptions_create();
|
||||
leveldb_readoptions_set_verify_checksums(roptions, 1);
|
||||
leveldb_readoptions_set_fill_cache(roptions, 0);
|
||||
|
||||
woptions = leveldb_writeoptions_create();
|
||||
leveldb_writeoptions_set_sync(woptions, 1);
|
||||
|
||||
StartPhase("destroy");
|
||||
leveldb_destroy_db(options, dbname, &err);
|
||||
Free(&err);
|
||||
|
||||
StartPhase("open_error");
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
CheckCondition(err != NULL);
|
||||
Free(&err);
|
||||
|
||||
StartPhase("leveldb_free");
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
CheckCondition(err != NULL);
|
||||
leveldb_free(err);
|
||||
err = NULL;
|
||||
|
||||
StartPhase("open");
|
||||
leveldb_options_set_create_if_missing(options, 1);
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
|
||||
StartPhase("put");
|
||||
leveldb_put(db, woptions, "foo", 3, "hello", 5, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("compactall");
|
||||
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("compactrange");
|
||||
leveldb_compact_range(db, "a", 1, "z", 1);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("writebatch");
|
||||
{
|
||||
leveldb_writebatch_t* wb = leveldb_writebatch_create();
|
||||
leveldb_writebatch_put(wb, "foo", 3, "a", 1);
|
||||
leveldb_writebatch_clear(wb);
|
||||
leveldb_writebatch_put(wb, "bar", 3, "b", 1);
|
||||
leveldb_writebatch_put(wb, "box", 3, "c", 1);
|
||||
leveldb_writebatch_delete(wb, "bar", 3);
|
||||
leveldb_write(db, woptions, wb, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
CheckGet(db, roptions, "bar", NULL);
|
||||
CheckGet(db, roptions, "box", "c");
|
||||
int pos = 0;
|
||||
leveldb_writebatch_iterate(wb, &pos, CheckPut, CheckDel);
|
||||
CheckCondition(pos == 3);
|
||||
leveldb_writebatch_destroy(wb);
|
||||
}
|
||||
|
||||
StartPhase("iter");
|
||||
{
|
||||
leveldb_iterator_t* iter = leveldb_create_iterator(db, roptions);
|
||||
CheckCondition(!leveldb_iter_valid(iter));
|
||||
leveldb_iter_seek_to_first(iter);
|
||||
CheckCondition(leveldb_iter_valid(iter));
|
||||
CheckIter(iter, "box", "c");
|
||||
leveldb_iter_next(iter);
|
||||
CheckIter(iter, "foo", "hello");
|
||||
leveldb_iter_prev(iter);
|
||||
CheckIter(iter, "box", "c");
|
||||
leveldb_iter_prev(iter);
|
||||
CheckCondition(!leveldb_iter_valid(iter));
|
||||
leveldb_iter_seek_to_last(iter);
|
||||
CheckIter(iter, "foo", "hello");
|
||||
leveldb_iter_seek(iter, "b", 1);
|
||||
CheckIter(iter, "box", "c");
|
||||
leveldb_iter_get_error(iter, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_iter_destroy(iter);
|
||||
}
|
||||
|
||||
StartPhase("approximate_sizes");
|
||||
{
|
||||
int i;
|
||||
int n = 20000;
|
||||
char keybuf[100];
|
||||
char valbuf[100];
|
||||
uint64_t sizes[2];
|
||||
const char* start[2] = { "a", "k00000000000000010000" };
|
||||
size_t start_len[2] = { 1, 21 };
|
||||
const char* limit[2] = { "k00000000000000010000", "z" };
|
||||
size_t limit_len[2] = { 21, 1 };
|
||||
leveldb_writeoptions_set_sync(woptions, 0);
|
||||
for (i = 0; i < n; i++) {
|
||||
snprintf(keybuf, sizeof(keybuf), "k%020d", i);
|
||||
snprintf(valbuf, sizeof(valbuf), "v%020d", i);
|
||||
leveldb_put(db, woptions, keybuf, strlen(keybuf), valbuf, strlen(valbuf),
|
||||
&err);
|
||||
CheckNoError(err);
|
||||
}
|
||||
leveldb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes);
|
||||
CheckCondition(sizes[0] > 0);
|
||||
CheckCondition(sizes[1] > 0);
|
||||
}
|
||||
|
||||
StartPhase("property");
|
||||
{
|
||||
char* prop = leveldb_property_value(db, "nosuchprop");
|
||||
CheckCondition(prop == NULL);
|
||||
prop = leveldb_property_value(db, "leveldb.stats");
|
||||
CheckCondition(prop != NULL);
|
||||
Free(&prop);
|
||||
}
|
||||
|
||||
StartPhase("snapshot");
|
||||
{
|
||||
const leveldb_snapshot_t* snap;
|
||||
snap = leveldb_create_snapshot(db);
|
||||
leveldb_delete(db, woptions, "foo", 3, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_readoptions_set_snapshot(roptions, snap);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
leveldb_readoptions_set_snapshot(roptions, NULL);
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
leveldb_release_snapshot(db, snap);
|
||||
}
|
||||
|
||||
StartPhase("repair");
|
||||
{
|
||||
leveldb_close(db);
|
||||
leveldb_options_set_create_if_missing(options, 0);
|
||||
leveldb_options_set_error_if_exists(options, 0);
|
||||
leveldb_repair_db(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
CheckGet(db, roptions, "bar", NULL);
|
||||
CheckGet(db, roptions, "box", "c");
|
||||
leveldb_options_set_create_if_missing(options, 1);
|
||||
leveldb_options_set_error_if_exists(options, 1);
|
||||
}
|
||||
|
||||
StartPhase("filter");
|
||||
for (run = 0; run < 2; run++) {
|
||||
// First run uses custom filter, second run uses bloom filter
|
||||
CheckNoError(err);
|
||||
leveldb_filterpolicy_t* policy;
|
||||
if (run == 0) {
|
||||
policy = leveldb_filterpolicy_create(
|
||||
NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName);
|
||||
} else {
|
||||
policy = leveldb_filterpolicy_create_bloom(10);
|
||||
}
|
||||
|
||||
// Create new database
|
||||
leveldb_close(db);
|
||||
leveldb_destroy_db(options, dbname, &err);
|
||||
leveldb_options_set_filter_policy(options, policy);
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_put(db, woptions, "foo", 3, "foovalue", 8, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_put(db, woptions, "bar", 3, "barvalue", 8, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||
|
||||
fake_filter_result = 1;
|
||||
CheckGet(db, roptions, "foo", "foovalue");
|
||||
CheckGet(db, roptions, "bar", "barvalue");
|
||||
if (phase == 0) {
|
||||
// Must not find value when custom filter returns false
|
||||
fake_filter_result = 0;
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
CheckGet(db, roptions, "bar", NULL);
|
||||
fake_filter_result = 1;
|
||||
|
||||
CheckGet(db, roptions, "foo", "foovalue");
|
||||
CheckGet(db, roptions, "bar", "barvalue");
|
||||
}
|
||||
leveldb_options_set_filter_policy(options, NULL);
|
||||
leveldb_filterpolicy_destroy(policy);
|
||||
}
|
||||
|
||||
StartPhase("cleanup");
|
||||
leveldb_close(db);
|
||||
leveldb_options_destroy(options);
|
||||
leveldb_readoptions_destroy(roptions);
|
||||
leveldb_writeoptions_destroy(woptions);
|
||||
leveldb_cache_destroy(cache);
|
||||
leveldb_comparator_destroy(cmp);
|
||||
leveldb_env_destroy(env);
|
||||
|
||||
fprintf(stderr, "PASS\n");
|
||||
return 0;
|
||||
}
|
||||
360
src/hyperleveldb/db/corruption_test.cc
Normal file
360
src/hyperleveldb/db/corruption_test.cc
Normal file
@@ -0,0 +1,360 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "../hyperleveldb/db.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include "../hyperleveldb/cache.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../hyperleveldb/table.h"
|
||||
#include "../hyperleveldb/write_batch.h"
|
||||
#include "db_impl.h"
|
||||
#include "filename.h"
|
||||
#include "log_format.h"
|
||||
#include "version_set.h"
|
||||
#include "../util/logging.h"
|
||||
#include "../util/testharness.h"
|
||||
#include "../util/testutil.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
static const int kValueSize = 1000;
|
||||
|
||||
class CorruptionTest {
|
||||
public:
|
||||
test::ErrorEnv env_;
|
||||
std::string dbname_;
|
||||
Cache* tiny_cache_;
|
||||
Options options_;
|
||||
DB* db_;
|
||||
|
||||
CorruptionTest() {
|
||||
tiny_cache_ = NewLRUCache(100);
|
||||
options_.env = &env_;
|
||||
dbname_ = test::TmpDir() + "/db_test";
|
||||
DestroyDB(dbname_, options_);
|
||||
|
||||
db_ = NULL;
|
||||
options_.create_if_missing = true;
|
||||
Reopen();
|
||||
options_.create_if_missing = false;
|
||||
}
|
||||
|
||||
~CorruptionTest() {
|
||||
delete db_;
|
||||
DestroyDB(dbname_, Options());
|
||||
delete tiny_cache_;
|
||||
}
|
||||
|
||||
Status TryReopen(Options* options = NULL) {
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
Options opt = (options ? *options : options_);
|
||||
opt.env = &env_;
|
||||
opt.block_cache = tiny_cache_;
|
||||
return DB::Open(opt, dbname_, &db_);
|
||||
}
|
||||
|
||||
void Reopen(Options* options = NULL) {
|
||||
ASSERT_OK(TryReopen(options));
|
||||
}
|
||||
|
||||
void RepairDB() {
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
ASSERT_OK(::leveldb::RepairDB(dbname_, options_));
|
||||
}
|
||||
|
||||
void Build(int n) {
|
||||
std::string key_space, value_space;
|
||||
WriteBatch batch;
|
||||
for (int i = 0; i < n; i++) {
|
||||
//if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n);
|
||||
Slice key = Key(i, &key_space);
|
||||
batch.Clear();
|
||||
batch.Put(key, Value(i, &value_space));
|
||||
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
||||
}
|
||||
}
|
||||
|
||||
void Check(int min_expected, int max_expected) {
|
||||
int next_expected = 0;
|
||||
int missed = 0;
|
||||
int bad_keys = 0;
|
||||
int bad_values = 0;
|
||||
int correct = 0;
|
||||
std::string value_space;
|
||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
uint64_t key;
|
||||
Slice in(iter->key());
|
||||
if (!ConsumeDecimalNumber(&in, &key) ||
|
||||
!in.empty() ||
|
||||
key < next_expected) {
|
||||
bad_keys++;
|
||||
continue;
|
||||
}
|
||||
missed += (key - next_expected);
|
||||
next_expected = key + 1;
|
||||
if (iter->value() != Value(key, &value_space)) {
|
||||
bad_values++;
|
||||
} else {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
delete iter;
|
||||
|
||||
fprintf(stderr,
|
||||
"expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n",
|
||||
min_expected, max_expected, correct, bad_keys, bad_values, missed);
|
||||
ASSERT_LE(min_expected, correct);
|
||||
ASSERT_GE(max_expected, correct);
|
||||
}
|
||||
|
||||
void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
|
||||
// Pick file to corrupt
|
||||
std::vector<std::string> filenames;
|
||||
ASSERT_OK(env_.GetChildren(dbname_, &filenames));
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
std::string fname;
|
||||
int picked_number = -1;
|
||||
for (int i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type) &&
|
||||
type == filetype &&
|
||||
int(number) > picked_number) { // Pick latest file
|
||||
fname = dbname_ + "/" + filenames[i];
|
||||
picked_number = number;
|
||||
}
|
||||
}
|
||||
ASSERT_TRUE(!fname.empty()) << filetype;
|
||||
|
||||
struct stat sbuf;
|
||||
if (stat(fname.c_str(), &sbuf) != 0) {
|
||||
const char* msg = strerror(errno);
|
||||
ASSERT_TRUE(false) << fname << ": " << msg;
|
||||
}
|
||||
|
||||
if (offset < 0) {
|
||||
// Relative to end of file; make it absolute
|
||||
if (-offset > sbuf.st_size) {
|
||||
offset = 0;
|
||||
} else {
|
||||
offset = sbuf.st_size + offset;
|
||||
}
|
||||
}
|
||||
if (offset > sbuf.st_size) {
|
||||
offset = sbuf.st_size;
|
||||
}
|
||||
if (offset + bytes_to_corrupt > sbuf.st_size) {
|
||||
bytes_to_corrupt = sbuf.st_size - offset;
|
||||
}
|
||||
|
||||
// Do it
|
||||
std::string contents;
|
||||
Status s = ReadFileToString(Env::Default(), fname, &contents);
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
for (int i = 0; i < bytes_to_corrupt; i++) {
|
||||
contents[i + offset] ^= 0x80;
|
||||
}
|
||||
s = WriteStringToFile(Env::Default(), contents, fname);
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
}
|
||||
|
||||
int Property(const std::string& name) {
|
||||
std::string property;
|
||||
int result;
|
||||
if (db_->GetProperty(name, &property) &&
|
||||
sscanf(property.c_str(), "%d", &result) == 1) {
|
||||
return result;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the ith key
|
||||
Slice Key(int i, std::string* storage) {
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "%016d", i);
|
||||
storage->assign(buf, strlen(buf));
|
||||
return Slice(*storage);
|
||||
}
|
||||
|
||||
// Return the value to associate with the specified key
|
||||
Slice Value(int k, std::string* storage) {
|
||||
Random r(k);
|
||||
return test::RandomString(&r, kValueSize, storage);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(CorruptionTest, Recovery) {
|
||||
Build(100);
|
||||
Check(100, 100);
|
||||
Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record
|
||||
Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block
|
||||
Reopen();
|
||||
|
||||
// The 64 records in the first two log blocks are completely lost.
|
||||
Check(36, 36);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, RecoverWriteError) {
|
||||
env_.writable_file_error_ = true;
|
||||
Status s = TryReopen();
|
||||
ASSERT_TRUE(!s.ok());
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, NewFileErrorDuringWrite) {
|
||||
// Do enough writing to force minor compaction
|
||||
env_.writable_file_error_ = true;
|
||||
const int num = 3 + (Options().write_buffer_size / kValueSize);
|
||||
std::string value_storage;
|
||||
Status s;
|
||||
for (int i = 0; s.ok() && i < num; i++) {
|
||||
WriteBatch batch;
|
||||
batch.Put("a", Value(100, &value_storage));
|
||||
s = db_->Write(WriteOptions(), &batch);
|
||||
}
|
||||
ASSERT_TRUE(!s.ok());
|
||||
ASSERT_GE(env_.num_writable_file_errors_, 1);
|
||||
env_.writable_file_error_ = false;
|
||||
Reopen();
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, TableFile) {
|
||||
Build(100);
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
dbi->TEST_CompactMemTable();
|
||||
dbi->TEST_CompactRange(0, NULL, NULL);
|
||||
dbi->TEST_CompactRange(1, NULL, NULL);
|
||||
|
||||
Corrupt(kTableFile, 100, 1);
|
||||
Check(99, 99);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, TableFileIndexData) {
|
||||
Build(10000); // Enough to build multiple Tables
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
dbi->TEST_CompactMemTable();
|
||||
|
||||
Corrupt(kTableFile, -2000, 500);
|
||||
Reopen();
|
||||
Check(5000, 9999);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, MissingDescriptor) {
|
||||
Build(1000);
|
||||
RepairDB();
|
||||
Reopen();
|
||||
Check(1000, 1000);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, SequenceNumberRecovery) {
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3"));
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4"));
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5"));
|
||||
RepairDB();
|
||||
Reopen();
|
||||
std::string v;
|
||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||
ASSERT_EQ("v5", v);
|
||||
// Write something. If sequence number was not recovered properly,
|
||||
// it will be hidden by an earlier write.
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6"));
|
||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||
ASSERT_EQ("v6", v);
|
||||
Reopen();
|
||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||
ASSERT_EQ("v6", v);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, CorruptedDescriptor) {
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
dbi->TEST_CompactMemTable();
|
||||
dbi->TEST_CompactRange(0, NULL, NULL);
|
||||
|
||||
Corrupt(kDescriptorFile, 0, 1000);
|
||||
Status s = TryReopen();
|
||||
ASSERT_TRUE(!s.ok());
|
||||
|
||||
RepairDB();
|
||||
Reopen();
|
||||
std::string v;
|
||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||
ASSERT_EQ("hello", v);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, CompactionInputError) {
|
||||
Build(10);
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
dbi->TEST_CompactMemTable();
|
||||
const int last = config::kMaxMemCompactLevel;
|
||||
ASSERT_EQ(1, Property("leveldb.num-files-at-level" + NumberToString(last)));
|
||||
|
||||
Corrupt(kTableFile, 100, 1);
|
||||
Check(9, 9);
|
||||
|
||||
// Force compactions by writing lots of values
|
||||
Build(10000);
|
||||
Check(10000, 10000);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, CompactionInputErrorParanoid) {
|
||||
Options options;
|
||||
options.paranoid_checks = true;
|
||||
options.write_buffer_size = 1048576;
|
||||
Reopen(&options);
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
|
||||
// Fill levels >= 1 so memtable compaction outputs to level 1
|
||||
for (int level = 1; level < config::kNumLevels; level++) {
|
||||
dbi->Put(WriteOptions(), "", "begin");
|
||||
dbi->Put(WriteOptions(), "~", "end");
|
||||
dbi->TEST_CompactMemTable();
|
||||
}
|
||||
|
||||
Build(10);
|
||||
dbi->TEST_CompactMemTable();
|
||||
env_.SleepForMicroseconds(1000000);
|
||||
ASSERT_EQ(1, Property("leveldb.num-files-at-level0"));
|
||||
|
||||
Corrupt(kTableFile, 100, 1);
|
||||
Check(9, 9);
|
||||
|
||||
// Write must eventually fail because of corrupted table
|
||||
Status s;
|
||||
std::string tmp1, tmp2;
|
||||
for (int i = 0; i < 1000000 && s.ok(); i++) {
|
||||
s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2));
|
||||
}
|
||||
ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db";
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, UnrelatedKeys) {
|
||||
Build(10);
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
||||
dbi->TEST_CompactMemTable();
|
||||
Corrupt(kTableFile, 100, 1);
|
||||
|
||||
std::string tmp1, tmp2;
|
||||
ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2)));
|
||||
std::string v;
|
||||
ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
|
||||
ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
|
||||
dbi->TEST_CompactMemTable();
|
||||
ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
|
||||
ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
979
src/hyperleveldb/db/db_bench.cc
Normal file
979
src/hyperleveldb/db/db_bench.cc
Normal file
@@ -0,0 +1,979 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "db_impl.h"
|
||||
#include "version_set.h"
|
||||
#include "../hyperleveldb/cache.h"
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../hyperleveldb/write_batch.h"
|
||||
#include "../port/port.h"
|
||||
#include "../util/crc32c.h"
|
||||
#include "../util/histogram.h"
|
||||
#include "../util/mutexlock.h"
|
||||
#include "../util/random.h"
|
||||
#include "../util/testutil.h"
|
||||
|
||||
// Comma-separated list of operations to run in the specified order
|
||||
// Actual benchmarks:
|
||||
// fillseq -- write N values in sequential key order in async mode
|
||||
// fillrandom -- write N values in random key order in async mode
|
||||
// overwrite -- overwrite N values in random key order in async mode
|
||||
// fillsync -- write N/100 values in random key order in sync mode
|
||||
// fill100K -- write N/1000 100K values in random order in async mode
|
||||
// deleteseq -- delete N keys in sequential order
|
||||
// deleterandom -- delete N keys in random order
|
||||
// readseq -- read N times sequentially
|
||||
// readreverse -- read N times in reverse order
|
||||
// readrandom -- read N times in random order
|
||||
// readmissing -- read N missing keys in random order
|
||||
// readhot -- read N times in random order from 1% section of DB
|
||||
// seekrandom -- N random seeks
|
||||
// crc32c -- repeated crc32c of 4K of data
|
||||
// acquireload -- load N*1000 times
|
||||
// Meta operations:
|
||||
// compact -- Compact the entire DB
|
||||
// stats -- Print DB stats
|
||||
// sstables -- Print sstable info
|
||||
// heapprofile -- Dump a heap profile (if supported by this port)
|
||||
static const char* FLAGS_benchmarks =
|
||||
"fillseq,"
|
||||
"fillsync,"
|
||||
"fillrandom,"
|
||||
"overwrite,"
|
||||
"readrandom,"
|
||||
"readrandom," // Extra run to allow previous compactions to quiesce
|
||||
"readseq,"
|
||||
"readreverse,"
|
||||
"compact,"
|
||||
"readrandom,"
|
||||
"readseq,"
|
||||
"readreverse,"
|
||||
"fill100K,"
|
||||
"crc32c,"
|
||||
"snappycomp,"
|
||||
"snappyuncomp,"
|
||||
"acquireload,"
|
||||
;
|
||||
|
||||
// Number of key/values to place in database
|
||||
static int FLAGS_num = 1000000;
|
||||
|
||||
// Number of read operations to do. If negative, do FLAGS_num reads.
|
||||
static int FLAGS_reads = -1;
|
||||
|
||||
// Number of concurrent threads to run.
|
||||
static int FLAGS_threads = 1;
|
||||
|
||||
// Size of each value
|
||||
static int FLAGS_value_size = 100;
|
||||
|
||||
// Arrange to generate values that shrink to this fraction of
|
||||
// their original size after compression
|
||||
static double FLAGS_compression_ratio = 0.5;
|
||||
|
||||
// Print histogram of operation timings
|
||||
static bool FLAGS_histogram = false;
|
||||
|
||||
// Number of bytes to buffer in memtable before compacting
|
||||
// (initialized to default value by "main")
|
||||
static int FLAGS_write_buffer_size = 0;
|
||||
|
||||
// Number of bytes to use as a cache of uncompressed data.
|
||||
// Negative means use default settings.
|
||||
static int FLAGS_cache_size = -1;
|
||||
|
||||
// Maximum number of files to keep open at the same time (use default if == 0)
|
||||
static int FLAGS_open_files = 0;
|
||||
|
||||
// Bloom filter bits per key.
|
||||
// Negative means use default settings.
|
||||
static int FLAGS_bloom_bits = -1;
|
||||
|
||||
// If true, do not destroy the existing database. If you set this
|
||||
// flag and also specify a benchmark that wants a fresh database, that
|
||||
// benchmark will fail.
|
||||
static bool FLAGS_use_existing_db = false;
|
||||
|
||||
// Use the db with the following name.
|
||||
static const char* FLAGS_db = NULL;
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
namespace {
|
||||
|
||||
// Helper for quickly generating random data.
|
||||
class RandomGenerator {
|
||||
private:
|
||||
std::string data_;
|
||||
int pos_;
|
||||
|
||||
public:
|
||||
RandomGenerator() {
|
||||
// We use a limited amount of data over and over again and ensure
|
||||
// that it is larger than the compression window (32KB), and also
|
||||
// large enough to serve all typical value sizes we want to write.
|
||||
Random rnd(301);
|
||||
std::string piece;
|
||||
while (data_.size() < 1048576) {
|
||||
// Add a short fragment that is as compressible as specified
|
||||
// by FLAGS_compression_ratio.
|
||||
test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
|
||||
data_.append(piece);
|
||||
}
|
||||
pos_ = 0;
|
||||
}
|
||||
|
||||
Slice Generate(int len) {
|
||||
if (pos_ + len > data_.size()) {
|
||||
pos_ = 0;
|
||||
assert(len < data_.size());
|
||||
}
|
||||
pos_ += len;
|
||||
return Slice(data_.data() + pos_ - len, len);
|
||||
}
|
||||
};
|
||||
|
||||
static Slice TrimSpace(Slice s) {
|
||||
int start = 0;
|
||||
while (start < s.size() && isspace(s[start])) {
|
||||
start++;
|
||||
}
|
||||
int limit = s.size();
|
||||
while (limit > start && isspace(s[limit-1])) {
|
||||
limit--;
|
||||
}
|
||||
return Slice(s.data() + start, limit - start);
|
||||
}
|
||||
|
||||
static void AppendWithSpace(std::string* str, Slice msg) {
|
||||
if (msg.empty()) return;
|
||||
if (!str->empty()) {
|
||||
str->push_back(' ');
|
||||
}
|
||||
str->append(msg.data(), msg.size());
|
||||
}
|
||||
|
||||
class Stats {
|
||||
private:
|
||||
double start_;
|
||||
double finish_;
|
||||
double seconds_;
|
||||
int done_;
|
||||
int next_report_;
|
||||
int64_t bytes_;
|
||||
double last_op_finish_;
|
||||
Histogram hist_;
|
||||
std::string message_;
|
||||
|
||||
public:
|
||||
Stats() { Start(); }
|
||||
|
||||
void Start() {
|
||||
next_report_ = 100;
|
||||
last_op_finish_ = start_;
|
||||
hist_.Clear();
|
||||
done_ = 0;
|
||||
bytes_ = 0;
|
||||
seconds_ = 0;
|
||||
start_ = Env::Default()->NowMicros();
|
||||
finish_ = start_;
|
||||
message_.clear();
|
||||
}
|
||||
|
||||
void Merge(const Stats& other) {
|
||||
hist_.Merge(other.hist_);
|
||||
done_ += other.done_;
|
||||
bytes_ += other.bytes_;
|
||||
seconds_ += other.seconds_;
|
||||
if (other.start_ < start_) start_ = other.start_;
|
||||
if (other.finish_ > finish_) finish_ = other.finish_;
|
||||
|
||||
// Just keep the messages from one thread
|
||||
if (message_.empty()) message_ = other.message_;
|
||||
}
|
||||
|
||||
void Stop() {
|
||||
finish_ = Env::Default()->NowMicros();
|
||||
seconds_ = (finish_ - start_) * 1e-6;
|
||||
}
|
||||
|
||||
void AddMessage(Slice msg) {
|
||||
AppendWithSpace(&message_, msg);
|
||||
}
|
||||
|
||||
void FinishedSingleOp() {
|
||||
if (FLAGS_histogram) {
|
||||
double now = Env::Default()->NowMicros();
|
||||
double micros = now - last_op_finish_;
|
||||
hist_.Add(micros);
|
||||
if (micros > 20000) {
|
||||
fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
|
||||
fflush(stderr);
|
||||
}
|
||||
last_op_finish_ = now;
|
||||
}
|
||||
|
||||
done_++;
|
||||
if (done_ >= next_report_) {
|
||||
if (next_report_ < 1000) next_report_ += 100;
|
||||
else if (next_report_ < 5000) next_report_ += 500;
|
||||
else if (next_report_ < 10000) next_report_ += 1000;
|
||||
else if (next_report_ < 50000) next_report_ += 5000;
|
||||
else if (next_report_ < 100000) next_report_ += 10000;
|
||||
else if (next_report_ < 500000) next_report_ += 50000;
|
||||
else next_report_ += 100000;
|
||||
fprintf(stderr, "... finished %d ops%30s\r", done_, "");
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
void AddBytes(int64_t n) {
|
||||
bytes_ += n;
|
||||
}
|
||||
|
||||
void Report(const Slice& name) {
|
||||
// Pretend at least one op was done in case we are running a benchmark
|
||||
// that does not call FinishedSingleOp().
|
||||
if (done_ < 1) done_ = 1;
|
||||
|
||||
std::string extra;
|
||||
if (bytes_ > 0) {
|
||||
// Rate is computed on actual elapsed time, not the sum of per-thread
|
||||
// elapsed times.
|
||||
double elapsed = (finish_ - start_) * 1e-6;
|
||||
char rate[100];
|
||||
snprintf(rate, sizeof(rate), "%6.1f MB/s",
|
||||
(bytes_ / 1048576.0) / elapsed);
|
||||
extra = rate;
|
||||
}
|
||||
AppendWithSpace(&extra, message_);
|
||||
|
||||
fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
|
||||
name.ToString().c_str(),
|
||||
seconds_ * 1e6 / done_,
|
||||
(extra.empty() ? "" : " "),
|
||||
extra.c_str());
|
||||
if (FLAGS_histogram) {
|
||||
fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
};
|
||||
|
||||
// State shared by all concurrent executions of the same benchmark.
|
||||
struct SharedState {
|
||||
port::Mutex mu;
|
||||
port::CondVar cv;
|
||||
int total;
|
||||
|
||||
// Each thread goes through the following states:
|
||||
// (1) initializing
|
||||
// (2) waiting for others to be initialized
|
||||
// (3) running
|
||||
// (4) done
|
||||
|
||||
int num_initialized;
|
||||
int num_done;
|
||||
bool start;
|
||||
|
||||
SharedState() : cv(&mu) { }
|
||||
};
|
||||
|
||||
// Per-thread state for concurrent executions of the same benchmark.
|
||||
struct ThreadState {
|
||||
int tid; // 0..n-1 when running in n threads
|
||||
Random rand; // Has different seeds for different threads
|
||||
Stats stats;
|
||||
SharedState* shared;
|
||||
|
||||
ThreadState(int index)
|
||||
: tid(index),
|
||||
rand(1000 + index) {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
class Benchmark {
|
||||
private:
|
||||
Cache* cache_;
|
||||
const FilterPolicy* filter_policy_;
|
||||
DB* db_;
|
||||
int num_;
|
||||
int value_size_;
|
||||
int entries_per_batch_;
|
||||
WriteOptions write_options_;
|
||||
int reads_;
|
||||
int heap_counter_;
|
||||
|
||||
void PrintHeader() {
|
||||
const int kKeySize = 16;
|
||||
PrintEnvironment();
|
||||
fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
|
||||
fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n",
|
||||
FLAGS_value_size,
|
||||
static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
|
||||
fprintf(stdout, "Entries: %d\n", num_);
|
||||
fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
|
||||
((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
|
||||
/ 1048576.0));
|
||||
fprintf(stdout, "FileSize: %.1f MB (estimated)\n",
|
||||
(((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
|
||||
/ 1048576.0));
|
||||
PrintWarnings();
|
||||
fprintf(stdout, "------------------------------------------------\n");
|
||||
}
|
||||
|
||||
void PrintWarnings() {
|
||||
#if defined(__GNUC__) && !defined(__OPTIMIZE__)
|
||||
fprintf(stdout,
|
||||
"WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
|
||||
);
|
||||
#endif
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout,
|
||||
"WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
|
||||
#endif
|
||||
|
||||
// See if snappy is working by attempting to compress a compressible string
|
||||
const char text[] = "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy";
|
||||
std::string compressed;
|
||||
if (!port::Snappy_Compress(text, sizeof(text), &compressed)) {
|
||||
fprintf(stdout, "WARNING: Snappy compression is not enabled\n");
|
||||
} else if (compressed.size() >= sizeof(text)) {
|
||||
fprintf(stdout, "WARNING: Snappy compression is not effective\n");
|
||||
}
|
||||
}
|
||||
|
||||
void PrintEnvironment() {
|
||||
fprintf(stderr, "LevelDB: version %d.%d\n",
|
||||
kMajorVersion, kMinorVersion);
|
||||
|
||||
#if defined(__linux)
|
||||
time_t now = time(NULL);
|
||||
fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline
|
||||
|
||||
FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
|
||||
if (cpuinfo != NULL) {
|
||||
char line[1000];
|
||||
int num_cpus = 0;
|
||||
std::string cpu_type;
|
||||
std::string cache_size;
|
||||
while (fgets(line, sizeof(line), cpuinfo) != NULL) {
|
||||
const char* sep = strchr(line, ':');
|
||||
if (sep == NULL) {
|
||||
continue;
|
||||
}
|
||||
Slice key = TrimSpace(Slice(line, sep - 1 - line));
|
||||
Slice val = TrimSpace(Slice(sep + 1));
|
||||
if (key == "model name") {
|
||||
++num_cpus;
|
||||
cpu_type = val.ToString();
|
||||
} else if (key == "cache size") {
|
||||
cache_size = val.ToString();
|
||||
}
|
||||
}
|
||||
fclose(cpuinfo);
|
||||
fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str());
|
||||
fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
Benchmark()
|
||||
: cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
|
||||
filter_policy_(FLAGS_bloom_bits >= 0
|
||||
? NewBloomFilterPolicy(FLAGS_bloom_bits)
|
||||
: NULL),
|
||||
db_(NULL),
|
||||
num_(FLAGS_num),
|
||||
value_size_(FLAGS_value_size),
|
||||
entries_per_batch_(1),
|
||||
reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
|
||||
heap_counter_(0) {
|
||||
std::vector<std::string> files;
|
||||
Env::Default()->GetChildren(FLAGS_db, &files);
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
if (Slice(files[i]).starts_with("heap-")) {
|
||||
Env::Default()->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
|
||||
}
|
||||
}
|
||||
if (!FLAGS_use_existing_db) {
|
||||
DestroyDB(FLAGS_db, Options());
|
||||
}
|
||||
}
|
||||
|
||||
~Benchmark() {
|
||||
delete db_;
|
||||
delete cache_;
|
||||
delete filter_policy_;
|
||||
}
|
||||
|
||||
void Run() {
|
||||
PrintHeader();
|
||||
Open();
|
||||
|
||||
const char* benchmarks = FLAGS_benchmarks;
|
||||
while (benchmarks != NULL) {
|
||||
const char* sep = strchr(benchmarks, ',');
|
||||
Slice name;
|
||||
if (sep == NULL) {
|
||||
name = benchmarks;
|
||||
benchmarks = NULL;
|
||||
} else {
|
||||
name = Slice(benchmarks, sep - benchmarks);
|
||||
benchmarks = sep + 1;
|
||||
}
|
||||
|
||||
// Reset parameters that may be overriddden bwlow
|
||||
num_ = FLAGS_num;
|
||||
reads_ = (FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads);
|
||||
value_size_ = FLAGS_value_size;
|
||||
entries_per_batch_ = 1;
|
||||
write_options_ = WriteOptions();
|
||||
|
||||
void (Benchmark::*method)(ThreadState*) = NULL;
|
||||
bool fresh_db = false;
|
||||
int num_threads = FLAGS_threads;
|
||||
|
||||
if (name == Slice("fillseq")) {
|
||||
fresh_db = true;
|
||||
method = &Benchmark::WriteSeq;
|
||||
} else if (name == Slice("fillbatch")) {
|
||||
fresh_db = true;
|
||||
entries_per_batch_ = 1000;
|
||||
method = &Benchmark::WriteSeq;
|
||||
} else if (name == Slice("fillrandom")) {
|
||||
fresh_db = true;
|
||||
method = &Benchmark::WriteRandom;
|
||||
} else if (name == Slice("overwrite")) {
|
||||
fresh_db = false;
|
||||
method = &Benchmark::WriteRandom;
|
||||
} else if (name == Slice("fillsync")) {
|
||||
fresh_db = true;
|
||||
num_ /= 1000;
|
||||
write_options_.sync = true;
|
||||
method = &Benchmark::WriteRandom;
|
||||
} else if (name == Slice("fill100K")) {
|
||||
fresh_db = true;
|
||||
num_ /= 1000;
|
||||
value_size_ = 100 * 1000;
|
||||
method = &Benchmark::WriteRandom;
|
||||
} else if (name == Slice("readseq")) {
|
||||
method = &Benchmark::ReadSequential;
|
||||
} else if (name == Slice("readreverse")) {
|
||||
method = &Benchmark::ReadReverse;
|
||||
} else if (name == Slice("readrandom")) {
|
||||
method = &Benchmark::ReadRandom;
|
||||
} else if (name == Slice("readmissing")) {
|
||||
method = &Benchmark::ReadMissing;
|
||||
} else if (name == Slice("seekrandom")) {
|
||||
method = &Benchmark::SeekRandom;
|
||||
} else if (name == Slice("readhot")) {
|
||||
method = &Benchmark::ReadHot;
|
||||
} else if (name == Slice("readrandomsmall")) {
|
||||
reads_ /= 1000;
|
||||
method = &Benchmark::ReadRandom;
|
||||
} else if (name == Slice("deleteseq")) {
|
||||
method = &Benchmark::DeleteSeq;
|
||||
} else if (name == Slice("deleterandom")) {
|
||||
method = &Benchmark::DeleteRandom;
|
||||
} else if (name == Slice("readwhilewriting")) {
|
||||
num_threads++; // Add extra thread for writing
|
||||
method = &Benchmark::ReadWhileWriting;
|
||||
} else if (name == Slice("compact")) {
|
||||
method = &Benchmark::Compact;
|
||||
} else if (name == Slice("crc32c")) {
|
||||
method = &Benchmark::Crc32c;
|
||||
} else if (name == Slice("acquireload")) {
|
||||
method = &Benchmark::AcquireLoad;
|
||||
} else if (name == Slice("snappycomp")) {
|
||||
method = &Benchmark::SnappyCompress;
|
||||
} else if (name == Slice("snappyuncomp")) {
|
||||
method = &Benchmark::SnappyUncompress;
|
||||
} else if (name == Slice("heapprofile")) {
|
||||
HeapProfile();
|
||||
} else if (name == Slice("stats")) {
|
||||
PrintStats("leveldb.stats");
|
||||
} else if (name == Slice("sstables")) {
|
||||
PrintStats("leveldb.sstables");
|
||||
} else {
|
||||
if (name != Slice()) { // No error message for empty name
|
||||
fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (fresh_db) {
|
||||
if (FLAGS_use_existing_db) {
|
||||
fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n",
|
||||
name.ToString().c_str());
|
||||
method = NULL;
|
||||
} else {
|
||||
delete db_;
|
||||
db_ = NULL;
|
||||
DestroyDB(FLAGS_db, Options());
|
||||
Open();
|
||||
}
|
||||
}
|
||||
|
||||
if (method != NULL) {
|
||||
RunBenchmark(num_threads, name, method);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
struct ThreadArg {
|
||||
Benchmark* bm;
|
||||
SharedState* shared;
|
||||
ThreadState* thread;
|
||||
void (Benchmark::*method)(ThreadState*);
|
||||
};
|
||||
|
||||
static void ThreadBody(void* v) {
|
||||
ThreadArg* arg = reinterpret_cast<ThreadArg*>(v);
|
||||
SharedState* shared = arg->shared;
|
||||
ThreadState* thread = arg->thread;
|
||||
{
|
||||
MutexLock l(&shared->mu);
|
||||
shared->num_initialized++;
|
||||
if (shared->num_initialized >= shared->total) {
|
||||
shared->cv.SignalAll();
|
||||
}
|
||||
while (!shared->start) {
|
||||
shared->cv.Wait();
|
||||
}
|
||||
}
|
||||
|
||||
thread->stats.Start();
|
||||
(arg->bm->*(arg->method))(thread);
|
||||
thread->stats.Stop();
|
||||
|
||||
{
|
||||
MutexLock l(&shared->mu);
|
||||
shared->num_done++;
|
||||
if (shared->num_done >= shared->total) {
|
||||
shared->cv.SignalAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RunBenchmark(int n, Slice name,
|
||||
void (Benchmark::*method)(ThreadState*)) {
|
||||
SharedState shared;
|
||||
shared.total = n;
|
||||
shared.num_initialized = 0;
|
||||
shared.num_done = 0;
|
||||
shared.start = false;
|
||||
|
||||
ThreadArg* arg = new ThreadArg[n];
|
||||
for (int i = 0; i < n; i++) {
|
||||
arg[i].bm = this;
|
||||
arg[i].method = method;
|
||||
arg[i].shared = &shared;
|
||||
arg[i].thread = new ThreadState(i);
|
||||
arg[i].thread->shared = &shared;
|
||||
Env::Default()->StartThread(ThreadBody, &arg[i]);
|
||||
}
|
||||
|
||||
shared.mu.Lock();
|
||||
while (shared.num_initialized < n) {
|
||||
shared.cv.Wait();
|
||||
}
|
||||
|
||||
shared.start = true;
|
||||
shared.cv.SignalAll();
|
||||
while (shared.num_done < n) {
|
||||
shared.cv.Wait();
|
||||
}
|
||||
shared.mu.Unlock();
|
||||
|
||||
for (int i = 1; i < n; i++) {
|
||||
arg[0].thread->stats.Merge(arg[i].thread->stats);
|
||||
}
|
||||
arg[0].thread->stats.Report(name);
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
delete arg[i].thread;
|
||||
}
|
||||
delete[] arg;
|
||||
}
|
||||
|
||||
void Crc32c(ThreadState* thread) {
|
||||
// Checksum about 500MB of data total
|
||||
const int size = 4096;
|
||||
const char* label = "(4K per op)";
|
||||
std::string data(size, 'x');
|
||||
int64_t bytes = 0;
|
||||
uint32_t crc = 0;
|
||||
while (bytes < 500 * 1048576) {
|
||||
crc = crc32c::Value(data.data(), size);
|
||||
thread->stats.FinishedSingleOp();
|
||||
bytes += size;
|
||||
}
|
||||
// Print so result is not dead
|
||||
fprintf(stderr, "... crc=0x%x\r", static_cast<unsigned int>(crc));
|
||||
|
||||
thread->stats.AddBytes(bytes);
|
||||
thread->stats.AddMessage(label);
|
||||
}
|
||||
|
||||
void AcquireLoad(ThreadState* thread) {
|
||||
int dummy;
|
||||
port::AtomicPointer ap(&dummy);
|
||||
int count = 0;
|
||||
void *ptr = NULL;
|
||||
thread->stats.AddMessage("(each op is 1000 loads)");
|
||||
while (count < 100000) {
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
ptr = ap.Acquire_Load();
|
||||
}
|
||||
count++;
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
if (ptr == NULL) exit(1); // Disable unused variable warning.
|
||||
}
|
||||
|
||||
void SnappyCompress(ThreadState* thread) {
|
||||
RandomGenerator gen;
|
||||
Slice input = gen.Generate(Options().block_size);
|
||||
int64_t bytes = 0;
|
||||
int64_t produced = 0;
|
||||
bool ok = true;
|
||||
std::string compressed;
|
||||
while (ok && bytes < 1024 * 1048576) { // Compress 1G
|
||||
ok = port::Snappy_Compress(input.data(), input.size(), &compressed);
|
||||
produced += compressed.size();
|
||||
bytes += input.size();
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
thread->stats.AddMessage("(snappy failure)");
|
||||
} else {
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "(output: %.1f%%)",
|
||||
(produced * 100.0) / bytes);
|
||||
thread->stats.AddMessage(buf);
|
||||
thread->stats.AddBytes(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
void SnappyUncompress(ThreadState* thread) {
|
||||
RandomGenerator gen;
|
||||
Slice input = gen.Generate(Options().block_size);
|
||||
std::string compressed;
|
||||
bool ok = port::Snappy_Compress(input.data(), input.size(), &compressed);
|
||||
int64_t bytes = 0;
|
||||
char* uncompressed = new char[input.size()];
|
||||
while (ok && bytes < 1024 * 1048576) { // Compress 1G
|
||||
ok = port::Snappy_Uncompress(compressed.data(), compressed.size(),
|
||||
uncompressed);
|
||||
bytes += input.size();
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
delete[] uncompressed;
|
||||
|
||||
if (!ok) {
|
||||
thread->stats.AddMessage("(snappy failure)");
|
||||
} else {
|
||||
thread->stats.AddBytes(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
void Open() {
|
||||
assert(db_ == NULL);
|
||||
Options options;
|
||||
options.create_if_missing = !FLAGS_use_existing_db;
|
||||
options.block_cache = cache_;
|
||||
options.write_buffer_size = FLAGS_write_buffer_size;
|
||||
options.max_open_files = FLAGS_open_files;
|
||||
options.filter_policy = filter_policy_;
|
||||
Status s = DB::Open(options, FLAGS_db, &db_);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteSeq(ThreadState* thread) {
|
||||
DoWrite(thread, true);
|
||||
}
|
||||
|
||||
void WriteRandom(ThreadState* thread) {
|
||||
DoWrite(thread, false);
|
||||
}
|
||||
|
||||
void DoWrite(ThreadState* thread, bool seq) {
|
||||
if (num_ != FLAGS_num) {
|
||||
char msg[100];
|
||||
snprintf(msg, sizeof(msg), "(%d ops)", num_);
|
||||
thread->stats.AddMessage(msg);
|
||||
}
|
||||
|
||||
RandomGenerator gen;
|
||||
WriteBatch batch;
|
||||
Status s;
|
||||
int64_t bytes = 0;
|
||||
for (int i = 0; i < num_; i += entries_per_batch_) {
|
||||
batch.Clear();
|
||||
for (int j = 0; j < entries_per_batch_; j++) {
|
||||
const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
|
||||
char key[100];
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
batch.Put(key, gen.Generate(value_size_));
|
||||
bytes += value_size_ + strlen(key);
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
s = db_->Write(write_options_, &batch);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
thread->stats.AddBytes(bytes);
|
||||
}
|
||||
|
||||
void ReadSequential(ThreadState* thread) {
|
||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||
int i = 0;
|
||||
int64_t bytes = 0;
|
||||
for (iter->SeekToFirst(); i < reads_ && iter->Valid(); iter->Next()) {
|
||||
bytes += iter->key().size() + iter->value().size();
|
||||
thread->stats.FinishedSingleOp();
|
||||
++i;
|
||||
}
|
||||
delete iter;
|
||||
thread->stats.AddBytes(bytes);
|
||||
}
|
||||
|
||||
void ReadReverse(ThreadState* thread) {
|
||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||
int i = 0;
|
||||
int64_t bytes = 0;
|
||||
for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) {
|
||||
bytes += iter->key().size() + iter->value().size();
|
||||
thread->stats.FinishedSingleOp();
|
||||
++i;
|
||||
}
|
||||
delete iter;
|
||||
thread->stats.AddBytes(bytes);
|
||||
}
|
||||
|
||||
void ReadRandom(ThreadState* thread) {
|
||||
ReadOptions options;
|
||||
std::string value;
|
||||
int found = 0;
|
||||
for (int i = 0; i < reads_; i++) {
|
||||
char key[100];
|
||||
const int k = thread->rand.Next() % FLAGS_num;
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
if (db_->Get(options, key, &value).ok()) {
|
||||
found++;
|
||||
}
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
char msg[100];
|
||||
snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
|
||||
thread->stats.AddMessage(msg);
|
||||
}
|
||||
|
||||
void ReadMissing(ThreadState* thread) {
|
||||
ReadOptions options;
|
||||
std::string value;
|
||||
for (int i = 0; i < reads_; i++) {
|
||||
char key[100];
|
||||
const int k = thread->rand.Next() % FLAGS_num;
|
||||
snprintf(key, sizeof(key), "%016d.", k);
|
||||
db_->Get(options, key, &value);
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
}
|
||||
|
||||
void ReadHot(ThreadState* thread) {
|
||||
ReadOptions options;
|
||||
std::string value;
|
||||
const int range = (FLAGS_num + 99) / 100;
|
||||
for (int i = 0; i < reads_; i++) {
|
||||
char key[100];
|
||||
const int k = thread->rand.Next() % range;
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
db_->Get(options, key, &value);
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
}
|
||||
|
||||
void SeekRandom(ThreadState* thread) {
|
||||
ReadOptions options;
|
||||
std::string value;
|
||||
int found = 0;
|
||||
for (int i = 0; i < reads_; i++) {
|
||||
Iterator* iter = db_->NewIterator(options);
|
||||
char key[100];
|
||||
const int k = thread->rand.Next() % FLAGS_num;
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
iter->Seek(key);
|
||||
if (iter->Valid() && iter->key() == key) found++;
|
||||
delete iter;
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
char msg[100];
|
||||
snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
|
||||
thread->stats.AddMessage(msg);
|
||||
}
|
||||
|
||||
void DoDelete(ThreadState* thread, bool seq) {
|
||||
RandomGenerator gen;
|
||||
WriteBatch batch;
|
||||
Status s;
|
||||
for (int i = 0; i < num_; i += entries_per_batch_) {
|
||||
batch.Clear();
|
||||
for (int j = 0; j < entries_per_batch_; j++) {
|
||||
const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
|
||||
char key[100];
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
batch.Delete(key);
|
||||
thread->stats.FinishedSingleOp();
|
||||
}
|
||||
s = db_->Write(write_options_, &batch);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "del error: %s\n", s.ToString().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DeleteSeq(ThreadState* thread) {
|
||||
DoDelete(thread, true);
|
||||
}
|
||||
|
||||
void DeleteRandom(ThreadState* thread) {
|
||||
DoDelete(thread, false);
|
||||
}
|
||||
|
||||
void ReadWhileWriting(ThreadState* thread) {
|
||||
if (thread->tid > 0) {
|
||||
ReadRandom(thread);
|
||||
} else {
|
||||
// Special thread that keeps writing until other threads are done.
|
||||
RandomGenerator gen;
|
||||
while (true) {
|
||||
{
|
||||
MutexLock l(&thread->shared->mu);
|
||||
if (thread->shared->num_done + 1 >= thread->shared->num_initialized) {
|
||||
// Other threads have finished
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int k = thread->rand.Next() % FLAGS_num;
|
||||
char key[100];
|
||||
snprintf(key, sizeof(key), "%016d", k);
|
||||
Status s = db_->Put(write_options_, key, gen.Generate(value_size_));
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "put error: %s\n", s.ToString().c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Do not count any of the preceding work/delay in stats.
|
||||
thread->stats.Start();
|
||||
}
|
||||
}
|
||||
|
||||
void Compact(ThreadState* thread) {
|
||||
db_->CompactRange(NULL, NULL);
|
||||
}
|
||||
|
||||
void PrintStats(const char* key) {
|
||||
std::string stats;
|
||||
if (!db_->GetProperty(key, &stats)) {
|
||||
stats = "(failed)";
|
||||
}
|
||||
fprintf(stdout, "\n%s\n", stats.c_str());
|
||||
}
|
||||
|
||||
static void WriteToFile(void* arg, const char* buf, int n) {
|
||||
reinterpret_cast<WritableFile*>(arg)->Append(Slice(buf, n));
|
||||
}
|
||||
|
||||
void HeapProfile() {
|
||||
char fname[100];
|
||||
snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_);
|
||||
WritableFile* file;
|
||||
Status s = Env::Default()->NewWritableFile(fname, &file);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
return;
|
||||
}
|
||||
bool ok = port::GetHeapProfile(WriteToFile, file);
|
||||
delete file;
|
||||
if (!ok) {
|
||||
fprintf(stderr, "heap profiling not supported\n");
|
||||
Env::Default()->DeleteFile(fname);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
|
||||
FLAGS_open_files = leveldb::Options().max_open_files;
|
||||
std::string default_db_path;
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
double d;
|
||||
int n;
|
||||
char junk;
|
||||
if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
|
||||
FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
|
||||
} else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
|
||||
FLAGS_compression_ratio = d;
|
||||
} else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
|
||||
(n == 0 || n == 1)) {
|
||||
FLAGS_histogram = n;
|
||||
} else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
|
||||
(n == 0 || n == 1)) {
|
||||
FLAGS_use_existing_db = n;
|
||||
} else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_num = n;
|
||||
} else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_reads = n;
|
||||
} else if (sscanf(argv[i], "--threads=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_threads = n;
|
||||
} else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_value_size = n;
|
||||
} else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_write_buffer_size = n;
|
||||
} else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_cache_size = n;
|
||||
} else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_bloom_bits = n;
|
||||
} else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) {
|
||||
FLAGS_open_files = n;
|
||||
} else if (strncmp(argv[i], "--db=", 5) == 0) {
|
||||
FLAGS_db = argv[i] + 5;
|
||||
} else {
|
||||
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Choose a location for the test database if none given with --db=<path>
|
||||
if (FLAGS_db == NULL) {
|
||||
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
|
||||
default_db_path += "/dbbench";
|
||||
FLAGS_db = default_db_path.c_str();
|
||||
}
|
||||
|
||||
leveldb::Benchmark benchmark;
|
||||
benchmark.Run();
|
||||
return 0;
|
||||
}
|
||||
1616
src/hyperleveldb/db/db_impl.cc
Normal file
1616
src/hyperleveldb/db/db_impl.cc
Normal file
File diff suppressed because it is too large
Load Diff
223
src/hyperleveldb/db/db_impl.h
Normal file
223
src/hyperleveldb/db/db_impl.h
Normal file
@@ -0,0 +1,223 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_DB_IMPL_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_DB_IMPL_H_
|
||||
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include "dbformat.h"
|
||||
#include "log_writer.h"
|
||||
#include "snapshot.h"
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../port/port.h"
|
||||
#include "../port/thread_annotations.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class MemTable;
|
||||
class TableCache;
|
||||
class Version;
|
||||
class VersionEdit;
|
||||
class VersionSet;
|
||||
|
||||
class DBImpl : public DB {
|
||||
public:
|
||||
DBImpl(const Options& options, const std::string& dbname);
|
||||
virtual ~DBImpl();
|
||||
|
||||
// Implementations of the DB interface
|
||||
virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value);
|
||||
virtual Status Delete(const WriteOptions&, const Slice& key);
|
||||
virtual Status Write(const WriteOptions& options, WriteBatch* updates);
|
||||
virtual Status Get(const ReadOptions& options,
|
||||
const Slice& key,
|
||||
std::string* value);
|
||||
virtual Iterator* NewIterator(const ReadOptions&);
|
||||
virtual const Snapshot* GetSnapshot();
|
||||
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
||||
virtual bool GetProperty(const Slice& property, std::string* value);
|
||||
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
|
||||
virtual void CompactRange(const Slice* begin, const Slice* end);
|
||||
|
||||
// Extra methods (for testing) that are not in the public DB interface
|
||||
|
||||
// Compact any files in the named level that overlap [*begin,*end]
|
||||
void TEST_CompactRange(int level, const Slice* begin, const Slice* end);
|
||||
|
||||
// Force current memtable contents to be compacted.
|
||||
Status TEST_CompactMemTable();
|
||||
|
||||
// Return an internal iterator over the current state of the database.
|
||||
// The keys of this iterator are internal keys (see format.h).
|
||||
// The returned iterator should be deleted when no longer needed.
|
||||
Iterator* TEST_NewInternalIterator();
|
||||
|
||||
// Return the maximum overlapping data (in bytes) at next level for any
|
||||
// file at a level >= 1.
|
||||
int64_t TEST_MaxNextLevelOverlappingBytes();
|
||||
|
||||
private:
|
||||
friend class DB;
|
||||
struct CompactionState;
|
||||
struct Writer;
|
||||
|
||||
Iterator* NewInternalIterator(const ReadOptions&,
|
||||
SequenceNumber* latest_snapshot);
|
||||
|
||||
Status NewDB();
|
||||
|
||||
// Recover the descriptor from persistent storage. May do a significant
|
||||
// amount of work to recover recently logged updates. Any changes to
|
||||
// be made to the descriptor are added to *edit.
|
||||
Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
void MaybeIgnoreError(Status* s) const;
|
||||
|
||||
// Delete any unneeded files and stale in-memory entries.
|
||||
void DeleteObsoleteFiles();
|
||||
|
||||
// A background thread to compact the in-memory write buffer to disk.
|
||||
// Switches to a new log-file/memtable and writes a new descriptor iff
|
||||
// successful.
|
||||
static void CompactMemTableWrapper(void* db)
|
||||
{ reinterpret_cast<DBImpl*>(db)->CompactMemTableThread(); }
|
||||
void CompactMemTableThread();
|
||||
|
||||
Status RecoverLogFile(uint64_t log_number,
|
||||
VersionEdit* edit,
|
||||
SequenceNumber* max_sequence)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base, uint64_t* number)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
Status SequenceWriteBegin(Writer* w, WriteBatch* updates)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
void SequenceWriteEnd(Writer* w)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
static void CompactLevelWrapper(void* db)
|
||||
{ reinterpret_cast<DBImpl*>(db)->CompactLevelThread(); }
|
||||
void CompactLevelThread();
|
||||
Status BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
static void CompactOptimisticWrapper(void* db)
|
||||
{ reinterpret_cast<DBImpl*>(db)->CompactOptimisticThread(); }
|
||||
void CompactOptimisticThread();
|
||||
Status OptimisticCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
void CleanupCompaction(CompactionState* compact)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
Status DoCompactionWork(CompactionState* compact)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
Status OpenCompactionOutputFile(CompactionState* compact);
|
||||
Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);
|
||||
Status InstallCompactionResults(CompactionState* compact)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Constant after construction
|
||||
Env* const env_;
|
||||
const InternalKeyComparator internal_comparator_;
|
||||
const InternalFilterPolicy internal_filter_policy_;
|
||||
const Options options_; // options_.comparator == &internal_comparator_
|
||||
bool owns_info_log_;
|
||||
bool owns_cache_;
|
||||
const std::string dbname_;
|
||||
|
||||
// table_cache_ provides its own synchronization
|
||||
TableCache* table_cache_;
|
||||
|
||||
// Lock over the persistent DB state. Non-NULL iff successfully acquired.
|
||||
FileLock* db_lock_;
|
||||
|
||||
// State below is protected by mutex_
|
||||
port::Mutex mutex_;
|
||||
port::AtomicPointer shutting_down_;
|
||||
MemTable* mem_;
|
||||
MemTable* imm_; // Memtable being compacted
|
||||
port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_
|
||||
WritableFile* logfile_;
|
||||
uint64_t logfile_number_;
|
||||
log::Writer* log_;
|
||||
|
||||
// Synchronize writers
|
||||
uint64_t __attribute__ ((aligned (8))) writers_lower_;
|
||||
uint64_t __attribute__ ((aligned (8))) writers_upper_;
|
||||
|
||||
SnapshotList snapshots_;
|
||||
|
||||
// Set of table files to protect from deletion because they are
|
||||
// part of ongoing compactions.
|
||||
std::set<uint64_t> pending_outputs_;
|
||||
|
||||
bool allow_background_activity_;
|
||||
bool levels_locked_[config::kNumLevels];
|
||||
int num_bg_threads_;
|
||||
// Tell the foreground that background has done something of note
|
||||
port::CondVar bg_fg_cv_;
|
||||
// Communicate with compaction background thread
|
||||
port::CondVar bg_compaction_cv_;
|
||||
// Communicate with memtable->L0 background thread
|
||||
port::CondVar bg_memtable_cv_;
|
||||
// Communicate with the optimistic background thread
|
||||
bool bg_optimistic_trip_;
|
||||
port::CondVar bg_optimistic_cv_;
|
||||
// Mutual exlusion protecting the LogAndApply func
|
||||
port::CondVar bg_log_cv_;
|
||||
bool bg_log_occupied_;
|
||||
|
||||
// Information for a manual compaction
|
||||
struct ManualCompaction {
|
||||
int level;
|
||||
bool done;
|
||||
const InternalKey* begin; // NULL means beginning of key range
|
||||
const InternalKey* end; // NULL means end of key range
|
||||
InternalKey tmp_storage; // Used to keep track of compaction progress
|
||||
};
|
||||
ManualCompaction* manual_compaction_;
|
||||
|
||||
VersionSet* versions_;
|
||||
|
||||
// Have we encountered a background error in paranoid mode?
|
||||
Status bg_error_;
|
||||
int consecutive_compaction_errors_;
|
||||
|
||||
// Per level compaction stats. stats_[level] stores the stats for
|
||||
// compactions that produced data for the specified "level".
|
||||
struct CompactionStats {
|
||||
int64_t micros;
|
||||
int64_t bytes_read;
|
||||
int64_t bytes_written;
|
||||
|
||||
CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { }
|
||||
|
||||
void Add(const CompactionStats& c) {
|
||||
this->micros += c.micros;
|
||||
this->bytes_read += c.bytes_read;
|
||||
this->bytes_written += c.bytes_written;
|
||||
}
|
||||
};
|
||||
CompactionStats stats_[config::kNumLevels];
|
||||
|
||||
// No copying allowed
|
||||
DBImpl(const DBImpl&);
|
||||
void operator=(const DBImpl&);
|
||||
|
||||
const Comparator* user_comparator() const {
|
||||
return internal_comparator_.user_comparator();
|
||||
}
|
||||
};
|
||||
|
||||
// Sanitize db options. The caller should delete result.info_log if
|
||||
// it is not equal to src.info_log.
|
||||
extern Options SanitizeOptions(const std::string& db,
|
||||
const InternalKeyComparator* icmp,
|
||||
const InternalFilterPolicy* ipolicy,
|
||||
const Options& src);
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_DB_IMPL_H_
|
||||
299
src/hyperleveldb/db/db_iter.cc
Normal file
299
src/hyperleveldb/db/db_iter.cc
Normal file
@@ -0,0 +1,299 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db_iter.h"
|
||||
|
||||
#include "filename.h"
|
||||
#include "dbformat.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../hyperleveldb/iterator.h"
|
||||
#include "../port/port.h"
|
||||
#include "../util/logging.h"
|
||||
#include "../util/mutexlock.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
#if 0
|
||||
static void DumpInternalIter(Iterator* iter) {
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
ParsedInternalKey k;
|
||||
if (!ParseInternalKey(iter->key(), &k)) {
|
||||
fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());
|
||||
} else {
|
||||
fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
// Memtables and sstables that make the DB representation contain
|
||||
// (userkey,seq,type) => uservalue entries. DBIter
|
||||
// combines multiple entries for the same userkey found in the DB
|
||||
// representation into a single entry while accounting for sequence
|
||||
// numbers, deletion markers, overwrites, etc.
|
||||
class DBIter: public Iterator {
|
||||
public:
|
||||
// Which direction is the iterator currently moving?
|
||||
// (1) When moving forward, the internal iterator is positioned at
|
||||
// the exact entry that yields this->key(), this->value()
|
||||
// (2) When moving backwards, the internal iterator is positioned
|
||||
// just before all entries whose user key == this->key().
|
||||
enum Direction {
|
||||
kForward,
|
||||
kReverse
|
||||
};
|
||||
|
||||
DBIter(const std::string* dbname, Env* env,
|
||||
const Comparator* cmp, Iterator* iter, SequenceNumber s)
|
||||
: dbname_(dbname),
|
||||
env_(env),
|
||||
user_comparator_(cmp),
|
||||
iter_(iter),
|
||||
sequence_(s),
|
||||
direction_(kForward),
|
||||
valid_(false) {
|
||||
}
|
||||
virtual ~DBIter() {
|
||||
delete iter_;
|
||||
}
|
||||
virtual bool Valid() const { return valid_; }
|
||||
virtual Slice key() const {
|
||||
assert(valid_);
|
||||
return (direction_ == kForward) ? ExtractUserKey(iter_->key()) : saved_key_;
|
||||
}
|
||||
virtual Slice value() const {
|
||||
assert(valid_);
|
||||
return (direction_ == kForward) ? iter_->value() : saved_value_;
|
||||
}
|
||||
virtual Status status() const {
|
||||
if (status_.ok()) {
|
||||
return iter_->status();
|
||||
} else {
|
||||
return status_;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Next();
|
||||
virtual void Prev();
|
||||
virtual void Seek(const Slice& target);
|
||||
virtual void SeekToFirst();
|
||||
virtual void SeekToLast();
|
||||
|
||||
private:
|
||||
void FindNextUserEntry(bool skipping, std::string* skip);
|
||||
void FindPrevUserEntry();
|
||||
bool ParseKey(ParsedInternalKey* key);
|
||||
|
||||
inline void SaveKey(const Slice& k, std::string* dst) {
|
||||
dst->assign(k.data(), k.size());
|
||||
}
|
||||
|
||||
inline void ClearSavedValue() {
|
||||
if (saved_value_.capacity() > 1048576) {
|
||||
std::string empty;
|
||||
swap(empty, saved_value_);
|
||||
} else {
|
||||
saved_value_.clear();
|
||||
}
|
||||
}
|
||||
|
||||
const std::string* const dbname_;
|
||||
Env* const env_;
|
||||
const Comparator* const user_comparator_;
|
||||
Iterator* const iter_;
|
||||
SequenceNumber const sequence_;
|
||||
|
||||
Status status_;
|
||||
std::string saved_key_; // == current key when direction_==kReverse
|
||||
std::string saved_value_; // == current raw value when direction_==kReverse
|
||||
Direction direction_;
|
||||
bool valid_;
|
||||
|
||||
// No copying allowed
|
||||
DBIter(const DBIter&);
|
||||
void operator=(const DBIter&);
|
||||
};
|
||||
|
||||
inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
|
||||
if (!ParseInternalKey(iter_->key(), ikey)) {
|
||||
status_ = Status::Corruption("corrupted internal key in DBIter");
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void DBIter::Next() {
|
||||
assert(valid_);
|
||||
|
||||
if (direction_ == kReverse) { // Switch directions?
|
||||
direction_ = kForward;
|
||||
// iter_ is pointing just before the entries for this->key(),
|
||||
// so advance into the range of entries for this->key() and then
|
||||
// use the normal skipping code below.
|
||||
if (!iter_->Valid()) {
|
||||
iter_->SeekToFirst();
|
||||
} else {
|
||||
iter_->Next();
|
||||
}
|
||||
if (!iter_->Valid()) {
|
||||
valid_ = false;
|
||||
saved_key_.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Temporarily use saved_key_ as storage for key to skip.
|
||||
std::string* skip = &saved_key_;
|
||||
SaveKey(ExtractUserKey(iter_->key()), skip);
|
||||
FindNextUserEntry(true, skip);
|
||||
}
|
||||
|
||||
void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
||||
// Loop until we hit an acceptable entry to yield
|
||||
assert(iter_->Valid());
|
||||
assert(direction_ == kForward);
|
||||
do {
|
||||
ParsedInternalKey ikey;
|
||||
if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
|
||||
switch (ikey.type) {
|
||||
case kTypeDeletion:
|
||||
// Arrange to skip all upcoming entries for this key since
|
||||
// they are hidden by this deletion.
|
||||
SaveKey(ikey.user_key, skip);
|
||||
skipping = true;
|
||||
break;
|
||||
case kTypeValue:
|
||||
if (skipping &&
|
||||
user_comparator_->Compare(ikey.user_key, *skip) <= 0) {
|
||||
// Entry hidden
|
||||
} else {
|
||||
valid_ = true;
|
||||
saved_key_.clear();
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
iter_->Next();
|
||||
} while (iter_->Valid());
|
||||
saved_key_.clear();
|
||||
valid_ = false;
|
||||
}
|
||||
|
||||
void DBIter::Prev() {
|
||||
assert(valid_);
|
||||
|
||||
if (direction_ == kForward) { // Switch directions?
|
||||
// iter_ is pointing at the current entry. Scan backwards until
|
||||
// the key changes so we can use the normal reverse scanning code.
|
||||
assert(iter_->Valid()); // Otherwise valid_ would have been false
|
||||
SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
|
||||
while (true) {
|
||||
iter_->Prev();
|
||||
if (!iter_->Valid()) {
|
||||
valid_ = false;
|
||||
saved_key_.clear();
|
||||
ClearSavedValue();
|
||||
return;
|
||||
}
|
||||
if (user_comparator_->Compare(ExtractUserKey(iter_->key()),
|
||||
saved_key_) < 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
direction_ = kReverse;
|
||||
}
|
||||
|
||||
FindPrevUserEntry();
|
||||
}
|
||||
|
||||
void DBIter::FindPrevUserEntry() {
|
||||
assert(direction_ == kReverse);
|
||||
|
||||
ValueType value_type = kTypeDeletion;
|
||||
if (iter_->Valid()) {
|
||||
do {
|
||||
ParsedInternalKey ikey;
|
||||
if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
|
||||
if ((value_type != kTypeDeletion) &&
|
||||
user_comparator_->Compare(ikey.user_key, saved_key_) < 0) {
|
||||
// We encountered a non-deleted value in entries for previous keys,
|
||||
break;
|
||||
}
|
||||
value_type = ikey.type;
|
||||
if (value_type == kTypeDeletion) {
|
||||
saved_key_.clear();
|
||||
ClearSavedValue();
|
||||
} else {
|
||||
Slice raw_value = iter_->value();
|
||||
if (saved_value_.capacity() > raw_value.size() + 1048576) {
|
||||
std::string empty;
|
||||
swap(empty, saved_value_);
|
||||
}
|
||||
SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
|
||||
saved_value_.assign(raw_value.data(), raw_value.size());
|
||||
}
|
||||
}
|
||||
iter_->Prev();
|
||||
} while (iter_->Valid());
|
||||
}
|
||||
|
||||
if (value_type == kTypeDeletion) {
|
||||
// End
|
||||
valid_ = false;
|
||||
saved_key_.clear();
|
||||
ClearSavedValue();
|
||||
direction_ = kForward;
|
||||
} else {
|
||||
valid_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void DBIter::Seek(const Slice& target) {
|
||||
direction_ = kForward;
|
||||
ClearSavedValue();
|
||||
saved_key_.clear();
|
||||
AppendInternalKey(
|
||||
&saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek));
|
||||
iter_->Seek(saved_key_);
|
||||
if (iter_->Valid()) {
|
||||
FindNextUserEntry(false, &saved_key_ /* temporary storage */);
|
||||
} else {
|
||||
valid_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void DBIter::SeekToFirst() {
|
||||
direction_ = kForward;
|
||||
ClearSavedValue();
|
||||
iter_->SeekToFirst();
|
||||
if (iter_->Valid()) {
|
||||
FindNextUserEntry(false, &saved_key_ /* temporary storage */);
|
||||
} else {
|
||||
valid_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void DBIter::SeekToLast() {
|
||||
direction_ = kReverse;
|
||||
ClearSavedValue();
|
||||
iter_->SeekToLast();
|
||||
FindPrevUserEntry();
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
Iterator* NewDBIterator(
|
||||
const std::string* dbname,
|
||||
Env* env,
|
||||
const Comparator* user_key_comparator,
|
||||
Iterator* internal_iter,
|
||||
const SequenceNumber& sequence) {
|
||||
return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence);
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
26
src/hyperleveldb/db/db_iter.h
Normal file
26
src/hyperleveldb/db/db_iter.h
Normal file
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_DB_ITER_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_DB_ITER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "dbformat.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
// Return a new iterator that converts internal keys (yielded by
|
||||
// "*internal_iter") that were live at the specified "sequence" number
|
||||
// into appropriate user keys.
|
||||
extern Iterator* NewDBIterator(
|
||||
const std::string* dbname,
|
||||
Env* env,
|
||||
const Comparator* user_key_comparator,
|
||||
Iterator* internal_iter,
|
||||
const SequenceNumber& sequence);
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_DB_ITER_H_
|
||||
2064
src/hyperleveldb/db/db_test.cc
Normal file
2064
src/hyperleveldb/db/db_test.cc
Normal file
File diff suppressed because it is too large
Load Diff
140
src/hyperleveldb/db/dbformat.cc
Normal file
140
src/hyperleveldb/db/dbformat.cc
Normal file
@@ -0,0 +1,140 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <stdio.h>
|
||||
#include "dbformat.h"
|
||||
#include "../port/port.h"
|
||||
#include "../util/coding.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
|
||||
assert(seq <= kMaxSequenceNumber);
|
||||
assert(t <= kValueTypeForSeek);
|
||||
return (seq << 8) | t;
|
||||
}
|
||||
|
||||
void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
|
||||
result->append(key.user_key.data(), key.user_key.size());
|
||||
PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
|
||||
}
|
||||
|
||||
std::string ParsedInternalKey::DebugString() const {
|
||||
char buf[50];
|
||||
snprintf(buf, sizeof(buf), "' @ %llu : %d",
|
||||
(unsigned long long) sequence,
|
||||
int(type));
|
||||
std::string result = "'";
|
||||
result += EscapeString(user_key.ToString());
|
||||
result += buf;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string InternalKey::DebugString() const {
|
||||
std::string result;
|
||||
ParsedInternalKey parsed;
|
||||
if (ParseInternalKey(rep_, &parsed)) {
|
||||
result = parsed.DebugString();
|
||||
} else {
|
||||
result = "(bad)";
|
||||
result.append(EscapeString(rep_));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const char* InternalKeyComparator::Name() const {
|
||||
return "leveldb.InternalKeyComparator";
|
||||
}
|
||||
|
||||
int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
|
||||
// Order by:
|
||||
// increasing user key (according to user-supplied comparator)
|
||||
// decreasing sequence number
|
||||
// decreasing type (though sequence# should be enough to disambiguate)
|
||||
int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
|
||||
if (r == 0) {
|
||||
const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
|
||||
const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
|
||||
if (anum > bnum) {
|
||||
r = -1;
|
||||
} else if (anum < bnum) {
|
||||
r = +1;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
void InternalKeyComparator::FindShortestSeparator(
|
||||
std::string* start,
|
||||
const Slice& limit) const {
|
||||
// Attempt to shorten the user portion of the key
|
||||
Slice user_start = ExtractUserKey(*start);
|
||||
Slice user_limit = ExtractUserKey(limit);
|
||||
std::string tmp(user_start.data(), user_start.size());
|
||||
user_comparator_->FindShortestSeparator(&tmp, user_limit);
|
||||
if (tmp.size() < user_start.size() &&
|
||||
user_comparator_->Compare(user_start, tmp) < 0) {
|
||||
// User key has become shorter physically, but larger logically.
|
||||
// Tack on the earliest possible number to the shortened user key.
|
||||
PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
|
||||
assert(this->Compare(*start, tmp) < 0);
|
||||
assert(this->Compare(tmp, limit) < 0);
|
||||
start->swap(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
|
||||
Slice user_key = ExtractUserKey(*key);
|
||||
std::string tmp(user_key.data(), user_key.size());
|
||||
user_comparator_->FindShortSuccessor(&tmp);
|
||||
if (tmp.size() < user_key.size() &&
|
||||
user_comparator_->Compare(user_key, tmp) < 0) {
|
||||
// User key has become shorter physically, but larger logically.
|
||||
// Tack on the earliest possible number to the shortened user key.
|
||||
PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek));
|
||||
assert(this->Compare(*key, tmp) < 0);
|
||||
key->swap(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
const char* InternalFilterPolicy::Name() const {
|
||||
return user_policy_->Name();
|
||||
}
|
||||
|
||||
void InternalFilterPolicy::CreateFilter(const Slice* keys, int n,
|
||||
std::string* dst) const {
|
||||
// We rely on the fact that the code in table.cc does not mind us
|
||||
// adjusting keys[].
|
||||
Slice* mkey = const_cast<Slice*>(keys);
|
||||
for (int i = 0; i < n; i++) {
|
||||
mkey[i] = ExtractUserKey(keys[i]);
|
||||
// TODO(sanjay): Suppress dups?
|
||||
}
|
||||
user_policy_->CreateFilter(keys, n, dst);
|
||||
}
|
||||
|
||||
bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const {
|
||||
return user_policy_->KeyMayMatch(ExtractUserKey(key), f);
|
||||
}
|
||||
|
||||
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
|
||||
size_t usize = user_key.size();
|
||||
size_t needed = usize + 13; // A conservative estimate
|
||||
char* dst;
|
||||
if (needed <= sizeof(space_)) {
|
||||
dst = space_;
|
||||
} else {
|
||||
dst = new char[needed];
|
||||
}
|
||||
start_ = dst;
|
||||
dst = EncodeVarint32(dst, usize + 8);
|
||||
kstart_ = dst;
|
||||
memcpy(dst, user_key.data(), usize);
|
||||
dst += usize;
|
||||
EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek));
|
||||
dst += 8;
|
||||
end_ = dst;
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
229
src/hyperleveldb/db/dbformat.h
Normal file
229
src/hyperleveldb/db/dbformat.h
Normal file
@@ -0,0 +1,229 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_FORMAT_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_FORMAT_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include "../hyperleveldb/comparator.h"
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "../hyperleveldb/filter_policy.h"
|
||||
#include "../hyperleveldb/slice.h"
|
||||
#include "../hyperleveldb/table_builder.h"
|
||||
#include "../util/coding.h"
|
||||
#include "../util/logging.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
// Grouping of constants. We may want to make some of these
|
||||
// parameters set via options.
|
||||
namespace config {
|
||||
static const int kNumLevels = 7;
|
||||
|
||||
// Level-0 compaction is started when we hit this many files.
|
||||
static const int kL0_CompactionTrigger = 4;
|
||||
|
||||
// Soft limit on number of level-0 files. We could slow down writes at this
|
||||
// point, but don't.
|
||||
static const int kL0_SlowdownWritesTrigger = 8;
|
||||
|
||||
// Maximum number of level-0 files. We could stop writes at this point, but
|
||||
// don't.
|
||||
static const int kL0_StopWritesTrigger = 12;
|
||||
|
||||
// Maximum level to which a new compacted memtable is pushed if it
|
||||
// does not create overlap. We try to push to level 2 to avoid the
|
||||
// relatively expensive level 0=>1 compactions and to avoid some
|
||||
// expensive manifest file operations. We do not push all the way to
|
||||
// the largest level since that can generate a lot of wasted disk
|
||||
// space if the same key space is being repeatedly overwritten.
|
||||
static const int kMaxMemCompactLevel = 2;
|
||||
|
||||
} // namespace config
|
||||
|
||||
class InternalKey;
|
||||
|
||||
// Value types encoded as the last component of internal keys.
|
||||
// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
|
||||
// data structures.
|
||||
enum ValueType {
|
||||
kTypeDeletion = 0x0,
|
||||
kTypeValue = 0x1
|
||||
};
|
||||
// kValueTypeForSeek defines the ValueType that should be passed when
|
||||
// constructing a ParsedInternalKey object for seeking to a particular
|
||||
// sequence number (since we sort sequence numbers in decreasing order
|
||||
// and the value type is embedded as the low 8 bits in the sequence
|
||||
// number in internal keys, we need to use the highest-numbered
|
||||
// ValueType, not the lowest).
|
||||
static const ValueType kValueTypeForSeek = kTypeValue;
|
||||
|
||||
typedef uint64_t SequenceNumber;
|
||||
|
||||
// We leave eight bits empty at the bottom so a type and sequence#
|
||||
// can be packed together into 64-bits.
|
||||
static const SequenceNumber kMaxSequenceNumber =
|
||||
((0x1ull << 56) - 1);
|
||||
|
||||
struct ParsedInternalKey {
|
||||
Slice user_key;
|
||||
SequenceNumber sequence;
|
||||
ValueType type;
|
||||
|
||||
ParsedInternalKey() { } // Intentionally left uninitialized (for speed)
|
||||
ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
|
||||
: user_key(u), sequence(seq), type(t) { }
|
||||
std::string DebugString() const;
|
||||
};
|
||||
|
||||
// Return the length of the encoding of "key".
|
||||
inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
|
||||
return key.user_key.size() + 8;
|
||||
}
|
||||
|
||||
// Append the serialization of "key" to *result.
|
||||
extern void AppendInternalKey(std::string* result,
|
||||
const ParsedInternalKey& key);
|
||||
|
||||
// Attempt to parse an internal key from "internal_key". On success,
|
||||
// stores the parsed data in "*result", and returns true.
|
||||
//
|
||||
// On error, returns false, leaves "*result" in an undefined state.
|
||||
extern bool ParseInternalKey(const Slice& internal_key,
|
||||
ParsedInternalKey* result);
|
||||
|
||||
// Returns the user key portion of an internal key.
|
||||
inline Slice ExtractUserKey(const Slice& internal_key) {
|
||||
assert(internal_key.size() >= 8);
|
||||
return Slice(internal_key.data(), internal_key.size() - 8);
|
||||
}
|
||||
|
||||
inline ValueType ExtractValueType(const Slice& internal_key) {
|
||||
assert(internal_key.size() >= 8);
|
||||
const size_t n = internal_key.size();
|
||||
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
|
||||
unsigned char c = num & 0xff;
|
||||
return static_cast<ValueType>(c);
|
||||
}
|
||||
|
||||
// A comparator for internal keys that uses a specified comparator for
|
||||
// the user key portion and breaks ties by decreasing sequence number.
|
||||
class InternalKeyComparator : public Comparator {
|
||||
private:
|
||||
const Comparator* user_comparator_;
|
||||
public:
|
||||
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { }
|
||||
virtual const char* Name() const;
|
||||
virtual int Compare(const Slice& a, const Slice& b) const;
|
||||
virtual void FindShortestSeparator(
|
||||
std::string* start,
|
||||
const Slice& limit) const;
|
||||
virtual void FindShortSuccessor(std::string* key) const;
|
||||
|
||||
const Comparator* user_comparator() const { return user_comparator_; }
|
||||
|
||||
int Compare(const InternalKey& a, const InternalKey& b) const;
|
||||
};
|
||||
|
||||
// Filter policy wrapper that converts from internal keys to user keys
|
||||
class InternalFilterPolicy : public FilterPolicy {
|
||||
private:
|
||||
const FilterPolicy* const user_policy_;
|
||||
public:
|
||||
explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
|
||||
virtual const char* Name() const;
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
|
||||
};
|
||||
|
||||
// Modules in this directory should keep internal keys wrapped inside
|
||||
// the following class instead of plain strings so that we do not
|
||||
// incorrectly use string comparisons instead of an InternalKeyComparator.
|
||||
class InternalKey {
|
||||
private:
|
||||
std::string rep_;
|
||||
public:
|
||||
InternalKey() { } // Leave rep_ as empty to indicate it is invalid
|
||||
InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
|
||||
AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
|
||||
}
|
||||
|
||||
void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
|
||||
Slice Encode() const {
|
||||
assert(!rep_.empty());
|
||||
return rep_;
|
||||
}
|
||||
|
||||
Slice user_key() const { return ExtractUserKey(rep_); }
|
||||
|
||||
void SetFrom(const ParsedInternalKey& p) {
|
||||
rep_.clear();
|
||||
AppendInternalKey(&rep_, p);
|
||||
}
|
||||
|
||||
void Clear() { rep_.clear(); }
|
||||
|
||||
std::string DebugString() const;
|
||||
};
|
||||
|
||||
inline int InternalKeyComparator::Compare(
|
||||
const InternalKey& a, const InternalKey& b) const {
|
||||
return Compare(a.Encode(), b.Encode());
|
||||
}
|
||||
|
||||
inline bool ParseInternalKey(const Slice& internal_key,
|
||||
ParsedInternalKey* result) {
|
||||
const size_t n = internal_key.size();
|
||||
if (n < 8) return false;
|
||||
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
|
||||
unsigned char c = num & 0xff;
|
||||
result->sequence = num >> 8;
|
||||
result->type = static_cast<ValueType>(c);
|
||||
result->user_key = Slice(internal_key.data(), n - 8);
|
||||
return (c <= static_cast<unsigned char>(kTypeValue));
|
||||
}
|
||||
|
||||
// A helper class useful for DBImpl::Get()
|
||||
class LookupKey {
|
||||
public:
|
||||
// Initialize *this for looking up user_key at a snapshot with
|
||||
// the specified sequence number.
|
||||
LookupKey(const Slice& user_key, SequenceNumber sequence);
|
||||
|
||||
~LookupKey();
|
||||
|
||||
// Return a key suitable for lookup in a MemTable.
|
||||
Slice memtable_key() const { return Slice(start_, end_ - start_); }
|
||||
|
||||
// Return an internal key (suitable for passing to an internal iterator)
|
||||
Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
|
||||
|
||||
// Return the user key
|
||||
Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); }
|
||||
|
||||
private:
|
||||
// We construct a char array of the form:
|
||||
// klength varint32 <-- start_
|
||||
// userkey char[klength] <-- kstart_
|
||||
// tag uint64
|
||||
// <-- end_
|
||||
// The array is a suitable MemTable key.
|
||||
// The suffix starting with "userkey" can be used as an InternalKey.
|
||||
const char* start_;
|
||||
const char* kstart_;
|
||||
const char* end_;
|
||||
char space_[200]; // Avoid allocation for short keys
|
||||
|
||||
// No copying allowed
|
||||
LookupKey(const LookupKey&);
|
||||
void operator=(const LookupKey&);
|
||||
};
|
||||
|
||||
inline LookupKey::~LookupKey() {
|
||||
if (start_ != space_) delete[] start_;
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_FORMAT_H_
|
||||
112
src/hyperleveldb/db/dbformat_test.cc
Normal file
112
src/hyperleveldb/db/dbformat_test.cc
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "dbformat.h"
|
||||
#include "../util/logging.h"
|
||||
#include "../util/testharness.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
static std::string IKey(const std::string& user_key,
|
||||
uint64_t seq,
|
||||
ValueType vt) {
|
||||
std::string encoded;
|
||||
AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt));
|
||||
return encoded;
|
||||
}
|
||||
|
||||
static std::string Shorten(const std::string& s, const std::string& l) {
|
||||
std::string result = s;
|
||||
InternalKeyComparator(BytewiseComparator()).FindShortestSeparator(&result, l);
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string ShortSuccessor(const std::string& s) {
|
||||
std::string result = s;
|
||||
InternalKeyComparator(BytewiseComparator()).FindShortSuccessor(&result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void TestKey(const std::string& key,
|
||||
uint64_t seq,
|
||||
ValueType vt) {
|
||||
std::string encoded = IKey(key, seq, vt);
|
||||
|
||||
Slice in(encoded);
|
||||
ParsedInternalKey decoded("", 0, kTypeValue);
|
||||
|
||||
ASSERT_TRUE(ParseInternalKey(in, &decoded));
|
||||
ASSERT_EQ(key, decoded.user_key.ToString());
|
||||
ASSERT_EQ(seq, decoded.sequence);
|
||||
ASSERT_EQ(vt, decoded.type);
|
||||
|
||||
ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded));
|
||||
}
|
||||
|
||||
class FormatTest { };
|
||||
|
||||
TEST(FormatTest, InternalKey_EncodeDecode) {
|
||||
const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" };
|
||||
const uint64_t seq[] = {
|
||||
1, 2, 3,
|
||||
(1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1,
|
||||
(1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1,
|
||||
(1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1
|
||||
};
|
||||
for (int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) {
|
||||
for (int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) {
|
||||
TestKey(keys[k], seq[s], kTypeValue);
|
||||
TestKey("hello", 1, kTypeDeletion);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FormatTest, InternalKeyShortSeparator) {
|
||||
// When user keys are same
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 99, kTypeValue)));
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 101, kTypeValue)));
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 100, kTypeValue)));
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 100, kTypeDeletion)));
|
||||
|
||||
// When user keys are misordered
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("bar", 99, kTypeValue)));
|
||||
|
||||
// When user keys are different, but correctly ordered
|
||||
ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("hello", 200, kTypeValue)));
|
||||
|
||||
// When start user key is prefix of limit user key
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foobar", 200, kTypeValue)));
|
||||
|
||||
// When limit user key is prefix of start user key
|
||||
ASSERT_EQ(IKey("foobar", 100, kTypeValue),
|
||||
Shorten(IKey("foobar", 100, kTypeValue),
|
||||
IKey("foo", 200, kTypeValue)));
|
||||
}
|
||||
|
||||
TEST(FormatTest, InternalKeyShortestSuccessor) {
|
||||
ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek),
|
||||
ShortSuccessor(IKey("foo", 100, kTypeValue)));
|
||||
ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue),
|
||||
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
139
src/hyperleveldb/db/filename.cc
Normal file
139
src/hyperleveldb/db/filename.cc
Normal file
@@ -0,0 +1,139 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include "filename.h"
|
||||
#include "dbformat.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../util/logging.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
// A utility routine: write "data" to the named file and Sync() it.
|
||||
extern Status WriteStringToFileSync(Env* env, const Slice& data,
|
||||
const std::string& fname);
|
||||
|
||||
static std::string MakeFileName(const std::string& name, uint64_t number,
|
||||
const char* suffix) {
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "/%06llu.%s",
|
||||
static_cast<unsigned long long>(number),
|
||||
suffix);
|
||||
return name + buf;
|
||||
}
|
||||
|
||||
std::string LogFileName(const std::string& name, uint64_t number) {
|
||||
assert(number > 0);
|
||||
return MakeFileName(name, number, "log");
|
||||
}
|
||||
|
||||
std::string TableFileName(const std::string& name, uint64_t number) {
|
||||
assert(number > 0);
|
||||
return MakeFileName(name, number, "sst");
|
||||
}
|
||||
|
||||
std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
|
||||
assert(number > 0);
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "/MANIFEST-%06llu",
|
||||
static_cast<unsigned long long>(number));
|
||||
return dbname + buf;
|
||||
}
|
||||
|
||||
std::string CurrentFileName(const std::string& dbname) {
|
||||
return dbname + "/CURRENT";
|
||||
}
|
||||
|
||||
std::string LockFileName(const std::string& dbname) {
|
||||
return dbname + "/LOCK";
|
||||
}
|
||||
|
||||
std::string TempFileName(const std::string& dbname, uint64_t number) {
|
||||
assert(number > 0);
|
||||
return MakeFileName(dbname, number, "dbtmp");
|
||||
}
|
||||
|
||||
std::string InfoLogFileName(const std::string& dbname) {
|
||||
return dbname + "/LOG";
|
||||
}
|
||||
|
||||
// Return the name of the old info log file for "dbname".
|
||||
std::string OldInfoLogFileName(const std::string& dbname) {
|
||||
return dbname + "/LOG.old";
|
||||
}
|
||||
|
||||
|
||||
// Owned filenames have the form:
|
||||
// dbname/CURRENT
|
||||
// dbname/LOCK
|
||||
// dbname/LOG
|
||||
// dbname/LOG.old
|
||||
// dbname/MANIFEST-[0-9]+
|
||||
// dbname/[0-9]+.(log|sst)
|
||||
bool ParseFileName(const std::string& fname,
|
||||
uint64_t* number,
|
||||
FileType* type) {
|
||||
Slice rest(fname);
|
||||
if (rest == "CURRENT") {
|
||||
*number = 0;
|
||||
*type = kCurrentFile;
|
||||
} else if (rest == "LOCK") {
|
||||
*number = 0;
|
||||
*type = kDBLockFile;
|
||||
} else if (rest == "LOG" || rest == "LOG.old") {
|
||||
*number = 0;
|
||||
*type = kInfoLogFile;
|
||||
} else if (rest.starts_with("MANIFEST-")) {
|
||||
rest.remove_prefix(strlen("MANIFEST-"));
|
||||
uint64_t num;
|
||||
if (!ConsumeDecimalNumber(&rest, &num)) {
|
||||
return false;
|
||||
}
|
||||
if (!rest.empty()) {
|
||||
return false;
|
||||
}
|
||||
*type = kDescriptorFile;
|
||||
*number = num;
|
||||
} else {
|
||||
// Avoid strtoull() to keep filename format independent of the
|
||||
// current locale
|
||||
uint64_t num;
|
||||
if (!ConsumeDecimalNumber(&rest, &num)) {
|
||||
return false;
|
||||
}
|
||||
Slice suffix = rest;
|
||||
if (suffix == Slice(".log")) {
|
||||
*type = kLogFile;
|
||||
} else if (suffix == Slice(".sst")) {
|
||||
*type = kTableFile;
|
||||
} else if (suffix == Slice(".dbtmp")) {
|
||||
*type = kTempFile;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
*number = num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Status SetCurrentFile(Env* env, const std::string& dbname,
|
||||
uint64_t descriptor_number) {
|
||||
// Remove leading "dbname/" and add newline to manifest file name
|
||||
std::string manifest = DescriptorFileName(dbname, descriptor_number);
|
||||
Slice contents = manifest;
|
||||
assert(contents.starts_with(dbname + "/"));
|
||||
contents.remove_prefix(dbname.size() + 1);
|
||||
std::string tmp = TempFileName(dbname, descriptor_number);
|
||||
Status s = WriteStringToFileSync(env, contents.ToString() + "\n", tmp);
|
||||
if (s.ok()) {
|
||||
s = env->RenameFile(tmp, CurrentFileName(dbname));
|
||||
}
|
||||
if (!s.ok()) {
|
||||
env->DeleteFile(tmp);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
80
src/hyperleveldb/db/filename.h
Normal file
80
src/hyperleveldb/db/filename.h
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// File names used by DB code
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_FILENAME_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_FILENAME_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include "../hyperleveldb/slice.h"
|
||||
#include "../hyperleveldb/status.h"
|
||||
#include "../port/port.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class Env;
|
||||
|
||||
enum FileType {
|
||||
kLogFile,
|
||||
kDBLockFile,
|
||||
kTableFile,
|
||||
kDescriptorFile,
|
||||
kCurrentFile,
|
||||
kTempFile,
|
||||
kInfoLogFile // Either the current one, or an old one
|
||||
};
|
||||
|
||||
// Return the name of the log file with the specified number
|
||||
// in the db named by "dbname". The result will be prefixed with
|
||||
// "dbname".
|
||||
extern std::string LogFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the name of the sstable with the specified number
|
||||
// in the db named by "dbname". The result will be prefixed with
|
||||
// "dbname".
|
||||
extern std::string TableFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the name of the descriptor file for the db named by
|
||||
// "dbname" and the specified incarnation number. The result will be
|
||||
// prefixed with "dbname".
|
||||
extern std::string DescriptorFileName(const std::string& dbname,
|
||||
uint64_t number);
|
||||
|
||||
// Return the name of the current file. This file contains the name
|
||||
// of the current manifest file. The result will be prefixed with
|
||||
// "dbname".
|
||||
extern std::string CurrentFileName(const std::string& dbname);
|
||||
|
||||
// Return the name of the lock file for the db named by
|
||||
// "dbname". The result will be prefixed with "dbname".
|
||||
extern std::string LockFileName(const std::string& dbname);
|
||||
|
||||
// Return the name of a temporary file owned by the db named "dbname".
|
||||
// The result will be prefixed with "dbname".
|
||||
extern std::string TempFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the name of the info log file for "dbname".
|
||||
extern std::string InfoLogFileName(const std::string& dbname);
|
||||
|
||||
// Return the name of the old info log file for "dbname".
|
||||
extern std::string OldInfoLogFileName(const std::string& dbname);
|
||||
|
||||
// If filename is a leveldb file, store the type of the file in *type.
|
||||
// The number encoded in the filename is stored in *number. If the
|
||||
// filename was successfully parsed, returns true. Else return false.
|
||||
extern bool ParseFileName(const std::string& filename,
|
||||
uint64_t* number,
|
||||
FileType* type);
|
||||
|
||||
// Make the CURRENT file point to the descriptor file with the
|
||||
// specified number.
|
||||
extern Status SetCurrentFile(Env* env, const std::string& dbname,
|
||||
uint64_t descriptor_number);
|
||||
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_FILENAME_H_
|
||||
122
src/hyperleveldb/db/filename_test.cc
Normal file
122
src/hyperleveldb/db/filename_test.cc
Normal file
@@ -0,0 +1,122 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "filename.h"
|
||||
|
||||
#include "dbformat.h"
|
||||
#include "../port/port.h"
|
||||
#include "../util/logging.h"
|
||||
#include "../util/testharness.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class FileNameTest { };
|
||||
|
||||
TEST(FileNameTest, Parse) {
|
||||
Slice db;
|
||||
FileType type;
|
||||
uint64_t number;
|
||||
|
||||
// Successful parses
|
||||
static struct {
|
||||
const char* fname;
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
} cases[] = {
|
||||
{ "100.log", 100, kLogFile },
|
||||
{ "0.log", 0, kLogFile },
|
||||
{ "0.sst", 0, kTableFile },
|
||||
{ "CURRENT", 0, kCurrentFile },
|
||||
{ "LOCK", 0, kDBLockFile },
|
||||
{ "MANIFEST-2", 2, kDescriptorFile },
|
||||
{ "MANIFEST-7", 7, kDescriptorFile },
|
||||
{ "LOG", 0, kInfoLogFile },
|
||||
{ "LOG.old", 0, kInfoLogFile },
|
||||
{ "18446744073709551615.log", 18446744073709551615ull, kLogFile },
|
||||
};
|
||||
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
||||
std::string f = cases[i].fname;
|
||||
ASSERT_TRUE(ParseFileName(f, &number, &type)) << f;
|
||||
ASSERT_EQ(cases[i].type, type) << f;
|
||||
ASSERT_EQ(cases[i].number, number) << f;
|
||||
}
|
||||
|
||||
// Errors
|
||||
static const char* errors[] = {
|
||||
"",
|
||||
"foo",
|
||||
"foo-dx-100.log",
|
||||
".log",
|
||||
"",
|
||||
"manifest",
|
||||
"CURREN",
|
||||
"CURRENTX",
|
||||
"MANIFES",
|
||||
"MANIFEST",
|
||||
"MANIFEST-",
|
||||
"XMANIFEST-3",
|
||||
"MANIFEST-3x",
|
||||
"LOC",
|
||||
"LOCKx",
|
||||
"LO",
|
||||
"LOGx",
|
||||
"18446744073709551616.log",
|
||||
"184467440737095516150.log",
|
||||
"100",
|
||||
"100.",
|
||||
"100.lop"
|
||||
};
|
||||
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
||||
std::string f = errors[i];
|
||||
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FileNameTest, Construction) {
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
std::string fname;
|
||||
|
||||
fname = CurrentFileName("foo");
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(0, number);
|
||||
ASSERT_EQ(kCurrentFile, type);
|
||||
|
||||
fname = LockFileName("foo");
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(0, number);
|
||||
ASSERT_EQ(kDBLockFile, type);
|
||||
|
||||
fname = LogFileName("foo", 192);
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(192, number);
|
||||
ASSERT_EQ(kLogFile, type);
|
||||
|
||||
fname = TableFileName("bar", 200);
|
||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(200, number);
|
||||
ASSERT_EQ(kTableFile, type);
|
||||
|
||||
fname = DescriptorFileName("bar", 100);
|
||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(100, number);
|
||||
ASSERT_EQ(kDescriptorFile, type);
|
||||
|
||||
fname = TempFileName("tmp", 999);
|
||||
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(999, number);
|
||||
ASSERT_EQ(kTempFile, type);
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
238
src/hyperleveldb/db/leveldb_main.cc
Normal file
238
src/hyperleveldb/db/leveldb_main.cc
Normal file
@@ -0,0 +1,238 @@
|
||||
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <stdio.h>
|
||||
#include "dbformat.h"
|
||||
#include "filename.h"
|
||||
#include "log_reader.h"
|
||||
#include "version_edit.h"
|
||||
#include "write_batch_internal.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../hyperleveldb/iterator.h"
|
||||
#include "../hyperleveldb/options.h"
|
||||
#include "../hyperleveldb/status.h"
|
||||
#include "../hyperleveldb/table.h"
|
||||
#include "../hyperleveldb/write_batch.h"
|
||||
#include "../util/logging.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
namespace {
|
||||
|
||||
bool GuessType(const std::string& fname, FileType* type) {
|
||||
size_t pos = fname.rfind('/');
|
||||
std::string basename;
|
||||
if (pos == std::string::npos) {
|
||||
basename = fname;
|
||||
} else {
|
||||
basename = std::string(fname.data() + pos + 1, fname.size() - pos - 1);
|
||||
}
|
||||
uint64_t ignored;
|
||||
return ParseFileName(basename, &ignored, type);
|
||||
}
|
||||
|
||||
// Notified when log reader encounters corruption.
|
||||
class CorruptionReporter : public log::Reader::Reporter {
|
||||
public:
|
||||
virtual void Corruption(size_t bytes, const Status& status) {
|
||||
printf("corruption: %d bytes; %s\n",
|
||||
static_cast<int>(bytes),
|
||||
status.ToString().c_str());
|
||||
}
|
||||
};
|
||||
|
||||
// Print contents of a log file. (*func)() is called on every record.
|
||||
bool PrintLogContents(Env* env, const std::string& fname,
|
||||
void (*func)(Slice)) {
|
||||
SequentialFile* file;
|
||||
Status s = env->NewSequentialFile(fname, &file);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
return false;
|
||||
}
|
||||
CorruptionReporter reporter;
|
||||
log::Reader reader(file, &reporter, true, 0);
|
||||
Slice record;
|
||||
std::string scratch;
|
||||
while (reader.ReadRecord(&record, &scratch)) {
|
||||
printf("--- offset %llu; ",
|
||||
static_cast<unsigned long long>(reader.LastRecordOffset()));
|
||||
(*func)(record);
|
||||
}
|
||||
delete file;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Called on every item found in a WriteBatch.
|
||||
class WriteBatchItemPrinter : public WriteBatch::Handler {
|
||||
public:
|
||||
uint64_t offset_;
|
||||
uint64_t sequence_;
|
||||
|
||||
virtual void Put(const Slice& key, const Slice& value) {
|
||||
printf(" put '%s' '%s'\n",
|
||||
EscapeString(key).c_str(),
|
||||
EscapeString(value).c_str());
|
||||
}
|
||||
virtual void Delete(const Slice& key) {
|
||||
printf(" del '%s'\n",
|
||||
EscapeString(key).c_str());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Called on every log record (each one of which is a WriteBatch)
|
||||
// found in a kLogFile.
|
||||
static void WriteBatchPrinter(Slice record) {
|
||||
if (record.size() < 12) {
|
||||
printf("log record length %d is too small\n",
|
||||
static_cast<int>(record.size()));
|
||||
return;
|
||||
}
|
||||
WriteBatch batch;
|
||||
WriteBatchInternal::SetContents(&batch, record);
|
||||
printf("sequence %llu\n",
|
||||
static_cast<unsigned long long>(WriteBatchInternal::Sequence(&batch)));
|
||||
WriteBatchItemPrinter batch_item_printer;
|
||||
Status s = batch.Iterate(&batch_item_printer);
|
||||
if (!s.ok()) {
|
||||
printf(" error: %s\n", s.ToString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
bool DumpLog(Env* env, const std::string& fname) {
|
||||
return PrintLogContents(env, fname, WriteBatchPrinter);
|
||||
}
|
||||
|
||||
// Called on every log record (each one of which is a WriteBatch)
|
||||
// found in a kDescriptorFile.
|
||||
static void VersionEditPrinter(Slice record) {
|
||||
VersionEdit edit;
|
||||
Status s = edit.DecodeFrom(record);
|
||||
if (!s.ok()) {
|
||||
printf("%s\n", s.ToString().c_str());
|
||||
return;
|
||||
}
|
||||
printf("%s", edit.DebugString().c_str());
|
||||
}
|
||||
|
||||
bool DumpDescriptor(Env* env, const std::string& fname) {
|
||||
return PrintLogContents(env, fname, VersionEditPrinter);
|
||||
}
|
||||
|
||||
bool DumpTable(Env* env, const std::string& fname) {
|
||||
uint64_t file_size;
|
||||
RandomAccessFile* file = NULL;
|
||||
Table* table = NULL;
|
||||
Status s = env->GetFileSize(fname, &file_size);
|
||||
if (s.ok()) {
|
||||
s = env->NewRandomAccessFile(fname, &file);
|
||||
}
|
||||
if (s.ok()) {
|
||||
// We use the default comparator, which may or may not match the
|
||||
// comparator used in this database. However this should not cause
|
||||
// problems since we only use Table operations that do not require
|
||||
// any comparisons. In particular, we do not call Seek or Prev.
|
||||
s = Table::Open(Options(), file, file_size, &table);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "%s\n", s.ToString().c_str());
|
||||
delete table;
|
||||
delete file;
|
||||
return false;
|
||||
}
|
||||
|
||||
ReadOptions ro;
|
||||
ro.fill_cache = false;
|
||||
Iterator* iter = table->NewIterator(ro);
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
ParsedInternalKey key;
|
||||
if (!ParseInternalKey(iter->key(), &key)) {
|
||||
printf("badkey '%s' => '%s'\n",
|
||||
EscapeString(iter->key()).c_str(),
|
||||
EscapeString(iter->value()).c_str());
|
||||
} else {
|
||||
char kbuf[20];
|
||||
const char* type;
|
||||
if (key.type == kTypeDeletion) {
|
||||
type = "del";
|
||||
} else if (key.type == kTypeValue) {
|
||||
type = "val";
|
||||
} else {
|
||||
snprintf(kbuf, sizeof(kbuf), "%d", static_cast<int>(key.type));
|
||||
type = kbuf;
|
||||
}
|
||||
printf("'%s' @ %8llu : %s => '%s'\n",
|
||||
EscapeString(key.user_key).c_str(),
|
||||
static_cast<unsigned long long>(key.sequence),
|
||||
type,
|
||||
EscapeString(iter->value()).c_str());
|
||||
}
|
||||
}
|
||||
s = iter->status();
|
||||
if (!s.ok()) {
|
||||
printf("iterator error: %s\n", s.ToString().c_str());
|
||||
}
|
||||
|
||||
delete iter;
|
||||
delete table;
|
||||
delete file;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DumpFile(Env* env, const std::string& fname) {
|
||||
FileType ftype;
|
||||
if (!GuessType(fname, &ftype)) {
|
||||
fprintf(stderr, "%s: unknown file type\n", fname.c_str());
|
||||
return false;
|
||||
}
|
||||
switch (ftype) {
|
||||
case kLogFile: return DumpLog(env, fname);
|
||||
case kDescriptorFile: return DumpDescriptor(env, fname);
|
||||
case kTableFile: return DumpTable(env, fname);
|
||||
|
||||
default: {
|
||||
fprintf(stderr, "%s: not a dump-able file type\n", fname.c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HandleDumpCommand(Env* env, char** files, int num) {
|
||||
bool ok = true;
|
||||
for (int i = 0; i < num; i++) {
|
||||
ok &= DumpFile(env, files[i]);
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace hyperleveldb
|
||||
|
||||
static void Usage() {
|
||||
fprintf(
|
||||
stderr,
|
||||
"Usage: leveldbutil command...\n"
|
||||
" dump files... -- dump contents of specified files\n"
|
||||
);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
leveldb::Env* env = leveldb::Env::Default();
|
||||
bool ok = true;
|
||||
if (argc < 2) {
|
||||
Usage();
|
||||
ok = false;
|
||||
} else {
|
||||
std::string command = argv[1];
|
||||
if (command == "dump") {
|
||||
ok = leveldb::HandleDumpCommand(env, argv+2, argc-2);
|
||||
} else {
|
||||
Usage();
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
return (ok ? 0 : 1);
|
||||
}
|
||||
35
src/hyperleveldb/db/log_format.h
Normal file
35
src/hyperleveldb/db/log_format.h
Normal file
@@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// Log format information shared by reader and writer.
|
||||
// See ../doc/log_format.txt for more detail.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_LOG_FORMAT_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_LOG_FORMAT_H_
|
||||
|
||||
namespace hyperleveldb {
|
||||
namespace log {
|
||||
|
||||
enum RecordType {
|
||||
// Zero is reserved for preallocated files
|
||||
kZeroType = 0,
|
||||
|
||||
kFullType = 1,
|
||||
|
||||
// For fragments
|
||||
kFirstType = 2,
|
||||
kMiddleType = 3,
|
||||
kLastType = 4
|
||||
};
|
||||
static const int kMaxRecordType = kLastType;
|
||||
|
||||
static const int kBlockSize = 32768;
|
||||
|
||||
// Header is checksum (4 bytes), type (1 byte), length (2 bytes).
|
||||
static const int kHeaderSize = 4 + 1 + 2;
|
||||
|
||||
} // namespace log
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_LOG_FORMAT_H_
|
||||
259
src/hyperleveldb/db/log_reader.cc
Normal file
259
src/hyperleveldb/db/log_reader.cc
Normal file
@@ -0,0 +1,259 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "log_reader.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../util/coding.h"
|
||||
#include "../util/crc32c.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
namespace log {
|
||||
|
||||
Reader::Reporter::~Reporter() {
|
||||
}
|
||||
|
||||
Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
|
||||
uint64_t initial_offset)
|
||||
: file_(file),
|
||||
reporter_(reporter),
|
||||
checksum_(checksum),
|
||||
backing_store_(new char[kBlockSize]),
|
||||
buffer_(),
|
||||
eof_(false),
|
||||
last_record_offset_(0),
|
||||
end_of_buffer_offset_(0),
|
||||
initial_offset_(initial_offset) {
|
||||
}
|
||||
|
||||
Reader::~Reader() {
|
||||
delete[] backing_store_;
|
||||
}
|
||||
|
||||
bool Reader::SkipToInitialBlock() {
|
||||
size_t offset_in_block = initial_offset_ % kBlockSize;
|
||||
uint64_t block_start_location = initial_offset_ - offset_in_block;
|
||||
|
||||
// Don't search a block if we'd be in the trailer
|
||||
if (offset_in_block > kBlockSize - 6) {
|
||||
offset_in_block = 0;
|
||||
block_start_location += kBlockSize;
|
||||
}
|
||||
|
||||
end_of_buffer_offset_ = block_start_location;
|
||||
|
||||
// Skip to start of first block that can contain the initial record
|
||||
if (block_start_location > 0) {
|
||||
Status skip_status = file_->Skip(block_start_location);
|
||||
if (!skip_status.ok()) {
|
||||
ReportDrop(block_start_location, skip_status);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Reader::ReadRecord(Slice* record, std::string* scratch) {
|
||||
if (last_record_offset_ < initial_offset_) {
|
||||
if (!SkipToInitialBlock()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
scratch->clear();
|
||||
record->clear();
|
||||
bool in_fragmented_record = false;
|
||||
// Record offset of the logical record that we're reading
|
||||
// 0 is a dummy value to make compilers happy
|
||||
uint64_t prospective_record_offset = 0;
|
||||
|
||||
Slice fragment;
|
||||
while (true) {
|
||||
uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
|
||||
const unsigned int record_type = ReadPhysicalRecord(&fragment);
|
||||
switch (record_type) {
|
||||
case kFullType:
|
||||
if (in_fragmented_record) {
|
||||
// Handle bug in earlier versions of log::Writer where
|
||||
// it could emit an empty kFirstType record at the tail end
|
||||
// of a block followed by a kFullType or kFirstType record
|
||||
// at the beginning of the next block.
|
||||
if (scratch->empty()) {
|
||||
in_fragmented_record = false;
|
||||
} else {
|
||||
ReportCorruption(scratch->size(), "partial record without end(1)");
|
||||
}
|
||||
}
|
||||
prospective_record_offset = physical_record_offset;
|
||||
scratch->clear();
|
||||
*record = fragment;
|
||||
last_record_offset_ = prospective_record_offset;
|
||||
return true;
|
||||
|
||||
case kFirstType:
|
||||
if (in_fragmented_record) {
|
||||
// Handle bug in earlier versions of log::Writer where
|
||||
// it could emit an empty kFirstType record at the tail end
|
||||
// of a block followed by a kFullType or kFirstType record
|
||||
// at the beginning of the next block.
|
||||
if (scratch->empty()) {
|
||||
in_fragmented_record = false;
|
||||
} else {
|
||||
ReportCorruption(scratch->size(), "partial record without end(2)");
|
||||
}
|
||||
}
|
||||
prospective_record_offset = physical_record_offset;
|
||||
scratch->assign(fragment.data(), fragment.size());
|
||||
in_fragmented_record = true;
|
||||
break;
|
||||
|
||||
case kMiddleType:
|
||||
if (!in_fragmented_record) {
|
||||
ReportCorruption(fragment.size(),
|
||||
"missing start of fragmented record(1)");
|
||||
} else {
|
||||
scratch->append(fragment.data(), fragment.size());
|
||||
}
|
||||
break;
|
||||
|
||||
case kLastType:
|
||||
if (!in_fragmented_record) {
|
||||
ReportCorruption(fragment.size(),
|
||||
"missing start of fragmented record(2)");
|
||||
} else {
|
||||
scratch->append(fragment.data(), fragment.size());
|
||||
*record = Slice(*scratch);
|
||||
last_record_offset_ = prospective_record_offset;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
||||
case kEof:
|
||||
if (in_fragmented_record) {
|
||||
ReportCorruption(scratch->size(), "partial record without end(3)");
|
||||
scratch->clear();
|
||||
}
|
||||
return false;
|
||||
|
||||
case kBadRecord:
|
||||
if (in_fragmented_record) {
|
||||
ReportCorruption(scratch->size(), "error in middle of record");
|
||||
in_fragmented_record = false;
|
||||
scratch->clear();
|
||||
}
|
||||
break;
|
||||
|
||||
default: {
|
||||
char buf[40];
|
||||
snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
|
||||
ReportCorruption(
|
||||
(fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
|
||||
buf);
|
||||
in_fragmented_record = false;
|
||||
scratch->clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t Reader::LastRecordOffset() {
|
||||
return last_record_offset_;
|
||||
}
|
||||
|
||||
void Reader::ReportCorruption(size_t bytes, const char* reason) {
|
||||
ReportDrop(bytes, Status::Corruption(reason));
|
||||
}
|
||||
|
||||
void Reader::ReportDrop(size_t bytes, const Status& reason) {
|
||||
if (reporter_ != NULL &&
|
||||
end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) {
|
||||
reporter_->Corruption(bytes, reason);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int Reader::ReadPhysicalRecord(Slice* result) {
|
||||
while (true) {
|
||||
if (buffer_.size() < kHeaderSize) {
|
||||
if (!eof_) {
|
||||
// Last read was a full read, so this is a trailer to skip
|
||||
buffer_.clear();
|
||||
Status status = file_->Read(kBlockSize, &buffer_, backing_store_);
|
||||
end_of_buffer_offset_ += buffer_.size();
|
||||
if (!status.ok()) {
|
||||
buffer_.clear();
|
||||
ReportDrop(kBlockSize, status);
|
||||
eof_ = true;
|
||||
return kEof;
|
||||
} else if (buffer_.size() < kBlockSize) {
|
||||
eof_ = true;
|
||||
}
|
||||
continue;
|
||||
} else if (buffer_.size() == 0) {
|
||||
// End of file
|
||||
return kEof;
|
||||
} else {
|
||||
size_t drop_size = buffer_.size();
|
||||
buffer_.clear();
|
||||
ReportCorruption(drop_size, "truncated record at end of file");
|
||||
return kEof;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the header
|
||||
const char* header = buffer_.data();
|
||||
const uint32_t a = static_cast<uint32_t>(header[4]) & 0xff;
|
||||
const uint32_t b = static_cast<uint32_t>(header[5]) & 0xff;
|
||||
const unsigned int type = header[6];
|
||||
const uint32_t length = a | (b << 8);
|
||||
if (kHeaderSize + length > buffer_.size()) {
|
||||
size_t drop_size = buffer_.size();
|
||||
buffer_.clear();
|
||||
ReportCorruption(drop_size, "bad record length");
|
||||
return kBadRecord;
|
||||
}
|
||||
|
||||
if (type == kZeroType && length == 0) {
|
||||
// Skip zero length record without reporting any drops since
|
||||
// such records are produced by the mmap based writing code in
|
||||
// env_posix.cc that preallocates file regions.
|
||||
buffer_.clear();
|
||||
return kBadRecord;
|
||||
}
|
||||
|
||||
// Check crc
|
||||
if (checksum_) {
|
||||
uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
|
||||
uint32_t actual_crc = crc32c::Value(header + 6, 1 + length);
|
||||
if (actual_crc != expected_crc) {
|
||||
// Drop the rest of the buffer since "length" itself may have
|
||||
// been corrupted and if we trust it, we could find some
|
||||
// fragment of a real log record that just happens to look
|
||||
// like a valid log record.
|
||||
size_t drop_size = buffer_.size();
|
||||
buffer_.clear();
|
||||
ReportCorruption(drop_size, "checksum mismatch");
|
||||
return kBadRecord;
|
||||
}
|
||||
}
|
||||
|
||||
buffer_.remove_prefix(kHeaderSize + length);
|
||||
|
||||
// Skip physical record that started before initial_offset_
|
||||
if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length <
|
||||
initial_offset_) {
|
||||
result->clear();
|
||||
return kBadRecord;
|
||||
}
|
||||
|
||||
*result = Slice(header + kHeaderSize, length);
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace log
|
||||
} // namespace hyperleveldb
|
||||
108
src/hyperleveldb/db/log_reader.h
Normal file
108
src/hyperleveldb/db/log_reader.h
Normal file
@@ -0,0 +1,108 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_LOG_READER_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_LOG_READER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "log_format.h"
|
||||
#include "../hyperleveldb/slice.h"
|
||||
#include "../hyperleveldb/status.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class SequentialFile;
|
||||
|
||||
namespace log {
|
||||
|
||||
class Reader {
|
||||
public:
|
||||
// Interface for reporting errors.
|
||||
class Reporter {
|
||||
public:
|
||||
virtual ~Reporter();
|
||||
|
||||
// Some corruption was detected. "size" is the approximate number
|
||||
// of bytes dropped due to the corruption.
|
||||
virtual void Corruption(size_t bytes, const Status& status) = 0;
|
||||
};
|
||||
|
||||
// Create a reader that will return log records from "*file".
|
||||
// "*file" must remain live while this Reader is in use.
|
||||
//
|
||||
// If "reporter" is non-NULL, it is notified whenever some data is
|
||||
// dropped due to a detected corruption. "*reporter" must remain
|
||||
// live while this Reader is in use.
|
||||
//
|
||||
// If "checksum" is true, verify checksums if available.
|
||||
//
|
||||
// The Reader will start reading at the first record located at physical
|
||||
// position >= initial_offset within the file.
|
||||
Reader(SequentialFile* file, Reporter* reporter, bool checksum,
|
||||
uint64_t initial_offset);
|
||||
|
||||
~Reader();
|
||||
|
||||
// Read the next record into *record. Returns true if read
|
||||
// successfully, false if we hit end of the input. May use
|
||||
// "*scratch" as temporary storage. The contents filled in *record
|
||||
// will only be valid until the next mutating operation on this
|
||||
// reader or the next mutation to *scratch.
|
||||
bool ReadRecord(Slice* record, std::string* scratch);
|
||||
|
||||
// Returns the physical offset of the last record returned by ReadRecord.
|
||||
//
|
||||
// Undefined before the first call to ReadRecord.
|
||||
uint64_t LastRecordOffset();
|
||||
|
||||
private:
|
||||
SequentialFile* const file_;
|
||||
Reporter* const reporter_;
|
||||
bool const checksum_;
|
||||
char* const backing_store_;
|
||||
Slice buffer_;
|
||||
bool eof_; // Last Read() indicated EOF by returning < kBlockSize
|
||||
|
||||
// Offset of the last record returned by ReadRecord.
|
||||
uint64_t last_record_offset_;
|
||||
// Offset of the first location past the end of buffer_.
|
||||
uint64_t end_of_buffer_offset_;
|
||||
|
||||
// Offset at which to start looking for the first record to return
|
||||
uint64_t const initial_offset_;
|
||||
|
||||
// Extend record types with the following special values
|
||||
enum {
|
||||
kEof = kMaxRecordType + 1,
|
||||
// Returned whenever we find an invalid physical record.
|
||||
// Currently there are three situations in which this happens:
|
||||
// * The record has an invalid CRC (ReadPhysicalRecord reports a drop)
|
||||
// * The record is a 0-length record (No drop is reported)
|
||||
// * The record is below constructor's initial_offset (No drop is reported)
|
||||
kBadRecord = kMaxRecordType + 2
|
||||
};
|
||||
|
||||
// Skips all blocks that are completely before "initial_offset_".
|
||||
//
|
||||
// Returns true on success. Handles reporting.
|
||||
bool SkipToInitialBlock();
|
||||
|
||||
// Return type, or one of the preceding special values
|
||||
unsigned int ReadPhysicalRecord(Slice* result);
|
||||
|
||||
// Reports dropped bytes to the reporter.
|
||||
// buffer_ must be updated to remove the dropped bytes prior to invocation.
|
||||
void ReportCorruption(size_t bytes, const char* reason);
|
||||
void ReportDrop(size_t bytes, const Status& reason);
|
||||
|
||||
// No copying allowed
|
||||
Reader(const Reader&);
|
||||
void operator=(const Reader&);
|
||||
};
|
||||
|
||||
} // namespace log
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_LOG_READER_H_
|
||||
509
src/hyperleveldb/db/log_test.cc
Normal file
509
src/hyperleveldb/db/log_test.cc
Normal file
@@ -0,0 +1,509 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "log_reader.h"
|
||||
#include "log_writer.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../util/coding.h"
|
||||
#include "../util/crc32c.h"
|
||||
#include "../util/random.h"
|
||||
#include "../util/testharness.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
namespace log {
|
||||
|
||||
// Construct a string of the specified length made out of the supplied
|
||||
// partial string.
|
||||
static std::string BigString(const std::string& partial_string, size_t n) {
|
||||
std::string result;
|
||||
while (result.size() < n) {
|
||||
result.append(partial_string);
|
||||
}
|
||||
result.resize(n);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Construct a string from a number
|
||||
static std::string NumberString(int n) {
|
||||
char buf[50];
|
||||
snprintf(buf, sizeof(buf), "%d.", n);
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
// Return a skewed potentially long string
|
||||
static std::string RandomSkewedString(int i, Random* rnd) {
|
||||
return BigString(NumberString(i), rnd->Skewed(17));
|
||||
}
|
||||
|
||||
class LogTest {
|
||||
private:
|
||||
class StringDest : public WritableFile {
|
||||
public:
|
||||
std::string contents_;
|
||||
|
||||
virtual Status Close() { return Status::OK(); }
|
||||
virtual Status Flush() { return Status::OK(); }
|
||||
virtual Status Sync() { return Status::OK(); }
|
||||
virtual Status WriteAt(uint64_t offset, const Slice& slice) {
|
||||
std::string tmp = contents_.substr(0, offset);
|
||||
tmp.append(slice.data(), slice.size());
|
||||
if (contents_.size() > offset + slice.size()) {
|
||||
tmp += contents_.substr(offset + slice.size());
|
||||
}
|
||||
contents_ = tmp;
|
||||
return Status::OK();
|
||||
}
|
||||
virtual Status Append(const Slice& slice) {
|
||||
contents_.append(slice.data(), slice.size());
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
class StringSource : public SequentialFile {
|
||||
public:
|
||||
Slice contents_;
|
||||
bool force_error_;
|
||||
bool returned_partial_;
|
||||
StringSource() : force_error_(false), returned_partial_(false) { }
|
||||
|
||||
virtual Status Read(size_t n, Slice* result, char* scratch) {
|
||||
ASSERT_TRUE(!returned_partial_) << "must not Read() after eof/error";
|
||||
|
||||
if (force_error_) {
|
||||
force_error_ = false;
|
||||
returned_partial_ = true;
|
||||
return Status::Corruption("read error");
|
||||
}
|
||||
|
||||
if (contents_.size() < n) {
|
||||
n = contents_.size();
|
||||
returned_partial_ = true;
|
||||
}
|
||||
*result = Slice(contents_.data(), n);
|
||||
contents_.remove_prefix(n);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status Skip(uint64_t n) {
|
||||
if (n > contents_.size()) {
|
||||
contents_.clear();
|
||||
return Status::NotFound("in-memory file skipepd past end");
|
||||
}
|
||||
|
||||
contents_.remove_prefix(n);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
class ReportCollector : public Reader::Reporter {
|
||||
public:
|
||||
size_t dropped_bytes_;
|
||||
std::string message_;
|
||||
|
||||
ReportCollector() : dropped_bytes_(0) { }
|
||||
virtual void Corruption(size_t bytes, const Status& status) {
|
||||
dropped_bytes_ += bytes;
|
||||
message_.append(status.ToString());
|
||||
}
|
||||
};
|
||||
|
||||
StringDest dest_;
|
||||
StringSource source_;
|
||||
ReportCollector report_;
|
||||
bool reading_;
|
||||
Writer writer_;
|
||||
Reader reader_;
|
||||
|
||||
// Record metadata for testing initial offset functionality
|
||||
static size_t initial_offset_record_sizes_[];
|
||||
static uint64_t initial_offset_last_record_offsets_[];
|
||||
|
||||
public:
|
||||
LogTest() : reading_(false),
|
||||
writer_(&dest_),
|
||||
reader_(&source_, &report_, true/*checksum*/,
|
||||
0/*initial_offset*/) {
|
||||
}
|
||||
|
||||
void Write(const std::string& msg) {
|
||||
ASSERT_TRUE(!reading_) << "Write() after starting to read";
|
||||
writer_.AddRecord(Slice(msg));
|
||||
}
|
||||
|
||||
size_t WrittenBytes() const {
|
||||
return dest_.contents_.size();
|
||||
}
|
||||
|
||||
std::string Read() {
|
||||
if (!reading_) {
|
||||
reading_ = true;
|
||||
source_.contents_ = Slice(dest_.contents_);
|
||||
}
|
||||
std::string scratch;
|
||||
Slice record;
|
||||
if (reader_.ReadRecord(&record, &scratch)) {
|
||||
return record.ToString();
|
||||
} else {
|
||||
return "EOF";
|
||||
}
|
||||
}
|
||||
|
||||
void IncrementByte(int offset, int delta) {
|
||||
dest_.contents_[offset] += delta;
|
||||
}
|
||||
|
||||
void SetByte(int offset, char new_byte) {
|
||||
dest_.contents_[offset] = new_byte;
|
||||
}
|
||||
|
||||
void ShrinkSize(int bytes) {
|
||||
dest_.contents_.resize(dest_.contents_.size() - bytes);
|
||||
}
|
||||
|
||||
void FixChecksum(int header_offset, int len) {
|
||||
// Compute crc of type/len/data
|
||||
uint32_t crc = crc32c::Value(&dest_.contents_[header_offset+6], 1 + len);
|
||||
crc = crc32c::Mask(crc);
|
||||
EncodeFixed32(&dest_.contents_[header_offset], crc);
|
||||
}
|
||||
|
||||
void ForceError() {
|
||||
source_.force_error_ = true;
|
||||
}
|
||||
|
||||
size_t DroppedBytes() const {
|
||||
return report_.dropped_bytes_;
|
||||
}
|
||||
|
||||
std::string ReportMessage() const {
|
||||
return report_.message_;
|
||||
}
|
||||
|
||||
// Returns OK iff recorded error message contains "msg"
|
||||
std::string MatchError(const std::string& msg) const {
|
||||
if (report_.message_.find(msg) == std::string::npos) {
|
||||
return report_.message_;
|
||||
} else {
|
||||
return "OK";
|
||||
}
|
||||
}
|
||||
|
||||
void WriteInitialOffsetLog() {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
std::string record(initial_offset_record_sizes_[i],
|
||||
static_cast<char>('a' + i));
|
||||
Write(record);
|
||||
}
|
||||
}
|
||||
|
||||
void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) {
|
||||
WriteInitialOffsetLog();
|
||||
reading_ = true;
|
||||
source_.contents_ = Slice(dest_.contents_);
|
||||
Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
|
||||
WrittenBytes() + offset_past_end);
|
||||
Slice record;
|
||||
std::string scratch;
|
||||
ASSERT_TRUE(!offset_reader->ReadRecord(&record, &scratch));
|
||||
delete offset_reader;
|
||||
}
|
||||
|
||||
void CheckInitialOffsetRecord(uint64_t initial_offset,
|
||||
int expected_record_offset) {
|
||||
WriteInitialOffsetLog();
|
||||
reading_ = true;
|
||||
source_.contents_ = Slice(dest_.contents_);
|
||||
Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
|
||||
initial_offset);
|
||||
Slice record;
|
||||
std::string scratch;
|
||||
ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
|
||||
ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset],
|
||||
record.size());
|
||||
ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
|
||||
offset_reader->LastRecordOffset());
|
||||
ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
|
||||
delete offset_reader;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
size_t LogTest::initial_offset_record_sizes_[] =
|
||||
{10000, // Two sizable records in first block
|
||||
10000,
|
||||
2 * log::kBlockSize - 1000, // Span three blocks
|
||||
1};
|
||||
|
||||
uint64_t LogTest::initial_offset_last_record_offsets_[] =
|
||||
{0,
|
||||
kHeaderSize + 10000,
|
||||
2 * (kHeaderSize + 10000),
|
||||
2 * (kHeaderSize + 10000) +
|
||||
(2 * log::kBlockSize - 1000) + 3 * kHeaderSize};
|
||||
|
||||
|
||||
TEST(LogTest, Empty) {
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadWrite) {
|
||||
Write("foo");
|
||||
Write("bar");
|
||||
Write("");
|
||||
Write("xxxx");
|
||||
ASSERT_EQ("foo", Read());
|
||||
ASSERT_EQ("bar", Read());
|
||||
ASSERT_EQ("", Read());
|
||||
ASSERT_EQ("xxxx", Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ("EOF", Read()); // Make sure reads at eof work
|
||||
}
|
||||
|
||||
TEST(LogTest, ManyBlocks) {
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
Write(NumberString(i));
|
||||
}
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
ASSERT_EQ(NumberString(i), Read());
|
||||
}
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, Fragmentation) {
|
||||
Write("small");
|
||||
Write(BigString("medium", 50000));
|
||||
Write(BigString("large", 100000));
|
||||
ASSERT_EQ("small", Read());
|
||||
ASSERT_EQ(BigString("medium", 50000), Read());
|
||||
ASSERT_EQ(BigString("large", 100000), Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, MarginalTrailer) {
|
||||
// Make a trailer that is exactly the same length as an empty record.
|
||||
const int n = kBlockSize - 2*kHeaderSize;
|
||||
Write(BigString("foo", n));
|
||||
ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes());
|
||||
Write("");
|
||||
Write("bar");
|
||||
ASSERT_EQ(BigString("foo", n), Read());
|
||||
ASSERT_EQ("", Read());
|
||||
ASSERT_EQ("bar", Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, MarginalTrailer2) {
|
||||
// Make a trailer that is exactly the same length as an empty record.
|
||||
const int n = kBlockSize - 2*kHeaderSize;
|
||||
Write(BigString("foo", n));
|
||||
ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes());
|
||||
Write("bar");
|
||||
ASSERT_EQ(BigString("foo", n), Read());
|
||||
ASSERT_EQ("bar", Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(0, DroppedBytes());
|
||||
ASSERT_EQ("", ReportMessage());
|
||||
}
|
||||
|
||||
TEST(LogTest, ShortTrailer) {
|
||||
const int n = kBlockSize - 2*kHeaderSize + 4;
|
||||
Write(BigString("foo", n));
|
||||
ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes());
|
||||
Write("");
|
||||
Write("bar");
|
||||
ASSERT_EQ(BigString("foo", n), Read());
|
||||
ASSERT_EQ("", Read());
|
||||
ASSERT_EQ("bar", Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, AlignedEof) {
|
||||
const int n = kBlockSize - 2*kHeaderSize + 4;
|
||||
Write(BigString("foo", n));
|
||||
ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes());
|
||||
ASSERT_EQ(BigString("foo", n), Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
TEST(LogTest, RandomRead) {
|
||||
const int N = 500;
|
||||
Random write_rnd(301);
|
||||
for (int i = 0; i < N; i++) {
|
||||
Write(RandomSkewedString(i, &write_rnd));
|
||||
}
|
||||
Random read_rnd(301);
|
||||
for (int i = 0; i < N; i++) {
|
||||
ASSERT_EQ(RandomSkewedString(i, &read_rnd), Read());
|
||||
}
|
||||
ASSERT_EQ("EOF", Read());
|
||||
}
|
||||
|
||||
// Tests of all the error paths in log_reader.cc follow:
|
||||
|
||||
TEST(LogTest, ReadError) {
|
||||
Write("foo");
|
||||
ForceError();
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(kBlockSize, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("read error"));
|
||||
}
|
||||
|
||||
TEST(LogTest, BadRecordType) {
|
||||
Write("foo");
|
||||
// Type is stored in header[6]
|
||||
IncrementByte(6, 100);
|
||||
FixChecksum(0, 3);
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(3, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("unknown record type"));
|
||||
}
|
||||
|
||||
TEST(LogTest, TruncatedTrailingRecord) {
|
||||
Write("foo");
|
||||
ShrinkSize(4); // Drop all payload as well as a header byte
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(kHeaderSize - 1, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("truncated record at end of file"));
|
||||
}
|
||||
|
||||
TEST(LogTest, BadLength) {
|
||||
Write("foo");
|
||||
ShrinkSize(1);
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(kHeaderSize + 2, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("bad record length"));
|
||||
}
|
||||
|
||||
TEST(LogTest, ChecksumMismatch) {
|
||||
Write("foo");
|
||||
IncrementByte(0, 10);
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(10, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("checksum mismatch"));
|
||||
}
|
||||
|
||||
TEST(LogTest, UnexpectedMiddleType) {
|
||||
Write("foo");
|
||||
SetByte(6, kMiddleType);
|
||||
FixChecksum(0, 3);
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(3, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("missing start"));
|
||||
}
|
||||
|
||||
TEST(LogTest, UnexpectedLastType) {
|
||||
Write("foo");
|
||||
SetByte(6, kLastType);
|
||||
FixChecksum(0, 3);
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(3, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("missing start"));
|
||||
}
|
||||
|
||||
TEST(LogTest, UnexpectedFullType) {
|
||||
Write("foo");
|
||||
Write("bar");
|
||||
SetByte(6, kFirstType);
|
||||
FixChecksum(0, 3);
|
||||
ASSERT_EQ("bar", Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(3, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("partial record without end"));
|
||||
}
|
||||
|
||||
TEST(LogTest, UnexpectedFirstType) {
|
||||
Write("foo");
|
||||
Write(BigString("bar", 100000));
|
||||
SetByte(6, kFirstType);
|
||||
FixChecksum(0, 3);
|
||||
ASSERT_EQ(BigString("bar", 100000), Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
ASSERT_EQ(3, DroppedBytes());
|
||||
ASSERT_EQ("OK", MatchError("partial record without end"));
|
||||
}
|
||||
|
||||
TEST(LogTest, ErrorJoinsRecords) {
|
||||
// Consider two fragmented records:
|
||||
// first(R1) last(R1) first(R2) last(R2)
|
||||
// where the middle two fragments disappear. We do not want
|
||||
// first(R1),last(R2) to get joined and returned as a valid record.
|
||||
|
||||
// Write records that span two blocks
|
||||
Write(BigString("foo", kBlockSize));
|
||||
Write(BigString("bar", kBlockSize));
|
||||
Write("correct");
|
||||
|
||||
// Wipe the middle block
|
||||
for (int offset = kBlockSize; offset < 2*kBlockSize; offset++) {
|
||||
SetByte(offset, 'x');
|
||||
}
|
||||
|
||||
ASSERT_EQ("correct", Read());
|
||||
ASSERT_EQ("EOF", Read());
|
||||
const int dropped = DroppedBytes();
|
||||
ASSERT_LE(dropped, 2*kBlockSize + 100);
|
||||
ASSERT_GE(dropped, 2*kBlockSize);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadStart) {
|
||||
CheckInitialOffsetRecord(0, 0);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadSecondOneOff) {
|
||||
CheckInitialOffsetRecord(1, 1);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadSecondTenThousand) {
|
||||
CheckInitialOffsetRecord(10000, 1);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadSecondStart) {
|
||||
CheckInitialOffsetRecord(10007, 1);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadThirdOneOff) {
|
||||
CheckInitialOffsetRecord(10008, 2);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadThirdStart) {
|
||||
CheckInitialOffsetRecord(20014, 2);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadFourthOneOff) {
|
||||
CheckInitialOffsetRecord(20015, 3);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadFourthFirstBlockTrailer) {
|
||||
CheckInitialOffsetRecord(log::kBlockSize - 4, 3);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadFourthMiddleBlock) {
|
||||
CheckInitialOffsetRecord(log::kBlockSize + 1, 3);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadFourthLastBlock) {
|
||||
CheckInitialOffsetRecord(2 * log::kBlockSize + 1, 3);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadFourthStart) {
|
||||
CheckInitialOffsetRecord(
|
||||
2 * (kHeaderSize + 1000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize,
|
||||
3);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadEnd) {
|
||||
CheckOffsetPastEndReturnsNoRecords(0);
|
||||
}
|
||||
|
||||
TEST(LogTest, ReadPastEnd) {
|
||||
CheckOffsetPastEndReturnsNoRecords(5);
|
||||
}
|
||||
|
||||
} // namespace log
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
132
src/hyperleveldb/db/log_writer.cc
Normal file
132
src/hyperleveldb/db/log_writer.cc
Normal file
@@ -0,0 +1,132 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "log_writer.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../util/coding.h"
|
||||
#include "../util/crc32c.h"
|
||||
#include "../util/mutexlock.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
namespace log {
|
||||
|
||||
Writer::Writer(WritableFile* dest)
|
||||
: dest_(dest),
|
||||
offset_mtx_(),
|
||||
offset_(0) {
|
||||
for (int i = 0; i <= kMaxRecordType; i++) {
|
||||
char t = static_cast<char>(i);
|
||||
type_crc_[i] = crc32c::Value(&t, 1);
|
||||
}
|
||||
}
|
||||
|
||||
Writer::~Writer() {
|
||||
}
|
||||
|
||||
Status Writer::AddRecord(const Slice& slice) {
|
||||
// computation of block_offset requires a pow2
|
||||
assert(kBlockSize == 32768);
|
||||
uint64_t start_offset;
|
||||
uint64_t end_offset;
|
||||
|
||||
{
|
||||
MutexLock l(&offset_mtx_);
|
||||
start_offset = offset_;
|
||||
end_offset = offset_;
|
||||
// compute the new offset_
|
||||
uint64_t left = slice.size();
|
||||
do {
|
||||
uint64_t block_offset = end_offset & (kBlockSize - 1);
|
||||
const uint64_t leftover = kBlockSize - block_offset;
|
||||
assert(leftover > 0);
|
||||
if (leftover < kHeaderSize) {
|
||||
end_offset += leftover;
|
||||
block_offset = 0;
|
||||
}
|
||||
// Invariant: we never leave < kHeaderSize bytes in a block.
|
||||
assert(kBlockSize - block_offset - kHeaderSize >= 0);
|
||||
|
||||
const uint64_t avail = kBlockSize - block_offset - kHeaderSize;
|
||||
const uint64_t fragment_length = (left < avail) ? left : avail;
|
||||
|
||||
end_offset += kHeaderSize + fragment_length;
|
||||
left -= fragment_length;
|
||||
} while (left > 0);
|
||||
offset_ = end_offset;
|
||||
}
|
||||
|
||||
const char* ptr = slice.data();
|
||||
size_t left = slice.size();
|
||||
uint64_t offset = start_offset;
|
||||
|
||||
// Fragment the record if necessary and emit it. Note that if slice
|
||||
// is empty, we still want to iterate once to emit a single
|
||||
// zero-length record
|
||||
Status s;
|
||||
bool begin = true;
|
||||
do {
|
||||
uint64_t block_offset = offset & (kBlockSize - 1);
|
||||
const uint64_t leftover = kBlockSize - block_offset;
|
||||
assert(leftover > 0);
|
||||
if (leftover < kHeaderSize) {
|
||||
// Switch to a new block
|
||||
// Fill the trailer (literal below relies on kHeaderSize being 7)
|
||||
assert(kHeaderSize == 7);
|
||||
dest_->WriteAt(offset, Slice("\x00\x00\x00\x00\x00\x00", leftover));
|
||||
block_offset = 0;
|
||||
offset += leftover;
|
||||
}
|
||||
// Invariant: we never leave < kHeaderSize bytes in a block.
|
||||
assert(kBlockSize - block_offset - kHeaderSize >= 0);
|
||||
|
||||
const size_t avail = kBlockSize - block_offset - kHeaderSize;
|
||||
const size_t fragment_length = (left < avail) ? left : avail;
|
||||
|
||||
RecordType type;
|
||||
const bool end = (left == fragment_length);
|
||||
if (begin && end) {
|
||||
type = kFullType;
|
||||
} else if (begin) {
|
||||
type = kFirstType;
|
||||
} else if (end) {
|
||||
type = kLastType;
|
||||
} else {
|
||||
type = kMiddleType;
|
||||
}
|
||||
|
||||
s = EmitPhysicalRecordAt(type, ptr, offset, fragment_length);
|
||||
offset += kHeaderSize + fragment_length;
|
||||
ptr += fragment_length;
|
||||
left -= fragment_length;
|
||||
begin = false;
|
||||
} while (s.ok() && left > 0);
|
||||
return s;
|
||||
}
|
||||
|
||||
Status Writer::EmitPhysicalRecordAt(RecordType t, const char* ptr, uint64_t offset, size_t n) {
|
||||
assert(n <= 0xffff); // Must fit in two bytes
|
||||
|
||||
// Format the header
|
||||
char buf[kHeaderSize];
|
||||
buf[4] = static_cast<char>(n & 0xff);
|
||||
buf[5] = static_cast<char>(n >> 8);
|
||||
buf[6] = static_cast<char>(t);
|
||||
|
||||
// Compute the crc of the record type and the payload.
|
||||
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n);
|
||||
crc = crc32c::Mask(crc); // Adjust for storage
|
||||
EncodeFixed32(buf, crc);
|
||||
|
||||
// Write the header and the payload
|
||||
Status s = dest_->WriteAt(offset, Slice(buf, kHeaderSize));
|
||||
if (s.ok()) {
|
||||
s = dest_->WriteAt(offset + kHeaderSize, Slice(ptr, n));
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace log
|
||||
} // namespace hyperleveldb
|
||||
50
src/hyperleveldb/db/log_writer.h
Normal file
50
src/hyperleveldb/db/log_writer.h
Normal file
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_LOG_WRITER_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_LOG_WRITER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "log_format.h"
|
||||
#include "../hyperleveldb/slice.h"
|
||||
#include "../hyperleveldb/status.h"
|
||||
#include "../port/port.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class WritableFile;
|
||||
|
||||
namespace log {
|
||||
|
||||
class Writer {
|
||||
public:
|
||||
// Create a writer that will append data to "*dest".
|
||||
// "*dest" must be initially empty.
|
||||
// "*dest" must remain live while this Writer is in use.
|
||||
explicit Writer(WritableFile* dest);
|
||||
~Writer();
|
||||
|
||||
Status AddRecord(const Slice& slice);
|
||||
|
||||
private:
|
||||
WritableFile* dest_;
|
||||
port::Mutex offset_mtx_;
|
||||
uint64_t offset_; // Current offset in file
|
||||
|
||||
// crc32c values for all supported record types. These are
|
||||
// pre-computed to reduce the overhead of computing the crc of the
|
||||
// record type stored in the header.
|
||||
uint32_t type_crc_[kMaxRecordType + 1];
|
||||
|
||||
Status EmitPhysicalRecordAt(RecordType type, const char* ptr, uint64_t offset, size_t length);
|
||||
|
||||
// No copying allowed
|
||||
Writer(const Writer&);
|
||||
void operator=(const Writer&);
|
||||
};
|
||||
|
||||
} // namespace log
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_LOG_WRITER_H_
|
||||
160
src/hyperleveldb/db/memtable.cc
Normal file
160
src/hyperleveldb/db/memtable.cc
Normal file
@@ -0,0 +1,160 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "memtable.h"
|
||||
#include "dbformat.h"
|
||||
#include "../hyperleveldb/comparator.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../hyperleveldb/iterator.h"
|
||||
#include "../util/coding.h"
|
||||
#include "../util/mutexlock.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
static Slice GetLengthPrefixedSlice(const char* data) {
|
||||
uint32_t len;
|
||||
const char* p = data;
|
||||
p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted
|
||||
return Slice(p, len);
|
||||
}
|
||||
|
||||
MemTable::MemTable(const InternalKeyComparator& cmp)
|
||||
: comparator_(cmp),
|
||||
refs_(0),
|
||||
table_(comparator_, &arena_) {
|
||||
}
|
||||
|
||||
MemTable::~MemTable() {
|
||||
assert(refs_ == 0);
|
||||
}
|
||||
|
||||
size_t MemTable::ApproximateMemoryUsage() {
|
||||
MutexLock l(&mtx_);
|
||||
return arena_.MemoryUsage();
|
||||
}
|
||||
|
||||
int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
|
||||
const {
|
||||
// Internal keys are encoded as length-prefixed strings.
|
||||
Slice a = GetLengthPrefixedSlice(aptr);
|
||||
Slice b = GetLengthPrefixedSlice(bptr);
|
||||
return comparator.Compare(a, b);
|
||||
}
|
||||
|
||||
// Encode a suitable internal key target for "target" and return it.
|
||||
// Uses *scratch as scratch space, and the returned pointer will point
|
||||
// into this scratch space.
|
||||
static const char* EncodeKey(std::string* scratch, const Slice& target) {
|
||||
scratch->clear();
|
||||
PutVarint32(scratch, target.size());
|
||||
scratch->append(target.data(), target.size());
|
||||
return scratch->data();
|
||||
}
|
||||
|
||||
class MemTableIterator: public Iterator {
|
||||
public:
|
||||
explicit MemTableIterator(MemTable::Table* table) : iter_(table) { }
|
||||
|
||||
virtual bool Valid() const { return iter_.Valid(); }
|
||||
virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); }
|
||||
virtual void SeekToFirst() { iter_.SeekToFirst(); }
|
||||
virtual void SeekToLast() { iter_.SeekToLast(); }
|
||||
virtual void Next() { iter_.Next(); }
|
||||
virtual void Prev() { iter_.Prev(); }
|
||||
virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); }
|
||||
virtual Slice value() const {
|
||||
Slice key_slice = GetLengthPrefixedSlice(iter_.key());
|
||||
return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
|
||||
}
|
||||
|
||||
virtual Status status() const { return Status::OK(); }
|
||||
|
||||
private:
|
||||
MemTable::Table::Iterator iter_;
|
||||
std::string tmp_; // For passing to EncodeKey
|
||||
|
||||
// No copying allowed
|
||||
MemTableIterator(const MemTableIterator&);
|
||||
void operator=(const MemTableIterator&);
|
||||
};
|
||||
|
||||
Iterator* MemTable::NewIterator() {
|
||||
return new MemTableIterator(&table_);
|
||||
}
|
||||
|
||||
void MemTable::Add(SequenceNumber s, ValueType type,
|
||||
const Slice& key,
|
||||
const Slice& value) {
|
||||
// Format of an entry is concatenation of:
|
||||
// key_size : varint32 of internal_key.size()
|
||||
// key bytes : char[internal_key.size()]
|
||||
// value_size : varint32 of value.size()
|
||||
// value bytes : char[value.size()]
|
||||
size_t key_size = key.size();
|
||||
size_t val_size = value.size();
|
||||
size_t internal_key_size = key_size + 8;
|
||||
const size_t encoded_len =
|
||||
VarintLength(internal_key_size) + internal_key_size +
|
||||
VarintLength(val_size) + val_size;
|
||||
char* buf = NULL;
|
||||
|
||||
{
|
||||
MutexLock l(&mtx_);
|
||||
buf = arena_.Allocate(encoded_len);
|
||||
}
|
||||
|
||||
char* p = EncodeVarint32(buf, internal_key_size);
|
||||
memcpy(p, key.data(), key_size);
|
||||
p += key_size;
|
||||
EncodeFixed64(p, (s << 8) | type);
|
||||
p += 8;
|
||||
p = EncodeVarint32(p, val_size);
|
||||
memcpy(p, value.data(), val_size);
|
||||
assert((p + val_size) - buf == encoded_len);
|
||||
Table::InsertHint ih(&table_, buf);
|
||||
|
||||
{
|
||||
MutexLock l(&mtx_);
|
||||
table_.InsertWithHint(&ih, buf);
|
||||
}
|
||||
}
|
||||
|
||||
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
|
||||
Slice memkey = key.memtable_key();
|
||||
Table::Iterator iter(&table_);
|
||||
iter.Seek(memkey.data());
|
||||
if (iter.Valid()) {
|
||||
// entry format is:
|
||||
// klength varint32
|
||||
// userkey char[klength]
|
||||
// tag uint64
|
||||
// vlength varint32
|
||||
// value char[vlength]
|
||||
// Check that it belongs to same user key. We do not check the
|
||||
// sequence number since the Seek() call above should have skipped
|
||||
// all entries with overly large sequence numbers.
|
||||
const char* entry = iter.key();
|
||||
uint32_t key_length;
|
||||
const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
|
||||
if (comparator_.comparator.user_comparator()->Compare(
|
||||
Slice(key_ptr, key_length - 8),
|
||||
key.user_key()) == 0) {
|
||||
// Correct user key
|
||||
const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
|
||||
switch (static_cast<ValueType>(tag & 0xff)) {
|
||||
case kTypeValue: {
|
||||
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
||||
value->assign(v.data(), v.size());
|
||||
return true;
|
||||
}
|
||||
case kTypeDeletion:
|
||||
*s = Status::NotFound(Slice());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
94
src/hyperleveldb/db/memtable.h
Normal file
94
src/hyperleveldb/db/memtable.h
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_MEMTABLE_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_MEMTABLE_H_
|
||||
|
||||
#include <string>
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "dbformat.h"
|
||||
#include "skiplist.h"
|
||||
#include "../util/arena.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class InternalKeyComparator;
|
||||
class Mutex;
|
||||
class MemTableIterator;
|
||||
|
||||
class MemTable {
|
||||
public:
|
||||
// MemTables are reference counted. The initial reference count
|
||||
// is zero and the caller must call Ref() at least once.
|
||||
explicit MemTable(const InternalKeyComparator& comparator);
|
||||
|
||||
// Increase reference count.
|
||||
// XXX use a release increment if not using GCC intrinsics
|
||||
void Ref() { __sync_add_and_fetch(&refs_, 1); }
|
||||
|
||||
// Drop reference count. Delete if no more references exist.
|
||||
// XXX use an acquire decrement if not using GCC intrinsics
|
||||
void Unref() {
|
||||
int refs = __sync_sub_and_fetch(&refs_, 1);
|
||||
assert(refs >= 0);
|
||||
if (refs <= 0) {
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns an estimate of the number of bytes of data in use by this
|
||||
// data structure.
|
||||
//
|
||||
// REQUIRES: external synchronization to prevent simultaneous
|
||||
// operations on the same MemTable.
|
||||
size_t ApproximateMemoryUsage();
|
||||
|
||||
// Return an iterator that yields the contents of the memtable.
|
||||
//
|
||||
// The caller must ensure that the underlying MemTable remains live
|
||||
// while the returned iterator is live. The keys returned by this
|
||||
// iterator are internal keys encoded by AppendInternalKey in the
|
||||
// db/format.{h,cc} module.
|
||||
Iterator* NewIterator();
|
||||
|
||||
// Add an entry into memtable that maps key to value at the
|
||||
// specified sequence number and with the specified type.
|
||||
// Typically value will be empty if type==kTypeDeletion.
|
||||
void Add(SequenceNumber seq, ValueType type,
|
||||
const Slice& key,
|
||||
const Slice& value);
|
||||
|
||||
// If memtable contains a value for key, store it in *value and return true.
|
||||
// If memtable contains a deletion for key, store a NotFound() error
|
||||
// in *status and return true.
|
||||
// Else, return false.
|
||||
bool Get(const LookupKey& key, std::string* value, Status* s);
|
||||
|
||||
private:
|
||||
~MemTable(); // Private since only Unref() should be used to delete it
|
||||
|
||||
struct KeyComparator {
|
||||
const InternalKeyComparator comparator;
|
||||
explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { }
|
||||
int operator()(const char* a, const char* b) const;
|
||||
};
|
||||
friend class MemTableIterator;
|
||||
friend class MemTableBackwardIterator;
|
||||
|
||||
typedef SkipList<const char*, KeyComparator> Table;
|
||||
|
||||
KeyComparator comparator_;
|
||||
int refs_;
|
||||
port::Mutex mtx_;
|
||||
Arena arena_;
|
||||
Table table_;
|
||||
|
||||
// No copying allowed
|
||||
MemTable(const MemTable&);
|
||||
void operator=(const MemTable&);
|
||||
};
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_MEMTABLE_H_
|
||||
389
src/hyperleveldb/db/repair.cc
Normal file
389
src/hyperleveldb/db/repair.cc
Normal file
@@ -0,0 +1,389 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// We recover the contents of the descriptor from the other files we find.
|
||||
// (1) Any log files are first converted to tables
|
||||
// (2) We scan every table to compute
|
||||
// (a) smallest/largest for the table
|
||||
// (b) largest sequence number in the table
|
||||
// (3) We generate descriptor contents:
|
||||
// - log number is set to zero
|
||||
// - next-file-number is set to 1 + largest file number we found
|
||||
// - last-sequence-number is set to largest sequence# found across
|
||||
// all tables (see 2c)
|
||||
// - compaction pointers are cleared
|
||||
// - every table file is added at level 0
|
||||
//
|
||||
// Possible optimization 1:
|
||||
// (a) Compute total size and use to pick appropriate max-level M
|
||||
// (b) Sort tables by largest sequence# in the table
|
||||
// (c) For each table: if it overlaps earlier table, place in level-0,
|
||||
// else place in level-M.
|
||||
// Possible optimization 2:
|
||||
// Store per-table metadata (smallest, largest, largest-seq#, ...)
|
||||
// in the table's meta section to speed up ScanTable.
|
||||
|
||||
#include "builder.h"
|
||||
#include "db_impl.h"
|
||||
#include "dbformat.h"
|
||||
#include "filename.h"
|
||||
#include "log_reader.h"
|
||||
#include "log_writer.h"
|
||||
#include "memtable.h"
|
||||
#include "table_cache.h"
|
||||
#include "version_edit.h"
|
||||
#include "write_batch_internal.h"
|
||||
#include "../hyperleveldb/comparator.h"
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
namespace {
|
||||
|
||||
class Repairer {
|
||||
public:
|
||||
Repairer(const std::string& dbname, const Options& options)
|
||||
: dbname_(dbname),
|
||||
env_(options.env),
|
||||
icmp_(options.comparator),
|
||||
ipolicy_(options.filter_policy),
|
||||
options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)),
|
||||
owns_info_log_(options_.info_log != options.info_log),
|
||||
owns_cache_(options_.block_cache != options.block_cache),
|
||||
next_file_number_(1) {
|
||||
// TableCache can be small since we expect each table to be opened once.
|
||||
table_cache_ = new TableCache(dbname_, &options_, 10);
|
||||
}
|
||||
|
||||
~Repairer() {
|
||||
delete table_cache_;
|
||||
if (owns_info_log_) {
|
||||
delete options_.info_log;
|
||||
}
|
||||
if (owns_cache_) {
|
||||
delete options_.block_cache;
|
||||
}
|
||||
}
|
||||
|
||||
Status Run() {
|
||||
Status status = FindFiles();
|
||||
if (status.ok()) {
|
||||
ConvertLogFilesToTables();
|
||||
ExtractMetaData();
|
||||
status = WriteDescriptor();
|
||||
}
|
||||
if (status.ok()) {
|
||||
unsigned long long bytes = 0;
|
||||
for (size_t i = 0; i < tables_.size(); i++) {
|
||||
bytes += tables_[i].meta.file_size;
|
||||
}
|
||||
Log(options_.info_log,
|
||||
"**** Repaired leveldb %s; "
|
||||
"recovered %d files; %llu bytes. "
|
||||
"Some data may have been lost. "
|
||||
"****",
|
||||
dbname_.c_str(),
|
||||
static_cast<int>(tables_.size()),
|
||||
bytes);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
private:
|
||||
struct TableInfo {
|
||||
FileMetaData meta;
|
||||
SequenceNumber max_sequence;
|
||||
};
|
||||
|
||||
std::string const dbname_;
|
||||
Env* const env_;
|
||||
InternalKeyComparator const icmp_;
|
||||
InternalFilterPolicy const ipolicy_;
|
||||
Options const options_;
|
||||
bool owns_info_log_;
|
||||
bool owns_cache_;
|
||||
TableCache* table_cache_;
|
||||
VersionEdit edit_;
|
||||
|
||||
std::vector<std::string> manifests_;
|
||||
std::vector<uint64_t> table_numbers_;
|
||||
std::vector<uint64_t> logs_;
|
||||
std::vector<TableInfo> tables_;
|
||||
uint64_t next_file_number_;
|
||||
|
||||
Status FindFiles() {
|
||||
std::vector<std::string> filenames;
|
||||
Status status = env_->GetChildren(dbname_, &filenames);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
if (filenames.empty()) {
|
||||
return Status::IOError(dbname_, "repair found no files");
|
||||
}
|
||||
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
for (size_t i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type)) {
|
||||
if (type == kDescriptorFile) {
|
||||
manifests_.push_back(filenames[i]);
|
||||
} else {
|
||||
if (number + 1 > next_file_number_) {
|
||||
next_file_number_ = number + 1;
|
||||
}
|
||||
if (type == kLogFile) {
|
||||
logs_.push_back(number);
|
||||
} else if (type == kTableFile) {
|
||||
table_numbers_.push_back(number);
|
||||
} else {
|
||||
// Ignore other files
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void ConvertLogFilesToTables() {
|
||||
for (size_t i = 0; i < logs_.size(); i++) {
|
||||
std::string logname = LogFileName(dbname_, logs_[i]);
|
||||
Status status = ConvertLogToTable(logs_[i]);
|
||||
if (!status.ok()) {
|
||||
Log(options_.info_log, "Log #%llu: ignoring conversion error: %s",
|
||||
(unsigned long long) logs_[i],
|
||||
status.ToString().c_str());
|
||||
}
|
||||
ArchiveFile(logname);
|
||||
}
|
||||
}
|
||||
|
||||
Status ConvertLogToTable(uint64_t log) {
|
||||
struct LogReporter : public log::Reader::Reporter {
|
||||
Env* env;
|
||||
Logger* info_log;
|
||||
uint64_t lognum;
|
||||
virtual void Corruption(size_t bytes, const Status& s) {
|
||||
// We print error messages for corruption, but continue repairing.
|
||||
Log(info_log, "Log #%llu: dropping %d bytes; %s",
|
||||
(unsigned long long) lognum,
|
||||
static_cast<int>(bytes),
|
||||
s.ToString().c_str());
|
||||
}
|
||||
};
|
||||
|
||||
// Open the log file
|
||||
std::string logname = LogFileName(dbname_, log);
|
||||
SequentialFile* lfile;
|
||||
Status status = env_->NewSequentialFile(logname, &lfile);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Create the log reader.
|
||||
LogReporter reporter;
|
||||
reporter.env = env_;
|
||||
reporter.info_log = options_.info_log;
|
||||
reporter.lognum = log;
|
||||
// We intentially make log::Reader do checksumming so that
|
||||
// corruptions cause entire commits to be skipped instead of
|
||||
// propagating bad information (like overly large sequence
|
||||
// numbers).
|
||||
log::Reader reader(lfile, &reporter, false/*do not checksum*/,
|
||||
0/*initial_offset*/);
|
||||
|
||||
// Read all the records and add to a memtable
|
||||
std::string scratch;
|
||||
Slice record;
|
||||
WriteBatch batch;
|
||||
MemTable* mem = new MemTable(icmp_);
|
||||
mem->Ref();
|
||||
int counter = 0;
|
||||
while (reader.ReadRecord(&record, &scratch)) {
|
||||
if (record.size() < 12) {
|
||||
reporter.Corruption(
|
||||
record.size(), Status::Corruption("log record too small"));
|
||||
continue;
|
||||
}
|
||||
WriteBatchInternal::SetContents(&batch, record);
|
||||
status = WriteBatchInternal::InsertInto(&batch, mem);
|
||||
if (status.ok()) {
|
||||
counter += WriteBatchInternal::Count(&batch);
|
||||
} else {
|
||||
Log(options_.info_log, "Log #%llu: ignoring %s",
|
||||
(unsigned long long) log,
|
||||
status.ToString().c_str());
|
||||
status = Status::OK(); // Keep going with rest of file
|
||||
}
|
||||
}
|
||||
delete lfile;
|
||||
|
||||
// Do not record a version edit for this conversion to a Table
|
||||
// since ExtractMetaData() will also generate edits.
|
||||
FileMetaData meta;
|
||||
meta.number = next_file_number_++;
|
||||
Iterator* iter = mem->NewIterator();
|
||||
status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
|
||||
delete iter;
|
||||
mem->Unref();
|
||||
mem = NULL;
|
||||
if (status.ok()) {
|
||||
if (meta.file_size > 0) {
|
||||
table_numbers_.push_back(meta.number);
|
||||
}
|
||||
}
|
||||
Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
|
||||
(unsigned long long) log,
|
||||
counter,
|
||||
(unsigned long long) meta.number,
|
||||
status.ToString().c_str());
|
||||
return status;
|
||||
}
|
||||
|
||||
void ExtractMetaData() {
|
||||
std::vector<TableInfo> kept;
|
||||
for (size_t i = 0; i < table_numbers_.size(); i++) {
|
||||
TableInfo t;
|
||||
t.meta.number = table_numbers_[i];
|
||||
Status status = ScanTable(&t);
|
||||
if (!status.ok()) {
|
||||
std::string fname = TableFileName(dbname_, table_numbers_[i]);
|
||||
Log(options_.info_log, "Table #%llu: ignoring %s",
|
||||
(unsigned long long) table_numbers_[i],
|
||||
status.ToString().c_str());
|
||||
ArchiveFile(fname);
|
||||
} else {
|
||||
tables_.push_back(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status ScanTable(TableInfo* t) {
|
||||
std::string fname = TableFileName(dbname_, t->meta.number);
|
||||
int counter = 0;
|
||||
Status status = env_->GetFileSize(fname, &t->meta.file_size);
|
||||
if (status.ok()) {
|
||||
Iterator* iter = table_cache_->NewIterator(
|
||||
ReadOptions(), t->meta.number, t->meta.file_size);
|
||||
bool empty = true;
|
||||
ParsedInternalKey parsed;
|
||||
t->max_sequence = 0;
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
Slice key = iter->key();
|
||||
if (!ParseInternalKey(key, &parsed)) {
|
||||
Log(options_.info_log, "Table #%llu: unparsable key %s",
|
||||
(unsigned long long) t->meta.number,
|
||||
EscapeString(key).c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
counter++;
|
||||
if (empty) {
|
||||
empty = false;
|
||||
t->meta.smallest.DecodeFrom(key);
|
||||
}
|
||||
t->meta.largest.DecodeFrom(key);
|
||||
if (parsed.sequence > t->max_sequence) {
|
||||
t->max_sequence = parsed.sequence;
|
||||
}
|
||||
}
|
||||
if (!iter->status().ok()) {
|
||||
status = iter->status();
|
||||
}
|
||||
delete iter;
|
||||
}
|
||||
Log(options_.info_log, "Table #%llu: %d entries %s",
|
||||
(unsigned long long) t->meta.number,
|
||||
counter,
|
||||
status.ToString().c_str());
|
||||
return status;
|
||||
}
|
||||
|
||||
Status WriteDescriptor() {
|
||||
std::string tmp = TempFileName(dbname_, 1);
|
||||
WritableFile* file;
|
||||
Status status = env_->NewWritableFile(tmp, &file);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
SequenceNumber max_sequence = 0;
|
||||
for (size_t i = 0; i < tables_.size(); i++) {
|
||||
if (max_sequence < tables_[i].max_sequence) {
|
||||
max_sequence = tables_[i].max_sequence;
|
||||
}
|
||||
}
|
||||
|
||||
edit_.SetComparatorName(icmp_.user_comparator()->Name());
|
||||
edit_.SetLogNumber(0);
|
||||
edit_.SetNextFile(next_file_number_);
|
||||
edit_.SetLastSequence(max_sequence);
|
||||
|
||||
for (size_t i = 0; i < tables_.size(); i++) {
|
||||
// TODO(opt): separate out into multiple levels
|
||||
const TableInfo& t = tables_[i];
|
||||
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
||||
t.meta.smallest, t.meta.largest);
|
||||
}
|
||||
|
||||
//fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
|
||||
{
|
||||
log::Writer log(file);
|
||||
std::string record;
|
||||
edit_.EncodeTo(&record);
|
||||
status = log.AddRecord(record);
|
||||
}
|
||||
if (status.ok()) {
|
||||
status = file->Close();
|
||||
}
|
||||
delete file;
|
||||
file = NULL;
|
||||
|
||||
if (!status.ok()) {
|
||||
env_->DeleteFile(tmp);
|
||||
} else {
|
||||
// Discard older manifests
|
||||
for (size_t i = 0; i < manifests_.size(); i++) {
|
||||
ArchiveFile(dbname_ + "/" + manifests_[i]);
|
||||
}
|
||||
|
||||
// Install new manifest
|
||||
status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1));
|
||||
if (status.ok()) {
|
||||
status = SetCurrentFile(env_, dbname_, 1);
|
||||
} else {
|
||||
env_->DeleteFile(tmp);
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void ArchiveFile(const std::string& fname) {
|
||||
// Move into another directory. E.g., for
|
||||
// dir/foo
|
||||
// rename to
|
||||
// dir/lost/foo
|
||||
const char* slash = strrchr(fname.c_str(), '/');
|
||||
std::string new_dir;
|
||||
if (slash != NULL) {
|
||||
new_dir.assign(fname.data(), slash - fname.data());
|
||||
}
|
||||
new_dir.append("/lost");
|
||||
env_->CreateDir(new_dir); // Ignore error
|
||||
std::string new_file = new_dir;
|
||||
new_file.append("/");
|
||||
new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
|
||||
Status s = env_->RenameFile(fname, new_file);
|
||||
Log(options_.info_log, "Archiving %s: %s\n",
|
||||
fname.c_str(), s.ToString().c_str());
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
Status RepairDB(const std::string& dbname, const Options& options) {
|
||||
Repairer repairer(dbname, options);
|
||||
return repairer.Run();
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
460
src/hyperleveldb/db/skiplist.h
Normal file
460
src/hyperleveldb/db/skiplist.h
Normal file
@@ -0,0 +1,460 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// Thread safety
|
||||
// -------------
|
||||
//
|
||||
// Writes require external synchronization, most likely a mutex.
|
||||
// Reads require a guarantee that the SkipList will not be destroyed
|
||||
// while the read is in progress. Apart from that, reads progress
|
||||
// without any internal locking or synchronization.
|
||||
//
|
||||
// Invariants:
|
||||
//
|
||||
// (1) Allocated nodes are never deleted until the SkipList is
|
||||
// destroyed. This is trivially guaranteed by the code since we
|
||||
// never delete any skip list nodes.
|
||||
//
|
||||
// (2) The contents of a Node except for the next/prev pointers are
|
||||
// immutable after the Node has been linked into the SkipList.
|
||||
// Only Insert() modifies the list, and it is careful to initialize
|
||||
// a node and use release-stores to publish the nodes in one or
|
||||
// more lists.
|
||||
//
|
||||
// ... prev vs. next pointer ordering ...
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "../port/port.h"
|
||||
#include "../util/arena.h"
|
||||
#include "../util/random.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class Arena;
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
class SkipList {
|
||||
private:
|
||||
struct Node;
|
||||
enum { kMaxHeight = 12 };
|
||||
|
||||
public:
|
||||
// Create a new SkipList object that will use "cmp" for comparing keys,
|
||||
// and will allocate memory using "*arena". Objects allocated in the arena
|
||||
// must remain allocated for the lifetime of the skiplist object.
|
||||
explicit SkipList(Comparator cmp, Arena* arena);
|
||||
|
||||
// Insert key into the list.
|
||||
// REQUIRES: nothing that compares equal to key is currently in the list.
|
||||
// REQUIRES: external synchronization.
|
||||
void Insert(const Key& key);
|
||||
|
||||
// Insert key into the list using the iterator as a hint.
|
||||
// REQUIRES: nothing that compares equal to key is currently in the list.
|
||||
// REQUIRES: external synchronization.
|
||||
class InsertHint;
|
||||
void InsertWithHint(InsertHint* ih, const Key& key);
|
||||
|
||||
// Returns true iff an entry that compares equal to key is in the list.
|
||||
bool Contains(const Key& key) const;
|
||||
|
||||
// Perform expensive iteration over the skip list prior to insert so that the
|
||||
// cost of a synchronized insert is reduced when the structure is full.
|
||||
// REQUIRES: same synchronization as is necessary for a read.
|
||||
class InsertHint {
|
||||
public:
|
||||
InsertHint(const SkipList* list, const Key& key);
|
||||
|
||||
private:
|
||||
const SkipList* list_;
|
||||
Node* x_;
|
||||
Node* prev_[kMaxHeight];
|
||||
Node* obs_[kMaxHeight];
|
||||
|
||||
// No copying allowed
|
||||
InsertHint(const InsertHint&);
|
||||
void operator=(const InsertHint&);
|
||||
friend class SkipList;
|
||||
};
|
||||
|
||||
// Iteration over the contents of a skip list
|
||||
class Iterator {
|
||||
public:
|
||||
// Initialize an iterator over the specified list.
|
||||
// The returned iterator is not valid.
|
||||
explicit Iterator(const SkipList* list);
|
||||
|
||||
// Returns true iff the iterator is positioned at a valid node.
|
||||
bool Valid() const;
|
||||
|
||||
// Returns the key at the current position.
|
||||
// REQUIRES: Valid()
|
||||
const Key& key() const;
|
||||
|
||||
// Advances to the next position.
|
||||
// REQUIRES: Valid()
|
||||
void Next();
|
||||
|
||||
// Advances to the previous position.
|
||||
// REQUIRES: Valid()
|
||||
void Prev();
|
||||
|
||||
// Advance to the first entry with a key >= target
|
||||
void Seek(const Key& target);
|
||||
|
||||
// Position at the first entry in list.
|
||||
// Final state of iterator is Valid() iff list is not empty.
|
||||
void SeekToFirst();
|
||||
|
||||
// Position at the last entry in list.
|
||||
// Final state of iterator is Valid() iff list is not empty.
|
||||
void SeekToLast();
|
||||
|
||||
private:
|
||||
const SkipList* list_;
|
||||
Node* node_;
|
||||
// Intentionally copyable
|
||||
};
|
||||
|
||||
private:
|
||||
// Immutable after construction
|
||||
Comparator const compare_;
|
||||
Arena* const arena_; // Arena used for allocations of nodes
|
||||
|
||||
Node* const head_;
|
||||
|
||||
// Modified only by Insert(). Read racily by readers, but stale
|
||||
// values are ok.
|
||||
port::AtomicPointer max_height_; // Height of the entire list
|
||||
|
||||
inline int GetMaxHeight() const {
|
||||
return static_cast<int>(
|
||||
reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()));
|
||||
}
|
||||
|
||||
// Read/written only by Insert().
|
||||
Random rnd_;
|
||||
|
||||
Node* NewNode(const Key& key, int height);
|
||||
int RandomHeight();
|
||||
bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
|
||||
|
||||
// Return true if key is greater than the data stored in "n"
|
||||
bool KeyIsAfterNode(const Key& key, Node* n) const;
|
||||
|
||||
// Return the earliest node that comes at or after key.
|
||||
// Return NULL if there is no such node.
|
||||
//
|
||||
// If prev is non-NULL, fills prev[level] with pointer to previous
|
||||
// node at "level" for every level in [0..max_height_-1].
|
||||
Node* FindGreaterOrEqual(const Key& key, Node** prev, Node** obs) const;
|
||||
|
||||
// Return the latest node with a key < key.
|
||||
// Return head_ if there is no such node.
|
||||
Node* FindLessThan(const Key& key) const;
|
||||
|
||||
// Return the last node in the list.
|
||||
// Return head_ if list is empty.
|
||||
Node* FindLast() const;
|
||||
|
||||
// Update the state of the InsertHint to reflect the latest values
|
||||
void UpdateHint(InsertHint* ih, const Key& k);
|
||||
|
||||
// No copying allowed
|
||||
SkipList(const SkipList&);
|
||||
void operator=(const SkipList&);
|
||||
};
|
||||
|
||||
// Implementation details follow
|
||||
template<typename Key, class Comparator>
|
||||
struct SkipList<Key,Comparator>::Node {
|
||||
explicit Node(const Key& k) : key(k) { }
|
||||
|
||||
Key const key;
|
||||
|
||||
// Accessors/mutators for links. Wrapped in methods so we can
|
||||
// add the appropriate barriers as necessary.
|
||||
Node* Next(int n) {
|
||||
assert(n >= 0);
|
||||
// Use an 'acquire load' so that we observe a fully initialized
|
||||
// version of the returned Node.
|
||||
return reinterpret_cast<Node*>(next_[n].Acquire_Load());
|
||||
}
|
||||
void SetNext(int n, Node* x) {
|
||||
assert(n >= 0);
|
||||
// Use a 'release store' so that anybody who reads through this
|
||||
// pointer observes a fully initialized version of the inserted node.
|
||||
next_[n].Release_Store(x);
|
||||
}
|
||||
|
||||
// No-barrier variants that can be safely used in a few locations.
|
||||
Node* NoBarrier_Next(int n) {
|
||||
assert(n >= 0);
|
||||
return reinterpret_cast<Node*>(next_[n].NoBarrier_Load());
|
||||
}
|
||||
void NoBarrier_SetNext(int n, Node* x) {
|
||||
assert(n >= 0);
|
||||
next_[n].NoBarrier_Store(x);
|
||||
}
|
||||
|
||||
private:
|
||||
// Array of length equal to the node height. next_[0] is lowest level link.
|
||||
port::AtomicPointer next_[1];
|
||||
};
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
typename SkipList<Key,Comparator>::Node*
|
||||
SkipList<Key,Comparator>::NewNode(const Key& key, int height) {
|
||||
char* mem = arena_->AllocateAligned(
|
||||
sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1));
|
||||
return new (mem) Node(key);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {
|
||||
list_ = list;
|
||||
node_ = NULL;
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline bool SkipList<Key,Comparator>::Iterator::Valid() const {
|
||||
return node_ != NULL;
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline const Key& SkipList<Key,Comparator>::Iterator::key() const {
|
||||
assert(Valid());
|
||||
return node_->key;
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::Next() {
|
||||
assert(Valid());
|
||||
node_ = node_->Next(0);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::Prev() {
|
||||
// Instead of using explicit "prev" links, we just search for the
|
||||
// last node that falls before key.
|
||||
assert(Valid());
|
||||
node_ = list_->FindLessThan(node_->key);
|
||||
if (node_ == list_->head_) {
|
||||
node_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {
|
||||
node_ = list_->FindGreaterOrEqual(target, NULL, NULL);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {
|
||||
node_ = list_->head_->Next(0);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
|
||||
node_ = list_->FindLast();
|
||||
if (node_ == list_->head_) {
|
||||
node_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
int SkipList<Key,Comparator>::RandomHeight() {
|
||||
// Increase height with probability 1 in kBranching
|
||||
static const unsigned int kBranching = 4;
|
||||
int height = 1;
|
||||
while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
|
||||
height++;
|
||||
}
|
||||
assert(height > 0);
|
||||
assert(height <= kMaxHeight);
|
||||
return height;
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
|
||||
// NULL n is considered infinite
|
||||
return (n != NULL) && (compare_(n->key, key) < 0);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev, Node** obs)
|
||||
const {
|
||||
Node* x = head_;
|
||||
int level = GetMaxHeight() - 1;
|
||||
while (true) {
|
||||
Node* next = x->Next(level);
|
||||
if (KeyIsAfterNode(key, next)) {
|
||||
// Keep searching in this list
|
||||
x = next;
|
||||
} else {
|
||||
if (prev != NULL) prev[level] = x;
|
||||
if (obs != NULL) obs[level] = next;
|
||||
if (level == 0) {
|
||||
return next;
|
||||
} else {
|
||||
// Switch to next list
|
||||
level--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
typename SkipList<Key,Comparator>::Node*
|
||||
SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
|
||||
Node* x = head_;
|
||||
int level = GetMaxHeight() - 1;
|
||||
while (true) {
|
||||
assert(x == head_ || compare_(x->key, key) < 0);
|
||||
Node* next = x->Next(level);
|
||||
if (next == NULL || compare_(next->key, key) >= 0) {
|
||||
if (level == 0) {
|
||||
return x;
|
||||
} else {
|
||||
// Switch to next list
|
||||
level--;
|
||||
}
|
||||
} else {
|
||||
x = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
|
||||
const {
|
||||
Node* x = head_;
|
||||
int level = GetMaxHeight() - 1;
|
||||
while (true) {
|
||||
Node* next = x->Next(level);
|
||||
if (next == NULL) {
|
||||
if (level == 0) {
|
||||
return x;
|
||||
} else {
|
||||
// Switch to next list
|
||||
level--;
|
||||
}
|
||||
} else {
|
||||
x = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
|
||||
: compare_(cmp),
|
||||
arena_(arena),
|
||||
head_(NewNode(0 /* any key will do */, kMaxHeight)),
|
||||
max_height_(reinterpret_cast<void*>(1)),
|
||||
rnd_(0xdeadbeef) {
|
||||
for (int i = 0; i < kMaxHeight; i++) {
|
||||
head_->SetNext(i, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
void SkipList<Key,Comparator>::Insert(const Key& key) {
|
||||
InsertHint ih(this, key);
|
||||
return InsertWithHint(&ih, key);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
SkipList<Key,Comparator>::InsertHint::InsertHint(const SkipList* list, const Key& key)
|
||||
: list_(list),
|
||||
x_(NULL) {
|
||||
for (int i = 0; i < kMaxHeight; ++i)
|
||||
{
|
||||
prev_[i] = list_->head_;
|
||||
obs_[i] = NULL;
|
||||
}
|
||||
x_ = list_->FindGreaterOrEqual(key, prev_, obs_);
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
void SkipList<Key,Comparator>::UpdateHint(InsertHint* ih, const Key& key) {
|
||||
// TODO(opt): We can be smarter here by using the skip list structure to
|
||||
// advance. It's assumed that a small number of insertions to the SkipList
|
||||
// happen between the time ih was created and now.
|
||||
for (int level = 0; level < kMaxHeight; ++level) {
|
||||
Node* x = ih->prev_[level];
|
||||
while (true) {
|
||||
Node* next = x->Next(level);
|
||||
if (next == ih->obs_[level] || !KeyIsAfterNode(key, next)) {
|
||||
ih->prev_[level] = x;
|
||||
ih->obs_[level] = next;
|
||||
break;
|
||||
}
|
||||
x = next;
|
||||
}
|
||||
}
|
||||
ih->x_ = ih->obs_[0];
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
void SkipList<Key,Comparator>::InsertWithHint(InsertHint* ih, const Key& key) {
|
||||
// Advance pointers to account for any data written between the creation of
|
||||
// the InsertHint and this call.
|
||||
UpdateHint(ih, key);
|
||||
Node* prev[kMaxHeight];
|
||||
Node* x = ih->x_;
|
||||
for (int i = 0; i < kMaxHeight; ++i) {
|
||||
prev[i] = ih->prev_[i];
|
||||
}
|
||||
|
||||
#if 0
|
||||
Node* check_prev[kMaxHeight];
|
||||
Node* check_x = FindGreaterOrEqual(key, check_prev, NULL);
|
||||
|
||||
for (int i = 0; i < GetMaxHeight(); ++i) {
|
||||
assert(check_prev[i] == prev[i]);
|
||||
assert(check_x == x);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Our data structure does not allow duplicate insertion
|
||||
assert(x == NULL || !Equal(key, x->key));
|
||||
|
||||
int height = RandomHeight();
|
||||
if (height > GetMaxHeight()) {
|
||||
for (int i = GetMaxHeight(); i < height; i++) {
|
||||
prev[i] = head_;
|
||||
}
|
||||
//fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
|
||||
|
||||
// It is ok to mutate max_height_ without any synchronization
|
||||
// with concurrent readers. A concurrent reader that observes
|
||||
// the new value of max_height_ will see either the old value of
|
||||
// new level pointers from head_ (NULL), or a new value set in
|
||||
// the loop below. In the former case the reader will
|
||||
// immediately drop to the next level since NULL sorts after all
|
||||
// keys. In the latter case the reader will use the new node.
|
||||
max_height_.NoBarrier_Store(reinterpret_cast<void*>(height));
|
||||
}
|
||||
|
||||
x = NewNode(key, height);
|
||||
for (int i = 0; i < height; i++) {
|
||||
// NoBarrier_SetNext() suffices since we will add a barrier when
|
||||
// we publish a pointer to "x" in prev[i].
|
||||
x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));
|
||||
prev[i]->SetNext(i, x);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
bool SkipList<Key,Comparator>::Contains(const Key& key) const {
|
||||
Node* x = FindGreaterOrEqual(key, NULL, NULL);
|
||||
if (x != NULL && Equal(key, x->key)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
378
src/hyperleveldb/db/skiplist_test.cc
Normal file
378
src/hyperleveldb/db/skiplist_test.cc
Normal file
@@ -0,0 +1,378 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "skiplist.h"
|
||||
#include <set>
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../util/arena.h"
|
||||
#include "../util/hash.h"
|
||||
#include "../util/random.h"
|
||||
#include "../util/testharness.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
typedef uint64_t Key;
|
||||
|
||||
struct Comparator {
|
||||
int operator()(const Key& a, const Key& b) const {
|
||||
if (a < b) {
|
||||
return -1;
|
||||
} else if (a > b) {
|
||||
return +1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class SkipTest { };
|
||||
|
||||
TEST(SkipTest, Empty) {
|
||||
Arena arena;
|
||||
Comparator cmp;
|
||||
SkipList<Key, Comparator> list(cmp, &arena);
|
||||
ASSERT_TRUE(!list.Contains(10));
|
||||
|
||||
SkipList<Key, Comparator>::Iterator iter(&list);
|
||||
ASSERT_TRUE(!iter.Valid());
|
||||
iter.SeekToFirst();
|
||||
ASSERT_TRUE(!iter.Valid());
|
||||
iter.Seek(100);
|
||||
ASSERT_TRUE(!iter.Valid());
|
||||
iter.SeekToLast();
|
||||
ASSERT_TRUE(!iter.Valid());
|
||||
}
|
||||
|
||||
TEST(SkipTest, InsertAndLookup) {
|
||||
const int N = 2000;
|
||||
const int R = 5000;
|
||||
Random rnd(1000);
|
||||
std::set<Key> keys;
|
||||
Arena arena;
|
||||
Comparator cmp;
|
||||
SkipList<Key, Comparator> list(cmp, &arena);
|
||||
for (int i = 0; i < N; i++) {
|
||||
Key key = rnd.Next() % R;
|
||||
if (keys.insert(key).second) {
|
||||
list.Insert(key);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < R; i++) {
|
||||
if (list.Contains(i)) {
|
||||
ASSERT_EQ(keys.count(i), 1);
|
||||
} else {
|
||||
ASSERT_EQ(keys.count(i), 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Simple iterator tests
|
||||
{
|
||||
SkipList<Key, Comparator>::Iterator iter(&list);
|
||||
ASSERT_TRUE(!iter.Valid());
|
||||
|
||||
iter.Seek(0);
|
||||
ASSERT_TRUE(iter.Valid());
|
||||
ASSERT_EQ(*(keys.begin()), iter.key());
|
||||
|
||||
iter.SeekToFirst();
|
||||
ASSERT_TRUE(iter.Valid());
|
||||
ASSERT_EQ(*(keys.begin()), iter.key());
|
||||
|
||||
iter.SeekToLast();
|
||||
ASSERT_TRUE(iter.Valid());
|
||||
ASSERT_EQ(*(keys.rbegin()), iter.key());
|
||||
}
|
||||
|
||||
// Forward iteration test
|
||||
for (int i = 0; i < R; i++) {
|
||||
SkipList<Key, Comparator>::Iterator iter(&list);
|
||||
iter.Seek(i);
|
||||
|
||||
// Compare against model iterator
|
||||
std::set<Key>::iterator model_iter = keys.lower_bound(i);
|
||||
for (int j = 0; j < 3; j++) {
|
||||
if (model_iter == keys.end()) {
|
||||
ASSERT_TRUE(!iter.Valid());
|
||||
break;
|
||||
} else {
|
||||
ASSERT_TRUE(iter.Valid());
|
||||
ASSERT_EQ(*model_iter, iter.key());
|
||||
++model_iter;
|
||||
iter.Next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Backward iteration test
|
||||
{
|
||||
SkipList<Key, Comparator>::Iterator iter(&list);
|
||||
iter.SeekToLast();
|
||||
|
||||
// Compare against model iterator
|
||||
for (std::set<Key>::reverse_iterator model_iter = keys.rbegin();
|
||||
model_iter != keys.rend();
|
||||
++model_iter) {
|
||||
ASSERT_TRUE(iter.Valid());
|
||||
ASSERT_EQ(*model_iter, iter.key());
|
||||
iter.Prev();
|
||||
}
|
||||
ASSERT_TRUE(!iter.Valid());
|
||||
}
|
||||
}
|
||||
|
||||
// We want to make sure that with a single writer and multiple
|
||||
// concurrent readers (with no synchronization other than when a
|
||||
// reader's iterator is created), the reader always observes all the
|
||||
// data that was present in the skip list when the iterator was
|
||||
// constructor. Because insertions are happening concurrently, we may
|
||||
// also observe new values that were inserted since the iterator was
|
||||
// constructed, but we should never miss any values that were present
|
||||
// at iterator construction time.
|
||||
//
|
||||
// We generate multi-part keys:
|
||||
// <key,gen,hash>
|
||||
// where:
|
||||
// key is in range [0..K-1]
|
||||
// gen is a generation number for key
|
||||
// hash is hash(key,gen)
|
||||
//
|
||||
// The insertion code picks a random key, sets gen to be 1 + the last
|
||||
// generation number inserted for that key, and sets hash to Hash(key,gen).
|
||||
//
|
||||
// At the beginning of a read, we snapshot the last inserted
|
||||
// generation number for each key. We then iterate, including random
|
||||
// calls to Next() and Seek(). For every key we encounter, we
|
||||
// check that it is either expected given the initial snapshot or has
|
||||
// been concurrently added since the iterator started.
|
||||
class ConcurrentTest {
|
||||
private:
|
||||
static const uint32_t K = 4;
|
||||
|
||||
static uint64_t key(Key key) { return (key >> 40); }
|
||||
static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; }
|
||||
static uint64_t hash(Key key) { return key & 0xff; }
|
||||
|
||||
static uint64_t HashNumbers(uint64_t k, uint64_t g) {
|
||||
uint64_t data[2] = { k, g };
|
||||
return Hash(reinterpret_cast<char*>(data), sizeof(data), 0);
|
||||
}
|
||||
|
||||
static Key MakeKey(uint64_t k, uint64_t g) {
|
||||
assert(sizeof(Key) == sizeof(uint64_t));
|
||||
assert(k <= K); // We sometimes pass K to seek to the end of the skiplist
|
||||
assert(g <= 0xffffffffu);
|
||||
return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff));
|
||||
}
|
||||
|
||||
static bool IsValidKey(Key k) {
|
||||
return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff);
|
||||
}
|
||||
|
||||
static Key RandomTarget(Random* rnd) {
|
||||
switch (rnd->Next() % 10) {
|
||||
case 0:
|
||||
// Seek to beginning
|
||||
return MakeKey(0, 0);
|
||||
case 1:
|
||||
// Seek to end
|
||||
return MakeKey(K, 0);
|
||||
default:
|
||||
// Seek to middle
|
||||
return MakeKey(rnd->Next() % K, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Per-key generation
|
||||
struct State {
|
||||
port::AtomicPointer generation[K];
|
||||
void Set(int k, intptr_t v) {
|
||||
generation[k].Release_Store(reinterpret_cast<void*>(v));
|
||||
}
|
||||
intptr_t Get(int k) {
|
||||
return reinterpret_cast<intptr_t>(generation[k].Acquire_Load());
|
||||
}
|
||||
|
||||
State() {
|
||||
for (int k = 0; k < K; k++) {
|
||||
Set(k, 0);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Current state of the test
|
||||
State current_;
|
||||
|
||||
Arena arena_;
|
||||
|
||||
// SkipList is not protected by mu_. We just use a single writer
|
||||
// thread to modify it.
|
||||
SkipList<Key, Comparator> list_;
|
||||
|
||||
public:
|
||||
ConcurrentTest() : list_(Comparator(), &arena_) { }
|
||||
|
||||
// REQUIRES: External synchronization
|
||||
void WriteStep(Random* rnd) {
|
||||
const uint32_t k = rnd->Next() % K;
|
||||
const intptr_t g = current_.Get(k) + 1;
|
||||
const Key key = MakeKey(k, g);
|
||||
list_.Insert(key);
|
||||
current_.Set(k, g);
|
||||
}
|
||||
|
||||
void ReadStep(Random* rnd) {
|
||||
// Remember the initial committed state of the skiplist.
|
||||
State initial_state;
|
||||
for (int k = 0; k < K; k++) {
|
||||
initial_state.Set(k, current_.Get(k));
|
||||
}
|
||||
|
||||
Key pos = RandomTarget(rnd);
|
||||
SkipList<Key, Comparator>::Iterator iter(&list_);
|
||||
iter.Seek(pos);
|
||||
while (true) {
|
||||
Key current;
|
||||
if (!iter.Valid()) {
|
||||
current = MakeKey(K, 0);
|
||||
} else {
|
||||
current = iter.key();
|
||||
ASSERT_TRUE(IsValidKey(current)) << current;
|
||||
}
|
||||
ASSERT_LE(pos, current) << "should not go backwards";
|
||||
|
||||
// Verify that everything in [pos,current) was not present in
|
||||
// initial_state.
|
||||
while (pos < current) {
|
||||
ASSERT_LT(key(pos), K) << pos;
|
||||
|
||||
// Note that generation 0 is never inserted, so it is ok if
|
||||
// <*,0,*> is missing.
|
||||
ASSERT_TRUE((gen(pos) == 0) ||
|
||||
(gen(pos) > initial_state.Get(key(pos)))
|
||||
) << "key: " << key(pos)
|
||||
<< "; gen: " << gen(pos)
|
||||
<< "; initgen: "
|
||||
<< initial_state.Get(key(pos));
|
||||
|
||||
// Advance to next key in the valid key space
|
||||
if (key(pos) < key(current)) {
|
||||
pos = MakeKey(key(pos) + 1, 0);
|
||||
} else {
|
||||
pos = MakeKey(key(pos), gen(pos) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!iter.Valid()) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (rnd->Next() % 2) {
|
||||
iter.Next();
|
||||
pos = MakeKey(key(pos), gen(pos) + 1);
|
||||
} else {
|
||||
Key new_target = RandomTarget(rnd);
|
||||
if (new_target > pos) {
|
||||
pos = new_target;
|
||||
iter.Seek(new_target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
const uint32_t ConcurrentTest::K;
|
||||
|
||||
// Simple test that does single-threaded testing of the ConcurrentTest
|
||||
// scaffolding.
|
||||
TEST(SkipTest, ConcurrentWithoutThreads) {
|
||||
ConcurrentTest test;
|
||||
Random rnd(test::RandomSeed());
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
test.ReadStep(&rnd);
|
||||
test.WriteStep(&rnd);
|
||||
}
|
||||
}
|
||||
|
||||
class TestState {
|
||||
public:
|
||||
ConcurrentTest t_;
|
||||
int seed_;
|
||||
port::AtomicPointer quit_flag_;
|
||||
|
||||
enum ReaderState {
|
||||
STARTING,
|
||||
RUNNING,
|
||||
DONE
|
||||
};
|
||||
|
||||
explicit TestState(int s)
|
||||
: seed_(s),
|
||||
quit_flag_(NULL),
|
||||
state_(STARTING),
|
||||
state_cv_(&mu_) {}
|
||||
|
||||
void Wait(ReaderState s) {
|
||||
mu_.Lock();
|
||||
while (state_ != s) {
|
||||
state_cv_.Wait();
|
||||
}
|
||||
mu_.Unlock();
|
||||
}
|
||||
|
||||
void Change(ReaderState s) {
|
||||
mu_.Lock();
|
||||
state_ = s;
|
||||
state_cv_.Signal();
|
||||
mu_.Unlock();
|
||||
}
|
||||
|
||||
private:
|
||||
port::Mutex mu_;
|
||||
ReaderState state_;
|
||||
port::CondVar state_cv_;
|
||||
};
|
||||
|
||||
static void ConcurrentReader(void* arg) {
|
||||
TestState* state = reinterpret_cast<TestState*>(arg);
|
||||
Random rnd(state->seed_);
|
||||
int64_t reads = 0;
|
||||
state->Change(TestState::RUNNING);
|
||||
while (!state->quit_flag_.Acquire_Load()) {
|
||||
state->t_.ReadStep(&rnd);
|
||||
++reads;
|
||||
}
|
||||
state->Change(TestState::DONE);
|
||||
}
|
||||
|
||||
static void RunConcurrent(int run) {
|
||||
const int seed = test::RandomSeed() + (run * 100);
|
||||
Random rnd(seed);
|
||||
const int N = 1000;
|
||||
const int kSize = 1000;
|
||||
for (int i = 0; i < N; i++) {
|
||||
if ((i % 100) == 0) {
|
||||
fprintf(stderr, "Run %d of %d\n", i, N);
|
||||
}
|
||||
TestState state(seed + 1);
|
||||
Env::Default()->Schedule(ConcurrentReader, &state);
|
||||
state.Wait(TestState::RUNNING);
|
||||
for (int i = 0; i < kSize; i++) {
|
||||
state.t_.WriteStep(&rnd);
|
||||
}
|
||||
state.quit_flag_.Release_Store(&state); // Any non-NULL arg will do
|
||||
state.Wait(TestState::DONE);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SkipTest, Concurrent1) { RunConcurrent(1); }
|
||||
TEST(SkipTest, Concurrent2) { RunConcurrent(2); }
|
||||
TEST(SkipTest, Concurrent3) { RunConcurrent(3); }
|
||||
TEST(SkipTest, Concurrent4) { RunConcurrent(4); }
|
||||
TEST(SkipTest, Concurrent5) { RunConcurrent(5); }
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
66
src/hyperleveldb/db/snapshot.h
Normal file
66
src/hyperleveldb/db/snapshot.h
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_SNAPSHOT_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_SNAPSHOT_H_
|
||||
|
||||
#include "../hyperleveldb/db.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class SnapshotList;
|
||||
|
||||
// Snapshots are kept in a doubly-linked list in the DB.
|
||||
// Each SnapshotImpl corresponds to a particular sequence number.
|
||||
class SnapshotImpl : public Snapshot {
|
||||
public:
|
||||
SequenceNumber number_; // const after creation
|
||||
|
||||
private:
|
||||
friend class SnapshotList;
|
||||
|
||||
// SnapshotImpl is kept in a doubly-linked circular list
|
||||
SnapshotImpl* prev_;
|
||||
SnapshotImpl* next_;
|
||||
|
||||
SnapshotList* list_; // just for sanity checks
|
||||
};
|
||||
|
||||
class SnapshotList {
|
||||
public:
|
||||
SnapshotList() {
|
||||
list_.prev_ = &list_;
|
||||
list_.next_ = &list_;
|
||||
}
|
||||
|
||||
bool empty() const { return list_.next_ == &list_; }
|
||||
SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; }
|
||||
SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; }
|
||||
|
||||
const SnapshotImpl* New(SequenceNumber seq) {
|
||||
SnapshotImpl* s = new SnapshotImpl;
|
||||
s->number_ = seq;
|
||||
s->list_ = this;
|
||||
s->next_ = &list_;
|
||||
s->prev_ = list_.prev_;
|
||||
s->prev_->next_ = s;
|
||||
s->next_->prev_ = s;
|
||||
return s;
|
||||
}
|
||||
|
||||
void Delete(const SnapshotImpl* s) {
|
||||
assert(s->list_ == this);
|
||||
s->prev_->next_ = s->next_;
|
||||
s->next_->prev_ = s->prev_;
|
||||
delete s;
|
||||
}
|
||||
|
||||
private:
|
||||
// Dummy head of doubly-linked list of snapshots
|
||||
SnapshotImpl list_;
|
||||
};
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_SNAPSHOT_H_
|
||||
121
src/hyperleveldb/db/table_cache.cc
Normal file
121
src/hyperleveldb/db/table_cache.cc
Normal file
@@ -0,0 +1,121 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "table_cache.h"
|
||||
|
||||
#include "filename.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../hyperleveldb/table.h"
|
||||
#include "../util/coding.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
struct TableAndFile {
|
||||
RandomAccessFile* file;
|
||||
Table* table;
|
||||
};
|
||||
|
||||
static void DeleteEntry(const Slice& key, void* value) {
|
||||
TableAndFile* tf = reinterpret_cast<TableAndFile*>(value);
|
||||
delete tf->table;
|
||||
delete tf->file;
|
||||
delete tf;
|
||||
}
|
||||
|
||||
static void UnrefEntry(void* arg1, void* arg2) {
|
||||
Cache* cache = reinterpret_cast<Cache*>(arg1);
|
||||
Cache::Handle* h = reinterpret_cast<Cache::Handle*>(arg2);
|
||||
cache->Release(h);
|
||||
}
|
||||
|
||||
TableCache::TableCache(const std::string& dbname,
|
||||
const Options* options,
|
||||
int entries)
|
||||
: env_(options->env),
|
||||
dbname_(dbname),
|
||||
options_(options),
|
||||
cache_(NewLRUCache(entries)) {
|
||||
}
|
||||
|
||||
TableCache::~TableCache() {
|
||||
delete cache_;
|
||||
}
|
||||
|
||||
Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
|
||||
Cache::Handle** handle) {
|
||||
Status s;
|
||||
char buf[sizeof(file_number)];
|
||||
EncodeFixed64(buf, file_number);
|
||||
Slice key(buf, sizeof(buf));
|
||||
*handle = cache_->Lookup(key);
|
||||
if (*handle == NULL) {
|
||||
std::string fname = TableFileName(dbname_, file_number);
|
||||
RandomAccessFile* file = NULL;
|
||||
Table* table = NULL;
|
||||
s = env_->NewRandomAccessFile(fname, &file);
|
||||
if (s.ok()) {
|
||||
s = Table::Open(*options_, file, file_size, &table);
|
||||
}
|
||||
|
||||
if (!s.ok()) {
|
||||
assert(table == NULL);
|
||||
delete file;
|
||||
// We do not cache error results so that if the error is transient,
|
||||
// or somebody repairs the file, we recover automatically.
|
||||
} else {
|
||||
TableAndFile* tf = new TableAndFile;
|
||||
tf->file = file;
|
||||
tf->table = table;
|
||||
*handle = cache_->Insert(key, tf, 1, &DeleteEntry);
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
Table** tableptr) {
|
||||
if (tableptr != NULL) {
|
||||
*tableptr = NULL;
|
||||
}
|
||||
|
||||
Cache::Handle* handle = NULL;
|
||||
Status s = FindTable(file_number, file_size, &handle);
|
||||
if (!s.ok()) {
|
||||
return NewErrorIterator(s);
|
||||
}
|
||||
|
||||
Table* table = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
||||
Iterator* result = table->NewIterator(options);
|
||||
result->RegisterCleanup(&UnrefEntry, cache_, handle);
|
||||
if (tableptr != NULL) {
|
||||
*tableptr = table;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Status TableCache::Get(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
const Slice& k,
|
||||
void* arg,
|
||||
void (*saver)(void*, const Slice&, const Slice&)) {
|
||||
Cache::Handle* handle = NULL;
|
||||
Status s = FindTable(file_number, file_size, &handle);
|
||||
if (s.ok()) {
|
||||
Table* t = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
||||
s = t->InternalGet(options, k, arg, saver);
|
||||
cache_->Release(handle);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
void TableCache::Evict(uint64_t file_number) {
|
||||
char buf[sizeof(file_number)];
|
||||
EncodeFixed64(buf, file_number);
|
||||
cache_->Erase(Slice(buf, sizeof(buf)));
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
61
src/hyperleveldb/db/table_cache.h
Normal file
61
src/hyperleveldb/db/table_cache.h
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// Thread-safe (provides internal synchronization)
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_TABLE_CACHE_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_TABLE_CACHE_H_
|
||||
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
#include "dbformat.h"
|
||||
#include "../hyperleveldb/cache.h"
|
||||
#include "../hyperleveldb/table.h"
|
||||
#include "../port/port.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class Env;
|
||||
|
||||
class TableCache {
|
||||
public:
|
||||
TableCache(const std::string& dbname, const Options* options, int entries);
|
||||
~TableCache();
|
||||
|
||||
// Return an iterator for the specified file number (the corresponding
|
||||
// file length must be exactly "file_size" bytes). If "tableptr" is
|
||||
// non-NULL, also sets "*tableptr" to point to the Table object
|
||||
// underlying the returned iterator, or NULL if no Table object underlies
|
||||
// the returned iterator. The returned "*tableptr" object is owned by
|
||||
// the cache and should not be deleted, and is valid for as long as the
|
||||
// returned iterator is live.
|
||||
Iterator* NewIterator(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
Table** tableptr = NULL);
|
||||
|
||||
// If a seek to internal key "k" in specified file finds an entry,
|
||||
// call (*handle_result)(arg, found_key, found_value).
|
||||
Status Get(const ReadOptions& options,
|
||||
uint64_t file_number,
|
||||
uint64_t file_size,
|
||||
const Slice& k,
|
||||
void* arg,
|
||||
void (*handle_result)(void*, const Slice&, const Slice&));
|
||||
|
||||
// Evict any entry for the specified file number
|
||||
void Evict(uint64_t file_number);
|
||||
|
||||
private:
|
||||
Env* const env_;
|
||||
const std::string dbname_;
|
||||
const Options* options_;
|
||||
Cache* cache_;
|
||||
|
||||
Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
|
||||
};
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_TABLE_CACHE_H_
|
||||
266
src/hyperleveldb/db/version_edit.cc
Normal file
266
src/hyperleveldb/db/version_edit.cc
Normal file
@@ -0,0 +1,266 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "version_edit.h"
|
||||
|
||||
#include "version_set.h"
|
||||
#include "../util/coding.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
// Tag numbers for serialized VersionEdit. These numbers are written to
|
||||
// disk and should not be changed.
|
||||
enum Tag {
|
||||
kComparator = 1,
|
||||
kLogNumber = 2,
|
||||
kNextFileNumber = 3,
|
||||
kLastSequence = 4,
|
||||
kCompactPointer = 5,
|
||||
kDeletedFile = 6,
|
||||
kNewFile = 7,
|
||||
// 8 was used for large value refs
|
||||
kPrevLogNumber = 9
|
||||
};
|
||||
|
||||
void VersionEdit::Clear() {
|
||||
comparator_.clear();
|
||||
log_number_ = 0;
|
||||
prev_log_number_ = 0;
|
||||
last_sequence_ = 0;
|
||||
next_file_number_ = 0;
|
||||
has_comparator_ = false;
|
||||
has_log_number_ = false;
|
||||
has_prev_log_number_ = false;
|
||||
has_next_file_number_ = false;
|
||||
has_last_sequence_ = false;
|
||||
deleted_files_.clear();
|
||||
new_files_.clear();
|
||||
}
|
||||
|
||||
void VersionEdit::EncodeTo(std::string* dst) const {
|
||||
if (has_comparator_) {
|
||||
PutVarint32(dst, kComparator);
|
||||
PutLengthPrefixedSlice(dst, comparator_);
|
||||
}
|
||||
if (has_log_number_) {
|
||||
PutVarint32(dst, kLogNumber);
|
||||
PutVarint64(dst, log_number_);
|
||||
}
|
||||
if (has_prev_log_number_) {
|
||||
PutVarint32(dst, kPrevLogNumber);
|
||||
PutVarint64(dst, prev_log_number_);
|
||||
}
|
||||
if (has_next_file_number_) {
|
||||
PutVarint32(dst, kNextFileNumber);
|
||||
PutVarint64(dst, next_file_number_);
|
||||
}
|
||||
if (has_last_sequence_) {
|
||||
PutVarint32(dst, kLastSequence);
|
||||
PutVarint64(dst, last_sequence_);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
||||
PutVarint32(dst, kCompactPointer);
|
||||
PutVarint32(dst, compact_pointers_[i].first); // level
|
||||
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
|
||||
}
|
||||
|
||||
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
|
||||
iter != deleted_files_.end();
|
||||
++iter) {
|
||||
PutVarint32(dst, kDeletedFile);
|
||||
PutVarint32(dst, iter->first); // level
|
||||
PutVarint64(dst, iter->second); // file number
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < new_files_.size(); i++) {
|
||||
const FileMetaData& f = new_files_[i].second;
|
||||
PutVarint32(dst, kNewFile);
|
||||
PutVarint32(dst, new_files_[i].first); // level
|
||||
PutVarint64(dst, f.number);
|
||||
PutVarint64(dst, f.file_size);
|
||||
PutLengthPrefixedSlice(dst, f.smallest.Encode());
|
||||
PutLengthPrefixedSlice(dst, f.largest.Encode());
|
||||
}
|
||||
}
|
||||
|
||||
static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
||||
Slice str;
|
||||
if (GetLengthPrefixedSlice(input, &str)) {
|
||||
dst->DecodeFrom(str);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool GetLevel(Slice* input, int* level) {
|
||||
uint32_t v;
|
||||
if (GetVarint32(input, &v) &&
|
||||
v < config::kNumLevels) {
|
||||
*level = v;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
Clear();
|
||||
Slice input = src;
|
||||
const char* msg = NULL;
|
||||
uint32_t tag;
|
||||
|
||||
// Temporary storage for parsing
|
||||
int level;
|
||||
uint64_t number;
|
||||
FileMetaData f;
|
||||
Slice str;
|
||||
InternalKey key;
|
||||
|
||||
while (msg == NULL && GetVarint32(&input, &tag)) {
|
||||
switch (tag) {
|
||||
case kComparator:
|
||||
if (GetLengthPrefixedSlice(&input, &str)) {
|
||||
comparator_ = str.ToString();
|
||||
has_comparator_ = true;
|
||||
} else {
|
||||
msg = "comparator name";
|
||||
}
|
||||
break;
|
||||
|
||||
case kLogNumber:
|
||||
if (GetVarint64(&input, &log_number_)) {
|
||||
has_log_number_ = true;
|
||||
} else {
|
||||
msg = "log number";
|
||||
}
|
||||
break;
|
||||
|
||||
case kPrevLogNumber:
|
||||
if (GetVarint64(&input, &prev_log_number_)) {
|
||||
has_prev_log_number_ = true;
|
||||
} else {
|
||||
msg = "previous log number";
|
||||
}
|
||||
break;
|
||||
|
||||
case kNextFileNumber:
|
||||
if (GetVarint64(&input, &next_file_number_)) {
|
||||
has_next_file_number_ = true;
|
||||
} else {
|
||||
msg = "next file number";
|
||||
}
|
||||
break;
|
||||
|
||||
case kLastSequence:
|
||||
if (GetVarint64(&input, &last_sequence_)) {
|
||||
has_last_sequence_ = true;
|
||||
} else {
|
||||
msg = "last sequence number";
|
||||
}
|
||||
break;
|
||||
|
||||
case kCompactPointer:
|
||||
if (GetLevel(&input, &level) &&
|
||||
GetInternalKey(&input, &key)) {
|
||||
compact_pointers_.push_back(std::make_pair(level, key));
|
||||
} else {
|
||||
msg = "compaction pointer";
|
||||
}
|
||||
break;
|
||||
|
||||
case kDeletedFile:
|
||||
if (GetLevel(&input, &level) &&
|
||||
GetVarint64(&input, &number)) {
|
||||
deleted_files_.insert(std::make_pair(level, number));
|
||||
} else {
|
||||
msg = "deleted file";
|
||||
}
|
||||
break;
|
||||
|
||||
case kNewFile:
|
||||
if (GetLevel(&input, &level) &&
|
||||
GetVarint64(&input, &f.number) &&
|
||||
GetVarint64(&input, &f.file_size) &&
|
||||
GetInternalKey(&input, &f.smallest) &&
|
||||
GetInternalKey(&input, &f.largest)) {
|
||||
new_files_.push_back(std::make_pair(level, f));
|
||||
} else {
|
||||
msg = "new-file entry";
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
msg = "unknown tag";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (msg == NULL && !input.empty()) {
|
||||
msg = "invalid tag";
|
||||
}
|
||||
|
||||
Status result;
|
||||
if (msg != NULL) {
|
||||
result = Status::Corruption("VersionEdit", msg);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string VersionEdit::DebugString() const {
|
||||
std::string r;
|
||||
r.append("VersionEdit {");
|
||||
if (has_comparator_) {
|
||||
r.append("\n Comparator: ");
|
||||
r.append(comparator_);
|
||||
}
|
||||
if (has_log_number_) {
|
||||
r.append("\n LogNumber: ");
|
||||
AppendNumberTo(&r, log_number_);
|
||||
}
|
||||
if (has_prev_log_number_) {
|
||||
r.append("\n PrevLogNumber: ");
|
||||
AppendNumberTo(&r, prev_log_number_);
|
||||
}
|
||||
if (has_next_file_number_) {
|
||||
r.append("\n NextFile: ");
|
||||
AppendNumberTo(&r, next_file_number_);
|
||||
}
|
||||
if (has_last_sequence_) {
|
||||
r.append("\n LastSeq: ");
|
||||
AppendNumberTo(&r, last_sequence_);
|
||||
}
|
||||
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
||||
r.append("\n CompactPointer: ");
|
||||
AppendNumberTo(&r, compact_pointers_[i].first);
|
||||
r.append(" ");
|
||||
r.append(compact_pointers_[i].second.DebugString());
|
||||
}
|
||||
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
|
||||
iter != deleted_files_.end();
|
||||
++iter) {
|
||||
r.append("\n DeleteFile: ");
|
||||
AppendNumberTo(&r, iter->first);
|
||||
r.append(" ");
|
||||
AppendNumberTo(&r, iter->second);
|
||||
}
|
||||
for (size_t i = 0; i < new_files_.size(); i++) {
|
||||
const FileMetaData& f = new_files_[i].second;
|
||||
r.append("\n AddFile: ");
|
||||
AppendNumberTo(&r, new_files_[i].first);
|
||||
r.append(" ");
|
||||
AppendNumberTo(&r, f.number);
|
||||
r.append(" ");
|
||||
AppendNumberTo(&r, f.file_size);
|
||||
r.append(" ");
|
||||
r.append(f.smallest.DebugString());
|
||||
r.append(" .. ");
|
||||
r.append(f.largest.DebugString());
|
||||
}
|
||||
r.append("\n}\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
107
src/hyperleveldb/db/version_edit.h
Normal file
107
src/hyperleveldb/db/version_edit.h
Normal file
@@ -0,0 +1,107 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_VERSION_EDIT_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_VERSION_EDIT_H_
|
||||
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "dbformat.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class VersionSet;
|
||||
|
||||
struct FileMetaData {
|
||||
int refs;
|
||||
int allowed_seeks; // Seeks allowed until compaction
|
||||
uint64_t number;
|
||||
uint64_t file_size; // File size in bytes
|
||||
InternalKey smallest; // Smallest internal key served by table
|
||||
InternalKey largest; // Largest internal key served by table
|
||||
|
||||
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { }
|
||||
};
|
||||
|
||||
class VersionEdit {
|
||||
public:
|
||||
VersionEdit() { Clear(); }
|
||||
~VersionEdit() { }
|
||||
|
||||
void Clear();
|
||||
|
||||
void SetComparatorName(const Slice& name) {
|
||||
has_comparator_ = true;
|
||||
comparator_ = name.ToString();
|
||||
}
|
||||
void SetLogNumber(uint64_t num) {
|
||||
has_log_number_ = true;
|
||||
log_number_ = num;
|
||||
}
|
||||
void SetPrevLogNumber(uint64_t num) {
|
||||
has_prev_log_number_ = true;
|
||||
prev_log_number_ = num;
|
||||
}
|
||||
void SetNextFile(uint64_t num) {
|
||||
has_next_file_number_ = true;
|
||||
next_file_number_ = num;
|
||||
}
|
||||
void SetLastSequence(SequenceNumber seq) {
|
||||
has_last_sequence_ = true;
|
||||
last_sequence_ = seq;
|
||||
}
|
||||
void SetCompactPointer(int level, const InternalKey& key) {
|
||||
compact_pointers_.push_back(std::make_pair(level, key));
|
||||
}
|
||||
|
||||
// Add the specified file at the specified number.
|
||||
// REQUIRES: This version has not been saved (see VersionSet::SaveTo)
|
||||
// REQUIRES: "smallest" and "largest" are smallest and largest keys in file
|
||||
void AddFile(int level, uint64_t file,
|
||||
uint64_t file_size,
|
||||
const InternalKey& smallest,
|
||||
const InternalKey& largest) {
|
||||
FileMetaData f;
|
||||
f.number = file;
|
||||
f.file_size = file_size;
|
||||
f.smallest = smallest;
|
||||
f.largest = largest;
|
||||
new_files_.push_back(std::make_pair(level, f));
|
||||
}
|
||||
|
||||
// Delete the specified "file" from the specified "level".
|
||||
void DeleteFile(int level, uint64_t file) {
|
||||
deleted_files_.insert(std::make_pair(level, file));
|
||||
}
|
||||
|
||||
void EncodeTo(std::string* dst) const;
|
||||
Status DecodeFrom(const Slice& src);
|
||||
|
||||
std::string DebugString() const;
|
||||
|
||||
private:
|
||||
friend class VersionSet;
|
||||
|
||||
typedef std::set< std::pair<int, uint64_t> > DeletedFileSet;
|
||||
|
||||
std::string comparator_;
|
||||
uint64_t log_number_;
|
||||
uint64_t prev_log_number_;
|
||||
uint64_t next_file_number_;
|
||||
SequenceNumber last_sequence_;
|
||||
bool has_comparator_;
|
||||
bool has_log_number_;
|
||||
bool has_prev_log_number_;
|
||||
bool has_next_file_number_;
|
||||
bool has_last_sequence_;
|
||||
|
||||
std::vector< std::pair<int, InternalKey> > compact_pointers_;
|
||||
DeletedFileSet deleted_files_;
|
||||
std::vector< std::pair<int, FileMetaData> > new_files_;
|
||||
};
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_VERSION_EDIT_H_
|
||||
46
src/hyperleveldb/db/version_edit_test.cc
Normal file
46
src/hyperleveldb/db/version_edit_test.cc
Normal file
@@ -0,0 +1,46 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "version_edit.h"
|
||||
#include "../util/testharness.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
static void TestEncodeDecode(const VersionEdit& edit) {
|
||||
std::string encoded, encoded2;
|
||||
edit.EncodeTo(&encoded);
|
||||
VersionEdit parsed;
|
||||
Status s = parsed.DecodeFrom(encoded);
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
parsed.EncodeTo(&encoded2);
|
||||
ASSERT_EQ(encoded, encoded2);
|
||||
}
|
||||
|
||||
class VersionEditTest { };
|
||||
|
||||
TEST(VersionEditTest, EncodeDecode) {
|
||||
static const uint64_t kBig = 1ull << 50;
|
||||
|
||||
VersionEdit edit;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
TestEncodeDecode(edit);
|
||||
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
|
||||
InternalKey("foo", kBig + 500 + i, kTypeValue),
|
||||
InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
|
||||
edit.DeleteFile(4, kBig + 700 + i);
|
||||
edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
|
||||
}
|
||||
|
||||
edit.SetComparatorName("foo");
|
||||
edit.SetLogNumber(kBig + 100);
|
||||
edit.SetNextFile(kBig + 200);
|
||||
edit.SetLastSequence(kBig + 1000);
|
||||
TestEncodeDecode(edit);
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
1502
src/hyperleveldb/db/version_set.cc
Normal file
1502
src/hyperleveldb/db/version_set.cc
Normal file
File diff suppressed because it is too large
Load Diff
390
src/hyperleveldb/db/version_set.h
Normal file
390
src/hyperleveldb/db/version_set.h
Normal file
@@ -0,0 +1,390 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// The representation of a DBImpl consists of a set of Versions. The
|
||||
// newest version is called "current". Older versions may be kept
|
||||
// around to provide a consistent view to live iterators.
|
||||
//
|
||||
// Each Version keeps track of a set of Table files per level. The
|
||||
// entire set of versions is maintained in a VersionSet.
|
||||
//
|
||||
// Version,VersionSet are thread-compatible, but require external
|
||||
// synchronization on all accesses.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_VERSION_SET_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_VERSION_SET_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "dbformat.h"
|
||||
#include "version_edit.h"
|
||||
#include "../port/port.h"
|
||||
#include "../port/thread_annotations.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
namespace log { class Writer; }
|
||||
|
||||
class Compaction;
|
||||
class CompactionBoundary;
|
||||
class Iterator;
|
||||
class MemTable;
|
||||
class TableBuilder;
|
||||
class TableCache;
|
||||
class Version;
|
||||
class VersionSet;
|
||||
class WritableFile;
|
||||
|
||||
// Return the smallest index i such that files[i]->largest >= key.
|
||||
// Return files.size() if there is no such file.
|
||||
// REQUIRES: "files" contains a sorted list of non-overlapping files.
|
||||
extern int FindFile(const InternalKeyComparator& icmp,
|
||||
const std::vector<FileMetaData*>& files,
|
||||
const Slice& key);
|
||||
|
||||
// Returns true iff some file in "files" overlaps the user key range
|
||||
// [*smallest,*largest].
|
||||
// smallest==NULL represents a key smaller than all keys in the DB.
|
||||
// largest==NULL represents a key largest than all keys in the DB.
|
||||
// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges
|
||||
// in sorted order.
|
||||
extern bool SomeFileOverlapsRange(
|
||||
const InternalKeyComparator& icmp,
|
||||
bool disjoint_sorted_files,
|
||||
const std::vector<FileMetaData*>& files,
|
||||
const Slice* smallest_user_key,
|
||||
const Slice* largest_user_key);
|
||||
|
||||
class Version {
|
||||
public:
|
||||
// Append to *iters a sequence of iterators that will
|
||||
// yield the contents of this Version when merged together.
|
||||
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
|
||||
void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
|
||||
|
||||
// Lookup the value for key. If found, store it in *val and
|
||||
// return OK. Else return a non-OK status. Fills *stats.
|
||||
// REQUIRES: lock is not held
|
||||
struct GetStats {
|
||||
FileMetaData* seek_file;
|
||||
int seek_file_level;
|
||||
};
|
||||
Status Get(const ReadOptions&, const LookupKey& key, std::string* val,
|
||||
GetStats* stats);
|
||||
|
||||
// Reference count management (so Versions do not disappear out from
|
||||
// under live iterators)
|
||||
void Ref();
|
||||
void Unref();
|
||||
|
||||
void GetOverlappingInputs(
|
||||
int level,
|
||||
const InternalKey* begin, // NULL means before all keys
|
||||
const InternalKey* end, // NULL means after all keys
|
||||
std::vector<FileMetaData*>* inputs);
|
||||
|
||||
// Returns true iff some file in the specified level overlaps
|
||||
// some part of [*smallest_user_key,*largest_user_key].
|
||||
// smallest_user_key==NULL represents a key smaller than all keys in the DB.
|
||||
// largest_user_key==NULL represents a key largest than all keys in the DB.
|
||||
bool OverlapInLevel(int level,
|
||||
const Slice* smallest_user_key,
|
||||
const Slice* largest_user_key);
|
||||
|
||||
// Return the level at which we should place a new memtable compaction
|
||||
// result that covers the range [smallest_user_key,largest_user_key].
|
||||
int PickLevelForMemTableOutput(const Slice& smallest_user_key,
|
||||
const Slice& largest_user_key);
|
||||
|
||||
int NumFiles(int level) const { return files_[level].size(); }
|
||||
|
||||
// Return a human readable string that describes this version's contents.
|
||||
std::string DebugString() const;
|
||||
|
||||
private:
|
||||
friend class Compaction;
|
||||
friend class VersionSet;
|
||||
|
||||
class LevelFileNumIterator;
|
||||
Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
|
||||
|
||||
VersionSet* vset_; // VersionSet to which this Version belongs
|
||||
Version* next_; // Next version in linked list
|
||||
Version* prev_; // Previous version in linked list
|
||||
int refs_; // Number of live refs to this version
|
||||
|
||||
// List of files per level
|
||||
std::vector<FileMetaData*> files_[config::kNumLevels];
|
||||
|
||||
// Level that should be compacted next and its compaction score.
|
||||
// Score < 1 means compaction is not strictly needed. These fields
|
||||
// are initialized by Finalize().
|
||||
double compaction_scores_[config::kNumLevels];
|
||||
|
||||
explicit Version(VersionSet* vset)
|
||||
: vset_(vset), next_(this), prev_(this), refs_(0) {
|
||||
for (int i = 0; i < config::kNumLevels; ++i) {
|
||||
compaction_scores_[i] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
~Version();
|
||||
|
||||
// No copying allowed
|
||||
Version(const Version&);
|
||||
void operator=(const Version&);
|
||||
};
|
||||
|
||||
class VersionSet {
|
||||
public:
|
||||
VersionSet(const std::string& dbname,
|
||||
const Options* options,
|
||||
TableCache* table_cache,
|
||||
const InternalKeyComparator*);
|
||||
~VersionSet();
|
||||
|
||||
// Apply *edit to the current version to form a new descriptor that
|
||||
// is both saved to persistent state and installed as the new
|
||||
// current version. Will release *mu while actually writing to the file.
|
||||
// REQUIRES: *mu is held on entry.
|
||||
// REQUIRES: no other thread concurrently calls LogAndApply()
|
||||
Status LogAndApply(VersionEdit* edit, port::Mutex* mu, port::CondVar* cv, bool* wt)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mu);
|
||||
|
||||
// Recover the last saved descriptor from persistent storage.
|
||||
Status Recover();
|
||||
|
||||
// Return the current version.
|
||||
Version* current() const { return current_; }
|
||||
|
||||
// Return the current manifest file number
|
||||
uint64_t ManifestFileNumber() const { return manifest_file_number_; }
|
||||
|
||||
// Allocate and return a new file number
|
||||
uint64_t NewFileNumber() { return next_file_number_++; }
|
||||
|
||||
// Arrange to reuse "file_number" unless a newer file number has
|
||||
// already been allocated.
|
||||
// REQUIRES: "file_number" was returned by a call to NewFileNumber().
|
||||
void ReuseFileNumber(uint64_t file_number) {
|
||||
if (next_file_number_ == file_number + 1) {
|
||||
next_file_number_ = file_number;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the number of Table files at the specified level.
|
||||
int NumLevelFiles(int level) const;
|
||||
|
||||
// Return the combined file size of all files at the specified level.
|
||||
int64_t NumLevelBytes(int level) const;
|
||||
|
||||
// Return the last sequence number.
|
||||
uint64_t LastSequence() const { return last_sequence_; }
|
||||
|
||||
// Set the last sequence number to s.
|
||||
void SetLastSequence(uint64_t s) {
|
||||
assert(s >= last_sequence_);
|
||||
last_sequence_ = s;
|
||||
}
|
||||
|
||||
// Mark the specified file number as used.
|
||||
void MarkFileNumberUsed(uint64_t number);
|
||||
|
||||
// Return the current log file number.
|
||||
uint64_t LogNumber() const { return log_number_; }
|
||||
|
||||
// Return the log file number for the log file that is currently
|
||||
// being compacted, or zero if there is no such log file.
|
||||
uint64_t PrevLogNumber() const { return prev_log_number_; }
|
||||
|
||||
// Pick level for a new compaction.
|
||||
// Returns kNumLevels if there is no compaction to be done.
|
||||
// Otherwise returns the lowest unlocked level that may compact upwards.
|
||||
int PickCompactionLevel(bool* locked);
|
||||
|
||||
// Pick inputs for a new compaction at the specified level.
|
||||
// Returns NULL if there is no compaction to be done.
|
||||
// Otherwise returns a pointer to a heap-allocated object that
|
||||
// describes the compaction. Caller should delete the result.
|
||||
Compaction* PickCompaction(int level);
|
||||
|
||||
// Return a compaction object for compacting the range [begin,end] in
|
||||
// the specified level. Returns NULL if there is nothing in that
|
||||
// level that overlaps the specified range. Caller should delete
|
||||
// the result.
|
||||
Compaction* CompactRange(
|
||||
int level,
|
||||
const InternalKey* begin,
|
||||
const InternalKey* end);
|
||||
|
||||
// Return the maximum overlapping data (in bytes) at next level for any
|
||||
// file at a level >= 1.
|
||||
int64_t MaxNextLevelOverlappingBytes();
|
||||
|
||||
// Create an iterator that reads over the compaction inputs for "*c".
|
||||
// The caller should delete the iterator when no longer needed.
|
||||
Iterator* MakeInputIterator(Compaction* c);
|
||||
|
||||
// Returns true iff some level needs a compaction.
|
||||
bool NeedsCompaction(bool* levels) const {
|
||||
Version* v = current_;
|
||||
for (int i = 0; i + 1 < config::kNumLevels; ++i) {
|
||||
if (!levels[i] && !levels[i + 1] &&
|
||||
v->compaction_scores_[i] >= 1.0 &&
|
||||
(i + 2 == config::kNumLevels ||
|
||||
v->compaction_scores_[i + 1] < 1.0)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add all files listed in any live version to *live.
|
||||
// May also mutate some internal state.
|
||||
void AddLiveFiles(std::set<uint64_t>* live);
|
||||
|
||||
// Return the approximate offset in the database of the data for
|
||||
// "key" as of version "v".
|
||||
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
||||
|
||||
// Return a human-readable short (single-line) summary of the number
|
||||
// of files per level. Uses *scratch as backing store.
|
||||
struct LevelSummaryStorage {
|
||||
char buffer[100];
|
||||
};
|
||||
const char* LevelSummary(LevelSummaryStorage* scratch) const;
|
||||
|
||||
private:
|
||||
class Builder;
|
||||
|
||||
friend class Compaction;
|
||||
friend class Version;
|
||||
|
||||
void Finalize(Version* v);
|
||||
|
||||
void GetRange(const std::vector<FileMetaData*>& inputs,
|
||||
InternalKey* smallest,
|
||||
InternalKey* largest);
|
||||
|
||||
void GetRange2(const std::vector<FileMetaData*>& inputs1,
|
||||
const std::vector<FileMetaData*>& inputs2,
|
||||
InternalKey* smallest,
|
||||
InternalKey* largest);
|
||||
|
||||
void GetCompactionBoundaries(int level,
|
||||
std::vector<FileMetaData*>* LA,
|
||||
std::vector<FileMetaData*>* LB,
|
||||
std::vector<uint64_t>* LA_sizes,
|
||||
std::vector<uint64_t>* LB_sizes,
|
||||
std::vector<class CompactionBoundary>* boundaries);
|
||||
|
||||
void SetupOtherInputs(Compaction* c);
|
||||
|
||||
// Save current contents to *log
|
||||
Status WriteSnapshot(log::Writer* log);
|
||||
|
||||
void AppendVersion(Version* v);
|
||||
|
||||
bool ManifestContains(const std::string& record) const;
|
||||
|
||||
Env* const env_;
|
||||
const std::string dbname_;
|
||||
const Options* const options_;
|
||||
TableCache* const table_cache_;
|
||||
const InternalKeyComparator icmp_;
|
||||
uint64_t next_file_number_;
|
||||
uint64_t manifest_file_number_;
|
||||
uint64_t last_sequence_;
|
||||
uint64_t log_number_;
|
||||
uint64_t prev_log_number_; // 0 or backing store for memtable being compacted
|
||||
|
||||
// Opened lazily
|
||||
WritableFile* descriptor_file_;
|
||||
log::Writer* descriptor_log_;
|
||||
Version dummy_versions_; // Head of circular doubly-linked list of versions.
|
||||
Version* current_; // == dummy_versions_.prev_
|
||||
|
||||
// Per-level key at which the next compaction at that level should start.
|
||||
// Either an empty string, or a valid InternalKey.
|
||||
std::string compact_pointer_[config::kNumLevels];
|
||||
|
||||
// No copying allowed
|
||||
VersionSet(const VersionSet&);
|
||||
void operator=(const VersionSet&);
|
||||
};
|
||||
|
||||
// A Compaction encapsulates information about a compaction.
|
||||
class Compaction {
|
||||
public:
|
||||
~Compaction();
|
||||
|
||||
// Return the level that is being compacted. Inputs from "level"
|
||||
// and "level+1" will be merged to produce a set of "level+1" files.
|
||||
int level() const { return level_; }
|
||||
|
||||
// Return the object that holds the edits to the descriptor done
|
||||
// by this compaction.
|
||||
VersionEdit* edit() { return &edit_; }
|
||||
|
||||
// "which" must be either 0 or 1
|
||||
int num_input_files(int which) const { return inputs_[which].size(); }
|
||||
|
||||
// Return the ith input file at "level()+which" ("which" must be 0 or 1).
|
||||
FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
|
||||
|
||||
// Maximum size of files to build during this compaction.
|
||||
uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
|
||||
|
||||
// Is this a trivial compaction that can be implemented by just
|
||||
// moving a single input file to the next level (no merging or splitting)
|
||||
bool IsTrivialMove() const;
|
||||
|
||||
// Add all inputs to this compaction as delete operations to *edit.
|
||||
void AddInputDeletions(VersionEdit* edit);
|
||||
|
||||
// Returns true if the information we have available guarantees that
|
||||
// the compaction is producing data in "level+1" for which no data exists
|
||||
// in levels greater than "level+1".
|
||||
bool IsBaseLevelForKey(const Slice& user_key);
|
||||
|
||||
// Release the input version for the compaction, once the compaction
|
||||
// is successful.
|
||||
void ReleaseInputs();
|
||||
|
||||
// Set and get the ratio of inputs to outputs.
|
||||
// If nonzero, this is the ratio of inputs to outputs. If zero, it indicates
|
||||
// that the compaction was chosen without concern for the ratio of inputs to
|
||||
// outputs.
|
||||
void SetRatio(double ratio) { ratio_ = ratio; }
|
||||
double ratio() { return ratio_; }
|
||||
|
||||
private:
|
||||
friend class Version;
|
||||
friend class VersionSet;
|
||||
|
||||
explicit Compaction(int level);
|
||||
|
||||
int level_;
|
||||
uint64_t max_output_file_size_;
|
||||
Version* input_version_;
|
||||
VersionEdit edit_;
|
||||
|
||||
double ratio_;
|
||||
|
||||
// Each compaction reads inputs from "level_" and "level_+1"
|
||||
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
|
||||
|
||||
// State for implementing IsBaseLevelForKey
|
||||
|
||||
// level_ptrs_ holds indices into input_version_->levels_: our state
|
||||
// is that we are positioned at one of the file ranges for each
|
||||
// higher level than the ones involved in this compaction (i.e. for
|
||||
// all L >= level_ + 2).
|
||||
size_t level_ptrs_[config::kNumLevels];
|
||||
};
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_VERSION_SET_H_
|
||||
179
src/hyperleveldb/db/version_set_test.cc
Normal file
179
src/hyperleveldb/db/version_set_test.cc
Normal file
@@ -0,0 +1,179 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "version_set.h"
|
||||
#include "../util/logging.h"
|
||||
#include "../util/testharness.h"
|
||||
#include "../util/testutil.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class FindFileTest {
|
||||
public:
|
||||
std::vector<FileMetaData*> files_;
|
||||
bool disjoint_sorted_files_;
|
||||
|
||||
FindFileTest() : disjoint_sorted_files_(true) { }
|
||||
|
||||
~FindFileTest() {
|
||||
for (int i = 0; i < files_.size(); i++) {
|
||||
delete files_[i];
|
||||
}
|
||||
}
|
||||
|
||||
void Add(const char* smallest, const char* largest,
|
||||
SequenceNumber smallest_seq = 100,
|
||||
SequenceNumber largest_seq = 100) {
|
||||
FileMetaData* f = new FileMetaData;
|
||||
f->number = files_.size() + 1;
|
||||
f->smallest = InternalKey(smallest, smallest_seq, kTypeValue);
|
||||
f->largest = InternalKey(largest, largest_seq, kTypeValue);
|
||||
files_.push_back(f);
|
||||
}
|
||||
|
||||
int Find(const char* key) {
|
||||
InternalKey target(key, 100, kTypeValue);
|
||||
InternalKeyComparator cmp(BytewiseComparator());
|
||||
return FindFile(cmp, files_, target.Encode());
|
||||
}
|
||||
|
||||
bool Overlaps(const char* smallest, const char* largest) {
|
||||
InternalKeyComparator cmp(BytewiseComparator());
|
||||
Slice s(smallest != NULL ? smallest : "");
|
||||
Slice l(largest != NULL ? largest : "");
|
||||
return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, files_,
|
||||
(smallest != NULL ? &s : NULL),
|
||||
(largest != NULL ? &l : NULL));
|
||||
}
|
||||
};
|
||||
|
||||
TEST(FindFileTest, Empty) {
|
||||
ASSERT_EQ(0, Find("foo"));
|
||||
ASSERT_TRUE(! Overlaps("a", "z"));
|
||||
ASSERT_TRUE(! Overlaps(NULL, "z"));
|
||||
ASSERT_TRUE(! Overlaps("a", NULL));
|
||||
ASSERT_TRUE(! Overlaps(NULL, NULL));
|
||||
}
|
||||
|
||||
TEST(FindFileTest, Single) {
|
||||
Add("p", "q");
|
||||
ASSERT_EQ(0, Find("a"));
|
||||
ASSERT_EQ(0, Find("p"));
|
||||
ASSERT_EQ(0, Find("p1"));
|
||||
ASSERT_EQ(0, Find("q"));
|
||||
ASSERT_EQ(1, Find("q1"));
|
||||
ASSERT_EQ(1, Find("z"));
|
||||
|
||||
ASSERT_TRUE(! Overlaps("a", "b"));
|
||||
ASSERT_TRUE(! Overlaps("z1", "z2"));
|
||||
ASSERT_TRUE(Overlaps("a", "p"));
|
||||
ASSERT_TRUE(Overlaps("a", "q"));
|
||||
ASSERT_TRUE(Overlaps("a", "z"));
|
||||
ASSERT_TRUE(Overlaps("p", "p1"));
|
||||
ASSERT_TRUE(Overlaps("p", "q"));
|
||||
ASSERT_TRUE(Overlaps("p", "z"));
|
||||
ASSERT_TRUE(Overlaps("p1", "p2"));
|
||||
ASSERT_TRUE(Overlaps("p1", "z"));
|
||||
ASSERT_TRUE(Overlaps("q", "q"));
|
||||
ASSERT_TRUE(Overlaps("q", "q1"));
|
||||
|
||||
ASSERT_TRUE(! Overlaps(NULL, "j"));
|
||||
ASSERT_TRUE(! Overlaps("r", NULL));
|
||||
ASSERT_TRUE(Overlaps(NULL, "p"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "p1"));
|
||||
ASSERT_TRUE(Overlaps("q", NULL));
|
||||
ASSERT_TRUE(Overlaps(NULL, NULL));
|
||||
}
|
||||
|
||||
|
||||
TEST(FindFileTest, Multiple) {
|
||||
Add("150", "200");
|
||||
Add("200", "250");
|
||||
Add("300", "350");
|
||||
Add("400", "450");
|
||||
ASSERT_EQ(0, Find("100"));
|
||||
ASSERT_EQ(0, Find("150"));
|
||||
ASSERT_EQ(0, Find("151"));
|
||||
ASSERT_EQ(0, Find("199"));
|
||||
ASSERT_EQ(0, Find("200"));
|
||||
ASSERT_EQ(1, Find("201"));
|
||||
ASSERT_EQ(1, Find("249"));
|
||||
ASSERT_EQ(1, Find("250"));
|
||||
ASSERT_EQ(2, Find("251"));
|
||||
ASSERT_EQ(2, Find("299"));
|
||||
ASSERT_EQ(2, Find("300"));
|
||||
ASSERT_EQ(2, Find("349"));
|
||||
ASSERT_EQ(2, Find("350"));
|
||||
ASSERT_EQ(3, Find("351"));
|
||||
ASSERT_EQ(3, Find("400"));
|
||||
ASSERT_EQ(3, Find("450"));
|
||||
ASSERT_EQ(4, Find("451"));
|
||||
|
||||
ASSERT_TRUE(! Overlaps("100", "149"));
|
||||
ASSERT_TRUE(! Overlaps("251", "299"));
|
||||
ASSERT_TRUE(! Overlaps("451", "500"));
|
||||
ASSERT_TRUE(! Overlaps("351", "399"));
|
||||
|
||||
ASSERT_TRUE(Overlaps("100", "150"));
|
||||
ASSERT_TRUE(Overlaps("100", "200"));
|
||||
ASSERT_TRUE(Overlaps("100", "300"));
|
||||
ASSERT_TRUE(Overlaps("100", "400"));
|
||||
ASSERT_TRUE(Overlaps("100", "500"));
|
||||
ASSERT_TRUE(Overlaps("375", "400"));
|
||||
ASSERT_TRUE(Overlaps("450", "450"));
|
||||
ASSERT_TRUE(Overlaps("450", "500"));
|
||||
}
|
||||
|
||||
TEST(FindFileTest, MultipleNullBoundaries) {
|
||||
Add("150", "200");
|
||||
Add("200", "250");
|
||||
Add("300", "350");
|
||||
Add("400", "450");
|
||||
ASSERT_TRUE(! Overlaps(NULL, "149"));
|
||||
ASSERT_TRUE(! Overlaps("451", NULL));
|
||||
ASSERT_TRUE(Overlaps(NULL, NULL));
|
||||
ASSERT_TRUE(Overlaps(NULL, "150"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "199"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "200"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "201"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "400"));
|
||||
ASSERT_TRUE(Overlaps(NULL, "800"));
|
||||
ASSERT_TRUE(Overlaps("100", NULL));
|
||||
ASSERT_TRUE(Overlaps("200", NULL));
|
||||
ASSERT_TRUE(Overlaps("449", NULL));
|
||||
ASSERT_TRUE(Overlaps("450", NULL));
|
||||
}
|
||||
|
||||
TEST(FindFileTest, OverlapSequenceChecks) {
|
||||
Add("200", "200", 5000, 3000);
|
||||
ASSERT_TRUE(! Overlaps("199", "199"));
|
||||
ASSERT_TRUE(! Overlaps("201", "300"));
|
||||
ASSERT_TRUE(Overlaps("200", "200"));
|
||||
ASSERT_TRUE(Overlaps("190", "200"));
|
||||
ASSERT_TRUE(Overlaps("200", "210"));
|
||||
}
|
||||
|
||||
TEST(FindFileTest, OverlappingFiles) {
|
||||
Add("150", "600");
|
||||
Add("400", "500");
|
||||
disjoint_sorted_files_ = false;
|
||||
ASSERT_TRUE(! Overlaps("100", "149"));
|
||||
ASSERT_TRUE(! Overlaps("601", "700"));
|
||||
ASSERT_TRUE(Overlaps("100", "150"));
|
||||
ASSERT_TRUE(Overlaps("100", "200"));
|
||||
ASSERT_TRUE(Overlaps("100", "300"));
|
||||
ASSERT_TRUE(Overlaps("100", "400"));
|
||||
ASSERT_TRUE(Overlaps("100", "500"));
|
||||
ASSERT_TRUE(Overlaps("375", "400"));
|
||||
ASSERT_TRUE(Overlaps("450", "450"));
|
||||
ASSERT_TRUE(Overlaps("450", "500"));
|
||||
ASSERT_TRUE(Overlaps("450", "700"));
|
||||
ASSERT_TRUE(Overlaps("600", "700"));
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
147
src/hyperleveldb/db/write_batch.cc
Normal file
147
src/hyperleveldb/db/write_batch.cc
Normal file
@@ -0,0 +1,147 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// WriteBatch::rep_ :=
|
||||
// sequence: fixed64
|
||||
// count: fixed32
|
||||
// data: record[count]
|
||||
// record :=
|
||||
// kTypeValue varstring varstring |
|
||||
// kTypeDeletion varstring
|
||||
// varstring :=
|
||||
// len: varint32
|
||||
// data: uint8[len]
|
||||
|
||||
#include "../hyperleveldb/write_batch.h"
|
||||
|
||||
#include "../hyperleveldb/db.h"
|
||||
#include "dbformat.h"
|
||||
#include "memtable.h"
|
||||
#include "write_batch_internal.h"
|
||||
#include "../util/coding.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
|
||||
static const size_t kHeader = 12;
|
||||
|
||||
WriteBatch::WriteBatch() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
WriteBatch::~WriteBatch() { }
|
||||
|
||||
WriteBatch::Handler::~Handler() { }
|
||||
|
||||
void WriteBatch::Clear() {
|
||||
rep_.clear();
|
||||
rep_.resize(kHeader);
|
||||
}
|
||||
|
||||
Status WriteBatch::Iterate(Handler* handler) const {
|
||||
Slice input(rep_);
|
||||
if (input.size() < kHeader) {
|
||||
return Status::Corruption("malformed WriteBatch (too small)");
|
||||
}
|
||||
|
||||
input.remove_prefix(kHeader);
|
||||
Slice key, value;
|
||||
int found = 0;
|
||||
while (!input.empty()) {
|
||||
found++;
|
||||
char tag = input[0];
|
||||
input.remove_prefix(1);
|
||||
switch (tag) {
|
||||
case kTypeValue:
|
||||
if (GetLengthPrefixedSlice(&input, &key) &&
|
||||
GetLengthPrefixedSlice(&input, &value)) {
|
||||
handler->Put(key, value);
|
||||
} else {
|
||||
return Status::Corruption("bad WriteBatch Put");
|
||||
}
|
||||
break;
|
||||
case kTypeDeletion:
|
||||
if (GetLengthPrefixedSlice(&input, &key)) {
|
||||
handler->Delete(key);
|
||||
} else {
|
||||
return Status::Corruption("bad WriteBatch Delete");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return Status::Corruption("unknown WriteBatch tag");
|
||||
}
|
||||
}
|
||||
if (found != WriteBatchInternal::Count(this)) {
|
||||
return Status::Corruption("WriteBatch has wrong count");
|
||||
} else {
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
|
||||
int WriteBatchInternal::Count(const WriteBatch* b) {
|
||||
return DecodeFixed32(b->rep_.data() + 8);
|
||||
}
|
||||
|
||||
void WriteBatchInternal::SetCount(WriteBatch* b, int n) {
|
||||
EncodeFixed32(&b->rep_[8], n);
|
||||
}
|
||||
|
||||
SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) {
|
||||
return SequenceNumber(DecodeFixed64(b->rep_.data()));
|
||||
}
|
||||
|
||||
void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) {
|
||||
EncodeFixed64(&b->rep_[0], seq);
|
||||
}
|
||||
|
||||
void WriteBatch::Put(const Slice& key, const Slice& value) {
|
||||
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
||||
rep_.push_back(static_cast<char>(kTypeValue));
|
||||
PutLengthPrefixedSlice(&rep_, key);
|
||||
PutLengthPrefixedSlice(&rep_, value);
|
||||
}
|
||||
|
||||
void WriteBatch::Delete(const Slice& key) {
|
||||
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
||||
rep_.push_back(static_cast<char>(kTypeDeletion));
|
||||
PutLengthPrefixedSlice(&rep_, key);
|
||||
}
|
||||
|
||||
namespace {
|
||||
class MemTableInserter : public WriteBatch::Handler {
|
||||
public:
|
||||
SequenceNumber sequence_;
|
||||
MemTable* mem_;
|
||||
|
||||
virtual void Put(const Slice& key, const Slice& value) {
|
||||
mem_->Add(sequence_, kTypeValue, key, value);
|
||||
sequence_++;
|
||||
}
|
||||
virtual void Delete(const Slice& key) {
|
||||
mem_->Add(sequence_, kTypeDeletion, key, Slice());
|
||||
sequence_++;
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
Status WriteBatchInternal::InsertInto(const WriteBatch* b,
|
||||
MemTable* memtable) {
|
||||
MemTableInserter inserter;
|
||||
inserter.sequence_ = WriteBatchInternal::Sequence(b);
|
||||
inserter.mem_ = memtable;
|
||||
return b->Iterate(&inserter);
|
||||
}
|
||||
|
||||
void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
|
||||
assert(contents.size() >= kHeader);
|
||||
b->rep_.assign(contents.data(), contents.size());
|
||||
}
|
||||
|
||||
void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) {
|
||||
SetCount(dst, Count(dst) + Count(src));
|
||||
assert(src->rep_.size() >= kHeader);
|
||||
dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader);
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
49
src/hyperleveldb/db/write_batch_internal.h
Normal file
49
src/hyperleveldb/db/write_batch_internal.h
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_HYPERLEVELDB_DB_WRITE_BATCH_INTERNAL_H_
|
||||
#define STORAGE_HYPERLEVELDB_DB_WRITE_BATCH_INTERNAL_H_
|
||||
|
||||
#include "../hyperleveldb/write_batch.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
class MemTable;
|
||||
|
||||
// WriteBatchInternal provides static methods for manipulating a
|
||||
// WriteBatch that we don't want in the public WriteBatch interface.
|
||||
class WriteBatchInternal {
|
||||
public:
|
||||
// Return the number of entries in the batch.
|
||||
static int Count(const WriteBatch* batch);
|
||||
|
||||
// Set the count for the number of entries in the batch.
|
||||
static void SetCount(WriteBatch* batch, int n);
|
||||
|
||||
// Return the seqeunce number for the start of this batch.
|
||||
static SequenceNumber Sequence(const WriteBatch* batch);
|
||||
|
||||
// Store the specified number as the seqeunce number for the start of
|
||||
// this batch.
|
||||
static void SetSequence(WriteBatch* batch, SequenceNumber seq);
|
||||
|
||||
static Slice Contents(const WriteBatch* batch) {
|
||||
return Slice(batch->rep_);
|
||||
}
|
||||
|
||||
static size_t ByteSize(const WriteBatch* batch) {
|
||||
return batch->rep_.size();
|
||||
}
|
||||
|
||||
static void SetContents(WriteBatch* batch, const Slice& contents);
|
||||
|
||||
static Status InsertInto(const WriteBatch* batch, MemTable* memtable);
|
||||
|
||||
static void Append(WriteBatch* dst, const WriteBatch* src);
|
||||
};
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
|
||||
#endif // STORAGE_HYPERLEVELDB_DB_WRITE_BATCH_INTERNAL_H_
|
||||
120
src/hyperleveldb/db/write_batch_test.cc
Normal file
120
src/hyperleveldb/db/write_batch_test.cc
Normal file
@@ -0,0 +1,120 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "hyperleveldb/db.h"
|
||||
|
||||
#include "memtable.h"
|
||||
#include "write_batch_internal.h"
|
||||
#include "../hyperleveldb/env.h"
|
||||
#include "../util/logging.h"
|
||||
#include "../util/testharness.h"
|
||||
|
||||
namespace hyperleveldb {
|
||||
|
||||
static std::string PrintContents(WriteBatch* b) {
|
||||
InternalKeyComparator cmp(BytewiseComparator());
|
||||
MemTable* mem = new MemTable(cmp);
|
||||
mem->Ref();
|
||||
std::string state;
|
||||
Status s = WriteBatchInternal::InsertInto(b, mem);
|
||||
int count = 0;
|
||||
Iterator* iter = mem->NewIterator();
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
ParsedInternalKey ikey;
|
||||
ASSERT_TRUE(ParseInternalKey(iter->key(), &ikey));
|
||||
switch (ikey.type) {
|
||||
case kTypeValue:
|
||||
state.append("Put(");
|
||||
state.append(ikey.user_key.ToString());
|
||||
state.append(", ");
|
||||
state.append(iter->value().ToString());
|
||||
state.append(")");
|
||||
count++;
|
||||
break;
|
||||
case kTypeDeletion:
|
||||
state.append("Delete(");
|
||||
state.append(ikey.user_key.ToString());
|
||||
state.append(")");
|
||||
count++;
|
||||
break;
|
||||
}
|
||||
state.append("@");
|
||||
state.append(NumberToString(ikey.sequence));
|
||||
}
|
||||
delete iter;
|
||||
if (!s.ok()) {
|
||||
state.append("ParseError()");
|
||||
} else if (count != WriteBatchInternal::Count(b)) {
|
||||
state.append("CountMismatch()");
|
||||
}
|
||||
mem->Unref();
|
||||
return state;
|
||||
}
|
||||
|
||||
class WriteBatchTest { };
|
||||
|
||||
TEST(WriteBatchTest, Empty) {
|
||||
WriteBatch batch;
|
||||
ASSERT_EQ("", PrintContents(&batch));
|
||||
ASSERT_EQ(0, WriteBatchInternal::Count(&batch));
|
||||
}
|
||||
|
||||
TEST(WriteBatchTest, Multiple) {
|
||||
WriteBatch batch;
|
||||
batch.Put(Slice("foo"), Slice("bar"));
|
||||
batch.Delete(Slice("box"));
|
||||
batch.Put(Slice("baz"), Slice("boo"));
|
||||
WriteBatchInternal::SetSequence(&batch, 100);
|
||||
ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch));
|
||||
ASSERT_EQ(3, WriteBatchInternal::Count(&batch));
|
||||
ASSERT_EQ("Put(baz, boo)@102"
|
||||
"Delete(box)@101"
|
||||
"Put(foo, bar)@100",
|
||||
PrintContents(&batch));
|
||||
}
|
||||
|
||||
TEST(WriteBatchTest, Corruption) {
|
||||
WriteBatch batch;
|
||||
batch.Put(Slice("foo"), Slice("bar"));
|
||||
batch.Delete(Slice("box"));
|
||||
WriteBatchInternal::SetSequence(&batch, 200);
|
||||
Slice contents = WriteBatchInternal::Contents(&batch);
|
||||
WriteBatchInternal::SetContents(&batch,
|
||||
Slice(contents.data(),contents.size()-1));
|
||||
ASSERT_EQ("Put(foo, bar)@200"
|
||||
"ParseError()",
|
||||
PrintContents(&batch));
|
||||
}
|
||||
|
||||
TEST(WriteBatchTest, Append) {
|
||||
WriteBatch b1, b2;
|
||||
WriteBatchInternal::SetSequence(&b1, 200);
|
||||
WriteBatchInternal::SetSequence(&b2, 300);
|
||||
WriteBatchInternal::Append(&b1, &b2);
|
||||
ASSERT_EQ("",
|
||||
PrintContents(&b1));
|
||||
b2.Put("a", "va");
|
||||
WriteBatchInternal::Append(&b1, &b2);
|
||||
ASSERT_EQ("Put(a, va)@200",
|
||||
PrintContents(&b1));
|
||||
b2.Clear();
|
||||
b2.Put("b", "vb");
|
||||
WriteBatchInternal::Append(&b1, &b2);
|
||||
ASSERT_EQ("Put(a, va)@200"
|
||||
"Put(b, vb)@201",
|
||||
PrintContents(&b1));
|
||||
b2.Delete("foo");
|
||||
WriteBatchInternal::Append(&b1, &b2);
|
||||
ASSERT_EQ("Put(a, va)@200"
|
||||
"Put(b, vb)@202"
|
||||
"Put(b, vb)@201"
|
||||
"Delete(foo)@203",
|
||||
PrintContents(&b1));
|
||||
}
|
||||
|
||||
} // namespace hyperleveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
||||
Reference in New Issue
Block a user