mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
[Rocksdb] Support Merge operation in rocksdb
Summary: This diff introduces a new Merge operation into rocksdb. The purpose of this review is mostly getting feedback from the team (everyone please) on the design. Please focus on the four files under include/leveldb/, as they spell the client visible interface change. include/leveldb/db.h include/leveldb/merge_operator.h include/leveldb/options.h include/leveldb/write_batch.h Please go over local/my_test.cc carefully, as it is a concerete use case. Please also review the impelmentation files to see if the straw man implementation makes sense. Note that, the diff does pass all make check and truly supports forward iterator over db and a version of Get that's based on iterator. Future work: - Integration with compaction - A raw Get implementation I am working on a wiki that explains the design and implementation choices, but coding comes just naturally and I think it might be a good idea to share the code earlier. The code is heavily commented. Test Plan: run all local tests Reviewers: dhruba, heyongqiang Reviewed By: dhruba CC: leveldb, zshao, sheki, emayanke, MarkCallaghan Differential Revision: https://reviews.facebook.net/D9651
This commit is contained in:
@@ -83,6 +83,14 @@ class DB {
|
||||
// Note: consider setting options.sync = true.
|
||||
virtual Status Delete(const WriteOptions& options, const Slice& key) = 0;
|
||||
|
||||
// Merge the database entry for "key" with "value". Returns OK on success,
|
||||
// and a non-OK status on error. The semantics of this operation is
|
||||
// determined by the user provided merge_operator when opening DB.
|
||||
// Note: consider setting options.sync = true.
|
||||
virtual Status Merge(const WriteOptions& options,
|
||||
const Slice& key,
|
||||
const Slice& value) = 0;
|
||||
|
||||
// Apply the specified updates to the database.
|
||||
// Returns OK on success, non-OK on failure.
|
||||
// Note: consider setting options.sync = true.
|
||||
@@ -185,7 +193,7 @@ class DB {
|
||||
virtual Status GetLiveFiles(std::vector<std::string>&,
|
||||
uint64_t* manifest_file_size) = 0;
|
||||
|
||||
// The sequence number of the most recent transaction.
|
||||
// The sequence number of the most recent transaction.
|
||||
virtual SequenceNumber GetLatestSequenceNumber() = 0;
|
||||
|
||||
// Return's an iterator for all writes since the sequence number
|
||||
|
||||
74
include/leveldb/merge_operator.h
Normal file
74
include/leveldb/merge_operator.h
Normal file
@@ -0,0 +1,74 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_INCLUDE_MERGE_OPERATOR_H_
|
||||
#define STORAGE_LEVELDB_INCLUDE_MERGE_OPERATOR_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class Slice;
|
||||
class Logger;
|
||||
|
||||
// The Merge Operator interface.
|
||||
// Client needs to provide an object implementing this interface if Merge
|
||||
// operation is accessed.
|
||||
// Essentially, MergeOperator specifies the SEMANTICS of a merge, which only
|
||||
// client knows. It could be numeric addition, list append, string
|
||||
// concatenation, ... , anything.
|
||||
// The library, on the other hand, is concerned with the exercise of this
|
||||
// interface, at the right time (during get, iteration, compaction...)
|
||||
// Note that, even though in principle we don't require any special property
|
||||
// of the merge operator, the current rocksdb compaction order does imply that
|
||||
// an associative operator could be exercised more naturally (and more
|
||||
// efficiently).
|
||||
//
|
||||
// Refer to my_test.cc for an example of implementation
|
||||
//
|
||||
class MergeOperator {
|
||||
public:
|
||||
virtual ~MergeOperator() {}
|
||||
|
||||
// Gives the client a way to express the read -> modify -> write semantics
|
||||
// key: (IN) The key that's associated with this merge operation.
|
||||
// Client could multiplex the merge operator based on it
|
||||
// if the key space is partitioned and different subspaces
|
||||
// refer to different types of data which have different
|
||||
// merge operation semantics
|
||||
// existing: (IN) null indicates that the key does not exist before this op
|
||||
// value: (IN) The passed-in merge operand value (when Merge is issued)
|
||||
// new_value:(OUT) Client is responsible for filling the merge result here
|
||||
// logger: (IN) Client could use this to log errors during merge.
|
||||
//
|
||||
// Note: Merge does not return anything to indicate if a merge is successful
|
||||
// or not.
|
||||
// Rationale: If a merge failed due to, say de-serialization error, we still
|
||||
// need to define a consistent merge result. Should we throw away
|
||||
// the existing value? the merge operand? Or reset the merged value
|
||||
// to sth? The rocksdb library is not in a position to make the
|
||||
// right choice. On the other hand, client knows exactly what
|
||||
// happened during Merge, thus is able to make the best decision.
|
||||
// Just save the final decision in new_value. logger is passed in,
|
||||
// in case client wants to leave a trace of what went wrong.
|
||||
virtual void Merge(const Slice& key,
|
||||
const Slice* existing_value,
|
||||
const Slice& value,
|
||||
std::string* new_value,
|
||||
Logger* logger) const = 0;
|
||||
|
||||
|
||||
// The name of the MergeOperator. Used to check for MergeOperator
|
||||
// mismatches (i.e., a DB created with one MergeOperator is
|
||||
// accessed using a different MergeOperator)
|
||||
// TODO: the name is currently not stored persistently and thus
|
||||
// no checking is enforced. Client is responsible for providing
|
||||
// consistent MergeOperator between DB opens.
|
||||
virtual const char* Name() const = 0;
|
||||
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
#endif // STORAGE_LEVELDB_INCLUDE_MERGE_OPERATOR_H_
|
||||
@@ -20,6 +20,7 @@ class Comparator;
|
||||
class Env;
|
||||
class FilterPolicy;
|
||||
class Logger;
|
||||
class MergeOperator;
|
||||
class Snapshot;
|
||||
|
||||
using std::shared_ptr;
|
||||
@@ -63,6 +64,18 @@ struct Options {
|
||||
// comparator provided to previous open calls on the same DB.
|
||||
const Comparator* comparator;
|
||||
|
||||
// REQUIRES: The client must provide a merge operator if Merge operation
|
||||
// needs to be accessed. Calling Merge on a DB without a merge operator
|
||||
// would result in Status::NotSupported. The client must ensure that the
|
||||
// merge operator supplied here has the same name and *exactly* the same
|
||||
// semantics as the merge operator provided to previous open calls on
|
||||
// the same DB. The only exception is reserved for upgrade, where a DB
|
||||
// previously without a merge operator is introduced to Merge operation
|
||||
// for the first time. It's necessary to specify a merge operator when
|
||||
// openning the DB in this case.
|
||||
// Default: nullptr
|
||||
const MergeOperator* merge_operator;
|
||||
|
||||
// If true, the database will be created if it is missing.
|
||||
// Default: false
|
||||
bool create_if_missing;
|
||||
|
||||
@@ -47,6 +47,9 @@ class Status {
|
||||
static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) {
|
||||
return Status(kIOError, msg, msg2);
|
||||
}
|
||||
static Status MergeInProgress(const Slice& msg, const Slice& msg2 = Slice()) {
|
||||
return Status(kMergeInProgress, msg, msg2);
|
||||
}
|
||||
|
||||
// Returns true iff the status indicates success.
|
||||
bool ok() const { return (state_ == nullptr); }
|
||||
@@ -66,6 +69,9 @@ class Status {
|
||||
// Returns true iff the status indicates an IOError.
|
||||
bool IsIOError() const { return code() == kIOError; }
|
||||
|
||||
// Returns true iff the status indicates an MergeInProgress.
|
||||
bool IsMergeInProgress() const { return code() == kMergeInProgress; }
|
||||
|
||||
// Return a string representation of this status suitable for printing.
|
||||
// Returns the string "OK" for success.
|
||||
std::string ToString() const;
|
||||
@@ -84,7 +90,8 @@ class Status {
|
||||
kCorruption = 2,
|
||||
kNotSupported = 3,
|
||||
kInvalidArgument = 4,
|
||||
kIOError = 5
|
||||
kIOError = 5,
|
||||
kMergeInProgress = 6
|
||||
};
|
||||
|
||||
Code code() const {
|
||||
|
||||
@@ -36,6 +36,10 @@ class WriteBatch {
|
||||
// Store the mapping "key->value" in the database.
|
||||
void Put(const Slice& key, const Slice& value);
|
||||
|
||||
// Merge "value" with the existing value of "key" in the database.
|
||||
// "key->merge(existing, value)"
|
||||
void Merge(const Slice& key, const Slice& value);
|
||||
|
||||
// If the database contains a mapping for "key", erase it. Else do nothing.
|
||||
void Delete(const Slice& key);
|
||||
|
||||
@@ -47,6 +51,10 @@ class WriteBatch {
|
||||
public:
|
||||
virtual ~Handler();
|
||||
virtual void Put(const Slice& key, const Slice& value) = 0;
|
||||
// Merge is not pure virtual. Otherwise, we would break existing
|
||||
// clients of Handler on a source code level.
|
||||
// The default implementation simply throws a runtime exception.
|
||||
virtual void Merge(const Slice& key, const Slice& value);
|
||||
virtual void Delete(const Slice& key) = 0;
|
||||
};
|
||||
Status Iterate(Handler* handler) const;
|
||||
|
||||
Reference in New Issue
Block a user