[rocksdb] new CompactionFilterV2 API

Summary:
This diff adds a new CompactionFilterV2 API that roll up the
decisions of kv pairs during compactions. These kv pairs must share the
same key prefix. They are buffered inside the db.

    typedef std::vector<Slice> SliceVector;
    virtual std::vector<bool> Filter(int level,
                                 const SliceVector& keys,
                                 const SliceVector& existing_values,
                                 std::vector<std::string>* new_values,
                                 std::vector<bool>* values_changed
                                 ) const = 0;

Application can override the Filter() function to operate
on the buffered kv pairs. More details in the inline documentation.

Test Plan:
make check. Added unit tests to make sure Keep, Delete,
Change all works.

Reviewers: haobo

CCs: leveldb

Differential Revision: https://reviews.facebook.net/D15087
This commit is contained in:
Danny Guo
2014-01-09 17:52:11 -08:00
parent cda4006e87
commit b47812fba6
12 changed files with 850 additions and 94 deletions

View File

@@ -10,26 +10,27 @@
#define STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_
#include <string>
#include <vector>
namespace rocksdb {
class Slice;
class SliceTransform;
// Context information of a compaction run
struct CompactionFilterContext {
// Does this compaction run include all data files
bool is_full_compaction;
// Is this compaction requested by the client (true),
// or is it occurring as an automatic compaction process
bool is_manual_compaction;
};
// CompactionFilter allows an application to modify/delete a key-value at
// the time of compaction.
class CompactionFilter {
public:
// Context information of a compaction run
struct Context {
// Does this compaction run include all data files
bool is_full_compaction;
// Is this compaction requested by the client (true),
// or is it occurring as an automatic compaction process
bool is_manual_compaction;
};
virtual ~CompactionFilter() {}
// The compaction process invokes this
@@ -64,14 +65,47 @@ class CompactionFilter {
virtual const char* Name() const = 0;
};
// CompactionFilterV2 that buffers kv pairs sharing the same prefix and let
// application layer to make individual decisions for all the kv pairs in the
// buffer.
class CompactionFilterV2 {
public:
virtual ~CompactionFilterV2() {}
// The compaction process invokes this method for all the kv pairs
// sharing the same prefix. It is a "roll-up" version of CompactionFilter.
//
// Each entry in the return vector indicates if the corresponding kv should
// be preserved in the output of this compaction run. The application can
// inspect the exisitng values of the keys and make decision based on it.
//
// When a value is to be preserved, the application has the option
// to modify the entry in existing_values and pass it back through an entry
// in new_values. A corresponding values_changed entry needs to be set to
// true in this case. Note that the new_values vector contains only changed
// values, i.e. new_values.size() <= values_changed.size().
//
typedef std::vector<Slice> SliceVector;
virtual std::vector<bool> Filter(int level,
const SliceVector& keys,
const SliceVector& existing_values,
std::vector<std::string>* new_values,
std::vector<bool>* values_changed)
const = 0;
// Returns a name that identifies this compaction filter.
// The name will be printed to LOG file on start up for diagnosis.
virtual const char* Name() const = 0;
};
// Each compaction will create a new CompactionFilter allowing the
// application to know about different campactions
class CompactionFilterFactory {
public:
virtual ~CompactionFilterFactory() { };
virtual ~CompactionFilterFactory() { }
virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
const CompactionFilter::Context& context) = 0;
const CompactionFilterContext& context) = 0;
// Returns a name that identifies this compaction filter factory.
virtual const char* Name() const = 0;
@@ -82,7 +116,7 @@ class CompactionFilterFactory {
class DefaultCompactionFilterFactory : public CompactionFilterFactory {
public:
virtual std::unique_ptr<CompactionFilter>
CreateCompactionFilter(const CompactionFilter::Context& context) override {
CreateCompactionFilter(const CompactionFilterContext& context) override {
return std::unique_ptr<CompactionFilter>(nullptr);
}
@@ -91,6 +125,65 @@ class DefaultCompactionFilterFactory : public CompactionFilterFactory {
}
};
// Each compaction will create a new CompactionFilterV2
//
// CompactionFilterFactoryV2 enables application to specify a prefix and use
// CompactionFilterV2 to filter kv-pairs in batches. Each batch contains all
// the kv-pairs sharing the same prefix.
//
// This is useful for applications that require grouping kv-pairs in
// compaction filter to make a purge/no-purge decision. For example, if the
// key prefix is user id and the rest of key represents the type of value.
// This batching filter will come in handy if the application's compaction
// filter requires knowledge of all types of values for any user id.
//
class CompactionFilterFactoryV2 {
public:
explicit CompactionFilterFactoryV2(const SliceTransform* prefix_extractor)
: prefix_extractor_(prefix_extractor) { }
virtual ~CompactionFilterFactoryV2() { }
virtual std::unique_ptr<CompactionFilterV2> CreateCompactionFilterV2(
const CompactionFilterContext& context) = 0;
// Returns a name that identifies this compaction filter factory.
virtual const char* Name() const = 0;
const SliceTransform* GetPrefixExtractor() const {
return prefix_extractor_;
}
void SetPrefixExtractor(const SliceTransform* prefix_extractor) {
prefix_extractor_ = prefix_extractor;
}
private:
// Prefix extractor for compaction filter v2
// Keys sharing the same prefix will be buffered internally.
// Client can implement a Filter callback function to operate on the buffer
const SliceTransform* prefix_extractor_;
};
// Default implementaion of CompactionFilterFactoryV2 which does not
// return any filter
class DefaultCompactionFilterFactoryV2 : public CompactionFilterFactoryV2 {
public:
explicit DefaultCompactionFilterFactoryV2(
const SliceTransform* prefix_extractor)
: CompactionFilterFactoryV2(prefix_extractor) { }
virtual std::unique_ptr<CompactionFilterV2>
CreateCompactionFilterV2(
const CompactionFilterContext& context) override {
return std::unique_ptr<CompactionFilterV2>(nullptr);
}
virtual const char* Name() const override {
return "DefaultCompactionFilterFactoryV2";
}
};
} // namespace rocksdb
#endif // STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_

View File

@@ -214,7 +214,7 @@ class Env {
virtual void StartThread(void (*function)(void* arg), void* arg) = 0;
// Wait for all threads started by StartThread to terminate.
virtual void WaitForJoin() = 0;
virtual void WaitForJoin() {}
// Get thread pool queue length for specific thrad pool.
virtual unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const {

View File

@@ -22,6 +22,7 @@ namespace rocksdb {
class Cache;
class CompactionFilter;
class CompactionFilterFactory;
class CompactionFilterFactoryV2;
class Comparator;
class Env;
enum InfoLogLevel : unsigned char;
@@ -123,6 +124,10 @@ struct Options {
// Default: a factory that doesn't provide any object
std::shared_ptr<CompactionFilterFactory> compaction_filter_factory;
// Version TWO of the compaction_filter_factory
// It supports rolling compaction
std::shared_ptr<CompactionFilterFactoryV2> compaction_filter_factory_v2;
// If true, the database will be created if it is missing.
// Default: false
bool create_if_missing;