Make the block-based table's index pluggable

Summary:
This patch introduced a new table options that allows us to make
block-based table's index pluggable.

To support that new features:

* Code has been refacotred to be more flexible and supports this option well.
* More documentation is added for the existing obsecure functionalities.
* Big surgeon on DataBlockReader(), where the logic was really convoluted.
* Other small code cleanups.

The pluggablility will mostly affect development of internal modules
and won't change frequently, as a result I intentionally avoid
heavy-weight patterns (like factory) and try to make it simple.

Test Plan: make all check

Reviewers: haobo, sdong

Reviewed By: sdong

CC: leveldb

Differential Revision: https://reviews.facebook.net/D16395
This commit is contained in:
kailiu
2014-02-28 18:19:07 -08:00
parent bf86af5174
commit 74939a9e13
7 changed files with 777 additions and 548 deletions

View File

@@ -8,12 +8,14 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
#include <stdint.h>
#include "rocksdb/cache.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include <memory>
#include <utility>
#include "rocksdb/statistics.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
#include "table/table_reader.h"
#include "util/coding.h"
@@ -21,14 +23,19 @@ namespace rocksdb {
class Block;
class BlockHandle;
class Cache;
class FilterBlockReader;
class Footer;
struct Options;
class InternalKeyComparator;
class Iterator;
class RandomAccessFile;
struct ReadOptions;
class TableCache;
class TableReader;
class FilterBlockReader;
class WritableFile;
struct BlockBasedTableOptions;
struct EnvOptions;
struct Options;
struct ReadOptions;
using std::unique_ptr;
@@ -91,7 +98,9 @@ class BlockBasedTable : public TableReader {
~BlockBasedTable();
bool TEST_filter_block_preloaded() const;
bool TEST_index_block_preloaded() const;
bool TEST_index_reader_preloaded() const;
// Implementation of IndexReader will be exposed to internal cc file only.
class IndexReader;
private:
template <class TValue>
@@ -101,40 +110,51 @@ class BlockBasedTable : public TableReader {
Rep* rep_;
bool compaction_optimized_;
static Iterator* BlockReader(void*, const ReadOptions&,
const EnvOptions& soptions,
const InternalKeyComparator& icomparator,
const Slice&, bool for_compaction);
static Iterator* DataBlockReader(void*, const ReadOptions&,
const EnvOptions& soptions,
const InternalKeyComparator& icomparator,
const Slice&, bool for_compaction);
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&,
bool* didIO, bool for_compaction = false);
static Iterator* DataBlockReader(void*, const ReadOptions&, const Slice&,
bool* didIO, bool for_compaction = false);
// if `no_io == true`, we will not try to read filter from sst file
// if it is not cached yet.
// For the following two functions:
// if `no_io == true`, we will not try to read filter/index from sst file
// were they not present in cache yet.
CachableEntry<FilterBlockReader> GetFilter(bool no_io = false) const;
Iterator* IndexBlockReader(const ReadOptions& options) const;
// Get the iterator from the index reader.
//
// Note: ErrorIterator with Status::Incomplete shall be returned if all the
// following conditions are met:
// 1. We enabled table_options.cache_index_and_filter_blocks.
// 2. index is not present in block cache.
// 3. We disallowed any io to be performed, that is, read_options ==
// kBlockCacheTier
Iterator* NewIndexIterator(const ReadOptions& read_options) const;
// Read the block, either from sst file or from cache. This method will try
// to read from cache only when block_cache is set or ReadOption doesn't
// explicitly prohibit storage IO.
// Read block cache from block caches (if set): block_cache and
// block_cache_compressed.
// On success, Status::OK with be returned and @block will be populated with
// pointer to the block as well as its block handle.
static Status GetDataBlockFromCache(
const Slice& block_cache_key, const Slice& compressed_block_cache_key,
Cache* block_cache, Cache* block_cache_compressed, Statistics* statistics,
const ReadOptions& read_options,
BlockBasedTable::CachableEntry<Block>* block);
// Put a raw block (maybe compressed) to the corresponding block caches.
// This method will perform decompression against raw_block if needed and then
// populate the block caches.
// On success, Status::OK will be returned; also @block will be populated with
// uncompressed block and its cache handle.
//
// If the block is read from cache, the statistics for cache miss/hit of the
// the given type of block will be updated. User can specify
// `block_cache_miss_ticker` and `block_cache_hit_ticker` for the statistics
// update.
//
// On success, the `result` parameter will be populated, which contains a
// pointer to the block and its cache handle, which will be nullptr if it's
// not read from the cache.
static Status GetBlock(const BlockBasedTable* table,
const BlockHandle& handle,
const ReadOptions& options,
bool for_compaction,
Tickers block_cache_miss_ticker,
Tickers block_cache_hit_ticker,
bool* didIO,
CachableEntry<Block>* result);
// REQUIRES: raw_block is heap-allocated. PutDataBlockToCache() will be
// responsible for releasing its memory if error occurs.
static Status PutDataBlockToCache(
const Slice& block_cache_key, const Slice& compressed_block_cache_key,
Cache* block_cache, Cache* block_cache_compressed,
const ReadOptions& read_options, Statistics* statistics,
CachableEntry<Block>* block, Block* raw_block);
// Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
// after a call to Seek(key), until handle_result returns false.
@@ -144,6 +164,7 @@ class BlockBasedTable : public TableReader {
void ReadMeta(const Footer& footer);
void ReadFilter(const Slice& filter_handle_value);
std::pair<Status, IndexReader*> CreateIndexReader() const;
// Read the meta block from sst.
static Status ReadMetaBlock(
@@ -159,10 +180,9 @@ class BlockBasedTable : public TableReader {
static void SetupCacheKeyPrefix(Rep* rep);
explicit BlockBasedTable(Rep* rep) :
compaction_optimized_(false) {
rep_ = rep;
}
explicit BlockBasedTable(Rep* rep)
: rep_(rep), compaction_optimized_(false) {}
// Generate a cache key prefix from the file
static void GenerateCachePrefix(Cache* cc,
RandomAccessFile* file, char* buffer, size_t* size);