Clean PlainTableReader's variables for better data locality

Summary:
Clean PlainTableReader's data structures:
(1) inline bloom_ (in order to do this, change DynamicBloom to allow lazy initialization)
(2) remove some variables only used when initialization from the class
(3) put variables not used in normal read code paths to the end of the class and reference prefix_extractor directly
(4) make Options a reference.

Test Plan: make all check

Reviewers: haobo, ljin

Reviewed By: ljin

Subscribers: igor, yhchiang, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D18891
This commit is contained in:
sdong
2014-06-09 12:30:19 -07:00
parent f43c8262c2
commit 80f409ea37
5 changed files with 130 additions and 99 deletions

View File

@@ -20,6 +20,7 @@
#include "table/table_reader.h"
#include "table/plain_table_factory.h"
#include "util/arena.h"
#include "util/dynamic_bloom.h"
namespace rocksdb {
@@ -31,7 +32,6 @@ class RandomAccessFile;
struct ReadOptions;
class TableCache;
class TableReader;
class DynamicBloom;
class InternalKeyComparator;
using std::unique_ptr;
@@ -73,10 +73,7 @@ class PlainTableReader: public TableReader {
PlainTableReader(const Options& options, unique_ptr<RandomAccessFile>&& file,
const EnvOptions& storage_options,
const InternalKeyComparator& internal_comparator,
uint64_t file_size, int bloom_num_bits,
double hash_table_ratio, size_t index_sparseness,
const TableProperties* table_properties,
size_t huge_page_tlb_size);
uint64_t file_size, const TableProperties* table_properties);
virtual ~PlainTableReader();
protected:
@@ -126,7 +123,9 @@ class PlainTableReader: public TableReader {
// ....
// record N file offset: fixedint32
// <end>
Status PopulateIndex(TableProperties* props);
Status PopulateIndex(TableProperties* props, int bloom_bits_per_key,
double hash_table_ratio, size_t index_sparseness,
size_t huge_page_tlb_size);
private:
struct IndexRecord;
@@ -141,35 +140,17 @@ class PlainTableReader: public TableReader {
uint32_t* index_;
int index_size_ = 0;
char* sub_index_;
Options options_;
const EnvOptions& soptions_;
unique_ptr<RandomAccessFile> file_;
const InternalKeyComparator internal_comparator_;
// represents plain table's current status.
Status status_;
Slice file_data_;
uint32_t file_size_;
const double kHashTableRatio;
const int kBloomBitsPerKey;
// To speed up the search for keys with same prefix, we'll add index key for
// every N keys, where the "N" is determined by
// kIndexIntervalForSamePrefixKeys
const size_t kIndexIntervalForSamePrefixKeys = 16;
// Bloom filter is used to rule out non-existent key
unique_ptr<DynamicBloom> bloom_;
Arena arena_;
std::shared_ptr<const TableProperties> table_properties_;
// data_start_offset_ and data_end_offset_ defines the range of the
// sst file that stores data.
const uint32_t data_start_offset_ = 0;
const uint32_t data_end_offset_;
const size_t user_key_len_;
const size_t huge_page_tlb_size_;
const SliceTransform* prefix_extractor_;
static const size_t kNumInternalBytes = 8;
static const uint32_t kSubIndexMask = 0x80000000;
@@ -177,6 +158,16 @@ class PlainTableReader: public TableReader {
static const uint64_t kMaxFileSize = 1u << 31;
static const size_t kRecordsPerGroup = 256;
// Bloom filter is used to rule out non-existent key
bool enable_bloom_;
DynamicBloom bloom_;
Arena arena_;
const Options& options_;
unique_ptr<RandomAccessFile> file_;
uint32_t file_size_;
std::shared_ptr<const TableProperties> table_properties_;
bool IsFixedLength() const {
return user_key_len_ != kPlainTableVariableLength;
}
@@ -193,10 +184,13 @@ class PlainTableReader: public TableReader {
// If bloom_ is not null, all the keys' full-key hash will be added to the
// bloom filter.
Status PopulateIndexRecordList(IndexRecordList* record_list,
int* num_prefixes) const;
int* num_prefixes, int bloom_bits_per_key,
size_t index_sparseness);
// Internal helper function to allocate memory for indexes and bloom filters
void AllocateIndexAndBloom(int num_prefixes);
void AllocateIndexAndBloom(int num_prefixes, int bloom_bits_per_key,
double hash_table_ratio,
size_t huge_page_tlb_size);
// Internal helper function to bucket index record list to hash buckets.
// bucket_header is a vector of size hash_table_size_, with each entry
@@ -214,7 +208,8 @@ class PlainTableReader: public TableReader {
// indexes and counts generated by BucketizeIndexesAndFillBloom().
void FillIndexes(const size_t kSubIndexSize,
const std::vector<IndexRecord*>& bucket_headers,
const std::vector<uint32_t>& entries_per_bucket);
const std::vector<uint32_t>& entries_per_bucket,
size_t huge_page_tlb_size);
// Read a plain table key from the position `start`. The read content
// will be written to `key` and the size of read bytes will be populated
@@ -244,7 +239,7 @@ class PlainTableReader: public TableReader {
Slice GetPrefixFromUserKey(const Slice& user_key) const {
if (!IsTotalOrderMode()) {
return options_.prefix_extractor->Transform(user_key);
return prefix_extractor_->Transform(user_key);
} else {
// Use empty slice as prefix if prefix_extractor is not set. In that case,
// it falls back to pure binary search and total iterator seek is
@@ -253,9 +248,7 @@ class PlainTableReader: public TableReader {
}
}
bool IsTotalOrderMode() const {
return (options_.prefix_extractor.get() == nullptr);
}
bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); }
// No copying allowed
explicit PlainTableReader(const TableReader&) = delete;