mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
Some refactorings on plain table
Summary: Plain table has been working well and this is just a nit-picking patch, which is generated during my coding reading. No real functional changes. only some changes regarding: * Improve some comments from the perspective a "new" code reader. * Change some magic number to constant, which can help us to parameterize them in the future. * Did some style, naming, C++ convention changes. * Fix warnings from new "arc lint" Test Plan: make check Reviewers: sdong, haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D15429
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/iterator.h"
|
||||
@@ -35,7 +36,7 @@ using std::unordered_map;
|
||||
//
|
||||
// The implementation of IndexedTableReader requires output file is mmaped
|
||||
class PlainTableReader: public TableReader {
|
||||
public:
|
||||
public:
|
||||
static Status Open(const Options& options, const EnvOptions& soptions,
|
||||
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
|
||||
unique_ptr<TableReader>* table, const int bloom_num_bits,
|
||||
@@ -65,12 +66,12 @@ public:
|
||||
const TableProperties& table_properties);
|
||||
~PlainTableReader();
|
||||
|
||||
private:
|
||||
private:
|
||||
struct IndexRecord;
|
||||
class IndexRecordList;
|
||||
|
||||
uint32_t* hash_table_ = nullptr;
|
||||
int hash_table_size_;
|
||||
int hash_table_size_ = 0;
|
||||
char* sub_index_ = nullptr;
|
||||
|
||||
Options options_;
|
||||
@@ -82,24 +83,30 @@ private:
|
||||
uint32_t version_;
|
||||
uint32_t file_size_;
|
||||
|
||||
const double hash_table_ratio_;
|
||||
const int bloom_bits_per_key_;
|
||||
DynamicBloom* bloom_;
|
||||
const double kHashTableRatio;
|
||||
const int kBloomBitsPerKey;
|
||||
DynamicBloom* bloom_ = nullptr;
|
||||
|
||||
TableProperties table_properties_;
|
||||
const uint32_t data_start_offset_;
|
||||
const uint32_t data_start_offset_ = 0;
|
||||
const uint32_t data_end_offset_;
|
||||
const size_t user_key_len_;
|
||||
|
||||
static const size_t kNumInternalBytes = 8;
|
||||
static const uint32_t kSubIndexMask = 0x80000000;
|
||||
static const size_t kOffsetLen = sizeof(uint32_t);
|
||||
static const uint64_t kMaxFileSize = 1u << 31;
|
||||
static const size_t kRecordsPerGroup = 256;
|
||||
// To speed up the search for keys with same prefix, we'll add index key for
|
||||
// every N keys, where the "N" is determined by
|
||||
// kIndexIntervalForSamePrefixKeys
|
||||
static const size_t kIndexIntervalForSamePrefixKeys = 16;
|
||||
|
||||
bool IsFixedLength() {
|
||||
bool IsFixedLength() const {
|
||||
return user_key_len_ != PlainTableFactory::kVariableLength;
|
||||
}
|
||||
|
||||
size_t GetFixedInternalKeyLength() {
|
||||
size_t GetFixedInternalKeyLength() const {
|
||||
return user_key_len_ + kNumInternalBytes;
|
||||
}
|
||||
|
||||
@@ -108,32 +115,67 @@ private:
|
||||
|
||||
// Internal helper function to generate an IndexRecordList object from all
|
||||
// the rows, which contains index records as a list.
|
||||
int PopulateIndexRecordList(IndexRecordList& record_list);
|
||||
int PopulateIndexRecordList(IndexRecordList* record_list);
|
||||
|
||||
// Internal helper function to allocate memory for indexes and bloom filters
|
||||
void Allocate(int num_prefixes);
|
||||
void AllocateIndexAndBloom(int num_prefixes);
|
||||
|
||||
// Internal helper function to bucket index record list to hash buckets.
|
||||
// hash2offsets is sized of of hash_table_size_, each contains a linked list
|
||||
// hash_to_offsets is sized of of hash_table_size_, each contains a linked
|
||||
// list
|
||||
// of offsets for the hash, in reversed order.
|
||||
// bucket_count is sized of hash_table_size_. The value is how many index
|
||||
// records are there in hash2offsets for the same bucket.
|
||||
// records are there in hash_to_offsets for the same bucket.
|
||||
size_t BucketizeIndexesAndFillBloom(
|
||||
IndexRecordList& record_list, int num_prefixes,
|
||||
std::vector<IndexRecord*>& hash2offsets,
|
||||
std::vector<uint32_t>& bucket_count);
|
||||
std::vector<IndexRecord*>* hash_to_offsets,
|
||||
std::vector<uint32_t>* bucket_count);
|
||||
|
||||
// Internal helper class to fill the indexes and bloom filters to internal
|
||||
// data structures. hash2offsets and bucket_count are bucketized indexes and
|
||||
// counts generated by BucketizeIndexesAndFillBloom().
|
||||
// data structures. hash_to_offsets and bucket_count are bucketized indexes
|
||||
// and counts generated by BucketizeIndexesAndFillBloom().
|
||||
void FillIndexes(size_t sub_index_size_needed,
|
||||
std::vector<IndexRecord*>& hash2offsets,
|
||||
std::vector<uint32_t>& bucket_count);
|
||||
const std::vector<IndexRecord*>& hash_to_offsets,
|
||||
const std::vector<uint32_t>& bucket_count);
|
||||
|
||||
// Populate the internal indexes. It must be called before
|
||||
// any query to the table.
|
||||
// This query will populate the hash table hash_table_, the second
|
||||
// level of indexes sub_index_ and bloom filter filter_slice_ if enabled.
|
||||
// PopulateIndex() builds index of keys. It must be called before any query
|
||||
// to the table.
|
||||
//
|
||||
// hash_table_ contains buckets size of hash_table_size_, each is a 32-bit
|
||||
// integer. The lower 31 bits contain an offset value (explained below) and
|
||||
// the first bit of the integer indicates type of the offset.
|
||||
//
|
||||
// +--------------+------------------------------------------------------+
|
||||
// | Flag (1 bit) | Offset to binary search buffer or file (31 bits) +
|
||||
// +--------------+------------------------------------------------------+
|
||||
//
|
||||
// Explanation for the "flag bit":
|
||||
//
|
||||
// 0 indicates that the bucket contains only one prefix (no conflict when
|
||||
// hashing this prefix), whose first row starts from this offset of the
|
||||
// file.
|
||||
// 1 indicates that the bucket contains more than one prefixes, or there
|
||||
// are too many rows for one prefix so we need a binary search for it. In
|
||||
// this case, the offset indicates the offset of sub_index_ holding the
|
||||
// binary search indexes of keys for those rows. Those binary search indexes
|
||||
// are organized in this way:
|
||||
//
|
||||
// The first 4 bytes, indicate how many indexes (N) are stored after it. After
|
||||
// it, there are N 32-bit integers, each points of an offset of the file,
|
||||
// which
|
||||
// points to starting of a row. Those offsets need to be guaranteed to be in
|
||||
// ascending order so the keys they are pointing to are also in ascending
|
||||
// order
|
||||
// to make sure we can use them to do binary searches. Below is visual
|
||||
// presentation of a bucket.
|
||||
//
|
||||
// <begin>
|
||||
// number_of_records: varint32
|
||||
// record 1 file offset: fixedint32
|
||||
// record 2 file offset: fixedint32
|
||||
// ....
|
||||
// record N file offset: fixedint32
|
||||
// <end>
|
||||
Status PopulateIndex();
|
||||
|
||||
// Check bloom filter to see whether it might contain this prefix.
|
||||
@@ -163,41 +205,4 @@ private:
|
||||
explicit PlainTableReader(const TableReader&) = delete;
|
||||
void operator=(const TableReader&) = delete;
|
||||
};
|
||||
|
||||
// Iterator to iterate IndexedTable
|
||||
class PlainTableIterator: public Iterator {
|
||||
public:
|
||||
explicit PlainTableIterator(PlainTableReader* table);
|
||||
~PlainTableIterator();
|
||||
|
||||
bool Valid() const;
|
||||
|
||||
void SeekToFirst();
|
||||
|
||||
void SeekToLast();
|
||||
|
||||
void Seek(const Slice& target);
|
||||
|
||||
void Next();
|
||||
|
||||
void Prev();
|
||||
|
||||
Slice key() const;
|
||||
|
||||
Slice value() const;
|
||||
|
||||
Status status() const;
|
||||
|
||||
private:
|
||||
PlainTableReader* table_;
|
||||
uint32_t offset_;
|
||||
uint32_t next_offset_;
|
||||
Slice key_;
|
||||
Slice value_;
|
||||
Status status_;
|
||||
// No copying allowed
|
||||
PlainTableIterator(const PlainTableIterator&) = delete;
|
||||
void operator=(const Iterator&) = delete;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
Reference in New Issue
Block a user