Revert "Allow allocating dynamic bloom, plain table indexes and hash linked list from huge page TLB"

This reverts commit 7dafa3a1d7.
This commit is contained in:
Igor Canadi
2014-05-04 08:37:09 -07:00
parent 41e5cf2392
commit d69dc64be7
18 changed files with 368 additions and 510 deletions

View File

@@ -8,7 +8,6 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "util/arena.h"
#include <sys/mman.h>
#include <algorithm>
namespace rocksdb {
@@ -39,13 +38,6 @@ Arena::~Arena() {
for (const auto& block : blocks_) {
delete[] block;
}
for (const auto& mmap_info : huge_blocks_) {
auto ret = munmap(mmap_info.addr_, mmap_info.length_);
if (ret != 0) {
// TODO(sdong): Better handling
perror("munmap");
}
}
}
char* Arena::AllocateFallback(size_t bytes, bool aligned) {
@@ -71,29 +63,9 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
}
}
char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size) {
char* Arena::AllocateAligned(size_t bytes) {
assert((kAlignUnit & (kAlignUnit - 1)) ==
0); // Pointer size should be a power of 2
#ifdef OS_LINUX
if (huge_page_tlb_size > 0 && bytes > 0) {
// Allocate from a huge page TBL table.
size_t reserved_size =
((bytes - 1U) / huge_page_tlb_size + 1U) * huge_page_tlb_size;
assert(reserved_size >= bytes);
void* addr = mmap(nullptr, reserved_size, (PROT_READ | PROT_WRITE),
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), 0, 0);
if (addr == MAP_FAILED) {
perror("mmap");
// fail back to malloc
} else {
blocks_memory_ += reserved_size;
huge_blocks_.push_back(MmapInfo(addr, reserved_size));
return reinterpret_cast<char*>(addr);
}
}
#endif
size_t current_mod =
reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod);

View File

@@ -34,14 +34,7 @@ class Arena {
char* Allocate(size_t bytes);
// huge_page_tlb_size: if >0, allocate bytes from huge page TLB and the size
// of the huge page TLB. Bytes will be rounded up to multiple and 2MB and
// allocate huge pages through mmap anonymous option with huge page on.
// The extra space allocated will be wasted. To enable it, need to reserve
// huge pages for it to be allocated, like:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt for details.
char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0);
char* AllocateAligned(size_t bytes);
// Returns an estimate of the total memory usage of data allocated
// by the arena (exclude the space allocated but not yet used for future
@@ -67,14 +60,6 @@ class Arena {
// Array of new[] allocated memory blocks
typedef std::vector<char*> Blocks;
Blocks blocks_;
struct MmapInfo {
void* addr_;
size_t length_;
MmapInfo(void* addr, size_t length) : addr_(addr), length_(length) {}
};
std::vector<MmapInfo> huge_blocks_;
size_t irregular_block_num = 0;
// Stats for current active block.

View File

@@ -19,19 +19,18 @@ static uint32_t BloomHash(const Slice& key) {
}
}
DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block,
DynamicBloom::DynamicBloom(uint32_t total_bits,
uint32_t cl_per_block,
uint32_t num_probes,
uint32_t (*hash_func)(const Slice& key),
size_t huge_page_tlb_size)
: kBlocked(cl_per_block > 0),
kBitsPerBlock(std::min(cl_per_block, num_probes) * CACHE_LINE_SIZE * 8),
kTotalBits((kBlocked ? (total_bits + kBitsPerBlock - 1) / kBitsPerBlock *
kBitsPerBlock
: total_bits + 7) /
8 * 8),
kNumBlocks(kBlocked ? kTotalBits / kBitsPerBlock : 1),
kNumProbes(num_probes),
hash_func_(hash_func == nullptr ? &BloomHash : hash_func) {
uint32_t (*hash_func)(const Slice& key))
: kBlocked(cl_per_block > 0),
kBitsPerBlock(std::min(cl_per_block, num_probes) * CACHE_LINE_SIZE * 8),
kTotalBits((kBlocked ? (total_bits + kBitsPerBlock - 1) / kBitsPerBlock
* kBitsPerBlock :
total_bits + 7) / 8 * 8),
kNumBlocks(kBlocked ? kTotalBits / kBitsPerBlock : 1),
kNumProbes(num_probes),
hash_func_(hash_func == nullptr ? &BloomHash : hash_func) {
assert(kBlocked ? kTotalBits > 0 : kTotalBits >= kBitsPerBlock);
assert(kNumProbes > 0);
@@ -39,9 +38,7 @@ DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block,
if (kBlocked) {
sz += CACHE_LINE_SIZE - 1;
}
raw_ = reinterpret_cast<unsigned char*>(
arena_.AllocateAligned(sz, huge_page_tlb_size));
memset(raw_, 0, sz);
raw_ = new unsigned char[sz]();
if (kBlocked && (reinterpret_cast<uint64_t>(raw_) % CACHE_LINE_SIZE)) {
data_ = raw_ + CACHE_LINE_SIZE -
reinterpret_cast<uint64_t>(raw_) % CACHE_LINE_SIZE;

View File

@@ -8,8 +8,6 @@
#include <atomic>
#include <memory>
#include <util/arena.h>
namespace rocksdb {
class Slice;
@@ -21,17 +19,13 @@ class DynamicBloom {
// cl_per_block: block size in cache lines. When this is non-zero, a
// query/set is done within a block to improve cache locality.
// hash_func: customized hash function
// huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
// withi this page size. Need to reserve huge pages for
// it to be allocated, like:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt
explicit DynamicBloom(uint32_t total_bits, uint32_t cl_per_block = 0,
uint32_t num_probes = 6,
uint32_t (*hash_func)(const Slice& key) = nullptr,
size_t huge_page_tlb_size = 0);
uint32_t num_probes = 6,
uint32_t (*hash_func)(const Slice& key) = nullptr);
~DynamicBloom() {}
~DynamicBloom() {
delete[] raw_;
}
// Assuming single threaded access to this function.
void Add(const Slice& key);
@@ -55,8 +49,6 @@ class DynamicBloom {
uint32_t (*hash_func_)(const Slice& key);
unsigned char* data_;
unsigned char* raw_;
Arena arena_;
};
inline void DynamicBloom::Add(const Slice& key) { AddHash(hash_func_(key)); }

View File

@@ -53,8 +53,7 @@ struct Node {
class HashLinkListRep : public MemTableRep {
public:
HashLinkListRep(const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform, size_t bucket_size,
size_t huge_page_tlb_size);
const SliceTransform* transform, size_t bucket_size);
virtual KeyHandle Allocate(const size_t len, char** buf) override;
@@ -307,13 +306,13 @@ class HashLinkListRep : public MemTableRep {
HashLinkListRep::HashLinkListRep(const MemTableRep::KeyComparator& compare,
Arena* arena, const SliceTransform* transform,
size_t bucket_size, size_t huge_page_tlb_size)
: MemTableRep(arena),
bucket_size_(bucket_size),
transform_(transform),
compare_(compare) {
char* mem = arena_->AllocateAligned(sizeof(port::AtomicPointer) * bucket_size,
huge_page_tlb_size);
size_t bucket_size)
: MemTableRep(arena),
bucket_size_(bucket_size),
transform_(transform),
compare_(compare) {
char* mem = arena_->AllocateAligned(
sizeof(port::AtomicPointer) * bucket_size);
buckets_ = new (mem) port::AtomicPointer[bucket_size];
@@ -470,13 +469,11 @@ Node* HashLinkListRep::FindGreaterOrEqualInBucket(Node* head,
MemTableRep* HashLinkListRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) {
return new HashLinkListRep(compare, arena, transform, bucket_count_,
huge_page_tlb_size_);
return new HashLinkListRep(compare, arena, transform, bucket_count_);
}
MemTableRepFactory* NewHashLinkListRepFactory(size_t bucket_count,
size_t huge_page_tlb_size) {
return new HashLinkListRepFactory(bucket_count, huge_page_tlb_size);
MemTableRepFactory* NewHashLinkListRepFactory(size_t bucket_count) {
return new HashLinkListRepFactory(bucket_count);
}
} // namespace rocksdb

View File

@@ -15,9 +15,8 @@ namespace rocksdb {
class HashLinkListRepFactory : public MemTableRepFactory {
public:
explicit HashLinkListRepFactory(size_t bucket_count,
size_t huge_page_tlb_size)
: bucket_count_(bucket_count), huge_page_tlb_size_(huge_page_tlb_size) {}
explicit HashLinkListRepFactory(size_t bucket_count)
: bucket_count_(bucket_count) { }
virtual ~HashLinkListRepFactory() {}
@@ -31,7 +30,6 @@ class HashLinkListRepFactory : public MemTableRepFactory {
private:
const size_t bucket_count_;
const size_t huge_page_tlb_size_;
};
}

View File

@@ -34,7 +34,8 @@ ColumnFamilyOptions::ColumnFamilyOptions()
compaction_filter(nullptr),
compaction_filter_factory(std::shared_ptr<CompactionFilterFactory>(
new DefaultCompactionFilterFactory())),
compaction_filter_factory_v2(new DefaultCompactionFilterFactoryV2()),
compaction_filter_factory_v2(
new DefaultCompactionFilterFactoryV2()),
write_buffer_size(4 << 20),
max_write_buffer_number(2),
min_write_buffer_number_to_merge(1),
@@ -80,7 +81,6 @@ ColumnFamilyOptions::ColumnFamilyOptions()
inplace_callback(nullptr),
memtable_prefix_bloom_bits(0),
memtable_prefix_bloom_probes(6),
memtable_prefix_bloom_huge_page_tlb_size(0),
bloom_locality(0),
max_successive_merges(0),
min_partial_merge_operands(2) {
@@ -146,8 +146,6 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
inplace_callback(options.inplace_callback),
memtable_prefix_bloom_bits(options.memtable_prefix_bloom_bits),
memtable_prefix_bloom_probes(options.memtable_prefix_bloom_probes),
memtable_prefix_bloom_huge_page_tlb_size(
options.memtable_prefix_bloom_huge_page_tlb_size),
bloom_locality(options.bloom_locality),
max_successive_merges(options.max_successive_merges),
min_partial_merge_operands(options.min_partial_merge_operands) {
@@ -430,8 +428,6 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
memtable_prefix_bloom_bits);
Log(log, " Options.memtable_prefix_bloom_probes: %d",
memtable_prefix_bloom_probes);
Log(log, " Options.memtable_prefix_bloom_huge_page_tlb_size: %zu",
memtable_prefix_bloom_huge_page_tlb_size);
Log(log, " Options.bloom_locality: %d",
bloom_locality);
Log(log, " Options.max_successive_merges: %zd",