mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
Merge branch 'master' into columnfamilies
Conflicts: HISTORY.md db/db_impl.cc db/db_impl.h db/db_iter.cc db/db_test.cc db/dbformat.h db/memtable.cc db/memtable_list.cc db/memtable_list.h db/table_cache.cc db/table_cache.h db/version_edit.h db/version_set.cc db/version_set.h db/write_batch.cc db/write_batch_test.cc include/rocksdb/options.h util/options.cc
This commit is contained in:
@@ -1,45 +0,0 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// Arena class defines memory allocation methods. It's used by memtable and
|
||||
// skiplist.
|
||||
|
||||
#ifndef STORAGE_ROCKSDB_INCLUDE_ARENA_H_
|
||||
#define STORAGE_ROCKSDB_INCLUDE_ARENA_H_
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class Arena {
|
||||
public:
|
||||
Arena() {};
|
||||
virtual ~Arena() {};
|
||||
|
||||
// Return a pointer to a newly allocated memory block of "bytes" bytes.
|
||||
virtual char* Allocate(size_t bytes) = 0;
|
||||
|
||||
// Allocate memory with the normal alignment guarantees provided by malloc.
|
||||
virtual char* AllocateAligned(size_t bytes) = 0;
|
||||
|
||||
// Returns an estimate of the total memory used by arena.
|
||||
virtual const size_t ApproximateMemoryUsage() = 0;
|
||||
|
||||
// Returns the total number of bytes in all blocks allocated so far.
|
||||
virtual const size_t MemoryAllocatedBytes() = 0;
|
||||
|
||||
private:
|
||||
// No copying allowed
|
||||
Arena(const Arena&);
|
||||
void operator=(const Arena&);
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
#endif // STORAGE_ROCKSDB_INCLUDE_ARENA_H_
|
||||
@@ -102,7 +102,10 @@ class Cache {
|
||||
virtual uint64_t NewId() = 0;
|
||||
|
||||
// returns the maximum configured capacity of the cache
|
||||
virtual size_t GetCapacity() = 0;
|
||||
virtual size_t GetCapacity() const = 0;
|
||||
|
||||
// returns the memory size for the entries residing in the cache.
|
||||
virtual size_t GetUsage() const = 0;
|
||||
|
||||
// Call this on shutdown if you want to speed it up. Cache will disown
|
||||
// any underlying data and will not free it on delete. This call will leak
|
||||
|
||||
@@ -438,7 +438,7 @@ class WritableFile {
|
||||
// This asks the OS to initiate flushing the cached data to disk,
|
||||
// without waiting for completion.
|
||||
// Default implementation does nothing.
|
||||
virtual Status RangeSync(off64_t offset, off64_t nbytes) {
|
||||
virtual Status RangeSync(off_t offset, off_t nbytes) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@@ -33,8 +33,7 @@
|
||||
// iteration over the entire collection is rare since doing so requires all the
|
||||
// keys to be copied into a sorted data structure.
|
||||
|
||||
#ifndef STORAGE_ROCKSDB_DB_MEMTABLEREP_H_
|
||||
#define STORAGE_ROCKSDB_DB_MEMTABLEREP_H_
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
@@ -52,7 +51,11 @@ class MemTableRep {
|
||||
public:
|
||||
// Compare a and b. Return a negative value if a is less than b, 0 if they
|
||||
// are equal, and a positive value if a is greater than b
|
||||
virtual int operator()(const char* a, const char* b) const = 0;
|
||||
virtual int operator()(const char* prefix_len_key1,
|
||||
const char* prefix_len_key2) const = 0;
|
||||
|
||||
virtual int operator()(const char* prefix_len_key,
|
||||
const Slice& key) const = 0;
|
||||
|
||||
virtual ~KeyComparator() { }
|
||||
};
|
||||
@@ -100,7 +103,7 @@ class MemTableRep {
|
||||
virtual void Prev() = 0;
|
||||
|
||||
// Advance to the first entry with a key >= target
|
||||
virtual void Seek(const char* target) = 0;
|
||||
virtual void Seek(const Slice& internal_key, const char* memtable_key) = 0;
|
||||
|
||||
// Position at the first entry in collection.
|
||||
// Final state of iterator is Valid() iff collection is not empty.
|
||||
@@ -175,26 +178,22 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// HashSkipListRep is backed by hash map of buckets. Each bucket is a skip
|
||||
// list. All the keys with the same prefix will be in the same bucket.
|
||||
// The prefix is determined using user supplied SliceTransform. It has
|
||||
// to match prefix_extractor in options.prefix_extractor.
|
||||
//
|
||||
// Iteration over the entire collection is implemented by dumping all the keys
|
||||
// into a separate skip list. Thus, these data structures are best used when
|
||||
// iteration over the entire collection is rare.
|
||||
//
|
||||
// Parameters:
|
||||
// transform: The prefix extractor that returns prefix when supplied a user
|
||||
// key. Has to match options.prefix_extractor
|
||||
// bucket_count: Number of buckets in a hash_map. Each bucket needs
|
||||
// 8 bytes. By default, we set buckets to one million, which
|
||||
// will take 8MB of memory. If you know the number of keys you'll
|
||||
// keep in hash map, set bucket count to be approximately twice
|
||||
// the number of keys
|
||||
// This class contains a fixed array of buckets, each
|
||||
// pointing to a skiplist (null if the bucket is empty).
|
||||
// bucket_count: number of fixed array buckets
|
||||
// skiplist_height: the max height of the skiplist
|
||||
// skiplist_branching_factor: probabilistic size ratio between adjacent
|
||||
// link lists in the skiplist
|
||||
extern MemTableRepFactory* NewHashSkipListRepFactory(
|
||||
const SliceTransform* transform, size_t bucket_count = 1000000);
|
||||
const SliceTransform* transform, size_t bucket_count = 1000000,
|
||||
int32_t skiplist_height = 4, int32_t skiplist_branching_factor = 4
|
||||
);
|
||||
|
||||
// The factory is to create memtables with a hashed linked list:
|
||||
// it contains a fixed array of buckets, each pointing to a sorted single
|
||||
// linked list (null if the bucket is empty).
|
||||
// bucket_count: number of fixed array buckets
|
||||
extern MemTableRepFactory* NewHashLinkListRepFactory(
|
||||
const SliceTransform* transform, size_t bucket_count = 50000);
|
||||
|
||||
}
|
||||
|
||||
#endif // STORAGE_ROCKSDB_DB_MEMTABLEREP_H_
|
||||
|
||||
@@ -34,6 +34,7 @@ class TablePropertiesCollector;
|
||||
class Slice;
|
||||
class SliceTransform;
|
||||
class Statistics;
|
||||
class InternalKeyComparator;
|
||||
|
||||
using std::shared_ptr;
|
||||
|
||||
@@ -65,6 +66,12 @@ struct CompressionOptions {
|
||||
: window_bits(wbits), level(lev), strategy(strategy) {}
|
||||
};
|
||||
|
||||
enum UpdateStatus { // Return status For inplace update callback
|
||||
UPDATE_FAILED = 0, // Nothing to update
|
||||
UPDATED_INPLACE = 1, // Value updated inplace
|
||||
UPDATED = 2, // No inplace update. Merged value set
|
||||
};
|
||||
|
||||
struct Options;
|
||||
|
||||
struct ColumnFamilyOptions {
|
||||
@@ -410,13 +417,17 @@ struct ColumnFamilyOptions {
|
||||
// the tables.
|
||||
// Default: emtpy vector -- no user-defined statistics collection will be
|
||||
// performed.
|
||||
std::vector<std::shared_ptr<TablePropertiesCollector>>
|
||||
table_properties_collectors;
|
||||
typedef std::vector<std::shared_ptr<TablePropertiesCollector>>
|
||||
TablePropertiesCollectors;
|
||||
TablePropertiesCollectors table_properties_collectors;
|
||||
|
||||
// Allows thread-safe inplace updates. Requires Updates iff
|
||||
// * key exists in current memtable
|
||||
// * new sizeof(new_value) <= sizeof(old_value)
|
||||
// * old_value for that key is a put i.e. kTypeValue
|
||||
// Allows thread-safe inplace updates.
|
||||
// If inplace_callback function is not set,
|
||||
// Put(key, new_value) will update inplace the existing_value iff
|
||||
// * key exists in current memtable
|
||||
// * new sizeof(new_value) <= sizeof(existing_value)
|
||||
// * existing_value for that key is a put i.e. kTypeValue
|
||||
// If inplace_callback function is set, check doc for inplace_callback.
|
||||
// Default: false.
|
||||
bool inplace_update_support;
|
||||
|
||||
@@ -424,6 +435,55 @@ struct ColumnFamilyOptions {
|
||||
// Default: 10000, if inplace_update_support = true, else 0.
|
||||
size_t inplace_update_num_locks;
|
||||
|
||||
// existing_value - pointer to previous value (from both memtable and sst).
|
||||
// nullptr if key doesn't exist
|
||||
// existing_value_size - pointer to size of existing_value).
|
||||
// nullptr if key doesn't exist
|
||||
// delta_value - Delta value to be merged with the existing_value.
|
||||
// Stored in transaction logs.
|
||||
// merged_value - Set when delta is applied on the previous value.
|
||||
|
||||
// Applicable only when inplace_update_support is true,
|
||||
// this callback function is called at the time of updating the memtable
|
||||
// as part of a Put operation, lets say Put(key, delta_value). It allows the
|
||||
// 'delta_value' specified as part of the Put operation to be merged with
|
||||
// an 'existing_value' of the key in the database.
|
||||
|
||||
// If the merged value is smaller in size that the 'existing_value',
|
||||
// then this function can update the 'existing_value' buffer inplace and
|
||||
// the corresponding 'existing_value'_size pointer, if it wishes to.
|
||||
// The callback should return UpdateStatus::UPDATED_INPLACE.
|
||||
// In this case. (In this case, the snapshot-semantics of the rocksdb
|
||||
// Iterator is not atomic anymore).
|
||||
|
||||
// If the merged value is larger in size than the 'existing_value' or the
|
||||
// application does not wish to modify the 'existing_value' buffer inplace,
|
||||
// then the merged value should be returned via *merge_value. It is set by
|
||||
// merging the 'existing_value' and the Put 'delta_value'. The callback should
|
||||
// return UpdateStatus::UPDATED in this case. This merged value will be added
|
||||
// to the memtable.
|
||||
|
||||
// If merging fails or the application does not wish to take any action,
|
||||
// then the callback should return UpdateStatus::UPDATE_FAILED.
|
||||
|
||||
// Please remember that the original call from the application is Put(key,
|
||||
// delta_value). So the transaction log (if enabled) will still contain (key,
|
||||
// delta_value). The 'merged_value' is not stored in the transaction log.
|
||||
// Hence the inplace_callback function should be consistent across db reopens.
|
||||
|
||||
// Default: nullptr
|
||||
UpdateStatus (*inplace_callback)(char* existing_value,
|
||||
uint32_t* existing_value_size,
|
||||
Slice delta_value,
|
||||
std::string* merged_value);
|
||||
|
||||
// if prefix_extractor is set and bloom_bits is not 0, create prefix bloom
|
||||
// for memtable
|
||||
uint32_t memtable_prefix_bloom_bits;
|
||||
|
||||
// number of hash probes per key
|
||||
uint32_t memtable_prefix_bloom_probes;
|
||||
|
||||
// Maximum number of successive merge operations on a key in the memtable.
|
||||
//
|
||||
// When a merge operation is added to the memtable and the maximum number of
|
||||
@@ -473,9 +533,10 @@ struct DBOptions {
|
||||
shared_ptr<Logger> info_log;
|
||||
|
||||
// Number of open files that can be used by the DB. You may need to
|
||||
// increase this if your database has a large working set (budget
|
||||
// one open file per 2MB of working set).
|
||||
//
|
||||
// increase this if your database has a large working set. Value -1 means
|
||||
// files opened are always kept open. You can estimate number of files based
|
||||
// on target_file_size_base and target_file_size_multiplier for level-based
|
||||
// compaction. For universal-style compaction, you can usually set it to -1.
|
||||
// Default: 1000
|
||||
int max_open_files;
|
||||
|
||||
|
||||
@@ -38,7 +38,27 @@ struct PerfContext {
|
||||
uint64_t internal_key_skipped_count;
|
||||
// total number of deletes skipped over during iteration
|
||||
uint64_t internal_delete_skipped_count;
|
||||
uint64_t wal_write_time; // total time spent on writing to WAL
|
||||
|
||||
uint64_t get_snapshot_time; // total time spent on getting snapshot
|
||||
uint64_t get_from_memtable_time; // total time spent on querying memtables
|
||||
uint64_t get_from_memtable_count; // number of mem tables queried
|
||||
// total time spent after Get() finds a key
|
||||
uint64_t get_post_process_time;
|
||||
uint64_t get_from_output_files_time; // total time reading from output files
|
||||
// total time spent on seeking child iters
|
||||
uint64_t seek_child_seek_time;
|
||||
// number of seek issued in child iterators
|
||||
uint64_t seek_child_seek_count;
|
||||
uint64_t seek_min_heap_time; // total time spent on the merge heap
|
||||
// total time spent on seeking the internal entries
|
||||
uint64_t seek_internal_seek_time;
|
||||
// total time spent on iterating internal entries to find the next user entry
|
||||
uint64_t find_next_user_entry_time;
|
||||
// total time spent on pre or post processing when writing a record
|
||||
uint64_t write_pre_and_post_process_time;
|
||||
uint64_t write_wal_time; // total time spent on writing to WAL
|
||||
// total time spent on writing to mem tables
|
||||
uint64_t write_memtable_time;
|
||||
};
|
||||
|
||||
extern __thread PerfContext perf_context;
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#define STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
@@ -18,10 +17,8 @@ namespace rocksdb {
|
||||
|
||||
/**
|
||||
* Keep adding ticker's here.
|
||||
* Any ticker should have a value less than TICKER_ENUM_MAX.
|
||||
* Add a new ticker by assigning it the current value of TICKER_ENUM_MAX
|
||||
* Add a string representation in TickersNameMap below.
|
||||
* And incrementing TICKER_ENUM_MAX.
|
||||
* 1. Any ticker should be added before TICKER_ENUM_MAX.
|
||||
* 2. Add a readable string in TickersNameMap below for the newly added ticker.
|
||||
*/
|
||||
enum Tickers {
|
||||
// total block cache misses
|
||||
@@ -252,7 +249,7 @@ class Statistics {
|
||||
virtual void setTickerCount(Tickers tickerType, uint64_t count) = 0;
|
||||
virtual void measureTime(Histograms histogramType, uint64_t time) = 0;
|
||||
|
||||
virtual void histogramData(Histograms type, HistogramData * const data) = 0;
|
||||
virtual void histogramData(Histograms type, HistogramData* const data) = 0;
|
||||
// String representation of the statistic object.
|
||||
std::string ToString();
|
||||
};
|
||||
|
||||
@@ -1,127 +1,81 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// Currently we support two types of tables: plain table and block-based table.
|
||||
// 1. Block-based table: this is the default table type that we inherited from
|
||||
// LevelDB, which was designed for storing data in hard disk or flash
|
||||
// device.
|
||||
// 2. Plain table: it is one of RocksDB's SST file format optimized
|
||||
// for low query latency on pure-memory or really low-latency media.
|
||||
//
|
||||
// A tutorial of rocksdb table formats is available here:
|
||||
// https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
|
||||
//
|
||||
// Example code is also available
|
||||
// https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats#wiki-examples
|
||||
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "rocksdb/table_properties.h"
|
||||
#include "rocksdb/options.h"
|
||||
#include "rocksdb/status.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
struct Options;
|
||||
// -- Block-based Table
|
||||
class FlushBlockPolicyFactory;
|
||||
class RandomAccessFile;
|
||||
struct ReadOptions;
|
||||
class TableCache;
|
||||
class TableBuilder;
|
||||
class TableReader;
|
||||
class WritableFile;
|
||||
struct EnvOptions;
|
||||
struct Options;
|
||||
|
||||
using std::unique_ptr;
|
||||
|
||||
// TableBuilder provides the interface used to build a Table
|
||||
// (an immutable and sorted map from keys to values).
|
||||
//
|
||||
// Multiple threads can invoke const methods on a TableBuilder without
|
||||
// external synchronization, but if any of the threads may call a
|
||||
// non-const method, all threads accessing the same TableBuilder must use
|
||||
// external synchronization.
|
||||
class TableBuilder {
|
||||
public:
|
||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||
virtual ~TableBuilder() {}
|
||||
// For advanced user only
|
||||
struct BlockBasedTableOptions {
|
||||
// @flush_block_policy_factory creates the instances of flush block policy.
|
||||
// which provides a configurable way to determine when to flush a block in
|
||||
// the block based tables. If not set, table builder will use the default
|
||||
// block flush policy, which cut blocks by block size (please refer to
|
||||
// `FlushBlockBySizePolicy`).
|
||||
std::shared_ptr<FlushBlockPolicyFactory> flush_block_policy_factory;
|
||||
|
||||
// Add key,value to the table being constructed.
|
||||
// REQUIRES: key is after any previously added key according to comparator.
|
||||
// REQUIRES: Finish(), Abandon() have not been called
|
||||
virtual void Add(const Slice& key, const Slice& value) = 0;
|
||||
|
||||
// Return non-ok iff some error has been detected.
|
||||
virtual Status status() const = 0;
|
||||
|
||||
// Finish building the table.
|
||||
// REQUIRES: Finish(), Abandon() have not been called
|
||||
virtual Status Finish() = 0;
|
||||
|
||||
// Indicate that the contents of this builder should be abandoned.
|
||||
// If the caller is not going to call Finish(), it must call Abandon()
|
||||
// before destroying this builder.
|
||||
// REQUIRES: Finish(), Abandon() have not been called
|
||||
virtual void Abandon() = 0;
|
||||
|
||||
// Number of calls to Add() so far.
|
||||
virtual uint64_t NumEntries() const = 0;
|
||||
|
||||
// Size of the file generated so far. If invoked after a successful
|
||||
// Finish() call, returns the size of the final generated file.
|
||||
virtual uint64_t FileSize() const = 0;
|
||||
// TODO(kailiu) Temporarily disable this feature by making the default value
|
||||
// to be false.
|
||||
//
|
||||
// Indicating if we'd put index/filter blocks to the block cache.
|
||||
// If not specified, each "table reader" object will pre-load index/filter
|
||||
// block during table initialization.
|
||||
bool cache_index_and_filter_blocks = false;
|
||||
};
|
||||
|
||||
// A Table is a sorted map from strings to strings. Tables are
|
||||
// immutable and persistent. A Table may be safely accessed from
|
||||
// multiple threads without external synchronization.
|
||||
class TableReader {
|
||||
public:
|
||||
virtual ~TableReader() {}
|
||||
// Create default block based table factory.
|
||||
extern TableFactory* NewBlockBasedTableFactory(
|
||||
const BlockBasedTableOptions& table_options = BlockBasedTableOptions());
|
||||
|
||||
// Determine whether there is a chance that the current table file
|
||||
// contains the key a key starting with iternal_prefix. The specific
|
||||
// table implementation can use bloom filter and/or other heuristic
|
||||
// to filter out this table as a whole.
|
||||
virtual bool PrefixMayMatch(const Slice& internal_prefix) = 0;
|
||||
// -- Plain Table
|
||||
// @user_key_len: plain table has optimization for fix-sized keys, which can be
|
||||
// specified via user_key_len. Alternatively, you can pass
|
||||
// `kPlainTableVariableLength` if your keys have variable
|
||||
// lengths.
|
||||
// @bloom_bits_per_key: the number of bits used for bloom filer per key. You may
|
||||
// disable it by passing a zero.
|
||||
// @hash_table_ratio: the desired utilization of the hash table used for prefix
|
||||
// hashing. hash_table_ratio = number of prefixes / #buckets
|
||||
// in the hash table
|
||||
const uint32_t kPlainTableVariableLength = 0;
|
||||
extern TableFactory* NewPlainTableFactory(
|
||||
uint32_t user_key_len = kPlainTableVariableLength,
|
||||
int bloom_bits_per_key = 10, double hash_table_ratio = 0.75);
|
||||
|
||||
// Returns a new iterator over the table contents.
|
||||
// The result of NewIterator() is initially invalid (caller must
|
||||
// call one of the Seek methods on the iterator before using it).
|
||||
virtual Iterator* NewIterator(const ReadOptions&) = 0;
|
||||
|
||||
// Given a key, return an approximate byte offset in the file where
|
||||
// the data for that key begins (or would begin if the key were
|
||||
// present in the file). The returned value is in terms of file
|
||||
// bytes, and so includes effects like compression of the underlying data.
|
||||
// E.g., the approximate offset of the last key in the table will
|
||||
// be close to the file length.
|
||||
virtual uint64_t ApproximateOffsetOf(const Slice& key) = 0;
|
||||
|
||||
// Returns true if the block for the specified key is in cache.
|
||||
// REQUIRES: key is in this table.
|
||||
virtual bool TEST_KeyInCache(const ReadOptions& options,
|
||||
const Slice& key) = 0;
|
||||
|
||||
// Set up the table for Compaction. Might change some parameters with
|
||||
// posix_fadvise
|
||||
virtual void SetupForCompaction() = 0;
|
||||
|
||||
virtual TableProperties& GetTableProperties() = 0;
|
||||
|
||||
// Calls (*result_handler)(handle_context, ...) repeatedly, starting with
|
||||
// the entry found after a call to Seek(key), until result_handler returns
|
||||
// false, where k is the actual internal key for a row found and v as the
|
||||
// value of the key. didIO is true if I/O is involved in the operation. May
|
||||
// not make such a call if filter policy says that key is not present.
|
||||
//
|
||||
// mark_key_may_exist_handler needs to be called when it is configured to be
|
||||
// memory only and the key is not found in the block cache, with
|
||||
// the parameter to be handle_context.
|
||||
//
|
||||
// readOptions is the options for the read
|
||||
// key is the key to search for
|
||||
virtual Status Get(
|
||||
const ReadOptions& readOptions,
|
||||
const Slice& key,
|
||||
void* handle_context,
|
||||
bool (*result_handler)(void* handle_context, const Slice& k,
|
||||
const Slice& v, bool didIO),
|
||||
void (*mark_key_may_exist_handler)(void* handle_context) = nullptr) = 0;
|
||||
};
|
||||
|
||||
// A base class for table factories
|
||||
// A base class for table factories.
|
||||
class TableFactory {
|
||||
public:
|
||||
virtual ~TableFactory() {}
|
||||
@@ -139,7 +93,7 @@ class TableFactory {
|
||||
// in parameter file. It's the caller's responsibility to make sure
|
||||
// file is in the correct format.
|
||||
//
|
||||
// GetTableReader() is called in two places:
|
||||
// NewTableReader() is called in two places:
|
||||
// (1) TableCache::FindTable() calls the function when table cache miss
|
||||
// and cache the table object returned.
|
||||
// (1) SstFileReader (for SST Dump) opens the table and dump the table
|
||||
@@ -150,9 +104,10 @@ class TableFactory {
|
||||
// file is a file handler to handle the file for the table
|
||||
// file_size is the physical file size of the file
|
||||
// table_reader is the output table reader
|
||||
virtual Status GetTableReader(
|
||||
virtual Status NewTableReader(
|
||||
const Options& options, const EnvOptions& soptions,
|
||||
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
|
||||
unique_ptr<TableReader>* table_reader) const = 0;
|
||||
|
||||
// Return a table builder to write to a file for this table type.
|
||||
@@ -173,8 +128,9 @@ class TableFactory {
|
||||
// file is a handle of a writable file. It is the caller's responsibility to
|
||||
// keep the file open and close the file after closing the table builder.
|
||||
// compression_type is the compression type to use in this table.
|
||||
virtual TableBuilder* GetTableBuilder(
|
||||
const Options& options, WritableFile* file,
|
||||
CompressionType compression_type) const = 0;
|
||||
virtual TableBuilder* NewTableBuilder(
|
||||
const Options& options, const InternalKeyComparator& internal_comparator,
|
||||
WritableFile* file, CompressionType compression_type) const = 0;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
@@ -1,28 +1,25 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "rocksdb/status.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
// -- Table Properties
|
||||
// Other than basic table properties, each table may also have the user
|
||||
// collected properties.
|
||||
// The value of the user-collected properties are encoded as raw bytes --
|
||||
// users have to interprete these values by themselves.
|
||||
typedef std::unordered_map<std::string, std::string> UserCollectedProperties;
|
||||
|
||||
// TableProperties contains a bunch of read-only properties of its associated
|
||||
// table.
|
||||
struct TableProperties {
|
||||
public:
|
||||
// Other than basic table properties, each table may also have the user
|
||||
// collected properties.
|
||||
// The value of the user-collected properties are encoded as raw bytes --
|
||||
// users have to interprete these values by themselves.
|
||||
typedef
|
||||
std::unordered_map<std::string, std::string>
|
||||
UserCollectedProperties;
|
||||
|
||||
// the total size of all data blocks.
|
||||
uint64_t data_size = 0;
|
||||
// the size of index block.
|
||||
@@ -37,6 +34,10 @@ struct TableProperties {
|
||||
uint64_t num_data_blocks = 0;
|
||||
// the number of entries in this table
|
||||
uint64_t num_entries = 0;
|
||||
// format version, reserved for backward compatibility
|
||||
uint64_t format_version = 0;
|
||||
// If 0, key is variable length. Otherwise number of bytes for each key.
|
||||
uint64_t fixed_key_len = 0;
|
||||
|
||||
// The name of the filter policy used in this table.
|
||||
// If no filter policy is used, `filter_policy_name` will be an empty string.
|
||||
@@ -47,17 +48,32 @@ struct TableProperties {
|
||||
|
||||
// convert this object to a human readable form
|
||||
// @prop_delim: delimiter for each property.
|
||||
std::string ToString(
|
||||
const std::string& prop_delim = "; ",
|
||||
const std::string& kv_delim = "=") const;
|
||||
std::string ToString(const std::string& prop_delim = "; ",
|
||||
const std::string& kv_delim = "=") const;
|
||||
};
|
||||
|
||||
// table properties' human-readable names in the property block.
|
||||
struct TablePropertiesNames {
|
||||
static const std::string kDataSize;
|
||||
static const std::string kIndexSize;
|
||||
static const std::string kFilterSize;
|
||||
static const std::string kRawKeySize;
|
||||
static const std::string kRawValueSize;
|
||||
static const std::string kNumDataBlocks;
|
||||
static const std::string kNumEntries;
|
||||
static const std::string kFormatVersion;
|
||||
static const std::string kFixedKeyLen;
|
||||
static const std::string kFilterPolicy;
|
||||
};
|
||||
|
||||
extern const std::string kPropertiesBlock;
|
||||
|
||||
// `TablePropertiesCollector` provides the mechanism for users to collect
|
||||
// their own interested properties. This class is essentially a collection
|
||||
// of callback functions that will be invoked during table building.
|
||||
class TablePropertiesCollector {
|
||||
public:
|
||||
virtual ~TablePropertiesCollector() { }
|
||||
virtual ~TablePropertiesCollector() {}
|
||||
|
||||
// Add() will be called when a new key/value pair is inserted into the table.
|
||||
// @params key the original key that is inserted into the table.
|
||||
@@ -68,23 +84,20 @@ class TablePropertiesCollector {
|
||||
// for writing the properties block.
|
||||
// @params properties User will add their collected statistics to
|
||||
// `properties`.
|
||||
virtual Status Finish(
|
||||
TableProperties::UserCollectedProperties* properties) = 0;
|
||||
virtual Status Finish(UserCollectedProperties* properties) = 0;
|
||||
|
||||
// The name of the properties collector can be used for debugging purpose.
|
||||
virtual const char* Name() const = 0;
|
||||
|
||||
// Return the human-readable properties, where the key is property name and
|
||||
// the value is the human-readable form of value.
|
||||
virtual TableProperties::UserCollectedProperties
|
||||
GetReadableProperties() const = 0;
|
||||
virtual UserCollectedProperties GetReadableProperties() const = 0;
|
||||
};
|
||||
|
||||
// Extra properties
|
||||
// Below is a list of non-basic properties that are collected by database
|
||||
// itself. Especially some properties regarding to the internal keys (which
|
||||
// is unknown to `table`).
|
||||
extern uint64_t GetDeletedKeys(
|
||||
const TableProperties::UserCollectedProperties& props);
|
||||
extern uint64_t GetDeletedKeys(const UserCollectedProperties& props);
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
Reference in New Issue
Block a user