Squashed 'src/rocksdb/' content from commit 224932d

git-subtree-dir: src/rocksdb git-subtree-split: 224932d4d0b561712107d747c662df181c39644d
2025-12-06 17:27:52 +00:00 · 2014-08-08 11:57:41 -07:00
commit f86d9fd626
435 changed files with 123706 additions and 0 deletions
--- a/include/rocksdb/c.h
+++ b/include/rocksdb/c.h
@@ -0,0 +1,788 @@
+/*  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+  This source code is licensed under the BSD-style license found in the
+  LICENSE file in the root directory of this source tree. An additional grant
+  of patent rights can be found in the PATENTS file in the same directory.
+ Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+  Use of this source code is governed by a BSD-style license that can be
+  found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+  C bindings for rocksdb.  May be useful as a stable ABI that can be
+  used by programs that keep rocksdb in a shared library, or for
+  a JNI api.
+
+  Does not support:
+  . getters for the option types
+  . custom comparators that implement key shortening
+  . capturing post-write-snapshot
+  . custom iter, db, env, cache implementations using just the C bindings
+
+  Some conventions:
+
+  (1) We expose just opaque struct pointers and functions to clients.
+  This allows us to change internal representations without having to
+  recompile clients.
+
+  (2) For simplicity, there is no equivalent to the Slice type.  Instead,
+  the caller has to pass the pointer and length as separate
+  arguments.
+
+  (3) Errors are represented by a null-terminated c string.  NULL
+  means no error.  All operations that can raise an error are passed
+  a "char** errptr" as the last argument.  One of the following must
+  be true on entry:
+     *errptr == NULL
+     *errptr points to a malloc()ed null-terminated error message
+  On success, a leveldb routine leaves *errptr unchanged.
+  On failure, leveldb frees the old value of *errptr and
+  set *errptr to a malloc()ed error message.
+
+  (4) Bools have the type unsigned char (0 == false; rest == true)
+
+  (5) All of the pointer arguments must be non-NULL.
+*/
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_C_H_
+#define STORAGE_ROCKSDB_INCLUDE_C_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* Exported types */
+
+typedef struct rocksdb_t                 rocksdb_t;
+typedef struct rocksdb_cache_t           rocksdb_cache_t;
+typedef struct rocksdb_compactionfilter_t rocksdb_compactionfilter_t;
+typedef struct rocksdb_compactionfiltercontext_t
+    rocksdb_compactionfiltercontext_t;
+typedef struct rocksdb_compactionfilterfactory_t
+    rocksdb_compactionfilterfactory_t;
+typedef struct rocksdb_compactionfilterv2_t
+    rocksdb_compactionfilterv2_t;
+typedef struct rocksdb_compactionfilterfactoryv2_t
+    rocksdb_compactionfilterfactoryv2_t;
+typedef struct rocksdb_comparator_t      rocksdb_comparator_t;
+typedef struct rocksdb_env_t             rocksdb_env_t;
+typedef struct rocksdb_fifo_compaction_options_t rocksdb_fifo_compaction_options_t;
+typedef struct rocksdb_filelock_t        rocksdb_filelock_t;
+typedef struct rocksdb_filterpolicy_t    rocksdb_filterpolicy_t;
+typedef struct rocksdb_flushoptions_t    rocksdb_flushoptions_t;
+typedef struct rocksdb_iterator_t        rocksdb_iterator_t;
+typedef struct rocksdb_logger_t          rocksdb_logger_t;
+typedef struct rocksdb_mergeoperator_t   rocksdb_mergeoperator_t;
+typedef struct rocksdb_options_t         rocksdb_options_t;
+typedef struct rocksdb_randomfile_t      rocksdb_randomfile_t;
+typedef struct rocksdb_readoptions_t     rocksdb_readoptions_t;
+typedef struct rocksdb_seqfile_t         rocksdb_seqfile_t;
+typedef struct rocksdb_slicetransform_t  rocksdb_slicetransform_t;
+typedef struct rocksdb_snapshot_t        rocksdb_snapshot_t;
+typedef struct rocksdb_writablefile_t    rocksdb_writablefile_t;
+typedef struct rocksdb_writebatch_t      rocksdb_writebatch_t;
+typedef struct rocksdb_writeoptions_t    rocksdb_writeoptions_t;
+typedef struct rocksdb_universal_compaction_options_t rocksdb_universal_compaction_options_t;
+typedef struct rocksdb_livefiles_t     rocksdb_livefiles_t;
+typedef struct rocksdb_column_family_handle_t rocksdb_column_family_handle_t;
+
+/* DB operations */
+
+extern rocksdb_t* rocksdb_open(
+    const rocksdb_options_t* options,
+    const char* name,
+    char** errptr);
+
+extern rocksdb_t* rocksdb_open_for_read_only(
+    const rocksdb_options_t* options,
+    const char* name,
+    unsigned char error_if_log_file_exist,
+    char** errptr);
+
+extern rocksdb_t* rocksdb_open_column_families(
+    const rocksdb_options_t* options,
+    const char* name,
+    int num_column_families,
+    const char** column_family_names,
+    const rocksdb_options_t** column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles,
+    char** errptr);
+
+extern rocksdb_t* rocksdb_open_for_read_only_column_families(
+    const rocksdb_options_t* options,
+    const char* name,
+    int num_column_families,
+    const char** column_family_names,
+    const rocksdb_options_t** column_family_options,
+    rocksdb_column_family_handle_t** column_family_handles,
+    unsigned char error_if_log_file_exist,
+    char** errptr);
+
+char** rocksdb_list_column_families(
+    const rocksdb_options_t* options,
+    const char* name,
+    size_t* lencf,
+    char** errptr);
+void rocksdb_list_column_families_destroy(char** list, size_t len);
+
+extern rocksdb_column_family_handle_t* rocksdb_create_column_family(
+    rocksdb_t* db,
+    const rocksdb_options_t* column_family_options,
+    const char* column_family_name,
+    char** errptr);
+
+extern void rocksdb_drop_column_family(
+    rocksdb_t* db,
+    rocksdb_column_family_handle_t* handle,
+    char** errptr);
+
+extern void rocksdb_column_family_handle_destroy(rocksdb_column_family_handle_t*);
+
+extern void rocksdb_close(rocksdb_t* db);
+
+extern void rocksdb_put(
+    rocksdb_t* db,
+    const rocksdb_writeoptions_t* options,
+    const char* key, size_t keylen,
+    const char* val, size_t vallen,
+    char** errptr);
+
+extern void rocksdb_put_cf(
+    rocksdb_t* db,
+    const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t keylen,
+    const char* val, size_t vallen,
+    char** errptr);
+
+extern void rocksdb_delete(
+    rocksdb_t* db,
+    const rocksdb_writeoptions_t* options,
+    const char* key, size_t keylen,
+    char** errptr);
+
+void rocksdb_delete_cf(
+    rocksdb_t* db,
+    const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t keylen,
+    char** errptr);
+
+extern void rocksdb_merge(
+    rocksdb_t* db,
+    const rocksdb_writeoptions_t* options,
+    const char* key, size_t keylen,
+    const char* val, size_t vallen,
+    char** errptr);
+
+extern void rocksdb_merge_cf(
+    rocksdb_t* db,
+    const rocksdb_writeoptions_t* options,
+    rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t keylen,
+    const char* val, size_t vallen,
+    char** errptr);
+
+extern void rocksdb_write(
+    rocksdb_t* db,
+    const rocksdb_writeoptions_t* options,
+    rocksdb_writebatch_t* batch,
+    char** errptr);
+
+/* Returns NULL if not found.  A malloc()ed array otherwise.
+   Stores the length of the array in *vallen. */
+extern char* rocksdb_get(
+    rocksdb_t* db,
+    const rocksdb_readoptions_t* options,
+    const char* key, size_t keylen,
+    size_t* vallen,
+    char** errptr);
+
+extern char* rocksdb_get_cf(
+    rocksdb_t* db,
+    const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t keylen,
+    size_t* vallen,
+    char** errptr);
+
+extern rocksdb_iterator_t* rocksdb_create_iterator(
+    rocksdb_t* db,
+    const rocksdb_readoptions_t* options);
+
+extern rocksdb_iterator_t* rocksdb_create_iterator_cf(
+    rocksdb_t* db,
+    const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family);
+
+extern const rocksdb_snapshot_t* rocksdb_create_snapshot(
+    rocksdb_t* db);
+
+extern void rocksdb_release_snapshot(
+    rocksdb_t* db,
+    const rocksdb_snapshot_t* snapshot);
+
+/* Returns NULL if property name is unknown.
+   Else returns a pointer to a malloc()-ed null-terminated value. */
+extern char* rocksdb_property_value(
+    rocksdb_t* db,
+    const char* propname);
+
+extern char* rocksdb_property_value_cf(
+    rocksdb_t* db,
+    rocksdb_column_family_handle_t* column_family,
+    const char* propname);
+
+extern void rocksdb_approximate_sizes(
+    rocksdb_t* db,
+    int num_ranges,
+    const char* const* range_start_key, const size_t* range_start_key_len,
+    const char* const* range_limit_key, const size_t* range_limit_key_len,
+    uint64_t* sizes);
+
+extern void rocksdb_approximate_sizes_cf(
+    rocksdb_t* db,
+    rocksdb_column_family_handle_t* column_family,
+    int num_ranges,
+    const char* const* range_start_key, const size_t* range_start_key_len,
+    const char* const* range_limit_key, const size_t* range_limit_key_len,
+    uint64_t* sizes);
+
+extern void rocksdb_compact_range(
+    rocksdb_t* db,
+    const char* start_key, size_t start_key_len,
+    const char* limit_key, size_t limit_key_len);
+
+extern void rocksdb_compact_range_cf(
+    rocksdb_t* db,
+    rocksdb_column_family_handle_t* column_family,
+    const char* start_key, size_t start_key_len,
+    const char* limit_key, size_t limit_key_len);
+
+extern void rocksdb_delete_file(
+    rocksdb_t* db,
+    const char* name);
+
+extern const rocksdb_livefiles_t* rocksdb_livefiles(
+    rocksdb_t* db);
+
+extern void rocksdb_flush(
+    rocksdb_t* db,
+    const rocksdb_flushoptions_t* options,
+    char** errptr);
+
+extern void rocksdb_disable_file_deletions(
+    rocksdb_t* db,
+    char** errptr);
+
+extern void rocksdb_enable_file_deletions(
+    rocksdb_t* db,
+    unsigned char force,
+    char** errptr);
+
+/* Management operations */
+
+extern void rocksdb_destroy_db(
+    const rocksdb_options_t* options,
+    const char* name,
+    char** errptr);
+
+extern void rocksdb_repair_db(
+    const rocksdb_options_t* options,
+    const char* name,
+    char** errptr);
+
+/* Iterator */
+
+extern void rocksdb_iter_destroy(rocksdb_iterator_t*);
+extern unsigned char rocksdb_iter_valid(const rocksdb_iterator_t*);
+extern void rocksdb_iter_seek_to_first(rocksdb_iterator_t*);
+extern void rocksdb_iter_seek_to_last(rocksdb_iterator_t*);
+extern void rocksdb_iter_seek(rocksdb_iterator_t*, const char* k, size_t klen);
+extern void rocksdb_iter_next(rocksdb_iterator_t*);
+extern void rocksdb_iter_prev(rocksdb_iterator_t*);
+extern const char* rocksdb_iter_key(const rocksdb_iterator_t*, size_t* klen);
+extern const char* rocksdb_iter_value(const rocksdb_iterator_t*, size_t* vlen);
+extern void rocksdb_iter_get_error(const rocksdb_iterator_t*, char** errptr);
+
+/* Write batch */
+
+extern rocksdb_writebatch_t* rocksdb_writebatch_create();
+extern rocksdb_writebatch_t* rocksdb_writebatch_create_from(const char* rep,
+                                                            size_t size);
+extern void rocksdb_writebatch_destroy(rocksdb_writebatch_t*);
+extern void rocksdb_writebatch_clear(rocksdb_writebatch_t*);
+extern int rocksdb_writebatch_count(rocksdb_writebatch_t*);
+extern void rocksdb_writebatch_put(
+    rocksdb_writebatch_t*,
+    const char* key, size_t klen,
+    const char* val, size_t vlen);
+extern void rocksdb_writebatch_put_cf(
+    rocksdb_writebatch_t*,
+    rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen,
+    const char* val, size_t vlen);
+extern void rocksdb_writebatch_merge(
+    rocksdb_writebatch_t*,
+    const char* key, size_t klen,
+    const char* val, size_t vlen);
+extern void rocksdb_writebatch_merge_cf(
+    rocksdb_writebatch_t*,
+    rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen,
+    const char* val, size_t vlen);
+extern void rocksdb_writebatch_delete(
+    rocksdb_writebatch_t*,
+    const char* key, size_t klen);
+extern void rocksdb_writebatch_delete_cf(
+    rocksdb_writebatch_t*,
+    rocksdb_column_family_handle_t* column_family,
+    const char* key, size_t klen);
+extern void rocksdb_writebatch_iterate(
+    rocksdb_writebatch_t*,
+    void* state,
+    void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
+    void (*deleted)(void*, const char* k, size_t klen));
+extern const char* rocksdb_writebatch_data(rocksdb_writebatch_t*, size_t *size);
+
+/* Options */
+
+extern rocksdb_options_t* rocksdb_options_create();
+extern void rocksdb_options_destroy(rocksdb_options_t*);
+extern void rocksdb_options_increase_parallelism(
+    rocksdb_options_t* opt, int total_threads);
+extern void rocksdb_options_optimize_for_point_lookup(
+    rocksdb_options_t* opt);
+extern void rocksdb_options_optimize_level_style_compaction(
+    rocksdb_options_t* opt, uint64_t memtable_memory_budget);
+extern void rocksdb_options_optimize_universal_style_compaction(
+    rocksdb_options_t* opt, uint64_t memtable_memory_budget);
+extern void rocksdb_options_set_compaction_filter(
+    rocksdb_options_t*,
+    rocksdb_compactionfilter_t*);
+extern void rocksdb_options_set_compaction_filter_factory(
+    rocksdb_options_t*, rocksdb_compactionfilterfactory_t*);
+extern void rocksdb_options_set_compaction_filter_factory_v2(
+    rocksdb_options_t*,
+    rocksdb_compactionfilterfactoryv2_t*);
+extern void rocksdb_options_set_comparator(
+    rocksdb_options_t*,
+    rocksdb_comparator_t*);
+extern void rocksdb_options_set_merge_operator(
+    rocksdb_options_t*,
+    rocksdb_mergeoperator_t*);
+extern void rocksdb_options_set_compression_per_level(
+  rocksdb_options_t* opt,
+  int* level_values,
+  size_t num_levels);
+extern void rocksdb_options_set_filter_policy(
+    rocksdb_options_t*,
+    rocksdb_filterpolicy_t*);
+extern void rocksdb_options_set_create_if_missing(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_create_missing_column_families(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_error_if_exists(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_paranoid_checks(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_env(rocksdb_options_t*, rocksdb_env_t*);
+extern void rocksdb_options_set_info_log(rocksdb_options_t*, rocksdb_logger_t*);
+extern void rocksdb_options_set_info_log_level(rocksdb_options_t*, int);
+extern void rocksdb_options_set_write_buffer_size(rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_max_open_files(rocksdb_options_t*, int);
+extern void rocksdb_options_set_cache(rocksdb_options_t*, rocksdb_cache_t*);
+extern void rocksdb_options_set_cache_compressed(rocksdb_options_t*, rocksdb_cache_t*);
+extern void rocksdb_options_set_block_size(rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_block_restart_interval(rocksdb_options_t*, int);
+extern void rocksdb_options_set_compression_options(
+    rocksdb_options_t*, int, int, int);
+extern void rocksdb_options_set_whole_key_filtering(rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_prefix_extractor(
+    rocksdb_options_t*, rocksdb_slicetransform_t*);
+extern void rocksdb_options_set_num_levels(rocksdb_options_t*, int);
+extern void rocksdb_options_set_level0_file_num_compaction_trigger(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_level0_slowdown_writes_trigger(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_level0_stop_writes_trigger(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_max_mem_compaction_level(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_target_file_size_base(
+    rocksdb_options_t*, uint64_t);
+extern void rocksdb_options_set_target_file_size_multiplier(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_max_bytes_for_level_base(
+    rocksdb_options_t*, uint64_t);
+extern void rocksdb_options_set_max_bytes_for_level_multiplier(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_expanded_compaction_factor(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_max_grandparent_overlap_factor(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_max_bytes_for_level_multiplier_additional(
+    rocksdb_options_t*, int* level_values, size_t num_levels);
+extern void rocksdb_options_enable_statistics(rocksdb_options_t*);
+
+extern void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t*, int);
+extern void rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t*, int);
+extern void rocksdb_options_set_max_background_compactions(rocksdb_options_t*, int);
+extern void rocksdb_options_set_max_background_flushes(rocksdb_options_t*, int);
+extern void rocksdb_options_set_max_log_file_size(rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_log_file_time_to_roll(rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_keep_log_file_num(rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_soft_rate_limit(rocksdb_options_t*, double);
+extern void rocksdb_options_set_hard_rate_limit(rocksdb_options_t*, double);
+extern void rocksdb_options_set_rate_limit_delay_max_milliseconds(
+    rocksdb_options_t*, unsigned int);
+extern void rocksdb_options_set_max_manifest_file_size(
+    rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_no_block_cache(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_table_cache_numshardbits(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_table_cache_remove_scan_count_limit(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_arena_block_size(
+    rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_use_fsync(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_db_stats_log_interval(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_db_log_dir(
+    rocksdb_options_t*, const char*);
+extern void rocksdb_options_set_wal_dir(
+    rocksdb_options_t*, const char*);
+extern void rocksdb_options_set_WAL_ttl_seconds(
+    rocksdb_options_t*, uint64_t);
+extern void rocksdb_options_set_WAL_size_limit_MB(
+    rocksdb_options_t*, uint64_t);
+extern void rocksdb_options_set_manifest_preallocation_size(
+    rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_purge_redundant_kvs_while_flush(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_allow_os_buffer(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_allow_mmap_reads(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_allow_mmap_writes(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_is_fd_close_on_exec(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_skip_log_error_on_recovery(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_stats_dump_period_sec(
+    rocksdb_options_t*, unsigned int);
+extern void rocksdb_options_set_block_size_deviation(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_advise_random_on_open(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_access_hint_on_compaction_start(
+    rocksdb_options_t*, int);
+extern void rocksdb_options_set_use_adaptive_mutex(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_bytes_per_sync(
+    rocksdb_options_t*, uint64_t);
+extern void rocksdb_options_set_verify_checksums_in_compaction(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_filter_deletes(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_max_sequential_skip_in_iterations(
+    rocksdb_options_t*, uint64_t);
+extern void rocksdb_options_set_disable_data_sync(rocksdb_options_t*, int);
+extern void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t*, int);
+extern void rocksdb_options_set_disable_seek_compaction(rocksdb_options_t*, int);
+extern void rocksdb_options_set_delete_obsolete_files_period_micros(
+    rocksdb_options_t*, uint64_t);
+extern void rocksdb_options_set_source_compaction_factor(rocksdb_options_t*, int);
+extern void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t*);
+extern void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t*);
+extern void rocksdb_options_set_hash_skip_list_rep(rocksdb_options_t*, size_t, int32_t, int32_t);
+extern void rocksdb_options_set_hash_link_list_rep(rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_plain_table_factory(rocksdb_options_t*, uint32_t, int, double, size_t);
+
+extern void rocksdb_options_set_max_bytes_for_level_base(rocksdb_options_t* opt, uint64_t n);
+extern void rocksdb_options_set_stats_dump_period_sec(rocksdb_options_t* opt, unsigned int sec);
+
+extern void rocksdb_options_set_min_level_to_compress(rocksdb_options_t* opt, int level);
+
+extern void rocksdb_options_set_memtable_prefix_bloom_bits(
+    rocksdb_options_t*, uint32_t);
+extern void rocksdb_options_set_memtable_prefix_bloom_probes(
+    rocksdb_options_t*, uint32_t);
+extern void rocksdb_options_set_max_successive_merges(
+    rocksdb_options_t*, size_t);
+extern void rocksdb_options_set_min_partial_merge_operands(
+    rocksdb_options_t*, uint32_t);
+extern void rocksdb_options_set_bloom_locality(
+    rocksdb_options_t*, uint32_t);
+extern void rocksdb_options_set_allow_thread_local(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_inplace_update_support(
+    rocksdb_options_t*, unsigned char);
+extern void rocksdb_options_set_inplace_update_num_locks(
+    rocksdb_options_t*, size_t);
+
+enum {
+  rocksdb_no_compression = 0,
+  rocksdb_snappy_compression = 1,
+  rocksdb_zlib_compression = 2,
+  rocksdb_bz2_compression = 3,
+  rocksdb_lz4_compression = 4,
+  rocksdb_lz4hc_compression = 5
+};
+extern void rocksdb_options_set_compression(rocksdb_options_t*, int);
+
+enum {
+  rocksdb_level_compaction = 0,
+  rocksdb_universal_compaction = 1,
+  rocksdb_fifo_compaction = 2
+};
+extern void rocksdb_options_set_compaction_style(rocksdb_options_t*, int);
+extern void rocksdb_options_set_universal_compaction_options(rocksdb_options_t*, rocksdb_universal_compaction_options_t*);
+extern void rocksdb_options_set_fifo_compaction_options(rocksdb_options_t* opt,
+    rocksdb_fifo_compaction_options_t* fifo);
+
+/* Compaction Filter */
+
+extern rocksdb_compactionfilter_t* rocksdb_compactionfilter_create(
+    void* state,
+    void (*destructor)(void*),
+    unsigned char (*filter)(
+        void*,
+        int level,
+        const char* key, size_t key_length,
+        const char* existing_value, size_t value_length,
+        char** new_value, size_t *new_value_length,
+        unsigned char* value_changed),
+    const char* (*name)(void*));
+extern void rocksdb_compactionfilter_destroy(rocksdb_compactionfilter_t*);
+
+/* Compaction Filter Context */
+
+extern unsigned char rocksdb_compactionfiltercontext_is_full_compaction(
+    rocksdb_compactionfiltercontext_t* context);
+
+extern unsigned char rocksdb_compactionfiltercontext_is_manual_compaction(
+    rocksdb_compactionfiltercontext_t* context);
+
+/* Compaction Filter Factory */
+
+extern rocksdb_compactionfilterfactory_t*
+    rocksdb_compactionfilterfactory_create(
+        void* state, void (*destructor)(void*),
+        rocksdb_compactionfilter_t* (*create_compaction_filter)(
+            void*, rocksdb_compactionfiltercontext_t* context),
+        const char* (*name)(void*));
+extern void rocksdb_compactionfilterfactory_destroy(
+    rocksdb_compactionfilterfactory_t*);
+
+/* Compaction Filter V2 */
+
+extern rocksdb_compactionfilterv2_t* rocksdb_compactionfilterv2_create(
+    void* state,
+    void (*destructor)(void*),
+    // num_keys specifies the number of array entries in every *list parameter.
+    // New values added to the new_values_list should be malloc'd and will be
+    // freed by the caller. Specify true in the to_delete_list to remove an
+    // entry during compaction; false to keep it.
+    void (*filter)(
+        void*, int level, size_t num_keys,
+        const char* const* keys_list, const size_t* keys_list_sizes,
+        const char* const* existing_values_list, const size_t* existing_values_list_sizes,
+        char** new_values_list, size_t* new_values_list_sizes,
+        unsigned char* to_delete_list),
+    const char* (*name)(void*));
+extern void rocksdb_compactionfilterv2_destroy(rocksdb_compactionfilterv2_t*);
+
+/* Compaction Filter Factory V2 */
+
+extern rocksdb_compactionfilterfactoryv2_t* rocksdb_compactionfilterfactoryv2_create(
+    void* state,
+    rocksdb_slicetransform_t* prefix_extractor,
+    void (*destructor)(void*),
+    rocksdb_compactionfilterv2_t* (*create_compaction_filter_v2)(
+        void*, const rocksdb_compactionfiltercontext_t* context),
+    const char* (*name)(void*));
+extern void rocksdb_compactionfilterfactoryv2_destroy(rocksdb_compactionfilterfactoryv2_t*);
+
+/* Comparator */
+
+extern rocksdb_comparator_t* rocksdb_comparator_create(
+    void* state,
+    void (*destructor)(void*),
+    int (*compare)(
+        void*,
+        const char* a, size_t alen,
+        const char* b, size_t blen),
+    const char* (*name)(void*));
+extern void rocksdb_comparator_destroy(rocksdb_comparator_t*);
+
+/* Filter policy */
+
+extern rocksdb_filterpolicy_t* rocksdb_filterpolicy_create(
+    void* state,
+    void (*destructor)(void*),
+    char* (*create_filter)(
+        void*,
+        const char* const* key_array, const size_t* key_length_array,
+        int num_keys,
+        size_t* filter_length),
+    unsigned char (*key_may_match)(
+        void*,
+        const char* key, size_t length,
+        const char* filter, size_t filter_length),
+    void (*delete_filter)(
+        void*,
+        const char* filter, size_t filter_length),
+    const char* (*name)(void*));
+extern void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t*);
+
+extern rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(
+    int bits_per_key);
+
+/* Merge Operator */
+
+extern rocksdb_mergeoperator_t* rocksdb_mergeoperator_create(
+    void* state,
+    void (*destructor)(void*),
+    char* (*full_merge)(
+        void*,
+        const char* key, size_t key_length,
+        const char* existing_value, size_t existing_value_length,
+        const char* const* operands_list, const size_t* operands_list_length,
+        int num_operands,
+        unsigned char* success, size_t* new_value_length),
+    char* (*partial_merge)(
+        void*,
+        const char* key, size_t key_length,
+        const char* const* operands_list, const size_t* operands_list_length,
+        int num_operands,
+        unsigned char* success, size_t* new_value_length),
+    void (*delete_value)(
+        void*,
+        const char* value, size_t value_length),
+    const char* (*name)(void*));
+extern void rocksdb_mergeoperator_destroy(rocksdb_mergeoperator_t*);
+
+/* Read options */
+
+extern rocksdb_readoptions_t* rocksdb_readoptions_create();
+extern void rocksdb_readoptions_destroy(rocksdb_readoptions_t*);
+extern void rocksdb_readoptions_set_verify_checksums(
+    rocksdb_readoptions_t*,
+    unsigned char);
+extern void rocksdb_readoptions_set_fill_cache(
+    rocksdb_readoptions_t*, unsigned char);
+extern void rocksdb_readoptions_set_snapshot(
+    rocksdb_readoptions_t*,
+    const rocksdb_snapshot_t*);
+extern void rocksdb_readoptions_set_read_tier(
+    rocksdb_readoptions_t*, int);
+extern void rocksdb_readoptions_set_tailing(
+    rocksdb_readoptions_t*, unsigned char);
+
+/* Write options */
+
+extern rocksdb_writeoptions_t* rocksdb_writeoptions_create();
+extern void rocksdb_writeoptions_destroy(rocksdb_writeoptions_t*);
+extern void rocksdb_writeoptions_set_sync(
+    rocksdb_writeoptions_t*, unsigned char);
+extern void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable);
+
+/* Flush options */
+
+extern rocksdb_flushoptions_t* rocksdb_flushoptions_create();
+extern void rocksdb_flushoptions_destroy(rocksdb_flushoptions_t*);
+extern void rocksdb_flushoptions_set_wait(
+    rocksdb_flushoptions_t*, unsigned char);
+
+/* Cache */
+
+extern rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity);
+extern void rocksdb_cache_destroy(rocksdb_cache_t* cache);
+
+/* Env */
+
+extern rocksdb_env_t* rocksdb_create_default_env();
+extern void rocksdb_env_set_background_threads(rocksdb_env_t* env, int n);
+extern void rocksdb_env_set_high_priority_background_threads(rocksdb_env_t* env, int n);
+extern void rocksdb_env_destroy(rocksdb_env_t*);
+
+/* SliceTransform */
+
+extern rocksdb_slicetransform_t* rocksdb_slicetransform_create(
+    void* state,
+    void (*destructor)(void*),
+    char* (*transform)(
+        void*,
+        const char* key, size_t length,
+        size_t* dst_length),
+    unsigned char (*in_domain)(
+        void*,
+        const char* key, size_t length),
+    unsigned char (*in_range)(
+        void*,
+        const char* key, size_t length),
+    const char* (*name)(void*));
+extern rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t);
+extern void rocksdb_slicetransform_destroy(rocksdb_slicetransform_t*);
+
+/* Universal Compaction options */
+
+enum {
+  rocksdb_similar_size_compaction_stop_style = 0,
+  rocksdb_total_size_compaction_stop_style = 1
+};
+
+extern rocksdb_universal_compaction_options_t* rocksdb_universal_compaction_options_create() ;
+extern void rocksdb_universal_compaction_options_set_size_ratio(
+  rocksdb_universal_compaction_options_t*, int);
+extern void rocksdb_universal_compaction_options_set_min_merge_width(
+  rocksdb_universal_compaction_options_t*, int);
+extern void rocksdb_universal_compaction_options_set_max_merge_width(
+  rocksdb_universal_compaction_options_t*, int);
+extern void rocksdb_universal_compaction_options_set_max_size_amplification_percent(
+  rocksdb_universal_compaction_options_t*, int);
+extern void rocksdb_universal_compaction_options_set_compression_size_percent(
+  rocksdb_universal_compaction_options_t*, int);
+extern void rocksdb_universal_compaction_options_set_stop_style(
+  rocksdb_universal_compaction_options_t*, int);
+extern void rocksdb_universal_compaction_options_destroy(
+  rocksdb_universal_compaction_options_t*);
+
+extern rocksdb_fifo_compaction_options_t* rocksdb_fifo_compaction_options_create();
+extern void rocksdb_fifo_compaction_options_set_max_table_files_size(
+    rocksdb_fifo_compaction_options_t* fifo_opts, uint64_t size);
+extern void rocksdb_fifo_compaction_options_destroy(
+    rocksdb_fifo_compaction_options_t* fifo_opts);
+
+extern int rocksdb_livefiles_count(
+  const rocksdb_livefiles_t*);
+extern const char* rocksdb_livefiles_name(
+  const rocksdb_livefiles_t*,
+  int index);
+extern int rocksdb_livefiles_level(
+  const rocksdb_livefiles_t*,
+  int index);
+extern size_t rocksdb_livefiles_size(
+  const rocksdb_livefiles_t*,
+  int index);
+extern const char* rocksdb_livefiles_smallestkey(
+  const rocksdb_livefiles_t*,
+  int index,
+  size_t* size);
+extern const char* rocksdb_livefiles_largestkey(
+  const rocksdb_livefiles_t*,
+  int index,
+  size_t* size);
+extern void rocksdb_livefiles_destroy(
+  const rocksdb_livefiles_t*);
+
+#ifdef __cplusplus
+}  /* end extern "C" */
+#endif
+
+#endif  /* STORAGE_ROCKSDB_INCLUDE_C_H_ */
--- a/include/rocksdb/cache.h
+++ b/include/rocksdb/cache.h
@@ -0,0 +1,140 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Cache is an interface that maps keys to values.  It has internal
+// synchronization and may be safely accessed concurrently from
+// multiple threads.  It may automatically evict entries to make room
+// for new entries.  Values have a specified charge against the cache
+// capacity.  For example, a cache where the values are variable
+// length strings, may use the length of the string as the charge for
+// the string.
+//
+// A builtin cache implementation with a least-recently-used eviction
+// policy is provided.  Clients may use their own implementations if
+// they want something more sophisticated (like scan-resistance, a
+// custom eviction policy, variable cache sizing, etc.)
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_CACHE_H_
+#define STORAGE_ROCKSDB_INCLUDE_CACHE_H_
+
+#include <memory>
+#include <stdint.h>
+#include "rocksdb/slice.h"
+
+namespace rocksdb {
+
+using std::shared_ptr;
+
+class Cache;
+
+// Create a new cache with a fixed size capacity. The cache is sharded
+// to 2^numShardBits shards, by hash of the key. The total capacity
+// is divided and evenly assigned to each shard. Inside each shard,
+// the eviction is done in two passes: first try to free spaces by
+// evicting entries that are among the most least used removeScanCountLimit
+// entries and do not have reference other than by the cache itself, in
+// the least-used order. If not enough space is freed, further free the
+// entries in least used order.
+//
+// The functions without parameter numShardBits and/or removeScanCountLimit
+// use default values. removeScanCountLimit's default value is 0, which
+// means a strict LRU order inside each shard.
+extern shared_ptr<Cache> NewLRUCache(size_t capacity);
+extern shared_ptr<Cache> NewLRUCache(size_t capacity, int numShardBits);
+extern shared_ptr<Cache> NewLRUCache(size_t capacity, int numShardBits,
+                                     int removeScanCountLimit);
+
+class Cache {
+ public:
+  Cache() { }
+
+  // Destroys all existing entries by calling the "deleter"
+  // function that was passed to the constructor.
+  virtual ~Cache();
+
+  // Opaque handle to an entry stored in the cache.
+  struct Handle { };
+
+  // Insert a mapping from key->value into the cache and assign it
+  // the specified charge against the total cache capacity.
+  //
+  // Returns a handle that corresponds to the mapping.  The caller
+  // must call this->Release(handle) when the returned mapping is no
+  // longer needed.
+  //
+  // When the inserted entry is no longer needed, the key and
+  // value will be passed to "deleter".
+  virtual Handle* Insert(const Slice& key, void* value, size_t charge,
+                         void (*deleter)(const Slice& key, void* value)) = 0;
+
+  // If the cache has no mapping for "key", returns nullptr.
+  //
+  // Else return a handle that corresponds to the mapping.  The caller
+  // must call this->Release(handle) when the returned mapping is no
+  // longer needed.
+  virtual Handle* Lookup(const Slice& key) = 0;
+
+  // Release a mapping returned by a previous Lookup().
+  // REQUIRES: handle must not have been released yet.
+  // REQUIRES: handle must have been returned by a method on *this.
+  virtual void Release(Handle* handle) = 0;
+
+  // Return the value encapsulated in a handle returned by a
+  // successful Lookup().
+  // REQUIRES: handle must not have been released yet.
+  // REQUIRES: handle must have been returned by a method on *this.
+  virtual void* Value(Handle* handle) = 0;
+
+  // If the cache contains entry for key, erase it.  Note that the
+  // underlying entry will be kept around until all existing handles
+  // to it have been released.
+  virtual void Erase(const Slice& key) = 0;
+
+  // Return a new numeric id.  May be used by multiple clients who are
+  // sharing the same cache to partition the key space.  Typically the
+  // client will allocate a new id at startup and prepend the id to
+  // its cache keys.
+  virtual uint64_t NewId() = 0;
+
+  // returns the maximum configured capacity of the cache
+  virtual size_t GetCapacity() const = 0;
+
+  // returns the memory size for the entries residing in the cache.
+  virtual size_t GetUsage() const = 0;
+
+  // Call this on shutdown if you want to speed it up. Cache will disown
+  // any underlying data and will not free it on delete. This call will leak
+  // memory - call this only if you're shutting down the process.
+  // Any attempts of using cache after this call will fail terribly.
+  // Always delete the DB object before calling this method!
+  virtual void DisownData() {
+    // default implementation is noop
+  };
+
+  // Apply callback to all entries in the cache
+  // If thread_safe is true, it will also lock the accesses. Otherwise, it will
+  // access the cache without the lock held
+  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                                      bool thread_safe) = 0;
+
+ private:
+  void LRU_Remove(Handle* e);
+  void LRU_Append(Handle* e);
+  void Unref(Handle* e);
+
+  struct Rep;
+  Rep* rep_;
+
+  // No copying allowed
+  Cache(const Cache&);
+  void operator=(const Cache&);
+};
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_UTIL_CACHE_H_
--- a/include/rocksdb/compaction_filter.h
+++ b/include/rocksdb/compaction_filter.h
@@ -0,0 +1,198 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_
+#define STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_
+
+#include <string>
+#include <vector>
+
+namespace rocksdb {
+
+class Slice;
+class SliceTransform;
+
+// Context information of a compaction run
+struct CompactionFilterContext {
+  // Does this compaction run include all data files
+  bool is_full_compaction;
+  // Is this compaction requested by the client (true),
+  // or is it occurring as an automatic compaction process
+  bool is_manual_compaction;
+};
+
+// CompactionFilter allows an application to modify/delete a key-value at
+// the time of compaction.
+
+class CompactionFilter {
+ public:
+  // Context information of a compaction run
+  struct Context {
+    // Does this compaction run include all data files
+    bool is_full_compaction;
+    // Is this compaction requested by the client (true),
+    // or is it occurring as an automatic compaction process
+    bool is_manual_compaction;
+  };
+
+  virtual ~CompactionFilter() {}
+
+  // The compaction process invokes this
+  // method for kv that is being compacted. A return value
+  // of false indicates that the kv should be preserved in the
+  // output of this compaction run and a return value of true
+  // indicates that this key-value should be removed from the
+  // output of the compaction.  The application can inspect
+  // the existing value of the key and make decision based on it.
+  //
+  // When the value is to be preserved, the application has the option
+  // to modify the existing_value and pass it back through new_value.
+  // value_changed needs to be set to true in this case.
+  //
+  // If multithreaded compaction is being used *and* a single CompactionFilter
+  // instance was supplied via Options::compaction_filter, this method may be
+  // called from different threads concurrently.  The application must ensure
+  // that the call is thread-safe.
+  //
+  // If the CompactionFilter was created by a factory, then it will only ever
+  // be used by a single thread that is doing the compaction run, and this
+  // call does not need to be thread-safe.  However, multiple filters may be
+  // in existence and operating concurrently.
+  virtual bool Filter(int level,
+                      const Slice& key,
+                      const Slice& existing_value,
+                      std::string* new_value,
+                      bool* value_changed) const = 0;
+
+  // Returns a name that identifies this compaction filter.
+  // The name will be printed to LOG file on start up for diagnosis.
+  virtual const char* Name() const = 0;
+};
+
+// CompactionFilterV2 that buffers kv pairs sharing the same prefix and let
+// application layer to make individual decisions for all the kv pairs in the
+// buffer.
+class CompactionFilterV2 {
+ public:
+  virtual ~CompactionFilterV2() {}
+
+  // The compaction process invokes this method for all the kv pairs
+  // sharing the same prefix. It is a "roll-up" version of CompactionFilter.
+  //
+  // Each entry in the return vector indicates if the corresponding kv should
+  // be preserved in the output of this compaction run. The application can
+  // inspect the existing values of the keys and make decision based on it.
+  //
+  // When a value is to be preserved, the application has the option
+  // to modify the entry in existing_values and pass it back through an entry
+  // in new_values. A corresponding values_changed entry needs to be set to
+  // true in this case. Note that the new_values vector contains only changed
+  // values, i.e. new_values.size() <= values_changed.size().
+  //
+  typedef std::vector<Slice> SliceVector;
+  virtual std::vector<bool> Filter(int level,
+                                   const SliceVector& keys,
+                                   const SliceVector& existing_values,
+                                   std::vector<std::string>* new_values,
+                                   std::vector<bool>* values_changed)
+    const = 0;
+
+  // Returns a name that identifies this compaction filter.
+  // The name will be printed to LOG file on start up for diagnosis.
+  virtual const char* Name() const = 0;
+};
+
+// Each compaction will create a new CompactionFilter allowing the
+// application to know about different compactions
+class CompactionFilterFactory {
+ public:
+  virtual ~CompactionFilterFactory() { }
+
+  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) = 0;
+
+  // Returns a name that identifies this compaction filter factory.
+  virtual const char* Name() const = 0;
+};
+
+// Default implementation of CompactionFilterFactory which does not
+// return any filter
+class DefaultCompactionFilterFactory : public CompactionFilterFactory {
+ public:
+  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
+      const CompactionFilter::Context& context) override {
+    return std::unique_ptr<CompactionFilter>(nullptr);
+  }
+
+  virtual const char* Name() const override {
+    return "DefaultCompactionFilterFactory";
+  }
+};
+
+// Each compaction will create a new CompactionFilterV2
+//
+// CompactionFilterFactoryV2 enables application to specify a prefix and use
+// CompactionFilterV2 to filter kv-pairs in batches. Each batch contains all
+// the kv-pairs sharing the same prefix.
+//
+// This is useful for applications that require grouping kv-pairs in
+// compaction filter to make a purge/no-purge decision. For example, if the
+// key prefix is user id and the rest of key represents the type of value.
+// This batching filter will come in handy if the application's compaction
+// filter requires knowledge of all types of values for any user id.
+//
+class CompactionFilterFactoryV2 {
+ public:
+  // NOTE: CompactionFilterFactoryV2 will not delete prefix_extractor
+  explicit CompactionFilterFactoryV2(const SliceTransform* prefix_extractor)
+    : prefix_extractor_(prefix_extractor) { }
+
+  virtual ~CompactionFilterFactoryV2() { }
+
+  virtual std::unique_ptr<CompactionFilterV2> CreateCompactionFilterV2(
+    const CompactionFilterContext& context) = 0;
+
+  // Returns a name that identifies this compaction filter factory.
+  virtual const char* Name() const = 0;
+
+  const SliceTransform* GetPrefixExtractor() const {
+    return prefix_extractor_;
+  }
+
+  void SetPrefixExtractor(const SliceTransform* prefix_extractor) {
+    prefix_extractor_ = prefix_extractor;
+  }
+
+ private:
+  // Prefix extractor for compaction filter v2
+  // Keys sharing the same prefix will be buffered internally.
+  // Client can implement a Filter callback function to operate on the buffer
+  const SliceTransform* prefix_extractor_;
+};
+
+// Default implementation of CompactionFilterFactoryV2 which does not
+// return any filter
+class DefaultCompactionFilterFactoryV2 : public CompactionFilterFactoryV2 {
+ public:
+  explicit DefaultCompactionFilterFactoryV2()
+      : CompactionFilterFactoryV2(nullptr) { }
+
+  virtual std::unique_ptr<CompactionFilterV2>
+  CreateCompactionFilterV2(
+      const CompactionFilterContext& context) override {
+    return std::unique_ptr<CompactionFilterV2>(nullptr);
+  }
+
+  virtual const char* Name() const override {
+    return "DefaultCompactionFilterFactoryV2";
+  }
+};
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_
--- a/include/rocksdb/comparator.h
+++ b/include/rocksdb/comparator.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_COMPARATOR_H_
+#define STORAGE_ROCKSDB_INCLUDE_COMPARATOR_H_
+
+#include <string>
+
+namespace rocksdb {
+
+class Slice;
+
+// A Comparator object provides a total order across slices that are
+// used as keys in an sstable or a database.  A Comparator implementation
+// must be thread-safe since rocksdb may invoke its methods concurrently
+// from multiple threads.
+class Comparator {
+ public:
+  virtual ~Comparator();
+
+  // Three-way comparison.  Returns value:
+  //   < 0 iff "a" < "b",
+  //   == 0 iff "a" == "b",
+  //   > 0 iff "a" > "b"
+  virtual int Compare(const Slice& a, const Slice& b) const = 0;
+
+  // The name of the comparator.  Used to check for comparator
+  // mismatches (i.e., a DB created with one comparator is
+  // accessed using a different comparator.
+  //
+  // The client of this package should switch to a new name whenever
+  // the comparator implementation changes in a way that will cause
+  // the relative ordering of any two keys to change.
+  //
+  // Names starting with "rocksdb." are reserved and should not be used
+  // by any clients of this package.
+  virtual const char* Name() const = 0;
+
+  // Advanced functions: these are used to reduce the space requirements
+  // for internal data structures like index blocks.
+
+  // If *start < limit, changes *start to a short string in [start,limit).
+  // Simple comparator implementations may return with *start unchanged,
+  // i.e., an implementation of this method that does nothing is correct.
+  virtual void FindShortestSeparator(
+      std::string* start,
+      const Slice& limit) const = 0;
+
+  // Changes *key to a short string >= *key.
+  // Simple comparator implementations may return with *key unchanged,
+  // i.e., an implementation of this method that does nothing is correct.
+  virtual void FindShortSuccessor(std::string* key) const = 0;
+};
+
+// Return a builtin comparator that uses lexicographic byte-wise
+// ordering.  The result remains the property of this module and
+// must not be deleted.
+extern const Comparator* BytewiseComparator();
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_COMPARATOR_H_
--- a/include/rocksdb/db.h
+++ b/include/rocksdb/db.h
@@ -0,0 +1,507 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_DB_H_
+#define STORAGE_ROCKSDB_INCLUDE_DB_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <memory>
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include "rocksdb/version.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/types.h"
+#include "rocksdb/transaction_log.h"
+
+namespace rocksdb {
+
+using std::unique_ptr;
+
+class ColumnFamilyHandle {
+ public:
+  virtual ~ColumnFamilyHandle() {}
+};
+extern const std::string kDefaultColumnFamilyName;
+
+struct ColumnFamilyDescriptor {
+  std::string name;
+  ColumnFamilyOptions options;
+  ColumnFamilyDescriptor()
+      : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
+  ColumnFamilyDescriptor(const std::string& _name,
+                         const ColumnFamilyOptions& _options)
+      : name(_name), options(_options) {}
+};
+
+static const int kMajorVersion = __ROCKSDB_MAJOR__;
+static const int kMinorVersion = __ROCKSDB_MINOR__;
+
+struct Options;
+struct ReadOptions;
+struct WriteOptions;
+struct FlushOptions;
+struct TableProperties;
+class WriteBatch;
+class Env;
+
+// Metadata associated with each SST file.
+struct LiveFileMetaData {
+  std::string column_family_name;  // Name of the column family
+  std::string db_path;
+  std::string name;                // Name of the file
+  int level;               // Level at which this file resides.
+  size_t size;             // File size in bytes.
+  std::string smallestkey; // Smallest user defined key in the file.
+  std::string largestkey;  // Largest user defined key in the file.
+  SequenceNumber smallest_seqno; // smallest seqno in file
+  SequenceNumber largest_seqno;  // largest seqno in file
+};
+
+// Abstract handle to particular state of a DB.
+// A Snapshot is an immutable object and can therefore be safely
+// accessed from multiple threads without any external synchronization.
+class Snapshot {
+ protected:
+  virtual ~Snapshot();
+};
+
+// A range of keys
+struct Range {
+  Slice start;          // Included in the range
+  Slice limit;          // Not included in the range
+
+  Range() { }
+  Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
+};
+
+// A collections of table properties objects, where
+//  key: is the table's file name.
+//  value: the table properties object of the given table.
+typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
+    TablePropertiesCollection;
+
+// A DB is a persistent ordered map from keys to values.
+// A DB is safe for concurrent access from multiple threads without
+// any external synchronization.
+class DB {
+ public:
+  // Open the database with the specified "name".
+  // Stores a pointer to a heap-allocated database in *dbptr and returns
+  // OK on success.
+  // Stores nullptr in *dbptr and returns a non-OK status on error.
+  // Caller should delete *dbptr when it is no longer needed.
+  static Status Open(const Options& options,
+                     const std::string& name,
+                     DB** dbptr);
+
+  // Open the database for read only. All DB interfaces
+  // that modify data, like put/delete, will return error.
+  // If the db is opened in read only mode, then no compactions
+  // will happen.
+  static Status OpenForReadOnly(const Options& options,
+      const std::string& name, DB** dbptr,
+      bool error_if_log_file_exist = false);
+
+  // Open the database for read only with column families. When opening DB with
+  // read only, you can specify only a subset of column families in the
+  // database that should be opened. However, you always need to specify default
+  // column family. The default column family name is 'default' and it's stored
+  // in rocksdb::kDefaultColumnFamilyName
+  static Status OpenForReadOnly(
+      const DBOptions& db_options, const std::string& name,
+      const std::vector<ColumnFamilyDescriptor>& column_families,
+      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
+      bool error_if_log_file_exist = false);
+
+  // Open DB with column families.
+  // db_options specify database specific options
+  // column_families is the vector of all column families in the databse,
+  // containing column family name and options. You need to open ALL column
+  // families in the database. To get the list of column families, you can use
+  // ListColumnFamilies(). Also, you can open only a subset of column families
+  // for read-only access.
+  // The default column family name is 'default' and it's stored
+  // in rocksdb::kDefaultColumnFamilyName.
+  // If everything is OK, handles will on return be the same size
+  // as column_families --- handles[i] will be a handle that you
+  // will use to operate on column family column_family[i]
+  static Status Open(const DBOptions& db_options, const std::string& name,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
+
+  // ListColumnFamilies will open the DB specified by argument name
+  // and return the list of all column families in that DB
+  // through column_families argument. The ordering of
+  // column families in column_families is unspecified.
+  static Status ListColumnFamilies(const DBOptions& db_options,
+                                   const std::string& name,
+                                   std::vector<std::string>* column_families);
+
+  DB() { }
+  virtual ~DB();
+
+  // Create a column_family and return the handle of column family
+  // through the argument handle.
+  virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
+                                    const std::string& column_family_name,
+                                    ColumnFamilyHandle** handle);
+
+  // Drop a column family specified by column_family handle. This call
+  // only records a drop record in the manifest and prevents the column
+  // family from flushing and compacting.
+  virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
+
+  // Set the database entry for "key" to "value".
+  // If "key" already exists, it will be overwritten.
+  // Returns OK on success, and a non-OK status on error.
+  // Note: consider setting options.sync = true.
+  virtual Status Put(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& value) = 0;
+  virtual Status Put(const WriteOptions& options, const Slice& key,
+                     const Slice& value) {
+    return Put(options, DefaultColumnFamily(), key, value);
+  }
+
+  // Remove the database entry (if any) for "key".  Returns OK on
+  // success, and a non-OK status on error.  It is not an error if "key"
+  // did not exist in the database.
+  // Note: consider setting options.sync = true.
+  virtual Status Delete(const WriteOptions& options,
+                        ColumnFamilyHandle* column_family,
+                        const Slice& key) = 0;
+  virtual Status Delete(const WriteOptions& options, const Slice& key) {
+    return Delete(options, DefaultColumnFamily(), key);
+  }
+
+  // Merge the database entry for "key" with "value".  Returns OK on success,
+  // and a non-OK status on error. The semantics of this operation is
+  // determined by the user provided merge_operator when opening DB.
+  // Note: consider setting options.sync = true.
+  virtual Status Merge(const WriteOptions& options,
+                       ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value) = 0;
+  virtual Status Merge(const WriteOptions& options, const Slice& key,
+                       const Slice& value) {
+    return Merge(options, DefaultColumnFamily(), key, value);
+  }
+
+  // Apply the specified updates to the database.
+  // Returns OK on success, non-OK on failure.
+  // Note: consider setting options.sync = true.
+  virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
+
+  // If the database contains an entry for "key" store the
+  // corresponding value in *value and return OK.
+  //
+  // If there is no entry for "key" leave *value unchanged and return
+  // a status for which Status::IsNotFound() returns true.
+  //
+  // May return some other Status on an error.
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     std::string* value) = 0;
+  virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) {
+    return Get(options, DefaultColumnFamily(), key, value);
+  }
+
+  // If keys[i] does not exist in the database, then the i'th returned
+  // status will be one for which Status::IsNotFound() is true, and
+  // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
+  // the i'th returned status will have Status::ok() true, and (*values)[i]
+  // will store the value associated with keys[i].
+  //
+  // (*values) will always be resized to be the same size as (keys).
+  // Similarly, the number of returned statuses will be the number of keys.
+  // Note: keys will not be "de-duplicated". Duplicate keys will return
+  // duplicate values in order.
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
+  virtual std::vector<Status> MultiGet(const ReadOptions& options,
+                                       const std::vector<Slice>& keys,
+                                       std::vector<std::string>* values) {
+    return MultiGet(options, std::vector<ColumnFamilyHandle*>(
+                                 keys.size(), DefaultColumnFamily()),
+                    keys, values);
+  }
+
+  // If the key definitely does not exist in the database, then this method
+  // returns false, else true. If the caller wants to obtain value when the key
+  // is found in memory, a bool for 'value_found' must be passed. 'value_found'
+  // will be true on return if value has been set properly.
+  // This check is potentially lighter-weight than invoking DB::Get(). One way
+  // to make this lighter weight is to avoid doing any IOs.
+  // Default implementation here returns true and sets 'value_found' to false
+  virtual bool KeyMayExist(const ReadOptions& options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           std::string* value, bool* value_found = nullptr) {
+    if (value_found != nullptr) {
+      *value_found = false;
+    }
+    return true;
+  }
+  virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
+                           std::string* value, bool* value_found = nullptr) {
+    return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
+  }
+
+  // Return a heap-allocated iterator over the contents of the database.
+  // The result of NewIterator() is initially invalid (caller must
+  // call one of the Seek methods on the iterator before using it).
+  //
+  // Caller should delete the iterator when it is no longer needed.
+  // The returned iterator should be deleted before this db is deleted.
+  virtual Iterator* NewIterator(const ReadOptions& options,
+                                ColumnFamilyHandle* column_family) = 0;
+  virtual Iterator* NewIterator(const ReadOptions& options) {
+    return NewIterator(options, DefaultColumnFamily());
+  }
+  // Returns iterators from a consistent database state across multiple
+  // column families. Iterators are heap allocated and need to be deleted
+  // before the db is deleted
+  virtual Status NewIterators(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families,
+      std::vector<Iterator*>* iterators) = 0;
+
+  // Return a handle to the current DB state.  Iterators created with
+  // this handle will all observe a stable snapshot of the current DB
+  // state.  The caller must call ReleaseSnapshot(result) when the
+  // snapshot is no longer needed.
+  //
+  // nullptr will be returned if the DB fails to take a snapshot or does
+  // not support snapshot.
+  virtual const Snapshot* GetSnapshot() = 0;
+
+  // Release a previously acquired snapshot.  The caller must not
+  // use "snapshot" after this call.
+  virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
+
+  // DB implementations can export properties about their state
+  // via this method.  If "property" is a valid property understood by this
+  // DB implementation, fills "*value" with its current value and returns
+  // true.  Otherwise returns false.
+  //
+  //
+  // Valid property names include:
+  //
+  //  "rocksdb.num-files-at-level<N>" - return the number of files at level <N>,
+  //     where <N> is an ASCII representation of a level number (e.g. "0").
+  //  "rocksdb.stats" - returns a multi-line string that describes statistics
+  //     about the internal operation of the DB.
+  //  "rocksdb.sstables" - returns a multi-line string that describes all
+  //     of the sstables that make up the db contents.
+  virtual bool GetProperty(ColumnFamilyHandle* column_family,
+                           const Slice& property, std::string* value) = 0;
+  virtual bool GetProperty(const Slice& property, std::string* value) {
+    return GetProperty(DefaultColumnFamily(), property, value);
+  }
+
+  // Similar to GetProperty(), but only works for a subset of properties whose
+  // return value is an integer. Return the value by integer.
+  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
+                              const Slice& property, uint64_t* value) = 0;
+  virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
+    return GetIntProperty(DefaultColumnFamily(), property, value);
+  }
+
+  // For each i in [0,n-1], store in "sizes[i]", the approximate
+  // file system space used by keys in "[range[i].start .. range[i].limit)".
+  //
+  // Note that the returned sizes measure file system space usage, so
+  // if the user data compresses by a factor of ten, the returned
+  // sizes will be one-tenth the size of the corresponding user data size.
+  //
+  // The results may not include the sizes of recently written data.
+  virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
+                                   const Range* range, int n,
+                                   uint64_t* sizes) = 0;
+  virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes) {
+    GetApproximateSizes(DefaultColumnFamily(), range, n, sizes);
+  }
+
+  // Compact the underlying storage for the key range [*begin,*end].
+  // The actual compaction interval might be superset of [*begin, *end].
+  // In particular, deleted and overwritten versions are discarded,
+  // and the data is rearranged to reduce the cost of operations
+  // needed to access the data.  This operation should typically only
+  // be invoked by users who understand the underlying implementation.
+  //
+  // begin==nullptr is treated as a key before all keys in the database.
+  // end==nullptr is treated as a key after all keys in the database.
+  // Therefore the following call will compact the entire database:
+  //    db->CompactRange(nullptr, nullptr);
+  // Note that after the entire database is compacted, all data are pushed
+  // down to the last level containing any data. If the total data size
+  // after compaction is reduced, that level might not be appropriate for
+  // hosting all the files. In this case, client could set reduce_level
+  // to true, to move the files back to the minimum level capable of holding
+  // the data set or a given level (specified by non-negative target_level).
+  // Compaction outputs should be placed in options.db_paths[target_path_id].
+  // Behavior is undefined if target_path_id is out of range.
+  virtual Status CompactRange(ColumnFamilyHandle* column_family,
+                              const Slice* begin, const Slice* end,
+                              bool reduce_level = false, int target_level = -1,
+                              uint32_t target_path_id = 0) = 0;
+  virtual Status CompactRange(const Slice* begin, const Slice* end,
+                              bool reduce_level = false, int target_level = -1,
+                              uint32_t target_path_id = 0) {
+    return CompactRange(DefaultColumnFamily(), begin, end, reduce_level,
+                        target_level, target_path_id);
+  }
+
+  // Number of levels used for this DB.
+  virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
+  virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
+
+  // Maximum level to which a new compacted memtable is pushed if it
+  // does not create overlap.
+  virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
+  virtual int MaxMemCompactionLevel() {
+    return MaxMemCompactionLevel(DefaultColumnFamily());
+  }
+
+  // Number of files in level-0 that would stop writes.
+  virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
+  virtual int Level0StopWriteTrigger() {
+    return Level0StopWriteTrigger(DefaultColumnFamily());
+  }
+
+  // Get DB name -- the exact same name that was provided as an argument to
+  // DB::Open()
+  virtual const std::string& GetName() const = 0;
+
+  // Get Env object from the DB
+  virtual Env* GetEnv() const = 0;
+
+  // Get DB Options that we use
+  virtual const Options& GetOptions(ColumnFamilyHandle* column_family)
+      const = 0;
+  virtual const Options& GetOptions() const {
+    return GetOptions(DefaultColumnFamily());
+  }
+
+  // Flush all mem-table data.
+  virtual Status Flush(const FlushOptions& options,
+                       ColumnFamilyHandle* column_family) = 0;
+  virtual Status Flush(const FlushOptions& options) {
+    return Flush(options, DefaultColumnFamily());
+  }
+
+  // The sequence number of the most recent transaction.
+  virtual SequenceNumber GetLatestSequenceNumber() const = 0;
+
+#ifndef ROCKSDB_LITE
+
+  // Prevent file deletions. Compactions will continue to occur,
+  // but no obsolete files will be deleted. Calling this multiple
+  // times have the same effect as calling it once.
+  virtual Status DisableFileDeletions() = 0;
+
+  // Allow compactions to delete obsolete files.
+  // If force == true, the call to EnableFileDeletions() will guarantee that
+  // file deletions are enabled after the call, even if DisableFileDeletions()
+  // was called multiple times before.
+  // If force == false, EnableFileDeletions will only enable file deletion
+  // after it's been called at least as many times as DisableFileDeletions(),
+  // enabling the two methods to be called by two threads concurrently without
+  // synchronization -- i.e., file deletions will be enabled only after both
+  // threads call EnableFileDeletions()
+  virtual Status EnableFileDeletions(bool force = true) = 0;
+
+  // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
+
+  // THIS METHOD IS DEPRECATED. Use the GetLiveFilesMetaData to get more
+  // detailed information on the live files.
+  // Retrieve the list of all files in the database. The files are
+  // relative to the dbname and are not absolute paths. The valid size of the
+  // manifest file is returned in manifest_file_size. The manifest file is an
+  // ever growing file, but only the portion specified by manifest_file_size is
+  // valid for this snapshot.
+  // Setting flush_memtable to true does Flush before recording the live files.
+  // Setting flush_memtable to false is useful when we don't want to wait for
+  // flush which may have to wait for compaction to complete taking an
+  // indeterminate time.
+  //
+  // In case you have multiple column families, even if flush_memtable is true,
+  // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
+  // for new data that arrived to already-flushed column families while other
+  // column families were flushing
+  virtual Status GetLiveFiles(std::vector<std::string>&,
+                              uint64_t* manifest_file_size,
+                              bool flush_memtable = true) = 0;
+
+  // Retrieve the sorted list of all wal files with earliest file first
+  virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
+
+  // Sets iter to an iterator that is positioned at a write-batch containing
+  // seq_number. If the sequence number is non existent, it returns an iterator
+  // at the first available seq_no after the requested seq_no
+  // Returns Status::OK if iterator is valid
+  // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
+  // use this api, else the WAL files will get
+  // cleared aggressively and the iterator might keep getting invalid before
+  // an update is read.
+  virtual Status GetUpdatesSince(
+      SequenceNumber seq_number, unique_ptr<TransactionLogIterator>* iter,
+      const TransactionLogIterator::ReadOptions&
+          read_options = TransactionLogIterator::ReadOptions()) = 0;
+
+  // Delete the file name from the db directory and update the internal state to
+  // reflect that. Supports deletion of sst and log files only. 'name' must be
+  // path relative to the db directory. eg. 000001.sst, /archive/000003.log
+  virtual Status DeleteFile(std::string name) = 0;
+
+  // Returns a list of all table files with their level, start key
+  // and end key
+  virtual void GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {}
+
+#endif  // ROCKSDB_LITE
+
+  // Sets the globally unique ID created at database creation time by invoking
+  // Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
+  // be set properly
+  virtual Status GetDbIdentity(std::string& identity) = 0;
+
+  // Returns default column family handle
+  virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;
+
+#ifndef ROCKSDB_LITE
+  virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
+                                          TablePropertiesCollection* props) = 0;
+  virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
+    return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
+  }
+#endif  // ROCKSDB_LITE
+
+ private:
+  // No copying allowed
+  DB(const DB&);
+  void operator=(const DB&);
+};
+
+// Destroy the contents of the specified database.
+// Be very careful using this method.
+Status DestroyDB(const std::string& name, const Options& options);
+
+#ifndef ROCKSDB_LITE
+// If a DB cannot be opened, you may attempt to call this method to
+// resurrect as much of the contents of the database as possible.
+// Some data may be lost, so be careful when calling this function
+// on a database that contains important information.
+Status RepairDB(const std::string& dbname, const Options& options);
+#endif
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_DB_H_
--- a/include/rocksdb/env.h
+++ b/include/rocksdb/env.h
@@ -0,0 +1,798 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// An Env is an interface used by the rocksdb implementation to access
+// operating system functionality like the filesystem etc.  Callers
+// may wish to provide a custom Env object when opening a database to
+// get fine gain control; e.g., to rate limit file system operations.
+//
+// All Env implementations are safe for concurrent access from
+// multiple threads without any external synchronization.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_ENV_H_
+#define STORAGE_ROCKSDB_INCLUDE_ENV_H_
+
+#include <cstdarg>
+#include <string>
+#include <memory>
+#include <vector>
+#include <stdint.h>
+#include "rocksdb/status.h"
+
+namespace rocksdb {
+
+class FileLock;
+class Logger;
+class RandomAccessFile;
+class SequentialFile;
+class Slice;
+class WritableFile;
+class RandomRWFile;
+class Directory;
+struct DBOptions;
+class RateLimiter;
+
+using std::unique_ptr;
+using std::shared_ptr;
+
+
+// Options while opening a file to read/write
+struct EnvOptions {
+
+  // construct with default Options
+  EnvOptions();
+
+  // construct from Options
+  explicit EnvOptions(const DBOptions& options);
+
+  // If true, then allow caching of data in environment buffers
+  bool use_os_buffer = true;
+
+   // If true, then use mmap to read data
+  bool use_mmap_reads = false;
+
+   // If true, then use mmap to write data
+  bool use_mmap_writes = true;
+
+  // If true, set the FD_CLOEXEC on open fd.
+  bool set_fd_cloexec = true;
+
+  // Allows OS to incrementally sync files to disk while they are being
+  // written, in the background. Issue one request for every bytes_per_sync
+  // written. 0 turns it off.
+  // Default: 0
+  uint64_t bytes_per_sync = 0;
+
+  // If true, we will preallocate the file with FALLOC_FL_KEEP_SIZE flag, which
+  // means that file size won't change as part of preallocation.
+  // If false, preallocation will also change the file size. This option will
+  // improve the performance in workloads where you sync the data on every
+  // write. By default, we set it to true for MANIFEST writes and false for
+  // WAL writes
+  bool fallocate_with_keep_size = true;
+
+  // If not nullptr, write rate limiting is enabled for flush and compaction
+  RateLimiter* rate_limiter = nullptr;
+};
+
+class Env {
+ public:
+  Env() { }
+  virtual ~Env();
+
+  // Return a default environment suitable for the current operating
+  // system.  Sophisticated users may wish to provide their own Env
+  // implementation instead of relying on this default environment.
+  //
+  // The result of Default() belongs to rocksdb and must never be deleted.
+  static Env* Default();
+
+  // Create a brand new sequentially-readable file with the specified name.
+  // On success, stores a pointer to the new file in *result and returns OK.
+  // On failure stores nullptr in *result and returns non-OK.  If the file does
+  // not exist, returns a non-OK status.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual Status NewSequentialFile(const std::string& fname,
+                                   unique_ptr<SequentialFile>* result,
+                                   const EnvOptions& options)
+                                   = 0;
+
+  // Create a brand new random access read-only file with the
+  // specified name.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure stores nullptr in *result and
+  // returns non-OK.  If the file does not exist, returns a non-OK
+  // status.
+  //
+  // The returned file may be concurrently accessed by multiple threads.
+  virtual Status NewRandomAccessFile(const std::string& fname,
+                                     unique_ptr<RandomAccessFile>* result,
+                                     const EnvOptions& options)
+                                     = 0;
+
+  // Create an object that writes to a new file with the specified
+  // name.  Deletes any existing file with the same name and creates a
+  // new file.  On success, stores a pointer to the new file in
+  // *result and returns OK.  On failure stores nullptr in *result and
+  // returns non-OK.
+  //
+  // The returned file will only be accessed by one thread at a time.
+  virtual Status NewWritableFile(const std::string& fname,
+                                 unique_ptr<WritableFile>* result,
+                                 const EnvOptions& options) = 0;
+
+  // Create an object that both reads and writes to a file on
+  // specified offsets (random access). If file already exists,
+  // does not overwrite it. On success, stores a pointer to the
+  // new file in *result and returns OK. On failure stores nullptr
+  // in *result and returns non-OK.
+  virtual Status NewRandomRWFile(const std::string& fname,
+                                 unique_ptr<RandomRWFile>* result,
+                                 const EnvOptions& options) = 0;
+
+  // Create an object that represents a directory. Will fail if directory
+  // doesn't exist. If the directory exists, it will open the directory
+  // and create a new Directory object.
+  //
+  // On success, stores a pointer to the new Directory in
+  // *result and returns OK. On failure stores nullptr in *result and
+  // returns non-OK.
+  virtual Status NewDirectory(const std::string& name,
+                              unique_ptr<Directory>* result) = 0;
+
+  // Returns true iff the named file exists.
+  virtual bool FileExists(const std::string& fname) = 0;
+
+  // Store in *result the names of the children of the specified directory.
+  // The names are relative to "dir".
+  // Original contents of *results are dropped.
+  virtual Status GetChildren(const std::string& dir,
+                             std::vector<std::string>* result) = 0;
+
+  // Delete the named file.
+  virtual Status DeleteFile(const std::string& fname) = 0;
+
+  // Create the specified directory. Returns error if directory exists.
+  virtual Status CreateDir(const std::string& dirname) = 0;
+
+  // Creates directory if missing. Return Ok if it exists, or successful in
+  // Creating.
+  virtual Status CreateDirIfMissing(const std::string& dirname) = 0;
+
+  // Delete the specified directory.
+  virtual Status DeleteDir(const std::string& dirname) = 0;
+
+  // Store the size of fname in *file_size.
+  virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0;
+
+  // Store the last modification time of fname in *file_mtime.
+  virtual Status GetFileModificationTime(const std::string& fname,
+                                         uint64_t* file_mtime) = 0;
+  // Rename file src to target.
+  virtual Status RenameFile(const std::string& src,
+                            const std::string& target) = 0;
+
+  // Lock the specified file.  Used to prevent concurrent access to
+  // the same db by multiple processes.  On failure, stores nullptr in
+  // *lock and returns non-OK.
+  //
+  // On success, stores a pointer to the object that represents the
+  // acquired lock in *lock and returns OK.  The caller should call
+  // UnlockFile(*lock) to release the lock.  If the process exits,
+  // the lock will be automatically released.
+  //
+  // If somebody else already holds the lock, finishes immediately
+  // with a failure.  I.e., this call does not wait for existing locks
+  // to go away.
+  //
+  // May create the named file if it does not already exist.
+  virtual Status LockFile(const std::string& fname, FileLock** lock) = 0;
+
+  // Release the lock acquired by a previous successful call to LockFile.
+  // REQUIRES: lock was returned by a successful LockFile() call
+  // REQUIRES: lock has not already been unlocked.
+  virtual Status UnlockFile(FileLock* lock) = 0;
+
+  // Priority for scheduling job in thread pool
+  enum Priority { LOW, HIGH, TOTAL };
+
+  // Priority for scheduling job in thread pool
+  enum IOPriority {
+    IO_LOW = 0,
+    IO_HIGH = 1,
+    IO_TOTAL = 2
+  };
+
+  // Arrange to run "(*function)(arg)" once in a background thread, in
+  // the thread pool specified by pri. By default, jobs go to the 'LOW'
+  // priority thread pool.
+
+  // "function" may run in an unspecified thread.  Multiple functions
+  // added to the same Env may run concurrently in different threads.
+  // I.e., the caller may not assume that background work items are
+  // serialized.
+  virtual void Schedule(
+      void (*function)(void* arg),
+      void* arg,
+      Priority pri = LOW) = 0;
+
+  // Start a new thread, invoking "function(arg)" within the new thread.
+  // When "function(arg)" returns, the thread will be destroyed.
+  virtual void StartThread(void (*function)(void* arg), void* arg) = 0;
+
+  // Wait for all threads started by StartThread to terminate.
+  virtual void WaitForJoin() {}
+
+  // Get thread pool queue length for specific thrad pool.
+  virtual unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const {
+    return 0;
+  }
+
+  // *path is set to a temporary directory that can be used for testing. It may
+  // or many not have just been created. The directory may or may not differ
+  // between runs of the same process, but subsequent calls will return the
+  // same directory.
+  virtual Status GetTestDirectory(std::string* path) = 0;
+
+  // Create and return a log file for storing informational messages.
+  virtual Status NewLogger(const std::string& fname,
+                           shared_ptr<Logger>* result) = 0;
+
+  // Returns the number of micro-seconds since some fixed point in time. Only
+  // useful for computing deltas of time.
+  virtual uint64_t NowMicros() = 0;
+
+  // Returns the number of nano-seconds since some fixed point in time. Only
+  // useful for computing deltas of time in one run.
+  // Default implementation simply relies on NowMicros
+  virtual uint64_t NowNanos() {
+    return NowMicros() * 1000;
+  }
+
+  // Sleep/delay the thread for the perscribed number of micro-seconds.
+  virtual void SleepForMicroseconds(int micros) = 0;
+
+  // Get the current host name.
+  virtual Status GetHostName(char* name, uint64_t len) = 0;
+
+  // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
+  virtual Status GetCurrentTime(int64_t* unix_time) = 0;
+
+  // Get full directory name for this db.
+  virtual Status GetAbsolutePath(const std::string& db_path,
+      std::string* output_path) = 0;
+
+  // The number of background worker threads of a specific thread pool
+  // for this environment. 'LOW' is the default pool.
+  // default number: 1
+  virtual void SetBackgroundThreads(int number, Priority pri = LOW) = 0;
+
+  // Converts seconds-since-Jan-01-1970 to a printable string
+  virtual std::string TimeToString(uint64_t time) = 0;
+
+  // Generates a unique id that can be used to identify a db
+  virtual std::string GenerateUniqueId();
+
+  // OptimizeForLogWrite will create a new EnvOptions object that is a copy of
+  // the EnvOptions in the parameters, but is optimized for writing log files.
+  // Default implementation returns the copy of the same object.
+  virtual EnvOptions OptimizeForLogWrite(const EnvOptions& env_options) const;
+  // OptimizeForManifestWrite will create a new EnvOptions object that is a copy
+  // of the EnvOptions in the parameters, but is optimized for writing manifest
+  // files. Default implementation returns the copy of the same object.
+  virtual EnvOptions OptimizeForManifestWrite(const EnvOptions& env_options)
+      const;
+
+ private:
+  // No copying allowed
+  Env(const Env&);
+  void operator=(const Env&);
+};
+
+// A file abstraction for reading sequentially through a file
+class SequentialFile {
+ public:
+  SequentialFile() { }
+  virtual ~SequentialFile();
+
+  // Read up to "n" bytes from the file.  "scratch[0..n-1]" may be
+  // written by this routine.  Sets "*result" to the data that was
+  // read (including if fewer than "n" bytes were successfully read).
+  // May set "*result" to point at data in "scratch[0..n-1]", so
+  // "scratch[0..n-1]" must be live when "*result" is used.
+  // If an error was encountered, returns a non-OK status.
+  //
+  // REQUIRES: External synchronization
+  virtual Status Read(size_t n, Slice* result, char* scratch) = 0;
+
+  // Skip "n" bytes from the file. This is guaranteed to be no
+  // slower that reading the same data, but may be faster.
+  //
+  // If end of file is reached, skipping will stop at the end of the
+  // file, and Skip will return OK.
+  //
+  // REQUIRES: External synchronization
+  virtual Status Skip(uint64_t n) = 0;
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  virtual Status InvalidateCache(size_t offset, size_t length) {
+    return Status::NotSupported("InvalidateCache not supported.");
+  }
+};
+
+// A file abstraction for randomly reading the contents of a file.
+class RandomAccessFile {
+ public:
+  RandomAccessFile() { }
+  virtual ~RandomAccessFile();
+
+  // Read up to "n" bytes from the file starting at "offset".
+  // "scratch[0..n-1]" may be written by this routine.  Sets "*result"
+  // to the data that was read (including if fewer than "n" bytes were
+  // successfully read).  May set "*result" to point at data in
+  // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
+  // "*result" is used.  If an error was encountered, returns a non-OK
+  // status.
+  //
+  // Safe for concurrent use by multiple threads.
+  virtual Status Read(uint64_t offset, size_t n, Slice* result,
+                      char* scratch) const = 0;
+
+  // Tries to get an unique ID for this file that will be the same each time
+  // the file is opened (and will stay the same while the file is open).
+  // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
+  // ID can be created this function returns the length of the ID and places it
+  // in "id"; otherwise, this function returns 0, in which case "id"
+  // may not have been modified.
+  //
+  // This function guarantees, for IDs from a given environment, two unique ids
+  // cannot be made equal to eachother by adding arbitrary bytes to one of
+  // them. That is, no unique ID is the prefix of another.
+  //
+  // This function guarantees that the returned ID will not be interpretable as
+  // a single varint.
+  //
+  // Note: these IDs are only valid for the duration of the process.
+  virtual size_t GetUniqueId(char* id, size_t max_size) const {
+    return 0; // Default implementation to prevent issues with backwards
+              // compatibility.
+  };
+
+
+  enum AccessPattern { NORMAL, RANDOM, SEQUENTIAL, WILLNEED, DONTNEED };
+
+  virtual void Hint(AccessPattern pattern) {}
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  virtual Status InvalidateCache(size_t offset, size_t length) {
+    return Status::NotSupported("InvalidateCache not supported.");
+  }
+};
+
+// A file abstraction for sequential writing.  The implementation
+// must provide buffering since callers may append small fragments
+// at a time to the file.
+class WritableFile {
+ public:
+  WritableFile()
+    : last_preallocated_block_(0),
+      preallocation_block_size_(0),
+      io_priority_(Env::IO_TOTAL) {
+  }
+  virtual ~WritableFile();
+
+  virtual Status Append(const Slice& data) = 0;
+  virtual Status Close() = 0;
+  virtual Status Flush() = 0;
+  virtual Status Sync() = 0; // sync data
+
+  /*
+   * Sync data and/or metadata as well.
+   * By default, sync only data.
+   * Override this method for environments where we need to sync
+   * metadata as well.
+   */
+  virtual Status Fsync() {
+    return Sync();
+  }
+
+  /*
+   * Change the priority in rate limiter if rate limiting is enabled.
+   * If rate limiting is not enabled, this call has no effect.
+   */
+  virtual void SetIOPriority(Env::IOPriority pri) {
+    io_priority_ = pri;
+  }
+
+  /*
+   * Get the size of valid data in the file.
+   */
+  virtual uint64_t GetFileSize() {
+    return 0;
+  }
+
+  /*
+   * Get and set the default pre-allocation block size for writes to
+   * this file.  If non-zero, then Allocate will be used to extend the
+   * underlying storage of a file (generally via fallocate) if the Env
+   * instance supports it.
+   */
+  void SetPreallocationBlockSize(size_t size) {
+    preallocation_block_size_ = size;
+  }
+
+  virtual void GetPreallocationStatus(size_t* block_size,
+                                      size_t* last_allocated_block) {
+    *last_allocated_block = last_preallocated_block_;
+    *block_size = preallocation_block_size_;
+  }
+
+  // For documentation, refer to RandomAccessFile::GetUniqueId()
+  virtual size_t GetUniqueId(char* id, size_t max_size) const {
+    return 0; // Default implementation to prevent issues with backwards
+  }
+
+  // Remove any kind of caching of data from the offset to offset+length
+  // of this file. If the length is 0, then it refers to the end of file.
+  // If the system is not caching the file contents, then this is a noop.
+  // This call has no effect on dirty pages in the cache.
+  virtual Status InvalidateCache(size_t offset, size_t length) {
+    return Status::NotSupported("InvalidateCache not supported.");
+  }
+
+ protected:
+  // PrepareWrite performs any necessary preparation for a write
+  // before the write actually occurs.  This allows for pre-allocation
+  // of space on devices where it can result in less file
+  // fragmentation and/or less waste from over-zealous filesystem
+  // pre-allocation.
+  void PrepareWrite(size_t offset, size_t len) {
+    if (preallocation_block_size_ == 0) {
+      return;
+    }
+    // If this write would cross one or more preallocation blocks,
+    // determine what the last preallocation block necesessary to
+    // cover this write would be and Allocate to that point.
+    const auto block_size = preallocation_block_size_;
+    size_t new_last_preallocated_block =
+      (offset + len + block_size - 1) / block_size;
+    if (new_last_preallocated_block > last_preallocated_block_) {
+      size_t num_spanned_blocks =
+        new_last_preallocated_block - last_preallocated_block_;
+      Allocate(block_size * last_preallocated_block_,
+               block_size * num_spanned_blocks);
+      last_preallocated_block_ = new_last_preallocated_block;
+    }
+  }
+
+  /*
+   * Pre-allocate space for a file.
+   */
+  virtual Status Allocate(off_t offset, off_t len) {
+    return Status::OK();
+  }
+
+  // Sync a file range with disk.
+  // offset is the starting byte of the file range to be synchronized.
+  // nbytes specifies the length of the range to be synchronized.
+  // This asks the OS to initiate flushing the cached data to disk,
+  // without waiting for completion.
+  // Default implementation does nothing.
+  virtual Status RangeSync(off_t offset, off_t nbytes) {
+    return Status::OK();
+  }
+
+ private:
+  size_t last_preallocated_block_;
+  size_t preallocation_block_size_;
+  // No copying allowed
+  WritableFile(const WritableFile&);
+  void operator=(const WritableFile&);
+
+ protected:
+  Env::IOPriority io_priority_;
+};
+
+// A file abstraction for random reading and writing.
+class RandomRWFile {
+ public:
+  RandomRWFile() {}
+  virtual ~RandomRWFile() {}
+
+  // Write data from Slice data to file starting from offset
+  // Returns IOError on failure, but does not guarantee
+  // atomicity of a write.  Returns OK status on success.
+  //
+  // Safe for concurrent use.
+  virtual Status Write(uint64_t offset, const Slice& data) = 0;
+  // Read up to "n" bytes from the file starting at "offset".
+  // "scratch[0..n-1]" may be written by this routine.  Sets "*result"
+  // to the data that was read (including if fewer than "n" bytes were
+  // successfully read).  May set "*result" to point at data in
+  // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
+  // "*result" is used.  If an error was encountered, returns a non-OK
+  // status.
+  //
+  // Safe for concurrent use by multiple threads.
+  virtual Status Read(uint64_t offset, size_t n, Slice* result,
+                      char* scratch) const = 0;
+  virtual Status Close() = 0; // closes the file
+  virtual Status Sync() = 0; // sync data
+
+  /*
+   * Sync data and/or metadata as well.
+   * By default, sync only data.
+   * Override this method for environments where we need to sync
+   * metadata as well.
+   */
+  virtual Status Fsync() {
+    return Sync();
+  }
+
+  /*
+   * Pre-allocate space for a file.
+   */
+  virtual Status Allocate(off_t offset, off_t len) {
+    return Status::OK();
+  }
+
+ private:
+  // No copying allowed
+  RandomRWFile(const RandomRWFile&);
+  void operator=(const RandomRWFile&);
+};
+
+// Directory object represents collection of files and implements
+// filesystem operations that can be executed on directories.
+class Directory {
+ public:
+  virtual ~Directory() {}
+  // Fsync directory
+  virtual Status Fsync() = 0;
+};
+
+enum InfoLogLevel : unsigned char {
+  DEBUG_LEVEL = 0,
+  INFO_LEVEL,
+  WARN_LEVEL,
+  ERROR_LEVEL,
+  FATAL_LEVEL,
+  NUM_INFO_LOG_LEVELS,
+};
+
+// An interface for writing log messages.
+class Logger {
+ public:
+  enum { DO_NOT_SUPPORT_GET_LOG_FILE_SIZE = -1 };
+  explicit Logger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL)
+      : log_level_(log_level) {}
+  virtual ~Logger();
+
+  // Write an entry to the log file with the specified format.
+  virtual void Logv(const char* format, va_list ap) = 0;
+
+  // Write an entry to the log file with the specified log level
+  // and format.  Any log with level under the internal log level
+  // of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be
+  // printed.
+  void Logv(const InfoLogLevel log_level, const char* format, va_list ap) {
+    static const char* kInfoLogLevelNames[5] = {"DEBUG", "INFO", "WARN",
+                                                "ERROR", "FATAL"};
+    if (log_level < log_level_) {
+      return;
+    }
+
+    if (log_level == InfoLogLevel::INFO_LEVEL) {
+      // Doesn't print log level if it is INFO level.
+      // This is to avoid unexpected performance regression after we add
+      // the feature of log level. All the logs before we add the feature
+      // are INFO level. We don't want to add extra costs to those existing
+      // logging.
+      Logv(format, ap);
+    } else {
+      char new_format[500];
+      snprintf(new_format, sizeof(new_format) - 1, "[%s] %s",
+               kInfoLogLevelNames[log_level], format);
+      Logv(new_format, ap);
+    }
+  }
+  virtual size_t GetLogFileSize() const {
+    return DO_NOT_SUPPORT_GET_LOG_FILE_SIZE;
+  }
+  // Flush to the OS buffers
+  virtual void Flush() {}
+  virtual InfoLogLevel GetInfoLogLevel() const { return log_level_; }
+  virtual void SetInfoLogLevel(const InfoLogLevel log_level) {
+    log_level_ = log_level;
+  }
+
+ private:
+  // No copying allowed
+  Logger(const Logger&);
+  void operator=(const Logger&);
+  InfoLogLevel log_level_;
+};
+
+
+// Identifies a locked file.
+class FileLock {
+ public:
+  FileLock() { }
+  virtual ~FileLock();
+ private:
+  // No copying allowed
+  FileLock(const FileLock&);
+  void operator=(const FileLock&);
+};
+
+extern void LogFlush(const shared_ptr<Logger>& info_log);
+
+extern void Log(const InfoLogLevel log_level,
+                const shared_ptr<Logger>& info_log, const char* format, ...);
+
+// a set of log functions with different log levels.
+extern void Debug(const shared_ptr<Logger>& info_log, const char* format, ...);
+extern void Info(const shared_ptr<Logger>& info_log, const char* format, ...);
+extern void Warn(const shared_ptr<Logger>& info_log, const char* format, ...);
+extern void Error(const shared_ptr<Logger>& info_log, const char* format, ...);
+extern void Fatal(const shared_ptr<Logger>& info_log, const char* format, ...);
+
+// Log the specified data to *info_log if info_log is non-nullptr.
+// The default info log level is InfoLogLevel::ERROR.
+extern void Log(const shared_ptr<Logger>& info_log, const char* format, ...)
+#   if defined(__GNUC__) || defined(__clang__)
+    __attribute__((__format__ (__printf__, 2, 3)))
+#   endif
+    ;
+
+extern void LogFlush(Logger *info_log);
+
+extern void Log(const InfoLogLevel log_level, Logger* info_log,
+                const char* format, ...);
+
+// The default info log level is InfoLogLevel::ERROR.
+extern void Log(Logger* info_log, const char* format, ...)
+#   if defined(__GNUC__) || defined(__clang__)
+    __attribute__((__format__ (__printf__, 2, 3)))
+#   endif
+    ;
+
+// a set of log functions with different log levels.
+extern void Debug(Logger* info_log, const char* format, ...);
+extern void Info(Logger* info_log, const char* format, ...);
+extern void Warn(Logger* info_log, const char* format, ...);
+extern void Error(Logger* info_log, const char* format, ...);
+extern void Fatal(Logger* info_log, const char* format, ...);
+
+// A utility routine: write "data" to the named file.
+extern Status WriteStringToFile(Env* env, const Slice& data,
+                                const std::string& fname,
+                                bool should_sync = false);
+
+// A utility routine: read contents of named file into *data
+extern Status ReadFileToString(Env* env, const std::string& fname,
+                               std::string* data);
+
+// An implementation of Env that forwards all calls to another Env.
+// May be useful to clients who wish to override just part of the
+// functionality of another Env.
+class EnvWrapper : public Env {
+ public:
+  // Initialize an EnvWrapper that delegates all calls to *t
+  explicit EnvWrapper(Env* t) : target_(t) { }
+  virtual ~EnvWrapper();
+
+  // Return the target to which this Env forwards all calls
+  Env* target() const { return target_; }
+
+  // The following text is boilerplate that forwards all methods to target()
+  Status NewSequentialFile(const std::string& f,
+                           unique_ptr<SequentialFile>* r,
+                           const EnvOptions& options) {
+    return target_->NewSequentialFile(f, r, options);
+  }
+  Status NewRandomAccessFile(const std::string& f,
+                             unique_ptr<RandomAccessFile>* r,
+                             const EnvOptions& options) {
+    return target_->NewRandomAccessFile(f, r, options);
+  }
+  Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
+                         const EnvOptions& options) {
+    return target_->NewWritableFile(f, r, options);
+  }
+  Status NewRandomRWFile(const std::string& f, unique_ptr<RandomRWFile>* r,
+                         const EnvOptions& options) {
+    return target_->NewRandomRWFile(f, r, options);
+  }
+  virtual Status NewDirectory(const std::string& name,
+                              unique_ptr<Directory>* result) {
+    return target_->NewDirectory(name, result);
+  }
+  bool FileExists(const std::string& f) { return target_->FileExists(f); }
+  Status GetChildren(const std::string& dir, std::vector<std::string>* r) {
+    return target_->GetChildren(dir, r);
+  }
+  Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); }
+  Status CreateDir(const std::string& d) { return target_->CreateDir(d); }
+  Status CreateDirIfMissing(const std::string& d) {
+    return target_->CreateDirIfMissing(d);
+  }
+  Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); }
+  Status GetFileSize(const std::string& f, uint64_t* s) {
+    return target_->GetFileSize(f, s);
+  }
+
+  Status GetFileModificationTime(const std::string& fname,
+                                 uint64_t* file_mtime) {
+    return target_->GetFileModificationTime(fname, file_mtime);
+  }
+
+  Status RenameFile(const std::string& s, const std::string& t) {
+    return target_->RenameFile(s, t);
+  }
+  Status LockFile(const std::string& f, FileLock** l) {
+    return target_->LockFile(f, l);
+  }
+  Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); }
+  void Schedule(void (*f)(void*), void* a, Priority pri) {
+    return target_->Schedule(f, a, pri);
+  }
+  void StartThread(void (*f)(void*), void* a) {
+    return target_->StartThread(f, a);
+  }
+  void WaitForJoin() { return target_->WaitForJoin(); }
+  virtual unsigned int GetThreadPoolQueueLen(Priority pri = LOW) const {
+    return target_->GetThreadPoolQueueLen(pri);
+  }
+  virtual Status GetTestDirectory(std::string* path) {
+    return target_->GetTestDirectory(path);
+  }
+  virtual Status NewLogger(const std::string& fname,
+                           shared_ptr<Logger>* result) {
+    return target_->NewLogger(fname, result);
+  }
+  uint64_t NowMicros() {
+    return target_->NowMicros();
+  }
+  void SleepForMicroseconds(int micros) {
+    target_->SleepForMicroseconds(micros);
+  }
+  Status GetHostName(char* name, uint64_t len) {
+    return target_->GetHostName(name, len);
+  }
+  Status GetCurrentTime(int64_t* unix_time) {
+    return target_->GetCurrentTime(unix_time);
+  }
+  Status GetAbsolutePath(const std::string& db_path,
+      std::string* output_path) {
+    return target_->GetAbsolutePath(db_path, output_path);
+  }
+  void SetBackgroundThreads(int num, Priority pri) {
+    return target_->SetBackgroundThreads(num, pri);
+  }
+  std::string TimeToString(uint64_t time) {
+    return target_->TimeToString(time);
+  }
+
+ private:
+  Env* target_;
+};
+
+// Returns a new environment that stores its data in memory and delegates
+// all non-file-storage tasks to base_env. The caller must delete the result
+// when it is no longer needed.
+// *base_env must remain live while the result is in use.
+Env* NewMemEnv(Env* base_env);
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_ENV_H_
--- a/include/rocksdb/filter_policy.h
+++ b/include/rocksdb/filter_policy.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A database can be configured with a custom FilterPolicy object.
+// This object is responsible for creating a small filter from a set
+// of keys.  These filters are stored in rocksdb and are consulted
+// automatically by rocksdb to decide whether or not to read some
+// information from disk. In many cases, a filter can cut down the
+// number of disk seeks form a handful to a single disk seek per
+// DB::Get() call.
+//
+// Most people will want to use the builtin bloom filter support (see
+// NewBloomFilterPolicy() below).
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
+#define STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
+
+#include <string>
+
+namespace rocksdb {
+
+class Slice;
+
+class FilterPolicy {
+ public:
+  virtual ~FilterPolicy();
+
+  // Return the name of this policy.  Note that if the filter encoding
+  // changes in an incompatible way, the name returned by this method
+  // must be changed.  Otherwise, old incompatible filters may be
+  // passed to methods of this type.
+  virtual const char* Name() const = 0;
+
+  // keys[0,n-1] contains a list of keys (potentially with duplicates)
+  // that are ordered according to the user supplied comparator.
+  // Append a filter that summarizes keys[0,n-1] to *dst.
+  //
+  // Warning: do not change the initial contents of *dst.  Instead,
+  // append the newly constructed filter to *dst.
+  virtual void CreateFilter(const Slice* keys, int n, std::string* dst)
+      const = 0;
+
+  // "filter" contains the data appended by a preceding call to
+  // CreateFilter() on this class.  This method must return true if
+  // the key was in the list of keys passed to CreateFilter().
+  // This method may return true or false if the key was not on the
+  // list, but it should aim to return false with a high probability.
+  virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
+};
+
+// Return a new filter policy that uses a bloom filter with approximately
+// the specified number of bits per key.  A good value for bits_per_key
+// is 10, which yields a filter with ~ 1% false positive rate.
+//
+// Callers must delete the result after any database that is using the
+// result has been closed.
+//
+// Note: if you are using a custom comparator that ignores some parts
+// of the keys being compared, you must not use NewBloomFilterPolicy()
+// and must provide your own FilterPolicy that also ignores the
+// corresponding parts of the keys.  For example, if the comparator
+// ignores trailing spaces, it would be incorrect to use a
+// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
+// trailing spaces in keys.
+extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
+
+}
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
--- a/include/rocksdb/flush_block_policy.h
+++ b/include/rocksdb/flush_block_policy.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+
+#pragma once
+
+#include <string>
+
+namespace rocksdb {
+
+class Slice;
+class BlockBuilder;
+struct Options;
+
+// FlushBlockPolicy provides a configurable way to determine when to flush a
+// block in the block based tables,
+class FlushBlockPolicy {
+ public:
+  // Keep track of the key/value sequences and return the boolean value to
+  // determine if table builder should flush current data block.
+  virtual bool Update(const Slice& key,
+                      const Slice& value) = 0;
+
+  virtual ~FlushBlockPolicy() { }
+};
+
+class FlushBlockPolicyFactory {
+ public:
+  // Return the name of the flush block policy.
+  virtual const char* Name() const = 0;
+
+  // Return a new block flush policy that flushes data blocks by data size.
+  // FlushBlockPolicy may need to access the metadata of the data block
+  // builder to determine when to flush the blocks.
+  //
+  // Callers must delete the result after any database that is using the
+  // result has been closed.
+  virtual FlushBlockPolicy* NewFlushBlockPolicy(
+      const Options& options, const BlockBuilder& data_block_builder) const = 0;
+
+  virtual ~FlushBlockPolicyFactory() { }
+};
+
+class FlushBlockBySizePolicyFactory : public FlushBlockPolicyFactory {
+ public:
+  FlushBlockBySizePolicyFactory() {}
+
+  virtual const char* Name() const override {
+    return "FlushBlockBySizePolicyFactory";
+  }
+
+  virtual FlushBlockPolicy* NewFlushBlockPolicy(
+      const Options& options,
+      const BlockBuilder& data_block_builder) const override;
+};
+
+}  // rocksdb
--- a/include/rocksdb/iostats_context.h
+++ b/include/rocksdb/iostats_context.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2014, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+
+#ifndef INCLUDE_ROCKSDB_IOSTATS_CONTEXT_H_
+#define INCLUDE_ROCKSDB_IOSTATS_CONTEXT_H_
+
+#include <stdint.h>
+#include <string>
+
+// A thread local context for gathering io-stats efficiently and transparently.
+namespace rocksdb {
+
+struct IOStatsContext {
+  // reset all io-stats counter to zero
+  void Reset();
+
+  std::string ToString() const;
+
+  // the thread pool id
+  uint64_t thread_pool_id;
+
+  // number of bytes that has been written.
+  uint64_t bytes_written;
+  // number of bytes that has been read.
+  uint64_t bytes_read;
+};
+
+extern __thread IOStatsContext iostats_context;
+
+}  // namespace rocksdb
+
+#endif  // INCLUDE_ROCKSDB_IOSTATS_CONTEXT_H_
--- a/include/rocksdb/iterator.h
+++ b/include/rocksdb/iterator.h
@@ -0,0 +1,106 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// An iterator yields a sequence of key/value pairs from a source.
+// The following class defines the interface.  Multiple implementations
+// are provided by this library.  In particular, iterators are provided
+// to access the contents of a Table or a DB.
+//
+// Multiple threads can invoke const methods on an Iterator without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Iterator must use
+// external synchronization.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_ITERATOR_H_
+#define STORAGE_ROCKSDB_INCLUDE_ITERATOR_H_
+
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace rocksdb {
+
+class Iterator {
+ public:
+  Iterator();
+  virtual ~Iterator();
+
+  // An iterator is either positioned at a key/value pair, or
+  // not valid.  This method returns true iff the iterator is valid.
+  virtual bool Valid() const = 0;
+
+  // Position at the first key in the source.  The iterator is Valid()
+  // after this call iff the source is not empty.
+  virtual void SeekToFirst() = 0;
+
+  // Position at the last key in the source.  The iterator is
+  // Valid() after this call iff the source is not empty.
+  virtual void SeekToLast() = 0;
+
+  // Position at the first key in the source that at or past target
+  // The iterator is Valid() after this call iff the source contains
+  // an entry that comes at or past target.
+  virtual void Seek(const Slice& target) = 0;
+
+  // Moves to the next entry in the source.  After this call, Valid() is
+  // true iff the iterator was not positioned at the last entry in the source.
+  // REQUIRES: Valid()
+  virtual void Next() = 0;
+
+  // Moves to the previous entry in the source.  After this call, Valid() is
+  // true iff the iterator was not positioned at the first entry in source.
+  // REQUIRES: Valid()
+  virtual void Prev() = 0;
+
+  // Return the key for the current entry.  The underlying storage for
+  // the returned slice is valid only until the next modification of
+  // the iterator.
+  // REQUIRES: Valid()
+  virtual Slice key() const = 0;
+
+  // Return the value for the current entry.  The underlying storage for
+  // the returned slice is valid only until the next modification of
+  // the iterator.
+  // REQUIRES: !AtEnd() && !AtStart()
+  virtual Slice value() const = 0;
+
+  // If an error has occurred, return it.  Else return an ok status.
+  // If non-blocking IO is requested and this operation cannot be
+  // satisfied without doing some IO, then this returns Status::Incomplete().
+  virtual Status status() const = 0;
+
+  // Clients are allowed to register function/arg1/arg2 triples that
+  // will be invoked when this iterator is destroyed.
+  //
+  // Note that unlike all of the preceding methods, this method is
+  // not abstract and therefore clients should not override it.
+  typedef void (*CleanupFunction)(void* arg1, void* arg2);
+  void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
+
+ private:
+  struct Cleanup {
+    CleanupFunction function;
+    void* arg1;
+    void* arg2;
+    Cleanup* next;
+  };
+  Cleanup cleanup_;
+
+  // No copying allowed
+  Iterator(const Iterator&);
+  void operator=(const Iterator&);
+};
+
+// Return an empty iterator (yields nothing).
+extern Iterator* NewEmptyIterator();
+
+// Return an empty iterator with the specified status.
+extern Iterator* NewErrorIterator(const Status& status);
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_ITERATOR_H_
--- a/include/rocksdb/ldb_tool.h
+++ b/include/rocksdb/ldb_tool.h
@@ -0,0 +1,37 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+#ifndef ROCKSDB_LITE
+#pragma once
+#include <string>
+#include "rocksdb/options.h"
+
+namespace rocksdb {
+
+// An interface for converting a slice to a readable string
+class SliceFormatter {
+ public:
+  virtual ~SliceFormatter() {}
+  virtual std::string Format(const Slice& s) const = 0;
+};
+
+// Options for customizing ldb tool (beyond the DB Options)
+struct LDBOptions {
+  // Create LDBOptions with default values for all fields
+  LDBOptions();
+
+  // Key formatter that converts a slice to a readable string.
+  // Default: Slice::ToString()
+  std::shared_ptr<SliceFormatter> key_formatter;
+};
+
+class LDBTool {
+ public:
+  void Run(int argc, char** argv, Options db_options= Options(),
+           const LDBOptions& ldb_options = LDBOptions());
+};
+
+} // namespace rocksdb
+
+#endif  // ROCKSDB_LITE
--- a/include/rocksdb/memtablerep.h
+++ b/include/rocksdb/memtablerep.h
@@ -0,0 +1,287 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+//
+// This file contains the interface that must be implemented by any collection
+// to be used as the backing store for a MemTable. Such a collection must
+// satisfy the following properties:
+//  (1) It does not store duplicate items.
+//  (2) It uses MemTableRep::KeyComparator to compare items for iteration and
+//     equality.
+//  (3) It can be accessed concurrently by multiple readers and can support
+//     during reads. However, it needn't support multiple concurrent writes.
+//  (4) Items are never deleted.
+// The liberal use of assertions is encouraged to enforce (1).
+//
+// The factory will be passed an Arena object when a new MemTableRep is
+// requested. The API for this object is in rocksdb/arena.h.
+//
+// Users can implement their own memtable representations. We include three
+// types built in:
+//  - SkipListRep: This is the default; it is backed by a skip list.
+//  - HashSkipListRep: The memtable rep that is best used for keys that are
+//  structured like "prefix:suffix" where iteration within a prefix is
+//  common and iteration across different prefixes is rare. It is backed by
+//  a hash map where each bucket is a skip list.
+//  - VectorRep: This is backed by an unordered std::vector. On iteration, the
+// vector is sorted. It is intelligent about sorting; once the MarkReadOnly()
+// has been called, the vector will only be sorted once. It is optimized for
+// random-write-heavy workloads.
+//
+// The last four implementations are designed for situations in which
+// iteration over the entire collection is rare since doing so requires all the
+// keys to be copied into a sorted data structure.
+
+#pragma once
+
+#include <memory>
+#include <stdint.h>
+
+namespace rocksdb {
+
+class Arena;
+class LookupKey;
+class Slice;
+class SliceTransform;
+class Logger;
+
+typedef void* KeyHandle;
+
+class MemTableRep {
+ public:
+  // KeyComparator provides a means to compare keys, which are internal keys
+  // concatenated with values.
+  class KeyComparator {
+   public:
+    // Compare a and b. Return a negative value if a is less than b, 0 if they
+    // are equal, and a positive value if a is greater than b
+    virtual int operator()(const char* prefix_len_key1,
+                           const char* prefix_len_key2) const = 0;
+
+    virtual int operator()(const char* prefix_len_key,
+                           const Slice& key) const = 0;
+
+    virtual ~KeyComparator() { }
+  };
+
+  explicit MemTableRep(Arena* arena) : arena_(arena) {}
+
+  // Allocate a buf of len size for storing key. The idea is that a specific
+  // memtable representation knows its underlying data structure better. By
+  // allowing it to allocate memory, it can possibly put correlated stuff
+  // in consecutive memory area to make processor prefetching more efficient.
+  virtual KeyHandle Allocate(const size_t len, char** buf);
+
+  // Insert key into the collection. (The caller will pack key and value into a
+  // single buffer and pass that in as the parameter to Insert).
+  // REQUIRES: nothing that compares equal to key is currently in the
+  // collection.
+  virtual void Insert(KeyHandle handle) = 0;
+
+  // Returns true iff an entry that compares equal to key is in the collection.
+  virtual bool Contains(const char* key) const = 0;
+
+  // Notify this table rep that it will no longer be added to. By default, does
+  // nothing.
+  virtual void MarkReadOnly() { }
+
+  // Look up key from the mem table, since the first key in the mem table whose
+  // user_key matches the one given k, call the function callback_func(), with
+  // callback_args directly forwarded as the first parameter, and the mem table
+  // key as the second parameter. If the return value is false, then terminates.
+  // Otherwise, go through the next key.
+  // It's safe for Get() to terminate after having finished all the potential
+  // key for the k.user_key(), or not.
+  //
+  // Default:
+  // Get() function with a default value of dynamically construct an iterator,
+  // seek and call the call back function.
+  virtual void Get(const LookupKey& k, void* callback_args,
+                   bool (*callback_func)(void* arg, const char* entry));
+
+  // Report an approximation of how much memory has been used other than memory
+  // that was allocated through the arena.
+  virtual size_t ApproximateMemoryUsage() = 0;
+
+  virtual ~MemTableRep() { }
+
+  // Iteration over the contents of a skip collection
+  class Iterator {
+   public:
+    // Initialize an iterator over the specified collection.
+    // The returned iterator is not valid.
+    // explicit Iterator(const MemTableRep* collection);
+    virtual ~Iterator() {}
+
+    // Returns true iff the iterator is positioned at a valid node.
+    virtual bool Valid() const = 0;
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    virtual const char* key() const = 0;
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    virtual void Next() = 0;
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    virtual void Prev() = 0;
+
+    // Advance to the first entry with a key >= target
+    virtual void Seek(const Slice& internal_key, const char* memtable_key) = 0;
+
+    // Position at the first entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    virtual void SeekToFirst() = 0;
+
+    // Position at the last entry in collection.
+    // Final state of iterator is Valid() iff collection is not empty.
+    virtual void SeekToLast() = 0;
+  };
+
+  // Return an iterator over the keys in this representation.
+  // arena: If not null, the arena needs to be used to allocate the Iterator.
+  //        When destroying the iterator, the caller will not call "delete"
+  //        but Iterator::~Iterator() directly. The destructor needs to destroy
+  //        all the states but those allocated in arena.
+  virtual Iterator* GetIterator(Arena* arena = nullptr) = 0;
+
+  // Return an iterator that has a special Seek semantics. The result of
+  // a Seek might only include keys with the same prefix as the target key.
+  // arena: If not null, the arena needs to be used to allocate the Iterator.
+  //        When destroying the iterator, the caller will not call "delete"
+  //        but Iterator::~Iterator() directly. The destructor needs to destroy
+  //        all the states but those allocated in arena.
+  virtual Iterator* GetDynamicPrefixIterator(Arena* arena = nullptr) {
+    return GetIterator(arena);
+  }
+
+  // Return true if the current MemTableRep supports merge operator.
+  // Default: true
+  virtual bool IsMergeOperatorSupported() const { return true; }
+
+  // Return true if the current MemTableRep supports snapshot
+  // Default: true
+  virtual bool IsSnapshotSupported() const { return true; }
+
+ protected:
+  // When *key is an internal key concatenated with the value, returns the
+  // user key.
+  virtual Slice UserKey(const char* key) const;
+
+  Arena* arena_;
+};
+
+// This is the base class for all factories that are used by RocksDB to create
+// new MemTableRep objects
+class MemTableRepFactory {
+ public:
+  virtual ~MemTableRepFactory() {}
+  virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
+                                         Arena*, const SliceTransform*,
+                                         Logger* logger) = 0;
+  virtual const char* Name() const = 0;
+};
+
+// This uses a skip list to store keys. It is the default.
+class SkipListFactory : public MemTableRepFactory {
+ public:
+  virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
+                                         Arena*, const SliceTransform*,
+                                         Logger* logger) override;
+  virtual const char* Name() const override { return "SkipListFactory"; }
+};
+
+#ifndef ROCKSDB_LITE
+// This creates MemTableReps that are backed by an std::vector. On iteration,
+// the vector is sorted. This is useful for workloads where iteration is very
+// rare and writes are generally not issued after reads begin.
+//
+// Parameters:
+//   count: Passed to the constructor of the underlying std::vector of each
+//     VectorRep. On initialization, the underlying array will be at least count
+//     bytes reserved for usage.
+class VectorRepFactory : public MemTableRepFactory {
+  const size_t count_;
+
+ public:
+  explicit VectorRepFactory(size_t count = 0) : count_(count) { }
+  virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
+                                         Arena*, const SliceTransform*,
+                                         Logger* logger) override;
+  virtual const char* Name() const override {
+    return "VectorRepFactory";
+  }
+};
+
+// This class contains a fixed array of buckets, each
+// pointing to a skiplist (null if the bucket is empty).
+// bucket_count: number of fixed array buckets
+// skiplist_height: the max height of the skiplist
+// skiplist_branching_factor: probabilistic size ratio between adjacent
+//                            link lists in the skiplist
+extern MemTableRepFactory* NewHashSkipListRepFactory(
+    size_t bucket_count = 1000000, int32_t skiplist_height = 4,
+    int32_t skiplist_branching_factor = 4
+);
+
+// The factory is to create memtables based on a hash table:
+// it contains a fixed array of buckets, each pointing to either a linked list
+// or a skip list if number of entries inside the bucket exceeds
+// threshold_use_skiplist.
+// @bucket_count: number of fixed array buckets
+// @huge_page_tlb_size: if <=0, allocate the hash table bytes from malloc.
+//                      Otherwise from huge page TLB. The user needs to reserve
+//                      huge pages for it to be allocated, like:
+//                          sysctl -w vm.nr_hugepages=20
+//                      See linux doc Documentation/vm/hugetlbpage.txt
+// @bucket_entries_logging_threshold: if number of entries in one bucket
+//                                    exceeds this number, log about it.
+// @if_log_bucket_dist_when_flash: if true, log distribution of number of
+//                                 entries when flushing.
+// @threshold_use_skiplist: a bucket switches to skip list if number of
+//                          entries exceed this parameter.
+extern MemTableRepFactory* NewHashLinkListRepFactory(
+    size_t bucket_count = 50000, size_t huge_page_tlb_size = 0,
+    int bucket_entries_logging_threshold = 4096,
+    bool if_log_bucket_dist_when_flash = true,
+    uint32_t threshold_use_skiplist = 256);
+
+// This factory creates a cuckoo-hashing based mem-table representation.
+// Cuckoo-hash is a closed-hash strategy, in which all key/value pairs
+// are stored in the bucket array itself intead of in some data structures
+// external to the bucket array.  In addition, each key in cuckoo hash
+// has a constant number of possible buckets in the bucket array.  These
+// two properties together makes cuckoo hash more memory efficient and
+// a constant worst-case read time.  Cuckoo hash is best suitable for
+// point-lookup workload.
+//
+// When inserting a key / value, it first checks whether one of its possible
+// buckets is empty.  If so, the key / value will be inserted to that vacant
+// bucket.  Otherwise, one of the keys originally stored in one of these
+// possible buckets will be "kicked out" and move to one of its possible
+// buckets (and possibly kicks out another victim.)  In the current
+// implementation, such "kick-out" path is bounded.  If it cannot find a
+// "kick-out" path for a specific key, this key will be stored in a backup
+// structure, and the current memtable to be forced to immutable.
+//
+// Note that currently this mem-table representation does not support
+// snapshot (i.e., it only queries latest state) and iterators.  In addition,
+// MultiGet operation might also lose its atomicity due to the lack of
+// snapshot support.
+//
+// Parameters:
+//   write_buffer_size: the write buffer size in bytes.
+//   average_data_size: the average size of key + value in bytes.  This value
+//     together with write_buffer_size will be used to compute the number
+//     of buckets.
+//   hash_function_count: the number of hash functions that will be used by
+//     the cuckoo-hash.  The number also equals to the number of possible
+//     buckets each key will have.
+extern MemTableRepFactory* NewHashCuckooRepFactory(
+    size_t write_buffer_size, size_t average_data_size = 64,
+    unsigned int hash_function_count = 4);
+#endif  // ROCKSDB_LITE
+}  // namespace rocksdb
--- a/include/rocksdb/merge_operator.h
+++ b/include/rocksdb/merge_operator.h
@@ -0,0 +1,182 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_MERGE_OPERATOR_H_
+#define STORAGE_ROCKSDB_INCLUDE_MERGE_OPERATOR_H_
+
+#include <memory>
+#include <string>
+#include <deque>
+#include "rocksdb/slice.h"
+
+namespace rocksdb {
+
+class Slice;
+class Logger;
+
+// The Merge Operator
+//
+// Essentially, a MergeOperator specifies the SEMANTICS of a merge, which only
+// client knows. It could be numeric addition, list append, string
+// concatenation, edit data structure, ... , anything.
+// The library, on the other hand, is concerned with the exercise of this
+// interface, at the right time (during get, iteration, compaction...)
+//
+// To use merge, the client needs to provide an object implementing one of
+// the following interfaces:
+//  a) AssociativeMergeOperator - for most simple semantics (always take
+//    two values, and merge them into one value, which is then put back
+//    into rocksdb); numeric addition and string concatenation are examples;
+//
+//  b) MergeOperator - the generic class for all the more abstract / complex
+//    operations; one method (FullMerge) to merge a Put/Delete value with a
+//    merge operand; and another method (PartialMerge) that merges multiple
+//    operands together. this is especially useful if your key values have
+//    complex structures but you would still like to support client-specific
+//    incremental updates.
+//
+// AssociativeMergeOperator is simpler to implement. MergeOperator is simply
+// more powerful.
+//
+// Refer to rocksdb-merge wiki for more details and example implementations.
+//
+class MergeOperator {
+ public:
+  virtual ~MergeOperator() {}
+
+  // Gives the client a way to express the read -> modify -> write semantics
+  // key:      (IN)    The key that's associated with this merge operation.
+  //                   Client could multiplex the merge operator based on it
+  //                   if the key space is partitioned and different subspaces
+  //                   refer to different types of data which have different
+  //                   merge operation semantics
+  // existing: (IN)    null indicates that the key does not exist before this op
+  // operand_list:(IN) the sequence of merge operations to apply, front() first.
+  // new_value:(OUT)   Client is responsible for filling the merge result here
+  // logger:   (IN)    Client could use this to log errors during merge.
+  //
+  // Return true on success.
+  // All values passed in will be client-specific values. So if this method
+  // returns false, it is because client specified bad data or there was
+  // internal corruption. This will be treated as an error by the library.
+  //
+  // Also make use of the *logger for error messages.
+  virtual bool FullMerge(const Slice& key,
+                         const Slice* existing_value,
+                         const std::deque<std::string>& operand_list,
+                         std::string* new_value,
+                         Logger* logger) const = 0;
+
+  // This function performs merge(left_op, right_op)
+  // when both the operands are themselves merge operation types
+  // that you would have passed to a DB::Merge() call in the same order
+  // (i.e.: DB::Merge(key,left_op), followed by DB::Merge(key,right_op)).
+  //
+  // PartialMerge should combine them into a single merge operation that is
+  // saved into *new_value, and then it should return true.
+  // *new_value should be constructed such that a call to
+  // DB::Merge(key, *new_value) would yield the same result as a call
+  // to DB::Merge(key, left_op) followed by DB::Merge(key, right_op).
+  //
+  // The default implementation of PartialMergeMulti will use this function
+  // as a helper, for backward compatibility.  Any successor class of
+  // MergeOperator should either implement PartialMerge or PartialMergeMulti,
+  // although implementing PartialMergeMulti is suggested as it is in general
+  // more effective to merge multiple operands at a time instead of two
+  // operands at a time.
+  //
+  // If it is impossible or infeasible to combine the two operations,
+  // leave new_value unchanged and return false. The library will
+  // internally keep track of the operations, and apply them in the
+  // correct order once a base-value (a Put/Delete/End-of-Database) is seen.
+  //
+  // TODO: Presently there is no way to differentiate between error/corruption
+  // and simply "return false". For now, the client should simply return
+  // false in any case it cannot perform partial-merge, regardless of reason.
+  // If there is corruption in the data, handle it in the FullMerge() function,
+  // and return false there.  The default implementation of PartialMerge will
+  // always return false.
+  virtual bool PartialMerge(const Slice& key, const Slice& left_operand,
+                            const Slice& right_operand, std::string* new_value,
+                            Logger* logger) const {
+    return false;
+  }
+
+  // This function performs merge when all the operands are themselves merge
+  // operation types that you would have passed to a DB::Merge() call in the
+  // same order (front() first)
+  // (i.e. DB::Merge(key, operand_list[0]), followed by
+  //  DB::Merge(key, operand_list[1]), ...)
+  //
+  // PartialMergeMulti should combine them into a single merge operation that is
+  // saved into *new_value, and then it should return true.  *new_value should
+  // be constructed such that a call to DB::Merge(key, *new_value) would yield
+  // the same result as subquential individual calls to DB::Merge(key, operand)
+  // for each operand in operand_list from front() to back().
+  //
+  // The PartialMergeMulti function will be called only when the list of
+  // operands are long enough. The minimum amount of operands that will be
+  // passed to the function are specified by the "min_partial_merge_operands"
+  // option.
+  //
+  // In the default implementation, PartialMergeMulti will invoke PartialMerge
+  // multiple times, where each time it only merges two operands.  Developers
+  // should either implement PartialMergeMulti, or implement PartialMerge which
+  // is served as the helper function of the default PartialMergeMulti.
+  virtual bool PartialMergeMulti(const Slice& key,
+                                 const std::deque<Slice>& operand_list,
+                                 std::string* new_value, Logger* logger) const;
+
+  // The name of the MergeOperator. Used to check for MergeOperator
+  // mismatches (i.e., a DB created with one MergeOperator is
+  // accessed using a different MergeOperator)
+  // TODO: the name is currently not stored persistently and thus
+  //       no checking is enforced. Client is responsible for providing
+  //       consistent MergeOperator between DB opens.
+  virtual const char* Name() const = 0;
+};
+
+// The simpler, associative merge operator.
+class AssociativeMergeOperator : public MergeOperator {
+ public:
+  virtual ~AssociativeMergeOperator() {}
+
+  // Gives the client a way to express the read -> modify -> write semantics
+  // key:           (IN) The key that's associated with this merge operation.
+  // existing_value:(IN) null indicates the key does not exist before this op
+  // value:         (IN) the value to update/merge the existing_value with
+  // new_value:    (OUT) Client is responsible for filling the merge result here
+  // logger:        (IN) Client could use this to log errors during merge.
+  //
+  // Return true on success.
+  // All values passed in will be client-specific values. So if this method
+  // returns false, it is because client specified bad data or there was
+  // internal corruption. The client should assume that this will be treated
+  // as an error by the library.
+  virtual bool Merge(const Slice& key,
+                     const Slice* existing_value,
+                     const Slice& value,
+                     std::string* new_value,
+                     Logger* logger) const = 0;
+
+
+ private:
+  // Default implementations of the MergeOperator functions
+  virtual bool FullMerge(const Slice& key,
+                         const Slice* existing_value,
+                         const std::deque<std::string>& operand_list,
+                         std::string* new_value,
+                         Logger* logger) const override;
+
+  virtual bool PartialMerge(const Slice& key,
+                            const Slice& left_operand,
+                            const Slice& right_operand,
+                            std::string* new_value,
+                            Logger* logger) const override;
+};
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_MERGE_OPERATOR_H_
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
--- a/include/rocksdb/perf_context.h
+++ b/include/rocksdb/perf_context.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_PERF_CONTEXT_H
+#define STORAGE_ROCKSDB_INCLUDE_PERF_CONTEXT_H
+
+#include <stdint.h>
+#include <string>
+
+namespace rocksdb {
+
+enum PerfLevel {
+  kDisable        = 0,  // disable perf stats
+  kEnableCount    = 1,  // enable only count stats
+  kEnableTime     = 2   // enable time stats too
+};
+
+// set the perf stats level
+void SetPerfLevel(PerfLevel level);
+
+// get current perf stats level
+PerfLevel GetPerfLevel();
+
+// A thread local context for gathering performance counter efficiently
+// and transparently.
+
+struct PerfContext {
+
+  void Reset(); // reset all performance counters to zero
+
+  std::string ToString() const;
+
+  uint64_t user_key_comparison_count; // total number of user key comparisons
+  uint64_t block_cache_hit_count;     // total number of block cache hits
+  uint64_t block_read_count;          // total number of block reads (with IO)
+  uint64_t block_read_byte;           // total number of bytes from block reads
+  uint64_t block_read_time;           // total time spent on block reads
+  uint64_t block_checksum_time;       // total time spent on block checksum
+  uint64_t block_decompress_time;     // total time spent on block decompression
+  // total number of internal keys skipped over during iteration (overwritten or
+  // deleted, to be more specific, hidden by a put or delete of the same key)
+  uint64_t internal_key_skipped_count;
+  // total number of deletes skipped over during iteration
+  uint64_t internal_delete_skipped_count;
+
+  uint64_t get_snapshot_time;          // total time spent on getting snapshot
+  uint64_t get_from_memtable_time;     // total time spent on querying memtables
+  uint64_t get_from_memtable_count;    // number of mem tables queried
+  // total time spent after Get() finds a key
+  uint64_t get_post_process_time;
+  uint64_t get_from_output_files_time; // total time reading from output files
+  // total time spent on seeking child iters
+  uint64_t seek_child_seek_time;
+  // number of seek issued in child iterators
+  uint64_t seek_child_seek_count;
+  uint64_t seek_min_heap_time;         // total time spent on the merge heap
+  // total time spent on seeking the internal entries
+  uint64_t seek_internal_seek_time;
+  // total time spent on iterating internal entries to find the next user entry
+  uint64_t find_next_user_entry_time;
+  // total time spent on pre or post processing when writing a record
+  uint64_t write_pre_and_post_process_time;
+  uint64_t write_wal_time;            // total time spent on writing to WAL
+  // total time spent on writing to mem tables
+  uint64_t write_memtable_time;
+};
+
+#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
+extern PerfContext perf_context;
+#else
+extern __thread PerfContext perf_context;
+#endif
+
+}
+
+#endif
--- a/include/rocksdb/rate_limiter.h
+++ b/include/rocksdb/rate_limiter.h
@@ -0,0 +1,60 @@
+//  Copyright (c) 2014, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "rocksdb/env.h"
+
+namespace rocksdb {
+
+class RateLimiter {
+ public:
+  virtual ~RateLimiter() {}
+
+  // Request for token to write bytes. If this request can not be satisfied,
+  // the call is blocked. Caller is responsible to make sure
+  // bytes < GetSingleBurstBytes()
+  virtual void Request(const int64_t bytes, const Env::IOPriority pri) = 0;
+
+  // Max bytes can be granted in a single burst
+  virtual int64_t GetSingleBurstBytes() const = 0;
+
+  // Total bytes that go though rate limiter
+  virtual int64_t GetTotalBytesThrough(
+      const Env::IOPriority pri = Env::IO_TOTAL) const = 0;
+
+  // Total # of requests that go though rate limiter
+  virtual int64_t GetTotalRequests(
+      const Env::IOPriority pri = Env::IO_TOTAL) const = 0;
+};
+
+// Create a RateLimiter object, which can be shared among RocksDB instances to
+// control write rate of flush and compaction.
+// @rate_bytes_per_sec: this is the only parameter you want to set most of the
+// time. It controls the total write rate of compaction and flush in bytes per
+// second. Currently, RocksDB does not enforce rate limit for anything other
+// than flush and compaction, e.g. write to WAL.
+// @refill_period_us: this controls how often tokens are refilled. For example,
+// when rate_bytes_per_sec is set to 10MB/s and refill_period_us is set to
+// 100ms, then 1MB is refilled every 100ms internally. Larger value can lead to
+// burstier writes while smaller value introduces more CPU overhead.
+// The default should work for most cases.
+// @fairness: RateLimiter accepts high-pri requests and low-pri requests.
+// A low-pri request is usually blocked in favor of hi-pri request. Currently,
+// RocksDB assigns low-pri to request from compaciton and high-pri to request
+// from flush. Low-pri requests can get blocked if flush requests come in
+// continuouly. This fairness parameter grants low-pri requests permission by
+// 1/fairness chance even though high-pri requests exist to avoid starvation.
+// You should be good by leaving it at default 10.
+extern RateLimiter* NewGenericRateLimiter(
+    int64_t rate_bytes_per_sec,
+    int64_t refill_period_us = 100 * 1000,
+    int32_t fairness = 10);
+
+}  // namespace rocksdb
--- a/include/rocksdb/slice.h
+++ b/include/rocksdb/slice.h
@@ -0,0 +1,137 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Slice is a simple structure containing a pointer into some external
+// storage and a size.  The user of a Slice must ensure that the slice
+// is not used after the corresponding external storage has been
+// deallocated.
+//
+// Multiple threads can invoke const methods on a Slice without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Slice must use
+// external synchronization.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_SLICE_H_
+#define STORAGE_ROCKSDB_INCLUDE_SLICE_H_
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include <string>
+
+namespace rocksdb {
+
+class Slice {
+ public:
+  // Create an empty slice.
+  Slice() : data_(""), size_(0) { }
+
+  // Create a slice that refers to d[0,n-1].
+  Slice(const char* d, size_t n) : data_(d), size_(n) { }
+
+  // Create a slice that refers to the contents of "s"
+  /* implicit */
+  Slice(const std::string& s) : data_(s.data()), size_(s.size()) { }
+
+  // Create a slice that refers to s[0,strlen(s)-1]
+  /* implicit */
+  Slice(const char* s) : data_(s), size_(strlen(s)) { }
+
+  // Return a pointer to the beginning of the referenced data
+  const char* data() const { return data_; }
+
+  // Return the length (in bytes) of the referenced data
+  size_t size() const { return size_; }
+
+  // Return true iff the length of the referenced data is zero
+  bool empty() const { return size_ == 0; }
+
+  // Return the ith byte in the referenced data.
+  // REQUIRES: n < size()
+  char operator[](size_t n) const {
+    assert(n < size());
+    return data_[n];
+  }
+
+  // Change this slice to refer to an empty array
+  void clear() { data_ = ""; size_ = 0; }
+
+  // Drop the first "n" bytes from this slice.
+  void remove_prefix(size_t n) {
+    assert(n <= size());
+    data_ += n;
+    size_ -= n;
+  }
+
+  // Return a string that contains the copy of the referenced data.
+  std::string ToString(bool hex = false) const {
+    if (hex) {
+      std::string result;
+      char buf[10];
+      for (size_t i = 0; i < size_; i++) {
+        snprintf(buf, 10, "%02X", (unsigned char)data_[i]);
+        result += buf;
+      }
+      return result;
+    } else {
+      return std::string(data_, size_);
+    }
+  }
+
+  // Three-way comparison.  Returns value:
+  //   <  0 iff "*this" <  "b",
+  //   == 0 iff "*this" == "b",
+  //   >  0 iff "*this" >  "b"
+  int compare(const Slice& b) const;
+
+  // Return true iff "x" is a prefix of "*this"
+  bool starts_with(const Slice& x) const {
+    return ((size_ >= x.size_) &&
+            (memcmp(data_, x.data_, x.size_) == 0));
+  }
+
+ // private: make these public for rocksdbjni access
+  const char* data_;
+  size_t size_;
+
+  // Intentionally copyable
+};
+
+// A set of Slices that are virtually concatenated together.  'parts' points
+// to an array of Slices.  The number of elements in the array is 'num_parts'.
+struct SliceParts {
+  SliceParts(const Slice* _parts, int _num_parts) :
+      parts(_parts), num_parts(_num_parts) { }
+  SliceParts() : parts(nullptr), num_parts(0) {}
+
+  const Slice* parts;
+  int num_parts;
+};
+
+inline bool operator==(const Slice& x, const Slice& y) {
+  return ((x.size() == y.size()) &&
+          (memcmp(x.data(), y.data(), x.size()) == 0));
+}
+
+inline bool operator!=(const Slice& x, const Slice& y) {
+  return !(x == y);
+}
+
+inline int Slice::compare(const Slice& b) const {
+  const int min_len = (size_ < b.size_) ? size_ : b.size_;
+  int r = memcmp(data_, b.data_, min_len);
+  if (r == 0) {
+    if (size_ < b.size_) r = -1;
+    else if (size_ > b.size_) r = +1;
+  }
+  return r;
+}
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_SLICE_H_
--- a/include/rocksdb/slice_transform.h
+++ b/include/rocksdb/slice_transform.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Class for specifying user-defined functions which perform a
+// transformation on a slice.  It is not required that every slice
+// belong to the domain and/or range of a function.  Subclasses should
+// define InDomain and InRange to determine which slices are in either
+// of these sets respectively.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_SLICE_TRANSFORM_H_
+#define STORAGE_ROCKSDB_INCLUDE_SLICE_TRANSFORM_H_
+
+#include <string>
+
+namespace rocksdb {
+
+class Slice;
+
+class SliceTransform {
+ public:
+  virtual ~SliceTransform() {};
+
+  // Return the name of this transformation.
+  virtual const char* Name() const = 0;
+
+  // transform a src in domain to a dst in the range
+  virtual Slice Transform(const Slice& src) const = 0;
+
+  // determine whether this is a valid src upon the function applies
+  virtual bool InDomain(const Slice& src) const = 0;
+
+  // determine whether dst=Transform(src) for some src
+  virtual bool InRange(const Slice& dst) const = 0;
+};
+
+extern const SliceTransform* NewFixedPrefixTransform(size_t prefix_len);
+
+extern const SliceTransform* NewNoopTransform();
+
+}
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_SLICE_TRANSFORM_H_
--- a/include/rocksdb/statistics.h
+++ b/include/rocksdb/statistics.h
@@ -0,0 +1,281 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_
+#define STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace rocksdb {
+
+/**
+ * Keep adding ticker's here.
+ *  1. Any ticker should be added before TICKER_ENUM_MAX.
+ *  2. Add a readable string in TickersNameMap below for the newly added ticker.
+ */
+enum Tickers : uint32_t {
+  // total block cache misses
+  // REQUIRES: BLOCK_CACHE_MISS == BLOCK_CACHE_INDEX_MISS +
+  //                               BLOCK_CACHE_FILTER_MISS +
+  //                               BLOCK_CACHE_DATA_MISS;
+  BLOCK_CACHE_MISS = 0,
+  // total block cache hit
+  // REQUIRES: BLOCK_CACHE_HIT == BLOCK_CACHE_INDEX_HIT +
+  //                              BLOCK_CACHE_FILTER_HIT +
+  //                              BLOCK_CACHE_DATA_HIT;
+  BLOCK_CACHE_HIT,
+  // # of blocks added to block cache.
+  BLOCK_CACHE_ADD,
+  // # of times cache miss when accessing index block from block cache.
+  BLOCK_CACHE_INDEX_MISS,
+  // # of times cache hit when accessing index block from block cache.
+  BLOCK_CACHE_INDEX_HIT,
+  // # of times cache miss when accessing filter block from block cache.
+  BLOCK_CACHE_FILTER_MISS,
+  // # of times cache hit when accessing filter block from block cache.
+  BLOCK_CACHE_FILTER_HIT,
+  // # of times cache miss when accessing data block from block cache.
+  BLOCK_CACHE_DATA_MISS,
+  // # of times cache hit when accessing data block from block cache.
+  BLOCK_CACHE_DATA_HIT,
+  // # of times bloom filter has avoided file reads.
+  BLOOM_FILTER_USEFUL,
+
+  // # of memtable hits.
+  MEMTABLE_HIT,
+  // # of memtable misses.
+  MEMTABLE_MISS,
+
+  /**
+   * COMPACTION_KEY_DROP_* count the reasons for key drop during compaction
+   * There are 3 reasons currently.
+   */
+  COMPACTION_KEY_DROP_NEWER_ENTRY,  // key was written with a newer value.
+  COMPACTION_KEY_DROP_OBSOLETE,     // The key is obsolete.
+  COMPACTION_KEY_DROP_USER,  // user compaction function has dropped the key.
+
+  // Number of keys written to the database via the Put and Write call's
+  NUMBER_KEYS_WRITTEN,
+  // Number of Keys read,
+  NUMBER_KEYS_READ,
+  // Number keys updated, if inplace update is enabled
+  NUMBER_KEYS_UPDATED,
+  // Bytes written / read
+  BYTES_WRITTEN,
+  BYTES_READ,
+  NO_FILE_CLOSES,
+  NO_FILE_OPENS,
+  NO_FILE_ERRORS,
+  // Time system had to wait to do LO-L1 compactions
+  STALL_L0_SLOWDOWN_MICROS,
+  // Time system had to wait to move memtable to L1.
+  STALL_MEMTABLE_COMPACTION_MICROS,
+  // write throttle because of too many files in L0
+  STALL_L0_NUM_FILES_MICROS,
+  RATE_LIMIT_DELAY_MILLIS,
+  NO_ITERATORS,  // number of iterators currently open
+
+  // Number of MultiGet calls, keys read, and bytes read
+  NUMBER_MULTIGET_CALLS,
+  NUMBER_MULTIGET_KEYS_READ,
+  NUMBER_MULTIGET_BYTES_READ,
+
+  // Number of deletes records that were not required to be
+  // written to storage because key does not exist
+  NUMBER_FILTERED_DELETES,
+  NUMBER_MERGE_FAILURES,
+  SEQUENCE_NUMBER,
+
+  // number of times bloom was checked before creating iterator on a
+  // file, and the number of times the check was useful in avoiding
+  // iterator creation (and thus likely IOPs).
+  BLOOM_FILTER_PREFIX_CHECKED,
+  BLOOM_FILTER_PREFIX_USEFUL,
+
+  // Number of times we had to reseek inside an iteration to skip
+  // over large number of keys with same userkey.
+  NUMBER_OF_RESEEKS_IN_ITERATION,
+
+  // Record the number of calls to GetUpadtesSince. Useful to keep track of
+  // transaction log iterator refreshes
+  GET_UPDATES_SINCE_CALLS,
+  BLOCK_CACHE_COMPRESSED_MISS,  // miss in the compressed block cache
+  BLOCK_CACHE_COMPRESSED_HIT,   // hit in the compressed block cache
+  WAL_FILE_SYNCED,              // Number of times WAL sync is done
+  WAL_FILE_BYTES,               // Number of bytes written to WAL
+
+  // Writes can be processed by requesting thread or by the thread at the
+  // head of the writers queue.
+  WRITE_DONE_BY_SELF,
+  WRITE_DONE_BY_OTHER,
+  WRITE_TIMEDOUT,        // Number of writes ending up with timed-out.
+  WRITE_WITH_WAL,       // Number of Write calls that request WAL
+  COMPACT_READ_BYTES,   // Bytes read during compaction
+  COMPACT_WRITE_BYTES,  // Bytes written during compaction
+  FLUSH_WRITE_BYTES,    // Bytes written during flush
+
+  // Number of table's properties loaded directly from file, without creating
+  // table reader object.
+  NUMBER_DIRECT_LOAD_TABLE_PROPERTIES,
+  NUMBER_SUPERVERSION_ACQUIRES,
+  NUMBER_SUPERVERSION_RELEASES,
+  NUMBER_SUPERVERSION_CLEANUPS,
+  NUMBER_BLOCK_NOT_COMPRESSED,
+  TICKER_ENUM_MAX
+};
+
+// The order of items listed in  Tickers should be the same as
+// the order listed in TickersNameMap
+const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
+    {BLOCK_CACHE_MISS, "rocksdb.block.cache.miss"},
+    {BLOCK_CACHE_HIT, "rocksdb.block.cache.hit"},
+    {BLOCK_CACHE_ADD, "rocksdb.block.cache.add"},
+    {BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss"},
+    {BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit"},
+    {BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss"},
+    {BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit"},
+    {BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss"},
+    {BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit"},
+    {BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"},
+    {MEMTABLE_HIT, "rocksdb.memtable.hit"},
+    {MEMTABLE_MISS, "rocksdb.memtable.miss"},
+    {COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new"},
+    {COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete"},
+    {COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user"},
+    {NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"},
+    {NUMBER_KEYS_READ, "rocksdb.number.keys.read"},
+    {NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},
+    {BYTES_WRITTEN, "rocksdb.bytes.written"},
+    {BYTES_READ, "rocksdb.bytes.read"},
+    {NO_FILE_CLOSES, "rocksdb.no.file.closes"},
+    {NO_FILE_OPENS, "rocksdb.no.file.opens"},
+    {NO_FILE_ERRORS, "rocksdb.no.file.errors"},
+    {STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros"},
+    {STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros"},
+    {STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros"},
+    {RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis"},
+    {NO_ITERATORS, "rocksdb.num.iterators"},
+    {NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get"},
+    {NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read"},
+    {NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read"},
+    {NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered"},
+    {NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures"},
+    {SEQUENCE_NUMBER, "rocksdb.sequence.number"},
+    {BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked"},
+    {BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful"},
+    {NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration"},
+    {GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls"},
+    {BLOCK_CACHE_COMPRESSED_MISS, "rocksdb.block.cachecompressed.miss"},
+    {BLOCK_CACHE_COMPRESSED_HIT, "rocksdb.block.cachecompressed.hit"},
+    {WAL_FILE_SYNCED, "rocksdb.wal.synced"},
+    {WAL_FILE_BYTES, "rocksdb.wal.bytes"},
+    {WRITE_DONE_BY_SELF, "rocksdb.write.self"},
+    {WRITE_DONE_BY_OTHER, "rocksdb.write.other"},
+    {WRITE_TIMEDOUT, "rocksdb.write.timedout"},
+    {WRITE_WITH_WAL, "rocksdb.write.wal"},
+    {FLUSH_WRITE_BYTES, "rocksdb.flush.write.bytes"},
+    {COMPACT_READ_BYTES, "rocksdb.compact.read.bytes"},
+    {COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes"},
+    {NUMBER_DIRECT_LOAD_TABLE_PROPERTIES,
+     "rocksdb.number.direct.load.table.properties"},
+    {NUMBER_SUPERVERSION_ACQUIRES, "rocksdb.number.superversion_acquires"},
+    {NUMBER_SUPERVERSION_RELEASES, "rocksdb.number.superversion_releases"},
+    {NUMBER_SUPERVERSION_CLEANUPS, "rocksdb.number.superversion_cleanups"},
+    {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"},
+};
+
+/**
+ * Keep adding histogram's here.
+ * Any histogram whould have value less than HISTOGRAM_ENUM_MAX
+ * Add a new Histogram by assigning it the current value of HISTOGRAM_ENUM_MAX
+ * Add a string representation in HistogramsNameMap below
+ * And increment HISTOGRAM_ENUM_MAX
+ */
+enum Histograms : uint32_t {
+  DB_GET = 0,
+  DB_WRITE,
+  COMPACTION_TIME,
+  TABLE_SYNC_MICROS,
+  COMPACTION_OUTFILE_SYNC_MICROS,
+  WAL_FILE_SYNC_MICROS,
+  MANIFEST_FILE_SYNC_MICROS,
+  // TIME SPENT IN IO DURING TABLE OPEN
+  TABLE_OPEN_IO_MICROS,
+  DB_MULTIGET,
+  READ_BLOCK_COMPACTION_MICROS,
+  READ_BLOCK_GET_MICROS,
+  WRITE_RAW_BLOCK_MICROS,
+
+  STALL_L0_SLOWDOWN_COUNT,
+  STALL_MEMTABLE_COMPACTION_COUNT,
+  STALL_L0_NUM_FILES_COUNT,
+  HARD_RATE_LIMIT_DELAY_COUNT,
+  SOFT_RATE_LIMIT_DELAY_COUNT,
+  NUM_FILES_IN_SINGLE_COMPACTION,
+  HISTOGRAM_ENUM_MAX,
+};
+
+const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
+  { DB_GET, "rocksdb.db.get.micros" },
+  { DB_WRITE, "rocksdb.db.write.micros" },
+  { COMPACTION_TIME, "rocksdb.compaction.times.micros" },
+  { TABLE_SYNC_MICROS, "rocksdb.table.sync.micros" },
+  { COMPACTION_OUTFILE_SYNC_MICROS, "rocksdb.compaction.outfile.sync.micros" },
+  { WAL_FILE_SYNC_MICROS, "rocksdb.wal.file.sync.micros" },
+  { MANIFEST_FILE_SYNC_MICROS, "rocksdb.manifest.file.sync.micros" },
+  { TABLE_OPEN_IO_MICROS, "rocksdb.table.open.io.micros" },
+  { DB_MULTIGET, "rocksdb.db.multiget.micros" },
+  { READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros" },
+  { READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros" },
+  { WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros" },
+  { STALL_L0_SLOWDOWN_COUNT, "rocksdb.l0.slowdown.count"},
+  { STALL_MEMTABLE_COMPACTION_COUNT, "rocksdb.memtable.compaction.count"},
+  { STALL_L0_NUM_FILES_COUNT, "rocksdb.num.files.stall.count"},
+  { HARD_RATE_LIMIT_DELAY_COUNT, "rocksdb.hard.rate.limit.delay.count"},
+  { SOFT_RATE_LIMIT_DELAY_COUNT, "rocksdb.soft.rate.limit.delay.count"},
+  { NUM_FILES_IN_SINGLE_COMPACTION, "rocksdb.numfiles.in.singlecompaction" },
+};
+
+struct HistogramData {
+  double median;
+  double percentile95;
+  double percentile99;
+  double average;
+  double standard_deviation;
+};
+
+// Analyze the performance of a db
+class Statistics {
+ public:
+  virtual ~Statistics() {}
+
+  virtual uint64_t getTickerCount(uint32_t tickerType) const = 0;
+  virtual void histogramData(uint32_t type,
+                             HistogramData* const data) const = 0;
+
+  virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0;
+  virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0;
+  virtual void measureTime(uint32_t histogramType, uint64_t time) = 0;
+
+  // String representation of the statistic object.
+  virtual std::string ToString() const = 0;
+
+  // Override this function to disable particular histogram collection
+  virtual bool HistEnabledForType(uint32_t type) const {
+    return type < HISTOGRAM_ENUM_MAX;
+  }
+};
+
+// Create a concrete DBStatistics object
+std::shared_ptr<Statistics> CreateDBStatistics();
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_
--- a/include/rocksdb/status.h
+++ b/include/rocksdb/status.h
@@ -0,0 +1,154 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Status encapsulates the result of an operation.  It may indicate success,
+// or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on a Status without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Status must use
+// external synchronization.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_STATUS_H_
+#define STORAGE_ROCKSDB_INCLUDE_STATUS_H_
+
+#include <string>
+#include "rocksdb/slice.h"
+
+namespace rocksdb {
+
+class Status {
+ public:
+  // Create a success status.
+  Status() : code_(kOk), state_(nullptr) { }
+  ~Status() { delete[] state_; }
+
+  // Copy the specified status.
+  Status(const Status& s);
+  void operator=(const Status& s);
+
+  // Return a success status.
+  static Status OK() { return Status(); }
+
+  // Return error status of an appropriate type.
+  static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kNotFound, msg, msg2);
+  }
+  // Fast path for not found without malloc;
+  static Status NotFound() {
+    return Status(kNotFound);
+  }
+  static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kCorruption, msg, msg2);
+  }
+  static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kNotSupported, msg, msg2);
+  }
+  static Status InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kInvalidArgument, msg, msg2);
+  }
+  static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kIOError, msg, msg2);
+  }
+  static Status MergeInProgress(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kMergeInProgress, msg, msg2);
+  }
+  static Status Incomplete(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kIncomplete, msg, msg2);
+  }
+  static Status ShutdownInProgress(const Slice& msg,
+                                   const Slice& msg2 = Slice()) {
+    return Status(kShutdownInProgress, msg, msg2);
+  }
+  static Status TimedOut() {
+    return Status(kTimedOut);
+  }
+  static Status TimedOut(const Slice& msg, const Slice& msg2 = Slice()) {
+    return Status(kTimedOut, msg, msg2);
+  }
+
+  // Returns true iff the status indicates success.
+  bool ok() const { return code() == kOk; }
+
+  // Returns true iff the status indicates a NotFound error.
+  bool IsNotFound() const { return code() == kNotFound; }
+
+  // Returns true iff the status indicates a Corruption error.
+  bool IsCorruption() const { return code() == kCorruption; }
+
+  // Returns true iff the status indicates a NotSupported error.
+  bool IsNotSupported() const { return code() == kNotSupported; }
+
+  // Returns true iff the status indicates an InvalidArgument error.
+  bool IsInvalidArgument() const { return code() == kInvalidArgument; }
+
+  // Returns true iff the status indicates an IOError.
+  bool IsIOError() const { return code() == kIOError; }
+
+  // Returns true iff the status indicates an MergeInProgress.
+  bool IsMergeInProgress() const { return code() == kMergeInProgress; }
+
+  // Returns true iff the status indicates Incomplete
+  bool IsIncomplete() const { return code() == kIncomplete; }
+
+  // Returns true iff the status indicates Incomplete
+  bool IsShutdownInProgress() const { return code() == kShutdownInProgress; }
+
+  bool IsTimedOut() const { return code() == kTimedOut; }
+
+  // Return a string representation of this status suitable for printing.
+  // Returns the string "OK" for success.
+  std::string ToString() const;
+
+  enum Code {
+    kOk = 0,
+    kNotFound = 1,
+    kCorruption = 2,
+    kNotSupported = 3,
+    kInvalidArgument = 4,
+    kIOError = 5,
+    kMergeInProgress = 6,
+    kIncomplete = 7,
+    kShutdownInProgress = 8,
+    kTimedOut = 9
+  };
+
+  Code code() const {
+    return code_;
+  }
+ private:
+  // A nullptr state_ (which is always the case for OK) means the message
+  // is empty.
+  // of the following form:
+  //    state_[0..3] == length of message
+  //    state_[4..]  == message
+  Code code_;
+  const char* state_;
+
+  explicit Status(Code code) : code_(code), state_(nullptr) { }
+  Status(Code code, const Slice& msg, const Slice& msg2);
+  static const char* CopyState(const char* s);
+};
+
+inline Status::Status(const Status& s) {
+  code_ = s.code_;
+  state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_);
+}
+inline void Status::operator=(const Status& s) {
+  // The following condition catches both aliasing (when this == &s),
+  // and the common case where both s and *this are ok.
+  code_ = s.code_;
+  if (state_ != s.state_) {
+    delete[] state_;
+    state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_);
+  }
+}
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_STATUS_H_
--- a/include/rocksdb/table.h
+++ b/include/rocksdb/table.h
@@ -0,0 +1,270 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Currently we support two types of tables: plain table and block-based table.
+//   1. Block-based table: this is the default table type that we inherited from
+//      LevelDB, which was designed for storing data in hard disk or flash
+//      device.
+//   2. Plain table: it is one of RocksDB's SST file format optimized
+//      for low query latency on pure-memory or really low-latency media.
+//
+// A tutorial of rocksdb table formats is available here:
+//   https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
+//
+// Example code is also available
+//   https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats#wiki-examples
+
+#pragma once
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "rocksdb/env.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/status.h"
+
+namespace rocksdb {
+
+// -- Block-based Table
+class FlushBlockPolicyFactory;
+class RandomAccessFile;
+class TableBuilder;
+class TableReader;
+class WritableFile;
+struct EnvOptions;
+struct Options;
+
+using std::unique_ptr;
+
+enum ChecksumType : char {
+  kNoChecksum = 0x0,  // not yet supported. Will fail
+  kCRC32c = 0x1,
+  kxxHash = 0x2,
+};
+
+// For advanced user only
+struct BlockBasedTableOptions {
+  // @flush_block_policy_factory creates the instances of flush block policy.
+  // which provides a configurable way to determine when to flush a block in
+  // the block based tables.  If not set, table builder will use the default
+  // block flush policy, which cut blocks by block size (please refer to
+  // `FlushBlockBySizePolicy`).
+  std::shared_ptr<FlushBlockPolicyFactory> flush_block_policy_factory;
+
+  // TODO(kailiu) Temporarily disable this feature by making the default value
+  // to be false.
+  //
+  // Indicating if we'd put index/filter blocks to the block cache.
+  // If not specified, each "table reader" object will pre-load index/filter
+  // block during table initialization.
+  bool cache_index_and_filter_blocks = false;
+
+  // The index type that will be used for this table.
+  enum IndexType : char {
+    // A space efficient index block that is optimized for
+    // binary-search-based index.
+    kBinarySearch,
+
+    // The hash index, if enabled, will do the hash lookup when
+    // `Options.prefix_extractor` is provided.
+    kHashSearch,
+  };
+
+  IndexType index_type = kBinarySearch;
+
+  // Influence the behavior when kHashSearch is used.
+  // if false, stores a precise prefix to block range mapping
+  // if true, does not store prefix and allows prefix hash collision
+  // (less memory consumption)
+  bool hash_index_allow_collision = true;
+
+  // Use the specified checksum type. Newly created table files will be
+  // protected with this checksum type. Old table files will still be readable,
+  // even though they have different checksum type.
+  ChecksumType checksum = kCRC32c;
+};
+
+// Table Properties that are specific to block-based table properties.
+struct BlockBasedTablePropertyNames {
+  // value of this propertis is a fixed int32 number.
+  static const std::string kIndexType;
+};
+
+// Create default block based table factory.
+extern TableFactory* NewBlockBasedTableFactory(
+    const BlockBasedTableOptions& table_options = BlockBasedTableOptions());
+
+#ifndef ROCKSDB_LITE
+
+enum EncodingType : char {
+  // Always write full keys without any special encoding.
+  kPlain,
+  // Find opportunity to write the same prefix once for multiple rows.
+  // In some cases, when a key follows a previous key with the same prefix,
+  // instead of writing out the full key, it just writes out the size of the
+  // shared prefix, as well as other bytes, to save some bytes.
+  //
+  // When using this option, the user is required to use the same prefix
+  // extractor to make sure the same prefix will be extracted from the same key.
+  // The Name() value of the prefix extractor will be stored in the file. When
+  // reopening the file, the name of the options.prefix_extractor given will be
+  // bitwise compared to the prefix extractors stored in the file. An error
+  // will be returned if the two don't match.
+  kPrefix,
+};
+
+// Table Properties that are specific to plain table properties.
+struct PlainTablePropertyNames {
+  static const std::string kPrefixExtractorName;
+  static const std::string kEncodingType;
+  static const std::string kBloomVersion;
+  static const std::string kNumBloomBlocks;
+};
+
+const uint32_t kPlainTableVariableLength = 0;
+
+struct PlainTableOptions {
+// @user_key_len: plain table has optimization for fix-sized keys, which can be
+//                specified via user_key_len.  Alternatively, you can pass
+//                `kPlainTableVariableLength` if your keys have variable
+//                lengths.
+uint32_t user_key_len = kPlainTableVariableLength;
+
+// @bloom_bits_per_key: the number of bits used for bloom filer per prefix. You
+//                      may disable it by passing a zero.
+int bloom_bits_per_key = 10;
+
+// @hash_table_ratio: the desired utilization of the hash table used for prefix
+//                    hashing. hash_table_ratio = number of prefixes / #buckets
+//                    in the hash table
+double hash_table_ratio = 0.75;
+
+// @index_sparseness: inside each prefix, need to build one index record for how
+//                    many keys for binary search inside each hash bucket.
+//                    For encoding type kPrefix, the value will be used when
+//                    writing to determine an interval to rewrite the full key.
+//                    It will also be used as a suggestion and satisfied when
+//                    possible.
+size_t index_sparseness = 16;
+
+// @huge_page_tlb_size: if <=0, allocate hash indexes and blooms from malloc.
+//                      Otherwise from huge page TLB. The user needs to reserve
+//                      huge pages for it to be allocated, like:
+//                          sysctl -w vm.nr_hugepages=20
+//                      See linux doc Documentation/vm/hugetlbpage.txt
+size_t huge_page_tlb_size = 0;
+
+// @encoding_type: how to encode the keys. See enum EncodingType above for
+//                 the choices. The value will determine how to encode keys
+//                 when writing to a new SST file. This value will be stored
+//                 inside the SST file which will be used when reading from the
+//                 file, which makes it possible for users to choose different
+//                 encoding type when reopening a DB. Files with different
+//                 encoding types can co-exist in the same DB and can be read.
+EncodingType encoding_type = kPlain;
+
+// @full_scan_mode: mode for reading the whole file one record by one without
+//                  using the index.
+  bool full_scan_mode = false;
+
+  // @store_index_in_file: compute plain table index and bloom filter during
+  //                       file building and store it in file. When reading
+  //                       file, index will be mmaped instead of recomputation.
+  bool store_index_in_file = false;
+};
+
+// -- Plain Table with prefix-only seek
+// For this factory, you need to set Options.prefix_extrator properly to make it
+// work. Look-up will starts with prefix hash lookup for key prefix. Inside the
+// hash bucket found, a binary search is executed for hash conflicts. Finally,
+// a linear search is used.
+
+extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
+                                              PlainTableOptions());
+
+struct CuckooTablePropertyNames {
+  static const std::string kEmptyKey;
+  static const std::string kValueLength;
+  static const std::string kNumHashTable;
+  static const std::string kMaxNumBuckets;
+  static const std::string kIsLastLevel;
+};
+
+#endif  // ROCKSDB_LITE
+
+// A base class for table factories.
+class TableFactory {
+ public:
+  virtual ~TableFactory() {}
+
+  // The type of the table.
+  //
+  // The client of this package should switch to a new name whenever
+  // the table format implementation changes.
+  //
+  // Names starting with "rocksdb." are reserved and should not be used
+  // by any clients of this package.
+  virtual const char* Name() const = 0;
+
+  // Returns a Table object table that can fetch data from file specified
+  // in parameter file. It's the caller's responsibility to make sure
+  // file is in the correct format.
+  //
+  // NewTableReader() is called in two places:
+  // (1) TableCache::FindTable() calls the function when table cache miss
+  //     and cache the table object returned.
+  // (1) SstFileReader (for SST Dump) opens the table and dump the table
+  //     contents using the interator of the table.
+  // options and soptions are options. options is the general options.
+  // Multiple configured can be accessed from there, including and not
+  // limited to block cache and key comparators.
+  // file is a file handler to handle the file for the table
+  // file_size is the physical file size of the file
+  // table_reader is the output table reader
+  virtual Status NewTableReader(
+      const Options& options, const EnvOptions& soptions,
+      const InternalKeyComparator& internal_comparator,
+      unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
+      unique_ptr<TableReader>* table_reader) const = 0;
+
+  // Return a table builder to write to a file for this table type.
+  //
+  // It is called in several places:
+  // (1) When flushing memtable to a level-0 output file, it creates a table
+  //     builder (In DBImpl::WriteLevel0Table(), by calling BuildTable())
+  // (2) During compaction, it gets the builder for writing compaction output
+  //     files in DBImpl::OpenCompactionOutputFile().
+  // (3) When recovering from transaction logs, it creates a table builder to
+  //     write to a level-0 output file (In DBImpl::WriteLevel0TableForRecovery,
+  //     by calling BuildTable())
+  // (4) When running Repairer, it creates a table builder to convert logs to
+  //     SST files (In Repairer::ConvertLogToTable() by calling BuildTable())
+  //
+  // options is the general options. Multiple configured can be acceseed from
+  // there, including and not limited to compression options.
+  // file is a handle of a writable file. It is the caller's responsibility to
+  // keep the file open and close the file after closing the table builder.
+  // compression_type is the compression type to use in this table.
+  virtual TableBuilder* NewTableBuilder(
+      const Options& options, const InternalKeyComparator& internal_comparator,
+      WritableFile* file, CompressionType compression_type) const = 0;
+};
+
+#ifndef ROCKSDB_LITE
+// Create a special table factory that can open both of block based table format
+// and plain table, based on setting inside the SST files. It should be used to
+// convert a DB from one table format to another.
+// @table_factory_to_write: the table factory used when writing to new files.
+// @block_based_table_factory:  block based table factory to use. If NULL, use
+//                              a default one.
+// @plain_table_factory: plain table factory to use. If NULL, use a default one.
+extern TableFactory* NewAdaptiveTableFactory(
+    std::shared_ptr<TableFactory> table_factory_to_write = nullptr,
+    std::shared_ptr<TableFactory> block_based_table_factory = nullptr,
+    std::shared_ptr<TableFactory> plain_table_factory = nullptr);
+
+#endif  // ROCKSDB_LITE
+
+}  // namespace rocksdb
--- a/include/rocksdb/table_properties.h
+++ b/include/rocksdb/table_properties.h
@@ -0,0 +1,127 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <string>
+#include <map>
+#include "rocksdb/status.h"
+
+namespace rocksdb {
+
+// -- Table Properties
+// Other than basic table properties, each table may also have the user
+// collected properties.
+// The value of the user-collected properties are encoded as raw bytes --
+// users have to interprete these values by themselves.
+// Note: To do prefix seek/scan in `UserCollectedProperties`, you can do
+// something similar to:
+//
+// UserCollectedProperties props = ...;
+// for (auto pos = props.lower_bound(prefix);
+//      pos != props.end() && pos->first.compare(0, prefix.size(), prefix) == 0;
+//      ++pos) {
+//   ...
+// }
+typedef std::map<const std::string, std::string> UserCollectedProperties;
+
+// TableProperties contains a bunch of read-only properties of its associated
+// table.
+struct TableProperties {
+ public:
+  // the total size of all data blocks.
+  uint64_t data_size = 0;
+  // the size of index block.
+  uint64_t index_size = 0;
+  // the size of filter block.
+  uint64_t filter_size = 0;
+  // total raw key size
+  uint64_t raw_key_size = 0;
+  // total raw value size
+  uint64_t raw_value_size = 0;
+  // the number of blocks in this table
+  uint64_t num_data_blocks = 0;
+  // the number of entries in this table
+  uint64_t num_entries = 0;
+  // format version, reserved for backward compatibility
+  uint64_t format_version = 0;
+  // If 0, key is variable length. Otherwise number of bytes for each key.
+  uint64_t fixed_key_len = 0;
+
+  // The name of the filter policy used in this table.
+  // If no filter policy is used, `filter_policy_name` will be an empty string.
+  std::string filter_policy_name;
+
+  // user collected properties
+  UserCollectedProperties user_collected_properties;
+
+  // convert this object to a human readable form
+  //   @prop_delim: delimiter for each property.
+  std::string ToString(const std::string& prop_delim = "; ",
+                       const std::string& kv_delim = "=") const;
+};
+
+// table properties' human-readable names in the property block.
+struct TablePropertiesNames {
+  static const std::string kDataSize;
+  static const std::string kIndexSize;
+  static const std::string kFilterSize;
+  static const std::string kRawKeySize;
+  static const std::string kRawValueSize;
+  static const std::string kNumDataBlocks;
+  static const std::string kNumEntries;
+  static const std::string kFormatVersion;
+  static const std::string kFixedKeyLen;
+  static const std::string kFilterPolicy;
+};
+
+extern const std::string kPropertiesBlock;
+
+// `TablePropertiesCollector` provides the mechanism for users to collect
+// their own interested properties. This class is essentially a collection
+// of callback functions that will be invoked during table building.
+// It is construced with TablePropertiesCollectorFactory. The methods don't
+// need to be thread-safe, as we will create exactly one
+// TablePropertiesCollector object per table and then call it sequentially
+class TablePropertiesCollector {
+ public:
+  virtual ~TablePropertiesCollector() {}
+
+  // Add() will be called when a new key/value pair is inserted into the table.
+  // @params key    the original key that is inserted into the table.
+  // @params value  the original value that is inserted into the table.
+  virtual Status Add(const Slice& key, const Slice& value) = 0;
+
+  // Finish() will be called when a table has already been built and is ready
+  // for writing the properties block.
+  // @params properties  User will add their collected statistics to
+  // `properties`.
+  virtual Status Finish(UserCollectedProperties* properties) = 0;
+
+  // Return the human-readable properties, where the key is property name and
+  // the value is the human-readable form of value.
+  virtual UserCollectedProperties GetReadableProperties() const = 0;
+
+  // The name of the properties collector can be used for debugging purpose.
+  virtual const char* Name() const = 0;
+};
+
+// Constructs TablePropertiesCollector. Internals create a new
+// TablePropertiesCollector for each new table
+class TablePropertiesCollectorFactory {
+ public:
+  virtual ~TablePropertiesCollectorFactory() {}
+  // has to be thread-safe
+  virtual TablePropertiesCollector* CreateTablePropertiesCollector() = 0;
+
+  // The name of the properties collector can be used for debugging purpose.
+  virtual const char* Name() const = 0;
+};
+
+// Extra properties
+// Below is a list of non-basic properties that are collected by database
+// itself. Especially some properties regarding to the internal keys (which
+// is unknown to `table`).
+extern uint64_t GetDeletedKeys(const UserCollectedProperties& props);
+
+}  // namespace rocksdb
--- a/include/rocksdb/transaction_log.h
+++ b/include/rocksdb/transaction_log.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_TRANSACTION_LOG_ITERATOR_H_
+#define STORAGE_ROCKSDB_INCLUDE_TRANSACTION_LOG_ITERATOR_H_
+
+#include "rocksdb/status.h"
+#include "rocksdb/types.h"
+#include "rocksdb/write_batch.h"
+#include <memory>
+#include <vector>
+
+namespace rocksdb {
+
+class LogFile;
+typedef std::vector<std::unique_ptr<LogFile>> VectorLogPtr;
+
+enum  WalFileType {
+  /* Indicates that WAL file is in archive directory. WAL files are moved from
+   * the main db directory to archive directory once they are not live and stay
+   * there until cleaned up. Files are cleaned depending on archive size
+   * (Options::WAL_size_limit_MB) and time since last cleaning
+   * (Options::WAL_ttl_seconds).
+   */
+  kArchivedLogFile = 0,
+
+  /* Indicates that WAL file is live and resides in the main db directory */
+  kAliveLogFile = 1
+} ;
+
+class LogFile {
+ public:
+  LogFile() {}
+  virtual ~LogFile() {}
+
+  // Returns log file's pathname relative to the main db dir
+  // Eg. For a live-log-file = /000003.log
+  //     For an archived-log-file = /archive/000003.log
+  virtual std::string PathName() const = 0;
+
+
+  // Primary identifier for log file.
+  // This is directly proportional to creation time of the log file
+  virtual uint64_t LogNumber() const = 0;
+
+  // Log file can be either alive or archived
+  virtual WalFileType Type() const = 0;
+
+  // Starting sequence number of writebatch written in this log file
+  virtual SequenceNumber StartSequence() const = 0;
+
+  // Size of log file on disk in Bytes
+  virtual uint64_t SizeFileBytes() const = 0;
+};
+
+struct BatchResult {
+  SequenceNumber sequence = 0;
+  std::unique_ptr<WriteBatch> writeBatchPtr;
+};
+
+// A TransactionLogIterator is used to iterate over the transactions in a db.
+// One run of the iterator is continuous, i.e. the iterator will stop at the
+// beginning of any gap in sequences
+class TransactionLogIterator {
+ public:
+  TransactionLogIterator() {}
+  virtual ~TransactionLogIterator() {}
+
+  // An iterator is either positioned at a WriteBatch or not valid.
+  // This method returns true if the iterator is valid.
+  // Can read data from a valid iterator.
+  virtual bool Valid() = 0;
+
+  // Moves the iterator to the next WriteBatch.
+  // REQUIRES: Valid() to be true.
+  virtual void Next() = 0;
+
+  // Returns ok if the iterator is valid.
+  // Returns the Error when something has gone wrong.
+  virtual Status status() = 0;
+
+  // If valid return's the current write_batch and the sequence number of the
+  // earliest transaction contained in the batch.
+  // ONLY use if Valid() is true and status() is OK.
+  virtual BatchResult GetBatch() = 0;
+
+  // The read options for TransactionLogIterator.
+  struct ReadOptions {
+    // If true, all data read from underlying storage will be
+    // verified against corresponding checksums.
+    // Default: true
+    bool verify_checksums_;
+
+    ReadOptions() : verify_checksums_(true) {}
+
+    explicit ReadOptions(bool verify_checksums)
+        : verify_checksums_(verify_checksums) {}
+  };
+};
+} //  namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_TRANSACTION_LOG_ITERATOR_H_
--- a/include/rocksdb/types.h
+++ b/include/rocksdb/types.h
@@ -0,0 +1,20 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_TYPES_H_
+#define STORAGE_ROCKSDB_INCLUDE_TYPES_H_
+
+#include <stdint.h>
+
+namespace rocksdb {
+
+// Define all public custom types here.
+
+// Represents a sequence number in a WAL file.
+typedef uint64_t SequenceNumber;
+
+}  //  namespace rocksdb
+
+#endif //  STORAGE_ROCKSDB_INCLUDE_TYPES_H_
--- a/include/rocksdb/universal_compaction.h
+++ b/include/rocksdb/universal_compaction.h
@@ -0,0 +1,84 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+
+#ifndef STORAGE_ROCKSDB_UNIVERSAL_COMPACTION_OPTIONS_H
+#define STORAGE_ROCKSDB_UNIVERSAL_COMPACTION_OPTIONS_H
+
+#include <stdint.h>
+#include <climits>
+#include <vector>
+
+namespace rocksdb {
+
+//
+// Algorithm used to make a compaction request stop picking new files
+// into a single compaction run
+//
+enum CompactionStopStyle {
+  kCompactionStopStyleSimilarSize, // pick files of similar size
+  kCompactionStopStyleTotalSize    // total size of picked files > next file
+};
+
+class CompactionOptionsUniversal {
+ public:
+
+  // Percentage flexibilty while comparing file size. If the candidate file(s)
+  // size is 1% smaller than the next file's size, then include next file into
+  // this candidate set. // Default: 1
+  unsigned int size_ratio;
+
+  // The minimum number of files in a single compaction run. Default: 2
+  unsigned int min_merge_width;
+
+  // The maximum number of files in a single compaction run. Default: UINT_MAX
+  unsigned int max_merge_width;
+
+  // The size amplification is defined as the amount (in percentage) of
+  // additional storage needed to store a single byte of data in the database.
+  // For example, a size amplification of 2% means that a database that
+  // contains 100 bytes of user-data may occupy upto 102 bytes of
+  // physical storage. By this definition, a fully compacted database has
+  // a size amplification of 0%. Rocksdb uses the following heuristic
+  // to calculate size amplification: it assumes that all files excluding
+  // the earliest file contribute to the size amplification.
+  // Default: 200, which means that a 100 byte database could require upto
+  // 300 bytes of storage.
+  unsigned int max_size_amplification_percent;
+
+  // If this option is set to be -1 (the default value), all the output files
+  // will follow compression type specified.
+  //
+  // If this option is not negative, we will try to make sure compressed
+  // size is just above this value. In normal cases, at least this percentage
+  // of data will be compressed.
+  // When we are compacting to a new file, here is the criteria whether
+  // it needs to be compressed: assuming here are the list of files sorted
+  // by generation time:
+  //    A1...An B1...Bm C1...Ct
+  // where A1 is the newest and Ct is the oldest, and we are going to compact
+  // B1...Bm, we calculate the total size of all the files as total_size, as
+  // well as  the total size of C1...Ct as total_C, the compaction output file
+  // will be compressed iff
+  //   total_C / total_size < this percentage
+  // Default: -1
+  int compression_size_percent;
+
+  // The algorithm used to stop picking files into a single compaction run
+  // Default: kCompactionStopStyleTotalSize
+  CompactionStopStyle stop_style;
+
+  // Default set of parameters
+  CompactionOptionsUniversal()
+      : size_ratio(1),
+        min_merge_width(2),
+        max_merge_width(UINT_MAX),
+        max_size_amplification_percent(200),
+        compression_size_percent(-1),
+        stop_style(kCompactionStopStyleTotalSize) {}
+};
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_UNIVERSAL_COMPACTION_OPTIONS_H
--- a/include/rocksdb/utilities/backupable_db.h
+++ b/include/rocksdb/utilities/backupable_db.h
@@ -0,0 +1,252 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#ifndef ROCKSDB_LITE
+
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <string>
+#include <map>
+#include <vector>
+
+#include "rocksdb/utilities/stackable_db.h"
+
+#include "rocksdb/env.h"
+#include "rocksdb/status.h"
+
+namespace rocksdb {
+
+struct BackupableDBOptions {
+  // Where to keep the backup files. Has to be different than dbname_
+  // Best to set this to dbname_ + "/backups"
+  // Required
+  std::string backup_dir;
+
+  // Backup Env object. It will be used for backup file I/O. If it's
+  // nullptr, backups will be written out using DBs Env. If it's
+  // non-nullptr, backup's I/O will be performed using this object.
+  // If you want to have backups on HDFS, use HDFS Env here!
+  // Default: nullptr
+  Env* backup_env;
+
+  // If share_table_files == true, backup will assume that table files with
+  // same name have the same contents. This enables incremental backups and
+  // avoids unnecessary data copies.
+  // If share_table_files == false, each backup will be on its own and will
+  // not share any data with other backups.
+  // default: true
+  bool share_table_files;
+
+  // Backup info and error messages will be written to info_log
+  // if non-nullptr.
+  // Default: nullptr
+  Logger* info_log;
+
+  // If sync == true, we can guarantee you'll get consistent backup even
+  // on a machine crash/reboot. Backup process is slower with sync enabled.
+  // If sync == false, we don't guarantee anything on machine reboot. However,
+  // chances are some of the backups are consistent.
+  // Default: true
+  bool sync;
+
+  // If true, it will delete whatever backups there are already
+  // Default: false
+  bool destroy_old_data;
+
+  // If false, we won't backup log files. This option can be useful for backing
+  // up in-memory databases where log file are persisted, but table files are in
+  // memory.
+  // Default: true
+  bool backup_log_files;
+
+  // Max bytes that can be transferred in a second during backup.
+  // If 0, go as fast as you can
+  // Default: 0
+  uint64_t backup_rate_limit;
+
+  // Max bytes that can be transferred in a second during restore.
+  // If 0, go as fast as you can
+  // Default: 0
+  uint64_t restore_rate_limit;
+
+  // Only used if share_table_files is set to true. If true, will consider that
+  // backups can come from different databases, hence a sst is not uniquely
+  // identifed by its name, but by the triple (file name, crc32, file length)
+  // Default: false
+  // Note: this is an experimental option, and you'll need to set it manually
+  // *turn it on only if you know what you're doing*
+  bool share_files_with_checksum;
+
+  void Dump(Logger* logger) const;
+
+  explicit BackupableDBOptions(const std::string& _backup_dir,
+                               Env* _backup_env = nullptr,
+                               bool _share_table_files = true,
+                               Logger* _info_log = nullptr, bool _sync = true,
+                               bool _destroy_old_data = false,
+                               bool _backup_log_files = true,
+                               uint64_t _backup_rate_limit = 0,
+                               uint64_t _restore_rate_limit = 0)
+      : backup_dir(_backup_dir),
+        backup_env(_backup_env),
+        share_table_files(_share_table_files),
+        info_log(_info_log),
+        sync(_sync),
+        destroy_old_data(_destroy_old_data),
+        backup_log_files(_backup_log_files),
+        backup_rate_limit(_backup_rate_limit),
+        restore_rate_limit(_restore_rate_limit),
+        share_files_with_checksum(false) {
+    assert(share_table_files || !share_files_with_checksum);
+  }
+};
+
+struct RestoreOptions {
+  // If true, restore won't overwrite the existing log files in wal_dir. It will
+  // also move all log files from archive directory to wal_dir. Use this option
+  // in combination with BackupableDBOptions::backup_log_files = false for
+  // persisting in-memory databases.
+  // Default: false
+  bool keep_log_files;
+
+  explicit RestoreOptions(bool _keep_log_files = false)
+      : keep_log_files(_keep_log_files) {}
+};
+
+typedef uint32_t BackupID;
+
+struct BackupInfo {
+  BackupID backup_id;
+  int64_t timestamp;
+  uint64_t size;
+
+  BackupInfo() {}
+  BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size)
+      : backup_id(_backup_id), timestamp(_timestamp), size(_size) {}
+};
+
+class BackupEngineReadOnly {
+ public:
+  virtual ~BackupEngineReadOnly() {}
+
+  static BackupEngineReadOnly* NewReadOnlyBackupEngine(
+      Env* db_env, const BackupableDBOptions& options);
+
+  // You can GetBackupInfo safely, even with other BackupEngine performing
+  // backups on the same directory
+  virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
+
+  // Restoring DB from backup is NOT safe when there is another BackupEngine
+  // running that might call DeleteBackup() or PurgeOldBackups(). It is caller's
+  // responsibility to synchronize the operation, i.e. don't delete the backup
+  // when you're restoring from it
+  virtual Status RestoreDBFromBackup(
+      BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& restore_options = RestoreOptions()) = 0;
+  virtual Status RestoreDBFromLatestBackup(
+      const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& restore_options = RestoreOptions()) = 0;
+};
+
+// Please see the documentation in BackupableDB and RestoreBackupableDB
+class BackupEngine {
+ public:
+  virtual ~BackupEngine() {}
+
+  static BackupEngine* NewBackupEngine(Env* db_env,
+                                       const BackupableDBOptions& options);
+
+  virtual Status CreateNewBackup(DB* db, bool flush_before_backup = false) = 0;
+  virtual Status PurgeOldBackups(uint32_t num_backups_to_keep) = 0;
+  virtual Status DeleteBackup(BackupID backup_id) = 0;
+  virtual void StopBackup() = 0;
+
+  virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
+  virtual Status RestoreDBFromBackup(
+      BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& restore_options = RestoreOptions()) = 0;
+  virtual Status RestoreDBFromLatestBackup(
+      const std::string& db_dir, const std::string& wal_dir,
+      const RestoreOptions& restore_options = RestoreOptions()) = 0;
+};
+
+// Stack your DB with BackupableDB to be able to backup the DB
+class BackupableDB : public StackableDB {
+ public:
+  // BackupableDBOptions have to be the same as the ones used in a previous
+  // incarnation of the DB
+  //
+  // BackupableDB ownes the pointer `DB* db` now. You should not delete it or
+  // use it after the invocation of BackupableDB
+  BackupableDB(DB* db, const BackupableDBOptions& options);
+  virtual ~BackupableDB();
+
+  // Captures the state of the database in the latest backup
+  // NOT a thread safe call
+  Status CreateNewBackup(bool flush_before_backup = false);
+  // Returns info about backups in backup_info
+  void GetBackupInfo(std::vector<BackupInfo>* backup_info);
+  // deletes old backups, keeping latest num_backups_to_keep alive
+  Status PurgeOldBackups(uint32_t num_backups_to_keep);
+  // deletes a specific backup
+  Status DeleteBackup(BackupID backup_id);
+  // Call this from another thread if you want to stop the backup
+  // that is currently happening. It will return immediatelly, will
+  // not wait for the backup to stop.
+  // The backup will stop ASAP and the call to CreateNewBackup will
+  // return Status::Incomplete(). It will not clean up after itself, but
+  // the state will remain consistent. The state will be cleaned up
+  // next time you create BackupableDB or RestoreBackupableDB.
+  void StopBackup();
+
+ private:
+  BackupEngine* backup_engine_;
+};
+
+// Use this class to access information about backups and restore from them
+class RestoreBackupableDB {
+ public:
+  RestoreBackupableDB(Env* db_env, const BackupableDBOptions& options);
+  ~RestoreBackupableDB();
+
+  // Returns info about backups in backup_info
+  void GetBackupInfo(std::vector<BackupInfo>* backup_info);
+
+  // restore from backup with backup_id
+  // IMPORTANT -- if options_.share_table_files == true and you restore DB
+  // from some backup that is not the latest, and you start creating new
+  // backups from the new DB, they will probably fail
+  //
+  // Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3.
+  // If you add new data to the DB and try creating a new backup now, the
+  // database will diverge from backups 4 and 5 and the new backup will fail.
+  // If you want to create new backup, you will first have to delete backups 4
+  // and 5.
+  Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir,
+                             const std::string& wal_dir,
+                             const RestoreOptions& restore_options =
+                                 RestoreOptions());
+
+  // restore from the latest backup
+  Status RestoreDBFromLatestBackup(const std::string& db_dir,
+                                   const std::string& wal_dir,
+                                   const RestoreOptions& restore_options =
+                                       RestoreOptions());
+  // deletes old backups, keeping latest num_backups_to_keep alive
+  Status PurgeOldBackups(uint32_t num_backups_to_keep);
+  // deletes a specific backup
+  Status DeleteBackup(BackupID backup_id);
+
+ private:
+  BackupEngine* backup_engine_;
+};
+
+}  // namespace rocksdb
+#endif  // ROCKSDB_LITE
--- a/include/rocksdb/utilities/db_ttl.h
+++ b/include/rocksdb/utilities/db_ttl.h
@@ -0,0 +1,68 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+
+#pragma once
+#ifndef ROCKSDB_LITE
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/utilities/stackable_db.h"
+#include "rocksdb/db.h"
+
+namespace rocksdb {
+
+// Database with TTL support.
+//
+// USE-CASES:
+// This API should be used to open the db when key-values inserted are
+//  meant to be removed from the db in a non-strict 'ttl' amount of time
+//  Therefore, this guarantees that key-values inserted will remain in the
+//  db for >= ttl amount of time and the db will make efforts to remove the
+//  key-values as soon as possible after ttl seconds of their insertion.
+//
+// BEHAVIOUR:
+// TTL is accepted in seconds
+// (int32_t)Timestamp(creation) is suffixed to values in Put internally
+// Expired TTL values deleted in compaction only:(Timestamp+ttl<time_now)
+// Get/Iterator may return expired entries(compaction not run on them yet)
+// Different TTL may be used during different Opens
+// Example: Open1 at t=0 with ttl=4 and insert k1,k2, close at t=2
+//          Open2 at t=3 with ttl=5. Now k1,k2 should be deleted at t>=5
+// read_only=true opens in the usual read-only mode. Compactions will not be
+//  triggered(neither manual nor automatic), so no expired entries removed
+//
+// CONSTRAINTS:
+// Not specifying/passing or non-positive TTL behaves like TTL = infinity
+//
+// !!!WARNING!!!:
+// Calling DB::Open directly to re-open a db created by this API will get
+//  corrupt values(timestamp suffixed) and no ttl effect will be there
+//  during the second Open, so use this API consistently to open the db
+// Be careful when passing ttl with a small positive value because the
+//  whole database may be deleted in a small amount of time
+
+class DBWithTTL : public StackableDB {
+ public:
+  virtual Status CreateColumnFamilyWithTtl(
+      const ColumnFamilyOptions& options, const std::string& column_family_name,
+      ColumnFamilyHandle** handle, int ttl) = 0;
+
+  static Status Open(const Options& options, const std::string& dbname,
+                     DBWithTTL** dbptr, int32_t ttl = 0,
+                     bool read_only = false);
+
+  static Status Open(const DBOptions& db_options, const std::string& dbname,
+                     const std::vector<ColumnFamilyDescriptor>& column_families,
+                     std::vector<ColumnFamilyHandle*>* handles,
+                     DBWithTTL** dbptr, std::vector<int32_t> ttls,
+                     bool read_only = false);
+
+ protected:
+  explicit DBWithTTL(DB* db) : StackableDB(db) {}
+};
+
+}  // namespace rocksdb
+#endif  // ROCKSDB_LITE
--- a/include/rocksdb/utilities/document_db.h
+++ b/include/rocksdb/utilities/document_db.h
@@ -0,0 +1,149 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+
+#pragma once
+#ifndef ROCKSDB_LITE
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/utilities/stackable_db.h"
+#include "rocksdb/utilities/json_document.h"
+#include "rocksdb/db.h"
+
+namespace rocksdb {
+
+// IMPORTANT: DocumentDB is a work in progress. It is unstable and we might
+// change the API without warning. Talk to RocksDB team before using this in
+// production ;)
+
+// DocumentDB is a layer on top of RocksDB that provides a very simple JSON API.
+// When creating a DB, you specify a list of indexes you want to keep on your
+// data. You can insert a JSON document to the DB, which is automatically
+// indexed. Every document added to the DB needs to have "_id" field which is
+// automatically indexed and is an unique primary key. All other indexes are
+// non-unique.
+
+// NOTE: field names in the JSON are NOT allowed to start with '$' or
+// contain '.'. We don't currently enforce that rule, but will start behaving
+// badly.
+
+// Cursor is what you get as a result of executing query. To get all
+// results from a query, call Next() on a Cursor while  Valid() returns true
+class Cursor {
+ public:
+  Cursor() = default;
+  virtual ~Cursor() {}
+
+  virtual bool Valid() const = 0;
+  virtual void Next() = 0;
+  // Lifecycle of the returned JSONDocument is until the next Next() call
+  virtual const JSONDocument& document() const = 0;
+  virtual Status status() const = 0;
+
+ private:
+  // No copying allowed
+  Cursor(const Cursor&);
+  void operator=(const Cursor&);
+};
+
+struct DocumentDBOptions {
+  int background_threads = 4;
+  uint64_t memtable_size = 128 * 1024 * 1024;    // 128 MB
+  uint64_t cache_size = 1 * 1024 * 1024 * 1024;  // 1 GB
+};
+
+// TODO(icanadi) Add `JSONDocument* info` parameter to all calls that can be
+// used by the caller to get more information about the call execution (number
+// of dropped records, number of updated records, etc.)
+class DocumentDB : public StackableDB {
+ public:
+  struct IndexDescriptor {
+    // Currently, you can only define an index on a single field. To specify an
+    // index on a field X, set index description to JSON "{X: 1}"
+    // Currently the value needs to be 1, which means ascending.
+    // In the future, we plan to also support indexes on multiple keys, where
+    // you could mix ascending sorting (1) with descending sorting indexes (-1)
+    JSONDocument* description;
+    std::string name;
+  };
+
+  // Open DocumentDB with specified indexes. The list of indexes has to be
+  // complete, i.e. include all indexes present in the DB, except the primary
+  // key index.
+  // Otherwise, Open() will return an error
+  static Status Open(const DocumentDBOptions& options, const std::string& name,
+                     const std::vector<IndexDescriptor>& indexes,
+                     DocumentDB** db, bool read_only = false);
+
+  explicit DocumentDB(DB* db) : StackableDB(db) {}
+
+  // Create a new index. It will stop all writes for the duration of the call.
+  // All current documents in the DB are scanned and corresponding index entries
+  // are created
+  virtual Status CreateIndex(const WriteOptions& write_options,
+                             const IndexDescriptor& index) = 0;
+
+  // Drop an index. Client is responsible to make sure that index is not being
+  // used by currently executing queries
+  virtual Status DropIndex(const std::string& name) = 0;
+
+  // Insert a document to the DB. The document needs to have a primary key "_id"
+  // which can either be a string or an integer. Otherwise the write will fail
+  // with InvalidArgument.
+  virtual Status Insert(const WriteOptions& options,
+                        const JSONDocument& document) = 0;
+
+  // Deletes all documents matching a filter atomically
+  virtual Status Remove(const ReadOptions& read_options,
+                        const WriteOptions& write_options,
+                        const JSONDocument& query) = 0;
+
+  // Does this sequence of operations:
+  // 1. Find all documents matching a filter
+  // 2. For all documents, atomically:
+  // 2.1. apply the update operators
+  // 2.2. update the secondary indexes
+  //
+  // Currently only $set update operator is supported.
+  // Syntax is: {$set: {key1: value1, key2: value2, etc...}}
+  // This operator will change a document's key1 field to value1, key2 to
+  // value2, etc. New values will be set even if a document didn't have an entry
+  // for the specified key.
+  //
+  // You can not change a primary key of a document.
+  //
+  // Update example: Update({id: {$gt: 5}, $index: id}, {$set: {enabled: true}})
+  virtual Status Update(const ReadOptions& read_options,
+                        const WriteOptions& write_options,
+                        const JSONDocument& filter,
+                        const JSONDocument& updates) = 0;
+
+  // query has to be an array in which every element is an operator. Currently
+  // only $filter operator is supported. Syntax of $filter operator is:
+  // {$filter: {key1: condition1, key2: condition2, etc.}} where conditions can
+  // be either:
+  // 1) a single value in which case the condition is equality condition, or
+  // 2) a defined operators, like {$gt: 4}, which will match all documents that
+  // have key greater than 4.
+  //
+  // Supported operators are:
+  // 1) $gt -- greater than
+  // 2) $gte -- greater than or equal
+  // 3) $lt -- less than
+  // 4) $lte -- less than or equal
+  // If you want the filter to use an index, you need to specify it like this:
+  // {$filter: {...(conditions)..., $index: index_name}}
+  //
+  // Example query:
+  // * [{$filter: {name: John, age: {$gte: 18}, $index: age}}]
+  // will return all Johns whose age is greater or equal to 18 and it will use
+  // index "age" to satisfy the query.
+  virtual Cursor* Query(const ReadOptions& read_options,
+                        const JSONDocument& query) = 0;
+};
+
+}  // namespace rocksdb
+#endif  // ROCKSDB_LITE
--- a/include/rocksdb/utilities/geo_db.h
+++ b/include/rocksdb/utilities/geo_db.h
@@ -0,0 +1,105 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+
+#ifndef ROCKSDB_LITE
+#pragma once
+#include <string>
+#include <vector>
+
+#include "rocksdb/utilities/stackable_db.h"
+#include "rocksdb/status.h"
+
+namespace rocksdb {
+
+//
+// Configurable options needed for setting up a Geo database
+//
+struct GeoDBOptions {
+  // Backup info and error messages will be written to info_log
+  // if non-nullptr.
+  // Default: nullptr
+  Logger* info_log;
+
+  explicit GeoDBOptions(Logger* _info_log = nullptr):info_log(_info_log) { }
+};
+
+//
+// A position in the earth's geoid
+//
+class GeoPosition {
+ public:
+  double latitude;
+  double longitude;
+
+  explicit GeoPosition(double la = 0, double lo = 0) :
+    latitude(la), longitude(lo) {
+  }
+};
+
+//
+// Description of an object on the Geoid. It is located by a GPS location,
+// and is identified by the id. The value associated with this object is
+// an opaque string 'value'. Different objects identified by unique id's
+// can have the same gps-location associated with them.
+//
+class GeoObject {
+ public:
+  GeoPosition position;
+  std::string id;
+  std::string value;
+
+  GeoObject() {}
+
+  GeoObject(const GeoPosition& pos, const std::string& i,
+            const std::string& val) :
+    position(pos), id(i), value(val) {
+  }
+};
+
+//
+// Stack your DB with GeoDB to be able to get geo-spatial support
+//
+class GeoDB : public StackableDB {
+ public:
+  // GeoDBOptions have to be the same as the ones used in a previous
+  // incarnation of the DB
+  //
+  // GeoDB owns the pointer `DB* db` now. You should not delete it or
+  // use it after the invocation of GeoDB
+  // GeoDB(DB* db, const GeoDBOptions& options) : StackableDB(db) {}
+  GeoDB(DB* db, const GeoDBOptions& options) : StackableDB(db) {}
+  virtual ~GeoDB() {}
+
+  // Insert a new object into the location database. The object is
+  // uniquely identified by the id. If an object with the same id already
+  // exists in the db, then the old one is overwritten by the new
+  // object being inserted here.
+  virtual Status Insert(const GeoObject& object) = 0;
+
+  // Retrieve the value of the object located at the specified GPS
+  // location and is identified by the 'id'.
+  virtual Status GetByPosition(const GeoPosition& pos,
+                               const Slice& id, std::string* value) = 0;
+
+  // Retrieve the value of the object identified by the 'id'. This method
+  // could be potentially slower than GetByPosition
+  virtual Status GetById(const Slice& id, GeoObject*  object) = 0;
+
+  // Delete the specified object
+  virtual Status Remove(const Slice& id) = 0;
+
+  // Returns a list of all items within a circular radius from the
+  // specified gps location. If 'number_of_values' is specified,
+  // then this call returns at most that many number of objects.
+  // The radius is specified in 'meters'.
+  virtual Status SearchRadial(const GeoPosition& pos,
+                              double radius,
+                              std::vector<GeoObject>* values,
+                              int number_of_values = INT_MAX) = 0;
+};
+
+}  // namespace rocksdb
+#endif  // ROCKSDB_LITE
--- a/include/rocksdb/utilities/json_document.h
+++ b/include/rocksdb/utilities/json_document.h
@@ -0,0 +1,174 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+#pragma once
+#ifndef ROCKSDB_LITE
+
+#include <string>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/slice.h"
+
+// We use JSONDocument for DocumentDB API
+// Implementation inspired by folly::dynamic and rapidjson
+
+namespace rocksdb {
+
+// NOTE: none of this is thread-safe
+class JSONDocument {
+ public:
+  // return nullptr on parse failure
+  static JSONDocument* ParseJSON(const char* json);
+
+  enum Type {
+    kNull,
+    kArray,
+    kBool,
+    kDouble,
+    kInt64,
+    kObject,
+    kString,
+  };
+
+  JSONDocument();  // null
+  /* implicit */ JSONDocument(bool b);
+  /* implicit */ JSONDocument(double d);
+  /* implicit */ JSONDocument(int64_t i);
+  /* implicit */ JSONDocument(const std::string& s);
+  /* implicit */ JSONDocument(const char* s);
+  // constructs JSONDocument of specific type with default value
+  explicit JSONDocument(Type type);
+
+  // copy constructor
+  JSONDocument(const JSONDocument& json_document);
+
+  ~JSONDocument();
+
+  Type type() const;
+
+  // REQUIRES: IsObject()
+  bool Contains(const std::string& key) const;
+  // Returns nullptr if !Contains()
+  // don't delete the returned pointer
+  // REQUIRES: IsObject()
+  const JSONDocument* Get(const std::string& key) const;
+  // REQUIRES: IsObject()
+  JSONDocument& operator[](const std::string& key);
+  // REQUIRES: IsObject()
+  const JSONDocument& operator[](const std::string& key) const;
+  // returns `this`, so you can chain operations.
+  // Copies value
+  // REQUIRES: IsObject()
+  JSONDocument* Set(const std::string& key, const JSONDocument& value);
+
+  // REQUIRES: IsArray() == true || IsObject() == true
+  size_t Count() const;
+
+  // REQUIRES: IsArray()
+  const JSONDocument* GetFromArray(size_t i) const;
+  // REQUIRES: IsArray()
+  JSONDocument& operator[](size_t i);
+  // REQUIRES: IsArray()
+  const JSONDocument& operator[](size_t i) const;
+  // returns `this`, so you can chain operations.
+  // Copies the value
+  // REQUIRES: IsArray() && i < Count()
+  JSONDocument* SetInArray(size_t i, const JSONDocument& value);
+  // REQUIRES: IsArray()
+  JSONDocument* PushBack(const JSONDocument& value);
+
+  bool IsNull() const;
+  bool IsArray() const;
+  bool IsBool() const;
+  bool IsDouble() const;
+  bool IsInt64() const;
+  bool IsObject() const;
+  bool IsString() const;
+
+  // REQUIRES: IsBool() == true
+  bool GetBool() const;
+  // REQUIRES: IsDouble() == true
+  double GetDouble() const;
+  // REQUIRES: IsInt64() == true
+  int64_t GetInt64() const;
+  // REQUIRES: IsString() == true
+  const std::string& GetString() const;
+
+  bool operator==(const JSONDocument& rhs) const;
+
+  std::string DebugString() const;
+
+ private:
+  class ItemsIteratorGenerator;
+
+ public:
+  // REQUIRES: IsObject()
+  ItemsIteratorGenerator Items() const;
+
+  // appends serialized object to dst
+  void Serialize(std::string* dst) const;
+  // returns nullptr if Slice doesn't represent valid serialized JSONDocument
+  static JSONDocument* Deserialize(const Slice& src);
+
+ private:
+  void SerializeInternal(std::string* dst, bool type_prefix) const;
+  // returns false if Slice doesn't represent valid serialized JSONDocument.
+  // Otherwise, true
+  bool DeserializeInternal(Slice* input);
+
+  typedef std::vector<JSONDocument*> Array;
+  typedef std::unordered_map<std::string, JSONDocument*> Object;
+
+  // iteration on objects
+  class const_item_iterator {
+   public:
+    typedef Object::const_iterator It;
+    typedef Object::value_type value_type;
+    /* implicit */ const_item_iterator(It it) : it_(it) {}
+    It& operator++() { return ++it_; }
+    bool operator!=(const const_item_iterator& other) {
+      return it_ != other.it_;
+    }
+    value_type operator*() { return *it_; }
+
+   private:
+    It it_;
+  };
+  class ItemsIteratorGenerator {
+   public:
+    /* implicit */ ItemsIteratorGenerator(const Object& object)
+        : object_(object) {}
+    const_item_iterator begin() { return object_.begin(); }
+    const_item_iterator end() { return object_.end(); }
+
+   private:
+    const Object& object_;
+  };
+
+  union Data {
+    Data() : n(nullptr) {}
+    ~Data() {}
+
+    void* n;
+    Array a;
+    bool b;
+    double d;
+    int64_t i;
+    std::string s;
+    Object o;
+  } data_;
+  const Type type_;
+
+  // Our serialization format's first byte specifies the encoding version. That
+  // way, we can easily change our format while providing backwards
+  // compatibility. This constant specifies the current version of the
+  // serialization format
+  static const char kSerializationFormatVersion;
+};
+
+}  // namespace rocksdb
+
+#endif  // ROCKSDB_LITE
--- a/include/rocksdb/utilities/spatial_db.h
+++ b/include/rocksdb/utilities/spatial_db.h
@@ -0,0 +1,236 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+
+#pragma once
+#ifndef ROCKSDB_LITE
+
+#include <string>
+#include <vector>
+
+#include "rocksdb/db.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/stackable_db.h"
+
+namespace rocksdb {
+namespace spatial {
+
+// NOTE: SpatialDB is experimental and we might change its API without warning.
+// Please talk to us before developing against SpatialDB API.
+//
+// SpatialDB is a support for spatial indexes built on top of RocksDB.
+// When creating a new SpatialDB, clients specifies a list of spatial indexes to
+// build on their data. Each spatial index is defined by the area and
+// granularity. If you're storing map data, different spatial index
+// granularities can be used for different zoom levels.
+//
+// Each element inserted into SpatialDB has:
+// * a bounding box, which determines how will the element be indexed
+// * string blob, which will usually be WKB representation of the polygon
+// (http://en.wikipedia.org/wiki/Well-known_text)
+// * feature set, which is a map of key-value pairs, where value can be null,
+// int, double, bool, string
+// * a list of indexes to insert the element in
+//
+// Each query is executed on a single spatial index. Query guarantees that it
+// will return all elements intersecting the specified bounding box, but it
+// might also return some extra non-intersecting elements.
+
+// Variant is a class that can be many things: null, bool, int, double or string
+// It is used to store different value types in FeatureSet (see below)
+struct Variant {
+  // Don't change the values here, they are persisted on disk
+  enum Type {
+    kNull = 0x0,
+    kBool = 0x1,
+    kInt = 0x2,
+    kDouble = 0x3,
+    kString = 0x4,
+  };
+
+  Variant() : type_(kNull) {}
+  /* implicit */ Variant(bool b) : type_(kBool) { data_.b = b; }
+  /* implicit */ Variant(uint64_t i) : type_(kInt) { data_.i = i; }
+  /* implicit */ Variant(double d) : type_(kDouble) { data_.d = d; }
+  /* implicit */ Variant(const std::string& s) : type_(kString) {
+    new (&data_.s) std::string(s);
+  }
+
+  Variant(const Variant& v);
+
+  ~Variant() {
+    if (type_ == kString) {
+      using std::string;
+      (&data_.s)->~string();
+    }
+  }
+
+  Type type() const { return type_; }
+  bool get_bool() const { return data_.b; }
+  uint64_t get_int() const { return data_.i; }
+  double get_double() const { return data_.d; }
+  const std::string& get_string() const { return data_.s; }
+
+  bool operator==(const Variant& other);
+  bool operator!=(const Variant& other);
+
+ private:
+  Type type_;
+  union Data {
+    Data() {}
+    ~Data() {}
+    bool b;
+    uint64_t i;
+    double d;
+    std::string s;
+  } data_;
+};
+
+// FeatureSet is a map of key-value pairs. One feature set is associated with
+// each element in SpatialDB. It can be used to add rich data about the element.
+class FeatureSet {
+ private:
+  typedef std::unordered_map<std::string, Variant> map;
+
+ public:
+  class iterator {
+   public:
+    /* implicit */ iterator(const map::const_iterator itr) : itr_(itr) {}
+    iterator& operator++() {
+      ++itr_;
+      return *this;
+    }
+    bool operator!=(const iterator& other) { return itr_ != other.itr_; }
+    bool operator==(const iterator& other) { return itr_ == other.itr_; }
+    map::value_type operator*() { return *itr_; }
+
+   private:
+    map::const_iterator itr_;
+  };
+  FeatureSet() = default;
+
+  FeatureSet* Set(const std::string& key, const Variant& value);
+  bool Contains(const std::string& key) const;
+  // REQUIRES: Contains(key)
+  const Variant& Get(const std::string& key) const;
+  iterator Find(const std::string& key) const;
+
+  iterator begin() const { return map_.begin(); }
+  iterator end() const { return map_.end(); }
+
+  void Clear();
+  size_t Size() const { return map_.size(); }
+
+  void Serialize(std::string* output) const;
+  // REQUIRED: empty FeatureSet
+  bool Deserialize(const Slice& input);
+
+  std::string DebugString() const;
+
+ private:
+  map map_;
+};
+
+// BoundingBox is a helper structure for defining rectangles representing
+// bounding boxes of spatial elements.
+template <typename T>
+struct BoundingBox {
+  T min_x, min_y, max_x, max_y;
+  BoundingBox() = default;
+  BoundingBox(T _min_x, T _min_y, T _max_x, T _max_y)
+      : min_x(_min_x), min_y(_min_y), max_x(_max_x), max_y(_max_y) {}
+
+  bool Intersects(const BoundingBox<T>& a) const {
+    return !(min_x > a.max_x || min_y > a.max_y || a.min_x > max_x ||
+             a.min_y > max_y);
+  }
+};
+
+struct SpatialDBOptions {
+  uint64_t cache_size = 1 * 1024 * 1024 * 1024LL;  // 1GB
+  int num_threads = 16;
+  bool bulk_load = true;
+};
+
+// Cursor is used to return data from the query to the client. To get all the
+// data from the query, just call Next() while Valid() is true
+class Cursor {
+ public:
+  Cursor() = default;
+  virtual ~Cursor() {}
+
+  virtual bool Valid() const = 0;
+  // REQUIRES: Valid()
+  virtual void Next() = 0;
+
+  // Lifetime of the underlying storage until the next call to Next()
+  // REQUIRES: Valid()
+  virtual const Slice blob() = 0;
+  // Lifetime of the underlying storage until the next call to Next()
+  // REQUIRES: Valid()
+  virtual const FeatureSet& feature_set() = 0;
+
+  virtual Status status() const = 0;
+
+ private:
+  // No copying allowed
+  Cursor(const Cursor&);
+  void operator=(const Cursor&);
+};
+
+// SpatialIndexOptions defines a spatial index that will be built on the data
+struct SpatialIndexOptions {
+  // Spatial indexes are referenced by names
+  std::string name;
+  // An area that is indexed. If the element is not intersecting with spatial
+  // index's bbox, it will not be inserted into the index
+  BoundingBox<double> bbox;
+  // tile_bits control the granularity of the spatial index. Each dimension of
+  // the bbox will be split into (1 << tile_bits) tiles, so there will be a
+  // total of (1 << tile_bits)^2 tiles. It is recommended to configure a size of
+  // each  tile to be approximately the size of the query on that spatial index
+  uint32_t tile_bits;
+  SpatialIndexOptions() {}
+  SpatialIndexOptions(const std::string& _name,
+                      const BoundingBox<double>& _bbox, uint32_t _tile_bits)
+      : name(_name), bbox(_bbox), tile_bits(_tile_bits) {}
+};
+
+class SpatialDB : public StackableDB {
+ public:
+  // Creates the SpatialDB with specified list of indexes.
+  // REQUIRED: db doesn't exist
+  static Status Create(const SpatialDBOptions& options, const std::string& name,
+                       const std::vector<SpatialIndexOptions>& spatial_indexes);
+
+  // Open the existing SpatialDB.  The resulting db object will be returned
+  // through db parameter.
+  // REQUIRED: db was created using SpatialDB::Create
+  static Status Open(const SpatialDBOptions& options, const std::string& name,
+                     SpatialDB** db, bool read_only = false);
+
+  explicit SpatialDB(DB* db) : StackableDB(db) {}
+
+  // Insert the element into the DB. Element will be inserted into specified
+  // spatial_indexes, based on specified bbox.
+  // REQUIRES: spatial_indexes.size() > 0
+  virtual Status Insert(const WriteOptions& write_options,
+                        const BoundingBox<double>& bbox, const Slice& blob,
+                        const FeatureSet& feature_set,
+                        const std::vector<std::string>& spatial_indexes) = 0;
+
+  // Calling Compact() after inserting a bunch of elements should speed up
+  // reading. This is especially useful if you use SpatialDBOptions::bulk_load
+  virtual Status Compact() = 0;
+
+  // Query the specified spatial_index. Query will return all elements that
+  // intersect bbox, but it may also return some extra elements.
+  virtual Cursor* Query(const ReadOptions& read_options,
+                        const BoundingBox<double>& bbox,
+                        const std::string& spatial_index) = 0;
+};
+
+}  // namespace spatial
+}  // namespace rocksdb
+#endif  // ROCKSDB_LITE
--- a/include/rocksdb/utilities/stackable_db.h
+++ b/include/rocksdb/utilities/stackable_db.h
@@ -0,0 +1,226 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include "rocksdb/db.h"
+
+namespace rocksdb {
+
+// This class contains APIs to stack rocksdb wrappers.Eg. Stack TTL over base d
+class StackableDB : public DB {
+ public:
+  // StackableDB is the owner of db now!
+  explicit StackableDB(DB* db) : db_(db) {}
+
+  ~StackableDB() {
+    delete db_;
+  }
+
+  virtual DB* GetBaseDB() {
+    return db_;
+  }
+
+  virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
+                                    const std::string& column_family_name,
+                                    ColumnFamilyHandle** handle) {
+    return db_->CreateColumnFamily(options, column_family_name, handle);
+  }
+
+  virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) {
+    return db_->DropColumnFamily(column_family);
+  }
+
+  using DB::Put;
+  virtual Status Put(const WriteOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     const Slice& val) override {
+    return db_->Put(options, column_family, key, val);
+  }
+
+  using DB::Get;
+  virtual Status Get(const ReadOptions& options,
+                     ColumnFamilyHandle* column_family, const Slice& key,
+                     std::string* value) override {
+    return db_->Get(options, column_family, key, value);
+  }
+
+  using DB::MultiGet;
+  virtual std::vector<Status> MultiGet(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_family,
+      const std::vector<Slice>& keys,
+      std::vector<std::string>* values) override {
+    return db_->MultiGet(options, column_family, keys, values);
+  }
+
+  using DB::KeyMayExist;
+  virtual bool KeyMayExist(const ReadOptions& options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           std::string* value,
+                           bool* value_found = nullptr) override {
+    return db_->KeyMayExist(options, column_family, key, value, value_found);
+  }
+
+  using DB::Delete;
+  virtual Status Delete(const WriteOptions& wopts,
+                        ColumnFamilyHandle* column_family,
+                        const Slice& key) override {
+    return db_->Delete(wopts, column_family, key);
+  }
+
+  using DB::Merge;
+  virtual Status Merge(const WriteOptions& options,
+                       ColumnFamilyHandle* column_family, const Slice& key,
+                       const Slice& value) override {
+    return db_->Merge(options, column_family, key, value);
+  }
+
+
+  virtual Status Write(const WriteOptions& opts, WriteBatch* updates)
+    override {
+      return db_->Write(opts, updates);
+  }
+
+  using DB::NewIterator;
+  virtual Iterator* NewIterator(const ReadOptions& opts,
+                                ColumnFamilyHandle* column_family) override {
+    return db_->NewIterator(opts, column_family);
+  }
+
+  virtual Status NewIterators(
+      const ReadOptions& options,
+      const std::vector<ColumnFamilyHandle*>& column_families,
+      std::vector<Iterator*>* iterators) {
+    return db_->NewIterators(options, column_families, iterators);
+  }
+
+
+  virtual const Snapshot* GetSnapshot() override {
+    return db_->GetSnapshot();
+  }
+
+  virtual void ReleaseSnapshot(const Snapshot* snapshot) override {
+    return db_->ReleaseSnapshot(snapshot);
+  }
+
+  using DB::GetProperty;
+  virtual bool GetProperty(ColumnFamilyHandle* column_family,
+                           const Slice& property, std::string* value) override {
+    return db_->GetProperty(column_family, property, value);
+  }
+
+  using DB::GetIntProperty;
+  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
+                              const Slice& property, uint64_t* value) override {
+    return db_->GetIntProperty(column_family, property, value);
+  }
+
+  using DB::GetApproximateSizes;
+  virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
+                                   const Range* r, int n,
+                                   uint64_t* sizes) override {
+      return db_->GetApproximateSizes(column_family, r, n, sizes);
+  }
+
+  using DB::CompactRange;
+  virtual Status CompactRange(ColumnFamilyHandle* column_family,
+                              const Slice* begin, const Slice* end,
+                              bool reduce_level = false, int target_level = -1,
+                              uint32_t target_path_id = 0) override {
+    return db_->CompactRange(column_family, begin, end, reduce_level,
+                             target_level, target_path_id);
+  }
+
+  using DB::NumberLevels;
+  virtual int NumberLevels(ColumnFamilyHandle* column_family) override {
+    return db_->NumberLevels(column_family);
+  }
+
+  using DB::MaxMemCompactionLevel;
+  virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family)
+      override {
+    return db_->MaxMemCompactionLevel(column_family);
+  }
+
+  using DB::Level0StopWriteTrigger;
+  virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family)
+      override {
+    return db_->Level0StopWriteTrigger(column_family);
+  }
+
+  virtual const std::string& GetName() const override {
+    return db_->GetName();
+  }
+
+  virtual Env* GetEnv() const override {
+    return db_->GetEnv();
+  }
+
+  using DB::GetOptions;
+  virtual const Options& GetOptions(ColumnFamilyHandle* column_family) const
+      override {
+    return db_->GetOptions(column_family);
+  }
+
+  using DB::Flush;
+  virtual Status Flush(const FlushOptions& fopts,
+                       ColumnFamilyHandle* column_family) override {
+    return db_->Flush(fopts, column_family);
+  }
+
+  virtual Status DisableFileDeletions() override {
+    return db_->DisableFileDeletions();
+  }
+
+  virtual Status EnableFileDeletions(bool force) override {
+    return db_->EnableFileDeletions(force);
+  }
+
+  virtual void GetLiveFilesMetaData(
+      std::vector<LiveFileMetaData>* metadata) override {
+    db_->GetLiveFilesMetaData(metadata);
+  }
+
+  virtual Status GetLiveFiles(std::vector<std::string>& vec, uint64_t* mfs,
+                              bool flush_memtable = true) override {
+      return db_->GetLiveFiles(vec, mfs, flush_memtable);
+  }
+
+  virtual SequenceNumber GetLatestSequenceNumber() const override {
+    return db_->GetLatestSequenceNumber();
+  }
+
+  virtual Status GetSortedWalFiles(VectorLogPtr& files) override {
+    return db_->GetSortedWalFiles(files);
+  }
+
+  virtual Status DeleteFile(std::string name) override {
+    return db_->DeleteFile(name);
+  }
+
+  virtual Status GetDbIdentity(std::string& identity) {
+    return db_->GetDbIdentity(identity);
+  }
+
+  using DB::GetPropertiesOfAllTables;
+  virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
+                                          TablePropertiesCollection* props) {
+    return db_->GetPropertiesOfAllTables(column_family, props);
+  }
+
+  virtual Status GetUpdatesSince(
+      SequenceNumber seq_number, unique_ptr<TransactionLogIterator>* iter,
+      const TransactionLogIterator::ReadOptions& read_options) override {
+    return db_->GetUpdatesSince(seq_number, iter, read_options);
+  }
+
+  virtual ColumnFamilyHandle* DefaultColumnFamily() const override {
+    return db_->DefaultColumnFamily();
+  }
+
+ protected:
+  DB* db_;
+};
+
+} //  namespace rocksdb
--- a/include/rocksdb/utilities/utility_db.h
+++ b/include/rocksdb/utilities/utility_db.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#ifndef ROCKSDB_LITE
+#include <vector>
+#include <string>
+
+#include "rocksdb/utilities/stackable_db.h"
+#include "rocksdb/utilities/db_ttl.h"
+#include "rocksdb/db.h"
+
+namespace rocksdb {
+
+// Please don't use this class. It's deprecated
+class UtilityDB {
+ public:
+  // This function is here only for backwards compatibility. Please use the
+  // functions defined in DBWithTTl (rocksdb/utilities/db_ttl.h)
+  // (deprecated)
+  __attribute__((deprecated)) static Status OpenTtlDB(const Options& options,
+                                                      const std::string& name,
+                                                      StackableDB** dbptr,
+                                                      int32_t ttl = 0,
+                                                      bool read_only = false);
+};
+
+} //  namespace rocksdb
+#endif  // ROCKSDB_LITE
--- a/include/rocksdb/version.h
+++ b/include/rocksdb/version.h
@@ -0,0 +1,17 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+#pragma once
+
+// Also update Makefile if you change these
+#define ROCKSDB_MAJOR 3
+#define ROCKSDB_MINOR 4
+#define ROCKSDB_PATCH 0
+
+// Do not use these. We made the mistake of declaring macros starting with
+// double underscore. Now we have to live with our choice. We'll deprecate these
+// at some point
+#define __ROCKSDB_MAJOR__ ROCKSDB_MAJOR
+#define __ROCKSDB_MINOR__ ROCKSDB_MINOR
+#define __ROCKSDB_PATCH__ ROCKSDB_PATCH
--- a/include/rocksdb/write_batch.h
+++ b/include/rocksdb/write_batch.h
@@ -0,0 +1,162 @@
+// Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch holds a collection of updates to apply atomically to a DB.
+//
+// The updates are applied in the order in which they are added
+// to the WriteBatch.  For example, the value of "key" will be "v3"
+// after the following batch is written:
+//
+//    batch.Put("key", "v1");
+//    batch.Delete("key");
+//    batch.Put("key", "v2");
+//    batch.Put("key", "v3");
+//
+// Multiple threads can invoke const methods on a WriteBatch without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same WriteBatch must use
+// external synchronization.
+
+#ifndef STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_
+#define STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_
+
+#include <string>
+#include "rocksdb/status.h"
+
+namespace rocksdb {
+
+class Slice;
+class ColumnFamilyHandle;
+struct SliceParts;
+
+class WriteBatch {
+ public:
+  explicit WriteBatch(size_t reserved_bytes = 0);
+  ~WriteBatch();
+
+  // Store the mapping "key->value" in the database.
+  void Put(ColumnFamilyHandle* column_family, const Slice& key,
+           const Slice& value);
+  void Put(const Slice& key, const Slice& value) {
+    Put(nullptr, key, value);
+  }
+
+  // Variant of Put() that gathers output like writev(2).  The key and value
+  // that will be written to the database are concatentations of arrays of
+  // slices.
+  void Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+           const SliceParts& value);
+  void Put(const SliceParts& key, const SliceParts& value) {
+    Put(nullptr, key, value);
+  }
+
+  // Merge "value" with the existing value of "key" in the database.
+  // "key->merge(existing, value)"
+  void Merge(ColumnFamilyHandle* column_family, const Slice& key,
+             const Slice& value);
+  void Merge(const Slice& key, const Slice& value) {
+    Merge(nullptr, key, value);
+  }
+
+  // If the database contains a mapping for "key", erase it.  Else do nothing.
+  void Delete(ColumnFamilyHandle* column_family, const Slice& key);
+  void Delete(const Slice& key) { Delete(nullptr, key); }
+
+  // variant that takes SliceParts
+  void Delete(ColumnFamilyHandle* column_family, const SliceParts& key);
+  void Delete(const SliceParts& key) { Delete(nullptr, key); }
+
+  // Append a blob of arbitrary size to the records in this batch. The blob will
+  // be stored in the transaction log but not in any other file. In particular,
+  // it will not be persisted to the SST files. When iterating over this
+  // WriteBatch, WriteBatch::Handler::LogData will be called with the contents
+  // of the blob as it is encountered. Blobs, puts, deletes, and merges will be
+  // encountered in the same order in thich they were inserted. The blob will
+  // NOT consume sequence number(s) and will NOT increase the count of the batch
+  //
+  // Example application: add timestamps to the transaction log for use in
+  // replication.
+  void PutLogData(const Slice& blob);
+
+  // Clear all updates buffered in this batch.
+  void Clear();
+
+  // Support for iterating over the contents of a batch.
+  class Handler {
+   public:
+    virtual ~Handler();
+    // default implementation will just call Put without column family for
+    // backwards compatibility. If the column family is not default,
+    // the function is noop
+    virtual Status PutCF(uint32_t column_family_id, const Slice& key,
+                         const Slice& value) {
+      if (column_family_id == 0) {
+        // Put() historically doesn't return status. We didn't want to be
+        // backwards incompatible so we didn't change the return status
+        // (this is a public API). We do an ordinary get and return Status::OK()
+        Put(key, value);
+        return Status::OK();
+      }
+      return Status::InvalidArgument(
+          "non-default column family and PutCF not implemented");
+    }
+    virtual void Put(const Slice& key, const Slice& value);
+    // Merge and LogData are not pure virtual. Otherwise, we would break
+    // existing clients of Handler on a source code level. The default
+    // implementation of Merge simply throws a runtime exception.
+    virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
+                           const Slice& value) {
+      if (column_family_id == 0) {
+        Merge(key, value);
+        return Status::OK();
+      }
+      return Status::InvalidArgument(
+          "non-default column family and MergeCF not implemented");
+    }
+    virtual void Merge(const Slice& key, const Slice& value);
+    // The default implementation of LogData does nothing.
+    virtual void LogData(const Slice& blob);
+    virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) {
+      if (column_family_id == 0) {
+        Delete(key);
+        return Status::OK();
+      }
+      return Status::InvalidArgument(
+          "non-default column family and DeleteCF not implemented");
+    }
+    virtual void Delete(const Slice& key);
+    // Continue is called by WriteBatch::Iterate. If it returns false,
+    // iteration is halted. Otherwise, it continues iterating. The default
+    // implementation always returns true.
+    virtual bool Continue();
+  };
+  Status Iterate(Handler* handler) const;
+
+  // Retrieve the serialized version of this batch.
+  const std::string& Data() const { return rep_; }
+
+  // Retrieve data size of the batch.
+  size_t GetDataSize() const { return rep_.size(); }
+
+  // Returns the number of updates in the batch
+  int Count() const;
+
+  // Constructor with a serialized string object
+  explicit WriteBatch(std::string rep): rep_(rep) {}
+
+ private:
+  friend class WriteBatchInternal;
+
+  std::string rep_;  // See comment in write_batch.cc for the format of rep_
+
+  // Intentionally copyable
+};
+
+}  // namespace rocksdb
+
+#endif  // STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_
--- a/include/utilities/backupable_db.h
+++ b/include/utilities/backupable_db.h
@@ -0,0 +1,12 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#warning This file was moved to rocksdb/utilities/backupable_db.h
+#include "rocksdb/utilities/backupable_db.h"
--- a/include/utilities/db_ttl.h
+++ b/include/utilities/db_ttl.h
@@ -0,0 +1,8 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+
+#pragma once
+#warning This file was moved to rocksdb/utilities/db_ttl.h
+#include "rocksdb/utilities/db_ttl.h"
--- a/include/utilities/document_db.h
+++ b/include/utilities/document_db.h
@@ -0,0 +1,8 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+
+#pragma once
+#warning This file was moved to rocksdb/utilities/document_db.h
+#include "rocksdb/utilities/document_db.h"
--- a/include/utilities/geo_db.h
+++ b/include/utilities/geo_db.h
@@ -0,0 +1,8 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+
+#pragma once
+#warning This file was moved to rocksdb/utilities/geo_db.h
+#include "rocksdb/utilities/geo_db.h"
--- a/include/utilities/json_document.h
+++ b/include/utilities/json_document.h
@@ -0,0 +1,7 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+#pragma once
+#warning This file was moved to rocksdb/utilities/json_document.h
+#include "rocksdb/utilities/json_document.h"
--- a/include/utilities/stackable_db.h
+++ b/include/utilities/stackable_db.h
@@ -0,0 +1,7 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#warning This file was moved to rocksdb/utilities/stackable_db.h
+#include "rocksdb/utilities/stackable_db.h"
--- a/include/utilities/utility_db.h
+++ b/include/utilities/utility_db.h
@@ -0,0 +1,7 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#warning This file was moved to rocksdb/utilities/utility_db.h
+#include "rocksdb/utilities/utility_db.h"