Squashed 'src/rocksdb/' content from commit 224932d

git-subtree-dir: src/rocksdb git-subtree-split: 224932d4d0b561712107d747c662df181c39644d
2025-12-06 17:27:52 +00:00 · 2014-08-08 11:57:41 -07:00
commit f86d9fd626
435 changed files with 123706 additions and 0 deletions
--- a/utilities/document/document_db.cc
+++ b/utilities/document/document_db.cc
--- a/utilities/document/document_db_test.cc
+++ b/utilities/document/document_db_test.cc
@@ -0,0 +1,263 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+
+#include <algorithm>
+
+#include "rocksdb/utilities/json_document.h"
+#include "rocksdb/utilities/document_db.h"
+
+#include "util/testharness.h"
+#include "util/testutil.h"
+
+namespace rocksdb {
+
+class DocumentDBTest {
+ public:
+  DocumentDBTest() {
+    dbname_ = test::TmpDir() + "/document_db_test";
+    DestroyDB(dbname_, Options());
+  }
+  ~DocumentDBTest() {
+    delete db_;
+    DestroyDB(dbname_, Options());
+  }
+
+  void AssertCursorIDs(Cursor* cursor, std::vector<int64_t> expected) {
+    std::vector<int64_t> got;
+    while (cursor->Valid()) {
+      ASSERT_TRUE(cursor->Valid());
+      ASSERT_TRUE(cursor->document().Contains("_id"));
+      got.push_back(cursor->document()["_id"].GetInt64());
+      cursor->Next();
+    }
+    std::sort(expected.begin(), expected.end());
+    std::sort(got.begin(), got.end());
+    ASSERT_TRUE(got == expected);
+  }
+
+  // converts ' to ", so that we don't have to escape " all over the place
+  std::string ConvertQuotes(const std::string& input) {
+    std::string output;
+    for (auto x : input) {
+      if (x == '\'') {
+        output.push_back('\"');
+      } else {
+        output.push_back(x);
+      }
+    }
+    return output;
+  }
+
+  void CreateIndexes(std::vector<DocumentDB::IndexDescriptor> indexes) {
+    for (auto i : indexes) {
+      ASSERT_OK(db_->CreateIndex(WriteOptions(), i));
+    }
+  }
+
+  JSONDocument* Parse(const std::string doc) {
+    return JSONDocument::ParseJSON(ConvertQuotes(doc).c_str());
+  }
+
+  std::string dbname_;
+  DocumentDB* db_;
+};
+
+TEST(DocumentDBTest, SimpleQueryTest) {
+  DocumentDBOptions options;
+  DocumentDB::IndexDescriptor index;
+  index.description = Parse("{'name': 1}");
+  index.name = "name_index";
+
+  ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_));
+  CreateIndexes({index});
+  delete db_;
+  // now there is index present
+  ASSERT_OK(DocumentDB::Open(options, dbname_, {index}, &db_));
+  delete index.description;
+
+  std::vector<std::string> json_objects = {
+      "{'_id': 1, 'name': 'One'}",   "{'_id': 2, 'name': 'Two'}",
+      "{'_id': 3, 'name': 'Three'}", "{'_id': 4, 'name': 'Four'}"};
+
+  for (auto& json : json_objects) {
+    std::unique_ptr<JSONDocument> document(Parse(json));
+    ASSERT_TRUE(document.get() != nullptr);
+    ASSERT_OK(db_->Insert(WriteOptions(), *document));
+  }
+
+  // inserting a document with existing primary key should return failure
+  {
+    std::unique_ptr<JSONDocument> document(Parse(json_objects[0]));
+    ASSERT_TRUE(document.get() != nullptr);
+    Status s = db_->Insert(WriteOptions(), *document);
+    ASSERT_TRUE(s.IsInvalidArgument());
+  }
+
+  // find equal to "Two"
+  {
+    std::unique_ptr<JSONDocument> query(
+        Parse("[{'$filter': {'name': 'Two', '$index': 'name_index'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {2});
+  }
+
+  // find less than "Three"
+  {
+    std::unique_ptr<JSONDocument> query(Parse(
+        "[{'$filter': {'name': {'$lt': 'Three'}, '$index': "
+        "'name_index'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+
+    AssertCursorIDs(cursor.get(), {1, 4});
+  }
+
+  // find less than "Three" without index
+  {
+    std::unique_ptr<JSONDocument> query(
+        Parse("[{'$filter': {'name': {'$lt': 'Three'} }}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {1, 4});
+  }
+
+  // remove less or equal to "Three"
+  {
+    std::unique_ptr<JSONDocument> query(
+        Parse("{'name': {'$lte': 'Three'}, '$index': 'name_index'}"));
+    ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query));
+  }
+
+  // find all -- only "Two" left, everything else should be deleted
+  {
+    std::unique_ptr<JSONDocument> query(Parse("[]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {2});
+  }
+}
+
+TEST(DocumentDBTest, ComplexQueryTest) {
+  DocumentDBOptions options;
+  DocumentDB::IndexDescriptor priority_index;
+  priority_index.description = Parse("{'priority': 1}");
+  priority_index.name = "priority";
+  DocumentDB::IndexDescriptor job_name_index;
+  job_name_index.description = Parse("{'job_name': 1}");
+  job_name_index.name = "job_name";
+  DocumentDB::IndexDescriptor progress_index;
+  progress_index.description = Parse("{'progress': 1}");
+  progress_index.name = "progress";
+
+  ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_));
+  CreateIndexes({priority_index, progress_index});
+  delete priority_index.description;
+  delete progress_index.description;
+
+  std::vector<std::string> json_objects = {
+      "{'_id': 1, 'job_name': 'play', 'priority': 10, 'progress': 14.2}",
+      "{'_id': 2, 'job_name': 'white', 'priority': 2, 'progress': 45.1}",
+      "{'_id': 3, 'job_name': 'straw', 'priority': 5, 'progress': 83.2}",
+      "{'_id': 4, 'job_name': 'temporary', 'priority': 3, 'progress': 14.9}",
+      "{'_id': 5, 'job_name': 'white', 'priority': 4, 'progress': 44.2}",
+      "{'_id': 6, 'job_name': 'tea', 'priority': 1, 'progress': 12.4}",
+      "{'_id': 7, 'job_name': 'delete', 'priority': 2, 'progress': 77.54}",
+      "{'_id': 8, 'job_name': 'rock', 'priority': 3, 'progress': 93.24}",
+      "{'_id': 9, 'job_name': 'steady', 'priority': 3, 'progress': 9.1}",
+      "{'_id': 10, 'job_name': 'white', 'priority': 1, 'progress': 61.4}",
+      "{'_id': 11, 'job_name': 'who', 'priority': 4, 'progress': 39.41}", };
+
+  // add index on the fly!
+  CreateIndexes({job_name_index});
+  delete job_name_index.description;
+
+  for (auto& json : json_objects) {
+    std::unique_ptr<JSONDocument> document(Parse(json));
+    ASSERT_TRUE(document != nullptr);
+    ASSERT_OK(db_->Insert(WriteOptions(), *document));
+  }
+
+  // 2 < priority < 4 AND progress > 10.0, index priority
+  {
+    std::unique_ptr<JSONDocument> query(Parse(
+        "[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': "
+        "10.0}, '$index': 'priority'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {4, 8});
+  }
+
+  // 2 < priority < 4 AND progress > 10.0, index progress
+  {
+    std::unique_ptr<JSONDocument> query(Parse(
+        "[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': "
+        "10.0}, '$index': 'progress'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {4, 8});
+  }
+
+  // job_name == 'white' AND priority >= 2, index job_name
+  {
+    std::unique_ptr<JSONDocument> query(Parse(
+        "[{'$filter': {'job_name': 'white', 'priority': {'$gte': "
+        "2}, '$index': 'job_name'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {2, 5});
+  }
+
+  // 35.0 <= progress < 65.5, index progress
+  {
+    std::unique_ptr<JSONDocument> query(Parse(
+        "[{'$filter': {'progress': {'$gt': 5.0, '$gte': 35.0, '$lt': 65.5}, "
+        "'$index': 'progress'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {2, 5, 10, 11});
+  }
+
+  // 2 < priority <= 4, index priority
+  {
+    std::unique_ptr<JSONDocument> query(Parse(
+        "[{'$filter': {'priority': {'$gt': 2, '$lt': 8, '$lte': 4}, "
+        "'$index': 'priority'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {4, 5, 8, 9, 11});
+  }
+
+  // Delete all whose progress is bigger than 50%
+  {
+    std::unique_ptr<JSONDocument> query(
+        Parse("{'progress': {'$gt': 50.0}, '$index': 'progress'}"));
+    ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query));
+  }
+
+  // 2 < priority < 6, index priority
+  {
+    std::unique_ptr<JSONDocument> query(Parse(
+        "[{'$filter': {'priority': {'$gt': 2, '$lt': 6}, "
+        "'$index': 'priority'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    AssertCursorIDs(cursor.get(), {4, 5, 9, 11});
+  }
+
+  // update set priority to 10 where job_name is 'white'
+  {
+    std::unique_ptr<JSONDocument> query(Parse("{'job_name': 'white'}"));
+    std::unique_ptr<JSONDocument> update(Parse("{'$set': {'priority': 10}}"));
+    ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update));
+  }
+
+  // 4 < priority
+  {
+    std::unique_ptr<JSONDocument> query(
+        Parse("[{'$filter': {'priority': {'$gt': 4}, '$index': 'priority'}}]"));
+    std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
+    ASSERT_OK(cursor->status());
+    AssertCursorIDs(cursor.get(), {1, 2, 5});
+  }
+
+  Status s = db_->DropIndex("doesnt-exist");
+  ASSERT_TRUE(!s.ok());
+  ASSERT_OK(db_->DropIndex("priority"));
+}
+
+}  //  namespace rocksdb
+
+int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }
--- a/utilities/document/json_document.cc
+++ b/utilities/document/json_document.cc
@@ -0,0 +1,617 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+#ifndef ROCKSDB_LITE
+
+#include "rocksdb/utilities/json_document.h"
+
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <cassert>
+#include <string>
+#include <map>
+#include <vector>
+
+#include "third-party/rapidjson/reader.h"
+#include "util/coding.h"
+
+namespace rocksdb {
+
+JSONDocument::JSONDocument() : type_(kNull) {}
+JSONDocument::JSONDocument(bool b) : type_(kBool) { data_.b = b; }
+JSONDocument::JSONDocument(double d) : type_(kDouble) { data_.d = d; }
+JSONDocument::JSONDocument(int64_t i) : type_(kInt64) { data_.i = i; }
+JSONDocument::JSONDocument(const std::string& s) : type_(kString) {
+  new (&data_.s) std::string(s);
+}
+JSONDocument::JSONDocument(const char* s) : type_(kString) {
+  new (&data_.s) std::string(s);
+}
+JSONDocument::JSONDocument(Type type) : type_(type) {
+  // TODO(icanadi) make all of this better by using templates
+  switch (type) {
+    case kNull:
+      break;
+    case kObject:
+      new (&data_.o) Object;
+      break;
+    case kBool:
+      data_.b = false;
+      break;
+    case kDouble:
+      data_.d = 0.0;
+      break;
+    case kArray:
+      new (&data_.a) Array;
+      break;
+    case kInt64:
+      data_.i = 0;
+      break;
+    case kString:
+      new (&data_.s) std::string();
+      break;
+    default:
+      assert(false);
+  }
+}
+
+JSONDocument::JSONDocument(const JSONDocument& json_document)
+    : JSONDocument(json_document.type_) {
+  switch (json_document.type_) {
+    case kNull:
+      break;
+    case kArray:
+      data_.a.reserve(json_document.data_.a.size());
+      for (const auto& iter : json_document.data_.a) {
+        // deep copy
+        data_.a.push_back(new JSONDocument(*iter));
+      }
+      break;
+    case kBool:
+      data_.b = json_document.data_.b;
+      break;
+    case kDouble:
+      data_.d = json_document.data_.d;
+      break;
+    case kInt64:
+      data_.i = json_document.data_.i;
+      break;
+    case kObject: {
+      for (const auto& iter : json_document.data_.o) {
+        // deep copy
+        data_.o.insert({iter.first, new JSONDocument(*iter.second)});
+      }
+      break;
+    }
+    case kString:
+      data_.s = json_document.data_.s;
+      break;
+    default:
+      assert(false);
+  }
+}
+
+JSONDocument::~JSONDocument() {
+  switch (type_) {
+    case kObject:
+      for (auto iter : data_.o) {
+        delete iter.second;
+      }
+      (&data_.o)->~Object();
+      break;
+    case kArray:
+      for (auto iter : data_.a) {
+        delete iter;
+      }
+      (&data_.a)->~Array();
+      break;
+    case kString:
+      using std::string;
+      (&data_.s)->~string();
+      break;
+    default:
+      // we're cool, no need for destructors for others
+      break;
+  }
+}
+
+JSONDocument::Type JSONDocument::type() const { return type_; }
+
+bool JSONDocument::Contains(const std::string& key) const {
+  assert(type_ == kObject);
+  auto iter = data_.o.find(key);
+  return iter != data_.o.end();
+}
+
+const JSONDocument* JSONDocument::Get(const std::string& key) const {
+  assert(type_ == kObject);
+  auto iter = data_.o.find(key);
+  if (iter == data_.o.end()) {
+    return nullptr;
+  }
+  return iter->second;
+}
+
+JSONDocument& JSONDocument::operator[](const std::string& key) {
+  assert(type_ == kObject);
+  auto iter = data_.o.find(key);
+  assert(iter != data_.o.end());
+  return *(iter->second);
+}
+
+const JSONDocument& JSONDocument::operator[](const std::string& key) const {
+  assert(type_ == kObject);
+  auto iter = data_.o.find(key);
+  assert(iter != data_.o.end());
+  return *(iter->second);
+}
+
+JSONDocument* JSONDocument::Set(const std::string& key, const JSONDocument& value) {
+  assert(type_ == kObject);
+  auto itr = data_.o.find(key);
+  if (itr == data_.o.end()) {
+    // insert
+    data_.o.insert({key, new JSONDocument(value)});
+  } else {
+    // overwrite
+    delete itr->second;
+    itr->second = new JSONDocument(value);
+  }
+  return this;
+}
+
+size_t JSONDocument::Count() const {
+  assert(type_ == kArray || type_ == kObject);
+  if (type_ == kArray) {
+    return data_.a.size();
+  } else if (type_ == kObject) {
+    return data_.o.size();
+  }
+  assert(false);
+  return 0;
+}
+
+const JSONDocument* JSONDocument::GetFromArray(size_t i) const {
+  assert(type_ == kArray);
+  return data_.a[i];
+}
+
+JSONDocument& JSONDocument::operator[](size_t i) {
+  assert(type_ == kArray && i < data_.a.size());
+  return *data_.a[i];
+}
+
+const JSONDocument& JSONDocument::operator[](size_t i) const {
+  assert(type_ == kArray && i < data_.a.size());
+  return *data_.a[i];
+}
+
+JSONDocument* JSONDocument::SetInArray(size_t i, const JSONDocument& value) {
+  assert(IsArray() && i < data_.a.size());
+  delete data_.a[i];
+  data_.a[i] = new JSONDocument(value);
+  return this;
+}
+
+JSONDocument* JSONDocument::PushBack(const JSONDocument& value) {
+  assert(IsArray());
+  data_.a.push_back(new JSONDocument(value));
+  return this;
+}
+
+bool JSONDocument::IsNull() const { return type() == kNull; }
+bool JSONDocument::IsArray() const { return type() == kArray; }
+bool JSONDocument::IsBool() const { return type() == kBool; }
+bool JSONDocument::IsDouble() const { return type() == kDouble; }
+bool JSONDocument::IsInt64() const { return type() == kInt64; }
+bool JSONDocument::IsObject() const { return type() == kObject; }
+bool JSONDocument::IsString() const { return type() == kString; }
+
+bool JSONDocument::GetBool() const {
+  assert(IsBool());
+  return data_.b;
+}
+double JSONDocument::GetDouble() const {
+  assert(IsDouble());
+  return data_.d;
+}
+int64_t JSONDocument::GetInt64() const {
+  assert(IsInt64());
+  return data_.i;
+}
+const std::string& JSONDocument::GetString() const {
+  assert(IsString());
+  return data_.s;
+}
+
+bool JSONDocument::operator==(const JSONDocument& rhs) const {
+  if (type_ != rhs.type_) {
+    return false;
+  }
+  switch (type_) {
+    case kNull:
+      return true;  // null == null
+    case kArray:
+      if (data_.a.size() != rhs.data_.a.size()) {
+        return false;
+      }
+      for (size_t i = 0; i < data_.a.size(); ++i) {
+        if (!(*data_.a[i] == *rhs.data_.a[i])) {
+          return false;
+        }
+      }
+      return true;
+    case kBool:
+      return data_.b == rhs.data_.b;
+    case kDouble:
+      return data_.d == rhs.data_.d;
+    case kInt64:
+      return data_.i == rhs.data_.i;
+    case kObject:
+      if (data_.o.size() != rhs.data_.o.size()) {
+        return false;
+      }
+      for (const auto& iter : data_.o) {
+        auto rhs_iter = rhs.data_.o.find(iter.first);
+        if (rhs_iter == rhs.data_.o.end() ||
+            !(*(rhs_iter->second) == *iter.second)) {
+          return false;
+        }
+      }
+      return true;
+    case kString:
+      return data_.s == rhs.data_.s;
+    default:
+      assert(false);
+  }
+  // it can't come to here, but we don't want the compiler to complain
+  return false;
+}
+
+std::string JSONDocument::DebugString() const {
+  std::string ret;
+  switch (type_) {
+    case kNull:
+      ret = "null";
+      break;
+    case kArray:
+      ret = "[";
+      for (size_t i = 0; i < data_.a.size(); ++i) {
+        if (i) {
+          ret += ", ";
+        }
+        ret += data_.a[i]->DebugString();
+      }
+      ret += "]";
+      break;
+    case kBool:
+      ret = data_.b ? "true" : "false";
+      break;
+    case kDouble: {
+      char buf[100];
+      snprintf(buf, sizeof(buf), "%lf", data_.d);
+      ret = buf;
+      break;
+    }
+    case kInt64: {
+      char buf[100];
+      snprintf(buf, sizeof(buf), "%" PRIi64, data_.i);
+      ret = buf;
+      break;
+    }
+    case kObject: {
+      bool first = true;
+      ret = "{";
+      for (const auto& iter : data_.o) {
+        ret += first ? "" : ", ";
+        first = false;
+        ret += iter.first + ": ";
+        ret += iter.second->DebugString();
+      }
+      ret += "}";
+      break;
+    }
+    case kString:
+      ret = "\"" + data_.s + "\"";
+      break;
+    default:
+      assert(false);
+  }
+  return ret;
+}
+
+JSONDocument::ItemsIteratorGenerator JSONDocument::Items() const {
+  assert(type_ == kObject);
+  return data_.o;
+}
+
+// parsing with rapidjson
+// TODO(icanadi) (perf) allocate objects with arena
+JSONDocument* JSONDocument::ParseJSON(const char* json) {
+  class JSONDocumentBuilder {
+   public:
+    JSONDocumentBuilder() {}
+
+    void Null() { stack_.push_back(new JSONDocument()); }
+    void Bool(bool b) { stack_.push_back(new JSONDocument(b)); }
+    void Int(int i) { Int64(static_cast<int64_t>(i)); }
+    void Uint(unsigned i) { Int64(static_cast<int64_t>(i)); }
+    void Int64(int64_t i) { stack_.push_back(new JSONDocument(i)); }
+    void Uint64(uint64_t i) { Int64(static_cast<int64_t>(i)); }
+    void Double(double d) { stack_.push_back(new JSONDocument(d)); }
+    void String(const char* str, size_t length, bool copy) {
+      assert(copy);
+      stack_.push_back(new JSONDocument(std::string(str, length)));
+    }
+    void StartObject() { stack_.push_back(new JSONDocument(kObject)); }
+    void EndObject(size_t member_count) {
+      assert(stack_.size() > 2 * member_count);
+      auto object_base_iter = stack_.end() - member_count * 2 - 1;
+      assert((*object_base_iter)->type_ == kObject);
+      auto& object_map = (*object_base_iter)->data_.o;
+      // iter will always be stack_.end() at some point (i.e. will not advance
+      // past it) because of the way we calculate object_base_iter
+      for (auto iter = object_base_iter + 1; iter != stack_.end(); iter += 2) {
+        assert((*iter)->type_ == kString);
+        object_map.insert({(*iter)->data_.s, *(iter + 1)});
+        delete *iter;
+      }
+      stack_.erase(object_base_iter + 1, stack_.end());
+    }
+    void StartArray() { stack_.push_back(new JSONDocument(kArray)); }
+    void EndArray(size_t element_count) {
+      assert(stack_.size() > element_count);
+      auto array_base_iter = stack_.end() - element_count - 1;
+      assert((*array_base_iter)->type_ == kArray);
+      (*array_base_iter)->data_.a.assign(array_base_iter + 1, stack_.end());
+      stack_.erase(array_base_iter + 1, stack_.end());
+    }
+
+    JSONDocument* GetDocument() {
+      if (stack_.size() != 1) {
+        return nullptr;
+      }
+      return stack_.back();
+    }
+
+    void DeleteAllDocumentsOnStack() {
+      for (auto document : stack_) {
+        delete document;
+      }
+      stack_.clear();
+    }
+
+   private:
+    std::vector<JSONDocument*> stack_;
+  };
+
+  rapidjson::StringStream stream(json);
+  rapidjson::Reader reader;
+  JSONDocumentBuilder handler;
+  bool ok = reader.Parse<0>(stream, handler);
+  if (!ok) {
+    handler.DeleteAllDocumentsOnStack();
+    return nullptr;
+  }
+  auto document = handler.GetDocument();
+  assert(document != nullptr);
+  return document;
+}
+
+// serialization and deserialization
+// format:
+// ------
+// document  ::= header(char) object
+// object    ::= varint32(n) key_value*(n times)
+// key_value ::= string element
+// element   ::= 0x01                     (kNull)
+//            |  0x02 array               (kArray)
+//            |  0x03 byte                (kBool)
+//            |  0x04 double              (kDouble)
+//            |  0x05 int64               (kInt64)
+//            |  0x06 object              (kObject)
+//            |  0x07 string              (kString)
+// array ::= varint32(n) element*(n times)
+// TODO(icanadi) evaluate string vs cstring format
+// string ::= varint32(n) byte*(n times)
+// double ::= 64-bit IEEE 754 floating point (8 bytes)
+// int64  ::= 8 bytes, 64-bit signed integer, little endian
+
+namespace {
+inline char GetPrefixFromType(JSONDocument::Type type) {
+  static char types[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
+  return types[type];
+}
+
+inline bool GetNextType(Slice* input, JSONDocument::Type* type) {
+  if (input->size() == 0) {
+    return false;
+  }
+  static JSONDocument::Type prefixes[] = {
+      JSONDocument::kNull,   JSONDocument::kArray, JSONDocument::kBool,
+      JSONDocument::kDouble, JSONDocument::kInt64, JSONDocument::kObject,
+      JSONDocument::kString};
+  size_t prefix = static_cast<size_t>((*input)[0]);
+  if (prefix == 0 || prefix >= 0x8) {
+    return false;
+  }
+  input->remove_prefix(1);
+  *type = prefixes[static_cast<size_t>(prefix - 1)];
+  return true;
+}
+
+// TODO(icanadi): Make sure this works on all platforms we support. Some
+// platforms may store double in different binary format (our specification says
+// we need IEEE 754)
+inline void PutDouble(std::string* dst, double d) {
+  dst->append(reinterpret_cast<char*>(&d), sizeof(d));
+}
+
+bool DecodeDouble(Slice* input, double* d) {
+  if (input->size() < sizeof(double)) {
+    return false;
+  }
+  memcpy(d, input->data(), sizeof(double));
+  input->remove_prefix(sizeof(double));
+
+  return true;
+}
+}  // namespace
+
+void JSONDocument::Serialize(std::string* dst) const {
+  // first byte is reserved for header
+  // currently, header is only version number. that will help us provide
+  // backwards compatility. we might also store more information here if
+  // necessary
+  dst->push_back(kSerializationFormatVersion);
+  SerializeInternal(dst, false);
+}
+
+void JSONDocument::SerializeInternal(std::string* dst, bool type_prefix) const {
+  if (type_prefix) {
+    dst->push_back(GetPrefixFromType(type_));
+  }
+  switch (type_) {
+    case kNull:
+      // just the prefix is all we need
+      break;
+    case kArray:
+      PutVarint32(dst, static_cast<uint32_t>(data_.a.size()));
+      for (const auto& element : data_.a) {
+        element->SerializeInternal(dst, true);
+      }
+      break;
+    case kBool:
+      dst->push_back(static_cast<char>(data_.b));
+      break;
+    case kDouble:
+      PutDouble(dst, data_.d);
+      break;
+    case kInt64:
+      PutFixed64(dst, static_cast<uint64_t>(data_.i));
+      break;
+    case kObject: {
+      PutVarint32(dst, static_cast<uint32_t>(data_.o.size()));
+      for (const auto& iter : data_.o) {
+        PutLengthPrefixedSlice(dst, Slice(iter.first));
+        iter.second->SerializeInternal(dst, true);
+      }
+      break;
+    }
+    case kString:
+      PutLengthPrefixedSlice(dst, Slice(data_.s));
+      break;
+    default:
+      assert(false);
+  }
+}
+
+const char JSONDocument::kSerializationFormatVersion = 1;
+
+JSONDocument* JSONDocument::Deserialize(const Slice& src) {
+  Slice input(src);
+  if (src.size() == 0) {
+    return nullptr;
+  }
+  char header = input[0];
+  if (header != kSerializationFormatVersion) {
+    // don't understand this header (possibly newer version format and we don't
+    // support downgrade)
+    return nullptr;
+  }
+  input.remove_prefix(1);
+  auto root = new JSONDocument(kObject);
+  bool ok = root->DeserializeInternal(&input);
+  if (!ok || input.size() > 0) {
+    // parsing failure :(
+    delete root;
+    return nullptr;
+  }
+  return root;
+}
+
+bool JSONDocument::DeserializeInternal(Slice* input) {
+  switch (type_) {
+    case kNull:
+      break;
+    case kArray: {
+      uint32_t size;
+      if (!GetVarint32(input, &size)) {
+        return false;
+      }
+      data_.a.resize(size);
+      for (size_t i = 0; i < size; ++i) {
+        Type type;
+        if (!GetNextType(input, &type)) {
+          return false;
+        }
+        data_.a[i] = new JSONDocument(type);
+        if (!data_.a[i]->DeserializeInternal(input)) {
+          return false;
+        }
+      }
+      break;
+    }
+    case kBool:
+      if (input->size() < 1) {
+        return false;
+      }
+      data_.b = static_cast<bool>((*input)[0]);
+      input->remove_prefix(1);
+      break;
+    case kDouble:
+      if (!DecodeDouble(input, &data_.d)) {
+        return false;
+      }
+      break;
+    case kInt64: {
+      uint64_t tmp;
+      if (!GetFixed64(input, &tmp)) {
+        return false;
+      }
+      data_.i = static_cast<int64_t>(tmp);
+      break;
+    }
+    case kObject: {
+      uint32_t num_elements;
+      bool ok = GetVarint32(input, &num_elements);
+      for (uint32_t i = 0; ok && i < num_elements; ++i) {
+        Slice key;
+        ok = GetLengthPrefixedSlice(input, &key);
+        Type type;
+        ok = ok && GetNextType(input, &type);
+        if (ok) {
+          std::unique_ptr<JSONDocument> value(new JSONDocument(type));
+          ok = value->DeserializeInternal(input);
+          if (ok) {
+            data_.o.insert({key.ToString(), value.get()});
+            value.release();
+          }
+        }
+      }
+      if (!ok) {
+        return false;
+      }
+      break;
+    }
+    case kString: {
+      Slice key;
+      if (!GetLengthPrefixedSlice(input, &key)) {
+        return false;
+      }
+      data_.s = key.ToString();
+      break;
+    }
+    default:
+      // this is an assert and not a return because DeserializeInternal() will
+      // always be called with a valid type_. In case there has been data
+      // corruption, GetNextType() is the function that will detect that and
+      // return corruption
+      assert(false);
+  }
+  return true;
+}
+
+}  // namespace rocksdb
+#endif  // ROCKSDB_LITE
--- a/utilities/document/json_document_test.cc
+++ b/utilities/document/json_document_test.cc
@@ -0,0 +1,170 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+
+#include <set>
+
+#include "rocksdb/utilities/json_document.h"
+
+#include "util/testutil.h"
+#include "util/testharness.h"
+
+namespace rocksdb {
+namespace {
+void AssertField(const JSONDocument& json, const std::string& field) {
+  ASSERT_TRUE(json.Contains(field));
+  ASSERT_TRUE(json[field].IsNull());
+}
+
+void AssertField(const JSONDocument& json, const std::string& field,
+                 const std::string& expected) {
+  ASSERT_TRUE(json.Contains(field));
+  ASSERT_TRUE(json[field].IsString());
+  ASSERT_EQ(expected, json[field].GetString());
+}
+
+void AssertField(const JSONDocument& json, const std::string& field,
+                 int64_t expected) {
+  ASSERT_TRUE(json.Contains(field));
+  ASSERT_TRUE(json[field].IsInt64());
+  ASSERT_EQ(expected, json[field].GetInt64());
+}
+
+void AssertField(const JSONDocument& json, const std::string& field,
+                 bool expected) {
+  ASSERT_TRUE(json.Contains(field));
+  ASSERT_TRUE(json[field].IsBool());
+  ASSERT_EQ(expected, json[field].GetBool());
+}
+
+void AssertField(const JSONDocument& json, const std::string& field,
+                 double expected) {
+  ASSERT_TRUE(json.Contains(field));
+  ASSERT_TRUE(json[field].IsDouble());
+  ASSERT_EQ(expected, json[field].GetDouble());
+}
+}  // namespace
+
+class JSONDocumentTest {
+ public:
+  void AssertSampleJSON(const JSONDocument& json) {
+    AssertField(json, "title", std::string("json"));
+    AssertField(json, "type", std::string("object"));
+    // properties
+    ASSERT_TRUE(json.Contains("properties"));
+    ASSERT_TRUE(json["properties"].Contains("flags"));
+    ASSERT_TRUE(json["properties"]["flags"].IsArray());
+    ASSERT_EQ(3u, json["properties"]["flags"].Count());
+    ASSERT_TRUE(json["properties"]["flags"][0].IsInt64());
+    ASSERT_EQ(10, json["properties"]["flags"][0].GetInt64());
+    ASSERT_TRUE(json["properties"]["flags"][1].IsString());
+    ASSERT_EQ("parse", json["properties"]["flags"][1].GetString());
+    ASSERT_TRUE(json["properties"]["flags"][2].IsObject());
+    AssertField(json["properties"]["flags"][2], "tag", std::string("no"));
+    AssertField(json["properties"]["flags"][2], std::string("status"));
+    AssertField(json["properties"], "age", 110.5e-4);
+    AssertField(json["properties"], "depth", static_cast<int64_t>(-10));
+    // test iteration
+    std::set<std::string> expected({"flags", "age", "depth"});
+    for (auto item : json["properties"].Items()) {
+      auto iter = expected.find(item.first);
+      ASSERT_TRUE(iter != expected.end());
+      expected.erase(iter);
+    }
+    ASSERT_EQ(0U, expected.size());
+    ASSERT_TRUE(json.Contains("latlong"));
+    ASSERT_TRUE(json["latlong"].IsArray());
+    ASSERT_EQ(2u, json["latlong"].Count());
+    ASSERT_TRUE(json["latlong"][0].IsDouble());
+    ASSERT_EQ(53.25, json["latlong"][0].GetDouble());
+    ASSERT_TRUE(json["latlong"][1].IsDouble());
+    ASSERT_EQ(43.75, json["latlong"][1].GetDouble());
+    AssertField(json, "enabled", true);
+  }
+
+  const std::string kSampleJSON =
+      "{ \"title\" : \"json\", \"type\" : \"object\", \"properties\" : { "
+      "\"flags\": [10, \"parse\", {\"tag\": \"no\", \"status\": null}], "
+      "\"age\": 110.5e-4, \"depth\": -10 }, \"latlong\": [53.25, 43.75], "
+      "\"enabled\": true }";
+
+  const std::string kSampleJSONDifferent =
+      "{ \"title\" : \"json\", \"type\" : \"object\", \"properties\" : { "
+      "\"flags\": [10, \"parse\", {\"tag\": \"no\", \"status\": 2}], "
+      "\"age\": 110.5e-4, \"depth\": -10 }, \"latlong\": [53.25, 43.75], "
+      "\"enabled\": true }";
+};
+
+TEST(JSONDocumentTest, Parsing) {
+  JSONDocument x(static_cast<int64_t>(5));
+  ASSERT_TRUE(x.IsInt64());
+
+  // make sure it's correctly parsed
+  auto parsed_json = JSONDocument::ParseJSON(kSampleJSON.c_str());
+  ASSERT_TRUE(parsed_json != nullptr);
+  AssertSampleJSON(*parsed_json);
+
+  // test deep copying
+  JSONDocument copied_json_document(*parsed_json);
+  AssertSampleJSON(copied_json_document);
+  ASSERT_TRUE(copied_json_document == *parsed_json);
+  delete parsed_json;
+
+  auto parsed_different_sample =
+      JSONDocument::ParseJSON(kSampleJSONDifferent.c_str());
+  ASSERT_TRUE(parsed_different_sample != nullptr);
+  ASSERT_TRUE(!(*parsed_different_sample == copied_json_document));
+  delete parsed_different_sample;
+
+  // parse error
+  const std::string kFaultyJSON =
+      kSampleJSON.substr(0, kSampleJSON.size() - 10);
+  ASSERT_TRUE(JSONDocument::ParseJSON(kFaultyJSON.c_str()) == nullptr);
+}
+
+TEST(JSONDocumentTest, Serialization) {
+  auto parsed_json = JSONDocument::ParseJSON(kSampleJSON.c_str());
+  ASSERT_TRUE(parsed_json != nullptr);
+  std::string serialized;
+  parsed_json->Serialize(&serialized);
+  delete parsed_json;
+
+  auto deserialized_json = JSONDocument::Deserialize(Slice(serialized));
+  ASSERT_TRUE(deserialized_json != nullptr);
+  AssertSampleJSON(*deserialized_json);
+  delete deserialized_json;
+
+  // deserialization failure
+  ASSERT_TRUE(JSONDocument::Deserialize(
+                  Slice(serialized.data(), serialized.size() - 10)) == nullptr);
+}
+
+TEST(JSONDocumentTest, Mutation) {
+  auto sample_json = JSONDocument::ParseJSON(kSampleJSON.c_str());
+  ASSERT_TRUE(sample_json != nullptr);
+  auto different_json = JSONDocument::ParseJSON(kSampleJSONDifferent.c_str());
+  ASSERT_TRUE(different_json != nullptr);
+
+  (*different_json)["properties"]["flags"][2].Set("status", JSONDocument());
+
+  ASSERT_TRUE(*different_json == *sample_json);
+
+  delete different_json;
+  delete sample_json;
+
+  auto json1 = JSONDocument::ParseJSON("{\"a\": [1, 2, 3]}");
+  auto json2 = JSONDocument::ParseJSON("{\"a\": [2, 2, 3, 4]}");
+  ASSERT_TRUE(json1 != nullptr && json2 != nullptr);
+
+  (*json1)["a"].SetInArray(0, static_cast<int64_t>(2))->PushBack(
+      static_cast<int64_t>(4));
+  ASSERT_TRUE(*json1 == *json2);
+
+  delete json1;
+  delete json2;
+}
+
+}  //  namespace rocksdb
+
+int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }