From 0754d4cb3bdc5728076f181f78d8f51acd3c73cd Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Thu, 24 Jul 2014 16:39:33 -0400 Subject: [PATCH] SpatialDB change API Summary: I changed SpatialDB API so that we only specify list of indexes when we create the database. That way, whoever is querying the DB doesn't need to know the full list of indexes and their options. Test Plan: spatial_db_test Reviewers: yinwang Reviewed By: yinwang Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D20571 --- include/rocksdb/utilities/spatial_db.h | 14 +- utilities/spatialdb/spatial_db.cc | 251 ++++++++++++++++--------- utilities/spatialdb/spatial_db_test.cc | 37 ++-- utilities/spatialdb/utils.h | 95 ++++++++++ 4 files changed, 279 insertions(+), 118 deletions(-) create mode 100644 utilities/spatialdb/utils.h diff --git a/include/rocksdb/utilities/spatial_db.h b/include/rocksdb/utilities/spatial_db.h index d5f9941321..ac900959e1 100644 --- a/include/rocksdb/utilities/spatial_db.h +++ b/include/rocksdb/utilities/spatial_db.h @@ -196,13 +196,15 @@ struct SpatialIndexOptions { class SpatialDB : public StackableDB { public: - // Open the SpatialDB. List of spatial_indexes need to include all indexes - // that already exist in the DB (if the DB already exists). It can include new - // indexes, which will be created and initialized as empty (data will not be - // re-indexed). The resulting db object will be returned through db parameter. - // TODO(icanadi) read_only = true doesn't yet work because of #4743185 + // Creates the SpatialDB with specified list of indexes. + // REQUIRED: db doesn't exist + static Status Create(const SpatialDBOptions& options, const std::string& name, + const std::vector& spatial_indexes); + + // Open the existing SpatialDB. The resulting db object will be returned + // through db parameter. + // REQUIRED: db was created using SpatialDB::Create static Status Open(const SpatialDBOptions& options, const std::string& name, - const std::vector& spatial_indexes, SpatialDB** db, bool read_only = false); explicit SpatialDB(DB* db) : StackableDB(db) {} diff --git a/utilities/spatialdb/spatial_db.cc b/utilities/spatialdb/spatial_db.cc index bf4a268fc5..c928d03037 100644 --- a/utilities/spatialdb/spatial_db.cc +++ b/utilities/spatialdb/spatial_db.cc @@ -15,10 +15,41 @@ #include "rocksdb/utilities/stackable_db.h" #include "rocksdb/utilities/spatial_db.h" #include "util/coding.h" +#include "utilities/spatialdb/utils.h" namespace rocksdb { namespace spatial { +// Column families are used to store element's data and spatial indexes. We use +// [default] column family to store the element data. This is the format of +// [default] column family: +// * id (fixed 64 big endian) -> blob (length prefixed slice) feature_set +// (serialized) +// We have one additional column family for each spatial index. The name of the +// column family is [spatial$]. The format is: +// * quad_key (fixed 64 bit big endian) id (fixed 64 bit big endian) -> "" +// We store information about indexes in [metadata] column family. Format is: +// * spatial$ -> bbox (4 double encodings) tile_bits +// (varint32) + +namespace { +const std::string kMetadataColumnFamilyName("metadata"); +inline std::string GetSpatialIndexColumnFamilyName( + const std::string& spatial_index_name) { + return "spatial$" + spatial_index_name; +} +inline bool GetSpatialIndexName(const std::string& column_family_name, + Slice* dst) { + *dst = Slice(column_family_name); + if (dst->starts_with("spatial$")) { + dst->remove_prefix(8); // strlen("spatial$") + return true; + } + return false; +} + +} // namespace + Variant::Variant(const Variant& v) : type_(v.type_) { switch (v.type_) { case kNull: @@ -100,8 +131,7 @@ void FeatureSet::Serialize(std::string* output) const { PutVarint64(output, iter.second.get_int()); break; case Variant::kDouble: { - double d = iter.second.get_double(); - output->append(reinterpret_cast(&d), sizeof(double)); + PutDouble(output, iter.second.get_double()); break; } case Variant::kString: @@ -145,13 +175,11 @@ bool FeatureSet::Deserialize(const Slice& input) { break; } case Variant::kDouble: { - if (s.size() < sizeof(double)) { + double d; + if (!GetDouble(&s, &d)) { return false; } - double d; - memcpy(&d, s.data(), sizeof(double)); map_.insert({key.ToString(), Variant(d)}); - s.remove_prefix(sizeof(double)); break; } case Variant::kString: { @@ -169,72 +197,6 @@ bool FeatureSet::Deserialize(const Slice& input) { return true; } -namespace { -// indexing idea from http://msdn.microsoft.com/en-us/library/bb259689.aspx -inline uint64_t GetTileFromCoord(double x, double start, double end, - uint32_t tile_bits) { - if (x < start) { - return 0; - } - uint64_t tiles = static_cast(1) << tile_bits; - uint64_t r = ((x - start) / (end - start)) * tiles; - return std::min(r, tiles - 1); -} -inline uint64_t GetQuadKeyFromTile(uint64_t tile_x, uint64_t tile_y, - uint32_t tile_bits) { - uint64_t quad_key = 0; - for (uint32_t i = 0; i < tile_bits; ++i) { - uint32_t mask = (1LL << i); - quad_key |= (tile_x & mask) << i; - quad_key |= (tile_y & mask) << (i + 1); - } - return quad_key; -} -inline BoundingBox GetTileBoundingBox( - const SpatialIndexOptions& spatial_index, BoundingBox bbox) { - return BoundingBox( - GetTileFromCoord(bbox.min_x, spatial_index.bbox.min_x, - spatial_index.bbox.max_x, spatial_index.tile_bits), - GetTileFromCoord(bbox.min_y, spatial_index.bbox.min_y, - spatial_index.bbox.max_y, spatial_index.tile_bits), - GetTileFromCoord(bbox.max_x, spatial_index.bbox.min_x, - spatial_index.bbox.max_x, spatial_index.tile_bits), - GetTileFromCoord(bbox.max_y, spatial_index.bbox.min_y, - spatial_index.bbox.max_y, spatial_index.tile_bits)); -} - -// big endian can be compared using memcpy -inline void PutFixed64BigEndian(std::string* dst, uint64_t value) { - char buf[sizeof(value)]; - buf[0] = (value >> 56) & 0xff; - buf[1] = (value >> 48) & 0xff; - buf[2] = (value >> 40) & 0xff; - buf[3] = (value >> 32) & 0xff; - buf[4] = (value >> 24) & 0xff; - buf[5] = (value >> 16) & 0xff; - buf[6] = (value >> 8) & 0xff; - buf[7] = value & 0xff; - dst->append(buf, sizeof(buf)); -} -// big endian can be compared using memcpy -inline bool GetFixed64BigEndian(const Slice& input, uint64_t* value) { - if (input.size() < sizeof(uint64_t)) { - return false; - } - auto ptr = input.data(); - *value = (static_cast(static_cast(ptr[0])) << 56) | - (static_cast(static_cast(ptr[1])) << 48) | - (static_cast(static_cast(ptr[2])) << 40) | - (static_cast(static_cast(ptr[3])) << 32) | - (static_cast(static_cast(ptr[4])) << 24) | - (static_cast(static_cast(ptr[5])) << 16) | - (static_cast(static_cast(ptr[6])) << 8) | - static_cast(static_cast(ptr[7])); - return true; -} - -} // namespace - class SpatialIndexCursor : public Cursor { public: SpatialIndexCursor(Iterator* spatial_iterator, Iterator* data_iterator, @@ -432,14 +394,6 @@ class ErrorCursor : public Cursor { FeatureSet trash_; }; -// Column families are used to store element's data and spatial indexes. We use -// [default] column family to store the element data. This is the format of -// [default] column family: -// * id (fixed 64 big endian) -> blob (length prefixed slice) feature_set -// (serialized) -// We have one additional column family for each spatial index. The name of the -// column family is [spatial$]. The format is: -// * quad_key (fixed 64 bit big endian) id (fixed 64 bit big endian) -> "" class SpatialDBImpl : public SpatialDB { public: // * db -- base DB that needs to be forwarded to StackableDB @@ -518,11 +472,19 @@ class SpatialDBImpl : public SpatialDB { virtual Status Compact() override { Status s, t; for (auto& iter : name_to_index_) { + t = Flush(FlushOptions(), iter.second.column_family); + if (!t.ok()) { + s = t; + } t = CompactRange(iter.second.column_family, nullptr, nullptr); if (!t.ok()) { s = t; } } + t = Flush(FlushOptions(), data_column_family_); + if (!t.ok()) { + s = t; + } t = CompactRange(data_column_family_, nullptr, nullptr); if (!t.ok()) { s = t; @@ -580,24 +542,119 @@ Options GetRocksDBOptionsFromOptions(const SpatialDBOptions& options) { } } // namespace -Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, - const std::vector& spatial_indexes, - SpatialDB** db, bool read_only) { +class MetadataStorage { + public: + MetadataStorage(DB* db, ColumnFamilyHandle* cf) : db_(db), cf_(cf) {} + ~MetadataStorage() {} + + // format: + // + Status AddIndex(const SpatialIndexOptions& index) { + std::string encoded_index; + PutDouble(&encoded_index, index.bbox.min_x); + PutDouble(&encoded_index, index.bbox.min_y); + PutDouble(&encoded_index, index.bbox.max_x); + PutDouble(&encoded_index, index.bbox.max_y); + PutVarint32(&encoded_index, index.tile_bits); + return db_->Put(WriteOptions(), cf_, + GetSpatialIndexColumnFamilyName(index.name), encoded_index); + } + + Status GetIndex(const std::string& name, SpatialIndexOptions* dst) { + std::string value; + Status s = db_->Get(ReadOptions(), cf_, + GetSpatialIndexColumnFamilyName(name), &value); + if (!s.ok()) { + return s; + } + dst->name = name; + Slice encoded_index(value); + bool ok = GetDouble(&encoded_index, &(dst->bbox.min_x)); + ok = ok && GetDouble(&encoded_index, &(dst->bbox.min_y)); + ok = ok && GetDouble(&encoded_index, &(dst->bbox.max_x)); + ok = ok && GetDouble(&encoded_index, &(dst->bbox.max_y)); + ok = ok && GetVarint32(&encoded_index, &(dst->tile_bits)); + return ok ? Status::OK() : Status::Corruption("Index encoding corrupted"); + } + + private: + DB* db_; + ColumnFamilyHandle* cf_; +}; + +Status SpatialDB::Create( + const SpatialDBOptions& options, const std::string& name, + const std::vector& spatial_indexes) { Options rocksdb_options = GetRocksDBOptionsFromOptions(options); rocksdb_options.create_if_missing = true; rocksdb_options.create_missing_column_families = true; + rocksdb_options.error_if_exists = true; std::vector column_families; column_families.push_back(ColumnFamilyDescriptor( kDefaultColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); + column_families.push_back(ColumnFamilyDescriptor( + kMetadataColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); for (const auto& index : spatial_indexes) { - column_families.emplace_back("spatial$" + index.name, + column_families.emplace_back(GetSpatialIndexColumnFamilyName(index.name), + ColumnFamilyOptions(rocksdb_options)); + } + + std::vector handles; + DB* base_db; + Status s = DB::Open(DBOptions(rocksdb_options), name, column_families, + &handles, &base_db); + if (!s.ok()) { + return s; + } + MetadataStorage metadata(base_db, handles[1]); + for (const auto& index : spatial_indexes) { + s = metadata.AddIndex(index); + if (!s.ok()) { + break; + } + } + + for (auto h : handles) { + delete h; + } + delete base_db; + + return s; +} + +Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, + SpatialDB** db, bool read_only) { + Options rocksdb_options = GetRocksDBOptionsFromOptions(options); + + Status s; + std::vector existing_column_families; + std::vector spatial_indexes; + s = DB::ListColumnFamilies(DBOptions(rocksdb_options), name, + &existing_column_families); + if (!s.ok()) { + return s; + } + for (const auto& cf_name : existing_column_families) { + Slice spatial_index; + if (GetSpatialIndexName(cf_name, &spatial_index)) { + spatial_indexes.emplace_back(spatial_index.data(), spatial_index.size()); + } + } + + std::vector column_families; + column_families.push_back(ColumnFamilyDescriptor( + kDefaultColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); + column_families.push_back(ColumnFamilyDescriptor( + kMetadataColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); + + for (const auto& index : spatial_indexes) { + column_families.emplace_back(GetSpatialIndexColumnFamilyName(index), ColumnFamilyOptions(rocksdb_options)); } std::vector handles; DB* base_db; - Status s; if (read_only) { s = DB::OpenForReadOnly(DBOptions(rocksdb_options), name, column_families, &handles, &base_db); @@ -609,14 +666,21 @@ Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, return s; } + MetadataStorage metadata(base_db, handles[1]); + std::vector> index_cf; - assert(handles.size() == spatial_indexes.size() + 1); + assert(handles.size() == spatial_indexes.size() + 2); for (size_t i = 0; i < spatial_indexes.size(); ++i) { - index_cf.emplace_back(spatial_indexes[i], handles[i + 1]); + SpatialIndexOptions index_options; + s = metadata.GetIndex(spatial_indexes[i], &index_options); + if (!s.ok()) { + break; + } + index_cf.emplace_back(index_options, handles[i + 2]); } uint64_t next_id; - { + if (s.ok()) { // find next_id Iterator* iter = base_db->NewIterator(ReadOptions(), handles[0]); iter->SeekToLast(); @@ -624,7 +688,7 @@ Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, uint64_t last_id; bool ok = GetFixed64BigEndian(iter->key(), &last_id); if (!ok) { - return Status::Corruption("Invalid key in data column family"); + s = Status::Corruption("Invalid key in data column family"); } next_id = last_id + 1; } else { @@ -632,7 +696,16 @@ Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, } delete iter; } + if (!s.ok()) { + for (auto h : handles) { + delete h; + } + delete db; + return s; + } + // I don't need metadata column family any more, so delete it + delete handles[1]; *db = new SpatialDBImpl(base_db, handles[0], index_cf, next_id); return Status::OK(); } diff --git a/utilities/spatialdb/spatial_db_test.cc b/utilities/spatialdb/spatial_db_test.cc index b4d5c23cc4..4cd2c8eed9 100644 --- a/utilities/spatialdb/spatial_db_test.cc +++ b/utilities/spatialdb/spatial_db_test.cc @@ -94,43 +94,33 @@ TEST(SpatialDBTest, FeatureSetSerializeTest) { } TEST(SpatialDBTest, TestNextID) { - ASSERT_OK(SpatialDB::Open( + ASSERT_OK(SpatialDB::Create( SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, - &db_)); + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)})); + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), "one", FeatureSet(), {"simple"})); ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(10, 10, 15, 15), "two", FeatureSet(), {"simple"})); delete db_; - ASSERT_OK(SpatialDB::Open( - SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, - &db_)); - + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(55, 55, 65, 65), "three", FeatureSet(), {"simple"})); - delete db_; - ASSERT_OK(SpatialDB::Open( - SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, - &db_)); - + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); AssertCursorResults(BoundingBox(0, 0, 100, 100), "simple", {"one", "two", "three"}); - delete db_; } TEST(SpatialDBTest, FeatureSetTest) { - ASSERT_OK(SpatialDB::Open( + ASSERT_OK(SpatialDB::Create( SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, - &db_)); + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)})); + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); FeatureSet fs; fs.Set("a", std::string("b")); @@ -161,10 +151,10 @@ TEST(SpatialDBTest, FeatureSetTest) { } TEST(SpatialDBTest, SimpleTest) { - ASSERT_OK(SpatialDB::Open( + ASSERT_OK(SpatialDB::Create( SpatialDBOptions(), dbname_, - {SpatialIndexOptions("index", BoundingBox(0, 0, 128, 128), 3)}, - &db_)); + {SpatialIndexOptions("index", BoundingBox(0, 0, 128, 128), 3)})); + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(33, 17, 63, 79), "one", FeatureSet(), {"index"})); @@ -229,9 +219,10 @@ TEST(SpatialDBTest, RandomizedTest) { std::vector>> elements; BoundingBox spatial_index_bounds(0, 0, (1LL << 32), (1LL << 32)); - ASSERT_OK(SpatialDB::Open( + ASSERT_OK(SpatialDB::Create( SpatialDBOptions(), dbname_, - {SpatialIndexOptions("index", spatial_index_bounds, 7)}, &db_)); + {SpatialIndexOptions("index", spatial_index_bounds, 7)})); + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); double step = (1LL << 32) / (1 << 7); for (int i = 0; i < 1000; ++i) { diff --git a/utilities/spatialdb/utils.h b/utilities/spatialdb/utils.h new file mode 100644 index 0000000000..eaf3c9b4e0 --- /dev/null +++ b/utilities/spatialdb/utils.h @@ -0,0 +1,95 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once +#include +#include + +#include "rocksdb/utilities/spatial_db.h" + +namespace rocksdb { +namespace spatial { + +// indexing idea from http://msdn.microsoft.com/en-us/library/bb259689.aspx +inline uint64_t GetTileFromCoord(double x, double start, double end, + uint32_t tile_bits) { + if (x < start) { + return 0; + } + uint64_t tiles = static_cast(1) << tile_bits; + uint64_t r = ((x - start) / (end - start)) * tiles; + return std::min(r, tiles - 1); +} + +inline uint64_t GetQuadKeyFromTile(uint64_t tile_x, uint64_t tile_y, + uint32_t tile_bits) { + uint64_t quad_key = 0; + for (uint32_t i = 0; i < tile_bits; ++i) { + uint32_t mask = (1LL << i); + quad_key |= (tile_x & mask) << i; + quad_key |= (tile_y & mask) << (i + 1); + } + return quad_key; +} + +inline BoundingBox GetTileBoundingBox( + const SpatialIndexOptions& spatial_index, BoundingBox bbox) { + return BoundingBox( + GetTileFromCoord(bbox.min_x, spatial_index.bbox.min_x, + spatial_index.bbox.max_x, spatial_index.tile_bits), + GetTileFromCoord(bbox.min_y, spatial_index.bbox.min_y, + spatial_index.bbox.max_y, spatial_index.tile_bits), + GetTileFromCoord(bbox.max_x, spatial_index.bbox.min_x, + spatial_index.bbox.max_x, spatial_index.tile_bits), + GetTileFromCoord(bbox.max_y, spatial_index.bbox.min_y, + spatial_index.bbox.max_y, spatial_index.tile_bits)); +} + +// big endian can be compared using memcpy +inline void PutFixed64BigEndian(std::string* dst, uint64_t value) { + char buf[sizeof(value)]; + buf[0] = (value >> 56) & 0xff; + buf[1] = (value >> 48) & 0xff; + buf[2] = (value >> 40) & 0xff; + buf[3] = (value >> 32) & 0xff; + buf[4] = (value >> 24) & 0xff; + buf[5] = (value >> 16) & 0xff; + buf[6] = (value >> 8) & 0xff; + buf[7] = value & 0xff; + dst->append(buf, sizeof(buf)); +} + +// big endian can be compared using memcpy +inline bool GetFixed64BigEndian(const Slice& input, uint64_t* value) { + if (input.size() < sizeof(uint64_t)) { + return false; + } + auto ptr = input.data(); + *value = (static_cast(static_cast(ptr[0])) << 56) | + (static_cast(static_cast(ptr[1])) << 48) | + (static_cast(static_cast(ptr[2])) << 40) | + (static_cast(static_cast(ptr[3])) << 32) | + (static_cast(static_cast(ptr[4])) << 24) | + (static_cast(static_cast(ptr[5])) << 16) | + (static_cast(static_cast(ptr[6])) << 8) | + static_cast(static_cast(ptr[7])); + return true; +} + +inline void PutDouble(std::string* dst, double d) { + dst->append(reinterpret_cast(&d), sizeof(double)); +} + +inline bool GetDouble(Slice* input, double* d) { + if (input->size() < sizeof(double)) { + return false; + } + memcpy(d, input->data(), sizeof(double)); + input->remove_prefix(sizeof(double)); + return true; +} + +} // namespace spatial +} // namespace rocksdb