diff --git a/include/rocksdb/utilities/spatial_db.h b/include/rocksdb/utilities/spatial_db.h index d5f9941321..ac900959e1 100644 --- a/include/rocksdb/utilities/spatial_db.h +++ b/include/rocksdb/utilities/spatial_db.h @@ -196,13 +196,15 @@ struct SpatialIndexOptions { class SpatialDB : public StackableDB { public: - // Open the SpatialDB. List of spatial_indexes need to include all indexes - // that already exist in the DB (if the DB already exists). It can include new - // indexes, which will be created and initialized as empty (data will not be - // re-indexed). The resulting db object will be returned through db parameter. - // TODO(icanadi) read_only = true doesn't yet work because of #4743185 + // Creates the SpatialDB with specified list of indexes. + // REQUIRED: db doesn't exist + static Status Create(const SpatialDBOptions& options, const std::string& name, + const std::vector& spatial_indexes); + + // Open the existing SpatialDB. The resulting db object will be returned + // through db parameter. + // REQUIRED: db was created using SpatialDB::Create static Status Open(const SpatialDBOptions& options, const std::string& name, - const std::vector& spatial_indexes, SpatialDB** db, bool read_only = false); explicit SpatialDB(DB* db) : StackableDB(db) {} diff --git a/utilities/spatialdb/spatial_db.cc b/utilities/spatialdb/spatial_db.cc index bf4a268fc5..c928d03037 100644 --- a/utilities/spatialdb/spatial_db.cc +++ b/utilities/spatialdb/spatial_db.cc @@ -15,10 +15,41 @@ #include "rocksdb/utilities/stackable_db.h" #include "rocksdb/utilities/spatial_db.h" #include "util/coding.h" +#include "utilities/spatialdb/utils.h" namespace rocksdb { namespace spatial { +// Column families are used to store element's data and spatial indexes. We use +// [default] column family to store the element data. This is the format of +// [default] column family: +// * id (fixed 64 big endian) -> blob (length prefixed slice) feature_set +// (serialized) +// We have one additional column family for each spatial index. The name of the +// column family is [spatial$]. The format is: +// * quad_key (fixed 64 bit big endian) id (fixed 64 bit big endian) -> "" +// We store information about indexes in [metadata] column family. Format is: +// * spatial$ -> bbox (4 double encodings) tile_bits +// (varint32) + +namespace { +const std::string kMetadataColumnFamilyName("metadata"); +inline std::string GetSpatialIndexColumnFamilyName( + const std::string& spatial_index_name) { + return "spatial$" + spatial_index_name; +} +inline bool GetSpatialIndexName(const std::string& column_family_name, + Slice* dst) { + *dst = Slice(column_family_name); + if (dst->starts_with("spatial$")) { + dst->remove_prefix(8); // strlen("spatial$") + return true; + } + return false; +} + +} // namespace + Variant::Variant(const Variant& v) : type_(v.type_) { switch (v.type_) { case kNull: @@ -100,8 +131,7 @@ void FeatureSet::Serialize(std::string* output) const { PutVarint64(output, iter.second.get_int()); break; case Variant::kDouble: { - double d = iter.second.get_double(); - output->append(reinterpret_cast(&d), sizeof(double)); + PutDouble(output, iter.second.get_double()); break; } case Variant::kString: @@ -145,13 +175,11 @@ bool FeatureSet::Deserialize(const Slice& input) { break; } case Variant::kDouble: { - if (s.size() < sizeof(double)) { + double d; + if (!GetDouble(&s, &d)) { return false; } - double d; - memcpy(&d, s.data(), sizeof(double)); map_.insert({key.ToString(), Variant(d)}); - s.remove_prefix(sizeof(double)); break; } case Variant::kString: { @@ -169,72 +197,6 @@ bool FeatureSet::Deserialize(const Slice& input) { return true; } -namespace { -// indexing idea from http://msdn.microsoft.com/en-us/library/bb259689.aspx -inline uint64_t GetTileFromCoord(double x, double start, double end, - uint32_t tile_bits) { - if (x < start) { - return 0; - } - uint64_t tiles = static_cast(1) << tile_bits; - uint64_t r = ((x - start) / (end - start)) * tiles; - return std::min(r, tiles - 1); -} -inline uint64_t GetQuadKeyFromTile(uint64_t tile_x, uint64_t tile_y, - uint32_t tile_bits) { - uint64_t quad_key = 0; - for (uint32_t i = 0; i < tile_bits; ++i) { - uint32_t mask = (1LL << i); - quad_key |= (tile_x & mask) << i; - quad_key |= (tile_y & mask) << (i + 1); - } - return quad_key; -} -inline BoundingBox GetTileBoundingBox( - const SpatialIndexOptions& spatial_index, BoundingBox bbox) { - return BoundingBox( - GetTileFromCoord(bbox.min_x, spatial_index.bbox.min_x, - spatial_index.bbox.max_x, spatial_index.tile_bits), - GetTileFromCoord(bbox.min_y, spatial_index.bbox.min_y, - spatial_index.bbox.max_y, spatial_index.tile_bits), - GetTileFromCoord(bbox.max_x, spatial_index.bbox.min_x, - spatial_index.bbox.max_x, spatial_index.tile_bits), - GetTileFromCoord(bbox.max_y, spatial_index.bbox.min_y, - spatial_index.bbox.max_y, spatial_index.tile_bits)); -} - -// big endian can be compared using memcpy -inline void PutFixed64BigEndian(std::string* dst, uint64_t value) { - char buf[sizeof(value)]; - buf[0] = (value >> 56) & 0xff; - buf[1] = (value >> 48) & 0xff; - buf[2] = (value >> 40) & 0xff; - buf[3] = (value >> 32) & 0xff; - buf[4] = (value >> 24) & 0xff; - buf[5] = (value >> 16) & 0xff; - buf[6] = (value >> 8) & 0xff; - buf[7] = value & 0xff; - dst->append(buf, sizeof(buf)); -} -// big endian can be compared using memcpy -inline bool GetFixed64BigEndian(const Slice& input, uint64_t* value) { - if (input.size() < sizeof(uint64_t)) { - return false; - } - auto ptr = input.data(); - *value = (static_cast(static_cast(ptr[0])) << 56) | - (static_cast(static_cast(ptr[1])) << 48) | - (static_cast(static_cast(ptr[2])) << 40) | - (static_cast(static_cast(ptr[3])) << 32) | - (static_cast(static_cast(ptr[4])) << 24) | - (static_cast(static_cast(ptr[5])) << 16) | - (static_cast(static_cast(ptr[6])) << 8) | - static_cast(static_cast(ptr[7])); - return true; -} - -} // namespace - class SpatialIndexCursor : public Cursor { public: SpatialIndexCursor(Iterator* spatial_iterator, Iterator* data_iterator, @@ -432,14 +394,6 @@ class ErrorCursor : public Cursor { FeatureSet trash_; }; -// Column families are used to store element's data and spatial indexes. We use -// [default] column family to store the element data. This is the format of -// [default] column family: -// * id (fixed 64 big endian) -> blob (length prefixed slice) feature_set -// (serialized) -// We have one additional column family for each spatial index. The name of the -// column family is [spatial$]. The format is: -// * quad_key (fixed 64 bit big endian) id (fixed 64 bit big endian) -> "" class SpatialDBImpl : public SpatialDB { public: // * db -- base DB that needs to be forwarded to StackableDB @@ -518,11 +472,19 @@ class SpatialDBImpl : public SpatialDB { virtual Status Compact() override { Status s, t; for (auto& iter : name_to_index_) { + t = Flush(FlushOptions(), iter.second.column_family); + if (!t.ok()) { + s = t; + } t = CompactRange(iter.second.column_family, nullptr, nullptr); if (!t.ok()) { s = t; } } + t = Flush(FlushOptions(), data_column_family_); + if (!t.ok()) { + s = t; + } t = CompactRange(data_column_family_, nullptr, nullptr); if (!t.ok()) { s = t; @@ -580,24 +542,119 @@ Options GetRocksDBOptionsFromOptions(const SpatialDBOptions& options) { } } // namespace -Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, - const std::vector& spatial_indexes, - SpatialDB** db, bool read_only) { +class MetadataStorage { + public: + MetadataStorage(DB* db, ColumnFamilyHandle* cf) : db_(db), cf_(cf) {} + ~MetadataStorage() {} + + // format: + // + Status AddIndex(const SpatialIndexOptions& index) { + std::string encoded_index; + PutDouble(&encoded_index, index.bbox.min_x); + PutDouble(&encoded_index, index.bbox.min_y); + PutDouble(&encoded_index, index.bbox.max_x); + PutDouble(&encoded_index, index.bbox.max_y); + PutVarint32(&encoded_index, index.tile_bits); + return db_->Put(WriteOptions(), cf_, + GetSpatialIndexColumnFamilyName(index.name), encoded_index); + } + + Status GetIndex(const std::string& name, SpatialIndexOptions* dst) { + std::string value; + Status s = db_->Get(ReadOptions(), cf_, + GetSpatialIndexColumnFamilyName(name), &value); + if (!s.ok()) { + return s; + } + dst->name = name; + Slice encoded_index(value); + bool ok = GetDouble(&encoded_index, &(dst->bbox.min_x)); + ok = ok && GetDouble(&encoded_index, &(dst->bbox.min_y)); + ok = ok && GetDouble(&encoded_index, &(dst->bbox.max_x)); + ok = ok && GetDouble(&encoded_index, &(dst->bbox.max_y)); + ok = ok && GetVarint32(&encoded_index, &(dst->tile_bits)); + return ok ? Status::OK() : Status::Corruption("Index encoding corrupted"); + } + + private: + DB* db_; + ColumnFamilyHandle* cf_; +}; + +Status SpatialDB::Create( + const SpatialDBOptions& options, const std::string& name, + const std::vector& spatial_indexes) { Options rocksdb_options = GetRocksDBOptionsFromOptions(options); rocksdb_options.create_if_missing = true; rocksdb_options.create_missing_column_families = true; + rocksdb_options.error_if_exists = true; std::vector column_families; column_families.push_back(ColumnFamilyDescriptor( kDefaultColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); + column_families.push_back(ColumnFamilyDescriptor( + kMetadataColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); for (const auto& index : spatial_indexes) { - column_families.emplace_back("spatial$" + index.name, + column_families.emplace_back(GetSpatialIndexColumnFamilyName(index.name), + ColumnFamilyOptions(rocksdb_options)); + } + + std::vector handles; + DB* base_db; + Status s = DB::Open(DBOptions(rocksdb_options), name, column_families, + &handles, &base_db); + if (!s.ok()) { + return s; + } + MetadataStorage metadata(base_db, handles[1]); + for (const auto& index : spatial_indexes) { + s = metadata.AddIndex(index); + if (!s.ok()) { + break; + } + } + + for (auto h : handles) { + delete h; + } + delete base_db; + + return s; +} + +Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, + SpatialDB** db, bool read_only) { + Options rocksdb_options = GetRocksDBOptionsFromOptions(options); + + Status s; + std::vector existing_column_families; + std::vector spatial_indexes; + s = DB::ListColumnFamilies(DBOptions(rocksdb_options), name, + &existing_column_families); + if (!s.ok()) { + return s; + } + for (const auto& cf_name : existing_column_families) { + Slice spatial_index; + if (GetSpatialIndexName(cf_name, &spatial_index)) { + spatial_indexes.emplace_back(spatial_index.data(), spatial_index.size()); + } + } + + std::vector column_families; + column_families.push_back(ColumnFamilyDescriptor( + kDefaultColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); + column_families.push_back(ColumnFamilyDescriptor( + kMetadataColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); + + for (const auto& index : spatial_indexes) { + column_families.emplace_back(GetSpatialIndexColumnFamilyName(index), ColumnFamilyOptions(rocksdb_options)); } std::vector handles; DB* base_db; - Status s; if (read_only) { s = DB::OpenForReadOnly(DBOptions(rocksdb_options), name, column_families, &handles, &base_db); @@ -609,14 +666,21 @@ Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, return s; } + MetadataStorage metadata(base_db, handles[1]); + std::vector> index_cf; - assert(handles.size() == spatial_indexes.size() + 1); + assert(handles.size() == spatial_indexes.size() + 2); for (size_t i = 0; i < spatial_indexes.size(); ++i) { - index_cf.emplace_back(spatial_indexes[i], handles[i + 1]); + SpatialIndexOptions index_options; + s = metadata.GetIndex(spatial_indexes[i], &index_options); + if (!s.ok()) { + break; + } + index_cf.emplace_back(index_options, handles[i + 2]); } uint64_t next_id; - { + if (s.ok()) { // find next_id Iterator* iter = base_db->NewIterator(ReadOptions(), handles[0]); iter->SeekToLast(); @@ -624,7 +688,7 @@ Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, uint64_t last_id; bool ok = GetFixed64BigEndian(iter->key(), &last_id); if (!ok) { - return Status::Corruption("Invalid key in data column family"); + s = Status::Corruption("Invalid key in data column family"); } next_id = last_id + 1; } else { @@ -632,7 +696,16 @@ Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, } delete iter; } + if (!s.ok()) { + for (auto h : handles) { + delete h; + } + delete db; + return s; + } + // I don't need metadata column family any more, so delete it + delete handles[1]; *db = new SpatialDBImpl(base_db, handles[0], index_cf, next_id); return Status::OK(); } diff --git a/utilities/spatialdb/spatial_db_test.cc b/utilities/spatialdb/spatial_db_test.cc index b4d5c23cc4..4cd2c8eed9 100644 --- a/utilities/spatialdb/spatial_db_test.cc +++ b/utilities/spatialdb/spatial_db_test.cc @@ -94,43 +94,33 @@ TEST(SpatialDBTest, FeatureSetSerializeTest) { } TEST(SpatialDBTest, TestNextID) { - ASSERT_OK(SpatialDB::Open( + ASSERT_OK(SpatialDB::Create( SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, - &db_)); + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)})); + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), "one", FeatureSet(), {"simple"})); ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(10, 10, 15, 15), "two", FeatureSet(), {"simple"})); delete db_; - ASSERT_OK(SpatialDB::Open( - SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, - &db_)); - + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(55, 55, 65, 65), "three", FeatureSet(), {"simple"})); - delete db_; - ASSERT_OK(SpatialDB::Open( - SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, - &db_)); - + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); AssertCursorResults(BoundingBox(0, 0, 100, 100), "simple", {"one", "two", "three"}); - delete db_; } TEST(SpatialDBTest, FeatureSetTest) { - ASSERT_OK(SpatialDB::Open( + ASSERT_OK(SpatialDB::Create( SpatialDBOptions(), dbname_, - {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, - &db_)); + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)})); + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); FeatureSet fs; fs.Set("a", std::string("b")); @@ -161,10 +151,10 @@ TEST(SpatialDBTest, FeatureSetTest) { } TEST(SpatialDBTest, SimpleTest) { - ASSERT_OK(SpatialDB::Open( + ASSERT_OK(SpatialDB::Create( SpatialDBOptions(), dbname_, - {SpatialIndexOptions("index", BoundingBox(0, 0, 128, 128), 3)}, - &db_)); + {SpatialIndexOptions("index", BoundingBox(0, 0, 128, 128), 3)})); + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(33, 17, 63, 79), "one", FeatureSet(), {"index"})); @@ -229,9 +219,10 @@ TEST(SpatialDBTest, RandomizedTest) { std::vector>> elements; BoundingBox spatial_index_bounds(0, 0, (1LL << 32), (1LL << 32)); - ASSERT_OK(SpatialDB::Open( + ASSERT_OK(SpatialDB::Create( SpatialDBOptions(), dbname_, - {SpatialIndexOptions("index", spatial_index_bounds, 7)}, &db_)); + {SpatialIndexOptions("index", spatial_index_bounds, 7)})); + ASSERT_OK(SpatialDB::Open(SpatialDBOptions(), dbname_, &db_)); double step = (1LL << 32) / (1 << 7); for (int i = 0; i < 1000; ++i) { diff --git a/utilities/spatialdb/utils.h b/utilities/spatialdb/utils.h new file mode 100644 index 0000000000..eaf3c9b4e0 --- /dev/null +++ b/utilities/spatialdb/utils.h @@ -0,0 +1,95 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once +#include +#include + +#include "rocksdb/utilities/spatial_db.h" + +namespace rocksdb { +namespace spatial { + +// indexing idea from http://msdn.microsoft.com/en-us/library/bb259689.aspx +inline uint64_t GetTileFromCoord(double x, double start, double end, + uint32_t tile_bits) { + if (x < start) { + return 0; + } + uint64_t tiles = static_cast(1) << tile_bits; + uint64_t r = ((x - start) / (end - start)) * tiles; + return std::min(r, tiles - 1); +} + +inline uint64_t GetQuadKeyFromTile(uint64_t tile_x, uint64_t tile_y, + uint32_t tile_bits) { + uint64_t quad_key = 0; + for (uint32_t i = 0; i < tile_bits; ++i) { + uint32_t mask = (1LL << i); + quad_key |= (tile_x & mask) << i; + quad_key |= (tile_y & mask) << (i + 1); + } + return quad_key; +} + +inline BoundingBox GetTileBoundingBox( + const SpatialIndexOptions& spatial_index, BoundingBox bbox) { + return BoundingBox( + GetTileFromCoord(bbox.min_x, spatial_index.bbox.min_x, + spatial_index.bbox.max_x, spatial_index.tile_bits), + GetTileFromCoord(bbox.min_y, spatial_index.bbox.min_y, + spatial_index.bbox.max_y, spatial_index.tile_bits), + GetTileFromCoord(bbox.max_x, spatial_index.bbox.min_x, + spatial_index.bbox.max_x, spatial_index.tile_bits), + GetTileFromCoord(bbox.max_y, spatial_index.bbox.min_y, + spatial_index.bbox.max_y, spatial_index.tile_bits)); +} + +// big endian can be compared using memcpy +inline void PutFixed64BigEndian(std::string* dst, uint64_t value) { + char buf[sizeof(value)]; + buf[0] = (value >> 56) & 0xff; + buf[1] = (value >> 48) & 0xff; + buf[2] = (value >> 40) & 0xff; + buf[3] = (value >> 32) & 0xff; + buf[4] = (value >> 24) & 0xff; + buf[5] = (value >> 16) & 0xff; + buf[6] = (value >> 8) & 0xff; + buf[7] = value & 0xff; + dst->append(buf, sizeof(buf)); +} + +// big endian can be compared using memcpy +inline bool GetFixed64BigEndian(const Slice& input, uint64_t* value) { + if (input.size() < sizeof(uint64_t)) { + return false; + } + auto ptr = input.data(); + *value = (static_cast(static_cast(ptr[0])) << 56) | + (static_cast(static_cast(ptr[1])) << 48) | + (static_cast(static_cast(ptr[2])) << 40) | + (static_cast(static_cast(ptr[3])) << 32) | + (static_cast(static_cast(ptr[4])) << 24) | + (static_cast(static_cast(ptr[5])) << 16) | + (static_cast(static_cast(ptr[6])) << 8) | + static_cast(static_cast(ptr[7])); + return true; +} + +inline void PutDouble(std::string* dst, double d) { + dst->append(reinterpret_cast(&d), sizeof(double)); +} + +inline bool GetDouble(Slice* input, double* d) { + if (input->size() < sizeof(double)) { + return false; + } + memcpy(d, input->data(), sizeof(double)); + input->remove_prefix(sizeof(double)); + return true; +} + +} // namespace spatial +} // namespace rocksdb