diff --git a/src/ripple/shamap/SHAMap.h b/src/ripple/shamap/SHAMap.h index 38466450e..f2974001f 100644 --- a/src/ripple/shamap/SHAMap.h +++ b/src/ripple/shamap/SHAMap.h @@ -111,8 +111,10 @@ private: mutable bool full_ = false; // Map is believed complete in database public: - /** Each non-leaf node has 16 children (the 'radix tree' part of the map) */ - static inline constexpr unsigned int branchFactor = 16; + /** Number of children each non-leaf node has (the 'radix tree' part of the + * map) */ + static inline constexpr unsigned int branchFactor = + SHAMapInnerNode::branchFactor; /** The depth of the hash map: data is only present in the leaves */ static inline constexpr unsigned int leafDepth = 64; diff --git a/src/ripple/shamap/SHAMapInnerNode.h b/src/ripple/shamap/SHAMapInnerNode.h index d77884113..727745788 100644 --- a/src/ripple/shamap/SHAMapInnerNode.h +++ b/src/ripple/shamap/SHAMapInnerNode.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -38,15 +39,75 @@ namespace ripple { class SHAMapInnerNode final : public SHAMapTreeNode, public CountedObject { - std::array mHashes; - std::shared_ptr mChildren[16]; - int mIsBranch = 0; - std::uint32_t mFullBelowGen = 0; +public: + /** Each inner node has 16 children (the 'radix tree' part of the map) */ + static inline constexpr unsigned int branchFactor = 16; + +private: + /** Opaque type that contains the `hashes` array (array of type + `SHAMapHash`) and the `children` array (array of type + `std::shared_ptr`). + */ + TaggedPointer hashesAndChildren_; + + std::uint32_t fullBelowGen_ = 0; + std::uint16_t isBranch_ = 0; static std::mutex childLock; + /** Convert arrays stored in `hashesAndChildren_` so they can store the + requested number of children. + + @param toAllocate allocate space for at least this number of children + (must be <= branchFactor) + + @note the arrays may allocate more than the requested value in + `toAllocate`. This is due to the implementation of TagPointer, which + only supports allocating arrays of 4 different sizes. + */ + void + resizeChildArrays(std::uint8_t toAllocate); + + /** Get the child's index inside the `hashes` or `children` array (stored in + `hashesAndChildren_`. + + These arrays may or may not be sparse). The optional will be empty is an + empty branch is requested and the arrays are sparse. + + @param i index of the requested child + */ + std::optional + getChildIndex(int i) const; + + /** Call the `f` callback for all 16 (branchFactor) branches - even if + the branch is empty. + + @param f a one parameter callback function. The parameter is the + child's hash. + */ + template + void + iterChildren(F&& f) const; + + /** Call the `f` callback for all non-empty branches. + + @param f a two parameter callback function. The first parameter is + the branch number, the second parameter is the index into the array. + For dense formats these are the same, for sparse they may be + different. + */ + template + void + iterNonEmptyChildIndexes(F&& f) const; + public: - SHAMapInnerNode(std::uint32_t cowid); + explicit SHAMapInnerNode( + std::uint32_t cowid, + std::uint8_t numAllocatedChildren = branchFactor); + + SHAMapInnerNode(SHAMapInnerNode const&) = delete; + SHAMapInnerNode& + operator=(SHAMapInnerNode const&) = delete; std::shared_ptr clone(std::uint32_t cowid) const override; @@ -71,27 +132,35 @@ public: bool isEmpty() const; + bool isEmptyBranch(int m) const; + int getBranchCount() const; + SHAMapHash const& getChildHash(int m) const; void setChild(int m, std::shared_ptr const& child); + void shareChild(int m, std::shared_ptr const& child); + SHAMapTreeNode* getChildPointer(int branch); + std::shared_ptr getChild(int branch); + virtual std::shared_ptr canonicalizeChild(int branch, std::shared_ptr node); // sync functions bool isFullBelow(std::uint32_t generation) const; + void setFullBelowGen(std::uint32_t gen); @@ -121,34 +190,22 @@ public: makeCompressedInner(Slice data); }; -inline SHAMapInnerNode::SHAMapInnerNode(std::uint32_t cowid) - : SHAMapTreeNode(cowid) -{ -} - inline bool SHAMapInnerNode::isEmptyBranch(int m) const { - return (mIsBranch & (1 << m)) == 0; -} - -inline SHAMapHash const& -SHAMapInnerNode::getChildHash(int m) const -{ - assert(m >= 0 && m < 16); - return mHashes[m]; + return (isBranch_ & (1 << m)) == 0; } inline bool SHAMapInnerNode::isFullBelow(std::uint32_t generation) const { - return mFullBelowGen == generation; + return fullBelowGen_ == generation; } inline void SHAMapInnerNode::setFullBelowGen(std::uint32_t gen) { - mFullBelowGen = gen; + fullBelowGen_ = gen; } } // namespace ripple diff --git a/src/ripple/shamap/impl/SHAMapInnerNode.cpp b/src/ripple/shamap/impl/SHAMapInnerNode.cpp index 62258544a..fc945dcd1 100644 --- a/src/ripple/shamap/impl/SHAMapInnerNode.cpp +++ b/src/ripple/shamap/impl/SHAMapInnerNode.cpp @@ -28,12 +28,12 @@ #include #include #include +#include #include #include #include -#include #include #include #include @@ -42,17 +42,85 @@ namespace ripple { std::mutex SHAMapInnerNode::childLock; +SHAMapInnerNode::SHAMapInnerNode( + std::uint32_t cowid, + std::uint8_t numAllocatedChildren) + : SHAMapTreeNode(cowid), hashesAndChildren_(numAllocatedChildren) +{ +} + +template +void +SHAMapInnerNode::iterChildren(F&& f) const +{ + hashesAndChildren_.iterChildren(isBranch_, std::forward(f)); +} + +template +void +SHAMapInnerNode::iterNonEmptyChildIndexes(F&& f) const +{ + hashesAndChildren_.iterNonEmptyChildIndexes(isBranch_, std::forward(f)); +} + +void +SHAMapInnerNode::resizeChildArrays(std::uint8_t toAllocate) +{ + hashesAndChildren_ = + TaggedPointer(std::move(hashesAndChildren_), isBranch_, toAllocate); +} + +std::optional +SHAMapInnerNode::getChildIndex(int i) const +{ + return hashesAndChildren_.getChildIndex(isBranch_, i); +} + std::shared_ptr SHAMapInnerNode::clone(std::uint32_t cowid) const { - auto p = std::make_shared(cowid); + auto const branchCount = getBranchCount(); + auto const thisIsSparse = !hashesAndChildren_.isDense(); + auto p = std::make_shared(cowid, branchCount); p->hash_ = hash_; - p->mIsBranch = mIsBranch; - p->mFullBelowGen = mFullBelowGen; - p->mHashes = mHashes; + p->isBranch_ = isBranch_; + p->fullBelowGen_ = fullBelowGen_; + SHAMapHash *cloneHashes, *thisHashes; + std::shared_ptr*cloneChildren, *thisChildren; + // structured bindings can't be captured in c++ 17; use tie instead + std::tie(std::ignore, cloneHashes, cloneChildren) = + p->hashesAndChildren_.getHashesAndChildren(); + std::tie(std::ignore, thisHashes, thisChildren) = + hashesAndChildren_.getHashesAndChildren(); + + if (thisIsSparse) + { + int cloneChildIndex = 0; + iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) { + cloneHashes[cloneChildIndex++] = thisHashes[indexNum]; + }); + } + else + { + iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) { + cloneHashes[branchNum] = thisHashes[indexNum]; + }); + } std::lock_guard lock(childLock); - for (int i = 0; i < 16; ++i) - p->mChildren[i] = mChildren[i]; + if (thisIsSparse) + { + int cloneChildIndex = 0; + iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) { + cloneChildren[cloneChildIndex++] = thisChildren[indexNum]; + }); + } + else + { + iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) { + cloneChildren[branchNum] = thisChildren[indexNum]; + }); + } + return p; } @@ -65,18 +133,21 @@ SHAMapInnerNode::makeFullInner( if (data.size() != 512) Throw("Invalid FI node"); - auto ret = std::make_shared(0); + auto ret = std::make_shared(0, branchFactor); Serializer s(data.data(), data.size()); - for (int i = 0; i < 16; ++i) + auto retHashes = ret->hashesAndChildren_.getHashes(); + for (int i = 0; i < branchFactor; ++i) { - s.getBitString(ret->mHashes[i].as_uint256(), i * 32); + s.getBitString(retHashes[i].as_uint256(), i * 32); - if (ret->mHashes[i].isNonZero()) - ret->mIsBranch |= (1 << i); + if (retHashes[i].isNonZero()) + ret->isBranch_ |= (1 << i); } + ret->resizeChildArrays(ret->getBranchCount()); + if (hashValid) ret->hash_ = hash; else @@ -91,8 +162,9 @@ SHAMapInnerNode::makeCompressedInner(Slice data) int len = s.getLength(); - auto ret = std::make_shared(0); + auto ret = std::make_shared(0, branchFactor); + auto retHashes = ret->hashesAndChildren_.getHashes(); for (int i = 0; i < (len / 33); ++i) { int pos; @@ -100,15 +172,17 @@ SHAMapInnerNode::makeCompressedInner(Slice data) if (!s.get8(pos, 32 + (i * 33))) Throw("short CI node"); - if ((pos < 0) || (pos >= 16)) + if ((pos < 0) || (pos >= branchFactor)) Throw("invalid CI node"); - s.getBitString(ret->mHashes[pos].as_uint256(), i * 33); + s.getBitString(retHashes[pos].as_uint256(), i * 33); - if (ret->mHashes[pos].isNonZero()) - ret->mIsBranch |= (1 << pos); + if (retHashes[pos].isNonZero()) + ret->isBranch_ |= (1 << pos); } + ret->resizeChildArrays(ret->getBranchCount()); + ret->updateHash(); return ret; @@ -118,13 +192,12 @@ void SHAMapInnerNode::updateHash() { uint256 nh; - if (mIsBranch != 0) + if (isBranch_ != 0) { sha512_half_hasher h; using beast::hash_append; hash_append(h, HashPrefix::innerNode); - for (auto const& hh : mHashes) - hash_append(h, hh); + iterChildren([&](SHAMapHash const& hh) { hash_append(h, hh); }); nh = static_cast(h); } hash_ = SHAMapHash{nh}; @@ -133,11 +206,15 @@ SHAMapInnerNode::updateHash() void SHAMapInnerNode::updateHashDeep() { - for (auto pos = 0; pos < 16; ++pos) - { - if (mChildren[pos] != nullptr) - mHashes[pos] = mChildren[pos]->getHash(); - } + SHAMapHash* hashes; + std::shared_ptr* children; + // structured bindings can't be captured in c++ 17; use tie instead + std::tie(std::ignore, hashes, children) = + hashesAndChildren_.getHashesAndChildren(); + iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) { + if (children[indexNum] != nullptr) + hashes[indexNum] = children[indexNum]->getHash(); + }); updateHash(); } @@ -150,22 +227,17 @@ SHAMapInnerNode::serializeForWire(Serializer& s) const if (getBranchCount() < 12) { // compressed node - for (int i = 0; i < mHashes.size(); ++i) - { - if (!isEmptyBranch(i)) - { - s.addBitString(mHashes[i].as_uint256()); - s.add8(i); - } - } - + auto hashes = hashesAndChildren_.getHashes(); + iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) { + s.addBitString(hashes[indexNum].as_uint256()); + s.add8(branchNum); + }); s.add8(wireTypeCompressedInner); } else { - for (auto const& hh : mHashes) - s.addBitString(hh.as_uint256()); - + iterChildren( + [&](SHAMapHash const& hh) { s.addBitString(hh.as_uint256()); }); s.add8(wireTypeInner); } } @@ -176,42 +248,33 @@ SHAMapInnerNode::serializeWithPrefix(Serializer& s) const assert(!isEmpty()); s.add32(HashPrefix::innerNode); - for (auto const& hh : mHashes) - s.addBitString(hh.as_uint256()); + iterChildren( + [&](SHAMapHash const& hh) { s.addBitString(hh.as_uint256()); }); } bool SHAMapInnerNode::isEmpty() const { - return mIsBranch == 0; + return isBranch_ == 0; } int SHAMapInnerNode::getBranchCount() const { - int count = 0; - - for (int i = 0; i < 16; ++i) - if (!isEmptyBranch(i)) - ++count; - - return count; + return popcnt16(isBranch_); } std::string SHAMapInnerNode::getString(const SHAMapNodeID& id) const { std::string ret = SHAMapTreeNode::getString(id); - for (int i = 0; i < mHashes.size(); ++i) - { - if (!isEmptyBranch(i)) - { - ret += "\n"; - ret += std::to_string(i); - ret += " = "; - ret += to_string(mHashes[i]); - } - } + auto hashes = hashesAndChildren_.getHashes(); + iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) { + ret += "\nb"; + ret += std::to_string(branchNum); + ret += " = "; + ret += to_string(hashes[indexNum]); + }); return ret; } @@ -219,46 +282,79 @@ SHAMapInnerNode::getString(const SHAMapNodeID& id) const void SHAMapInnerNode::setChild(int m, std::shared_ptr const& child) { - assert((m >= 0) && (m < 16)); + assert((m >= 0) && (m < branchFactor)); assert(cowid_ != 0); assert(child.get() != this); - mHashes[m].zero(); - hash_.zero(); + + auto const dstIsBranch = [&] { + if (child) + return isBranch_ | (1 << m); + else + return isBranch_ & ~(1 << m); + }(); + + auto const dstToAllocate = popcnt16(dstIsBranch); + // change hashesAndChildren to remove the element, or make room for the + // added element, if necessary + hashesAndChildren_ = TaggedPointer( + std::move(hashesAndChildren_), isBranch_, dstIsBranch, dstToAllocate); + + isBranch_ = dstIsBranch; + if (child) - mIsBranch |= (1 << m); - else - mIsBranch &= ~(1 << m); - mChildren[m] = child; + { + auto const childIndex = *getChildIndex(m); + auto [_, hashes, children] = hashesAndChildren_.getHashesAndChildren(); + hashes[childIndex].zero(); + children[childIndex] = child; + } + + hash_.zero(); + + assert(getBranchCount() <= hashesAndChildren_.capacity()); } // finished modifying, now make shareable void SHAMapInnerNode::shareChild(int m, std::shared_ptr const& child) { - assert((m >= 0) && (m < 16)); + assert((m >= 0) && (m < branchFactor)); assert(cowid_ != 0); assert(child); assert(child.get() != this); - mChildren[m] = child; + assert(!isEmptyBranch(m)); + hashesAndChildren_.getChildren()[*getChildIndex(m)] = child; } SHAMapTreeNode* SHAMapInnerNode::getChildPointer(int branch) { - assert(branch >= 0 && branch < 16); + assert(branch >= 0 && branch < branchFactor); + assert(!isEmptyBranch(branch)); std::lock_guard lock(childLock); - return mChildren[branch].get(); + return hashesAndChildren_.getChildren()[*getChildIndex(branch)].get(); } std::shared_ptr SHAMapInnerNode::getChild(int branch) { - assert(branch >= 0 && branch < 16); + assert(branch >= 0 && branch < branchFactor); + assert(!isEmptyBranch(branch)); std::lock_guard lock(childLock); - return mChildren[branch]; + return hashesAndChildren_.getChildren()[*getChildIndex(branch)]; +} + +SHAMapHash const& +SHAMapInnerNode::getChildHash(int m) const +{ + assert((m >= 0) && (m < branchFactor)); + if (auto const i = getChildIndex(m)) + return hashesAndChildren_.getHashes()[*i]; + + return zeroSHAMapHash; } std::shared_ptr @@ -266,20 +362,23 @@ SHAMapInnerNode::canonicalizeChild( int branch, std::shared_ptr node) { - assert(branch >= 0 && branch < 16); + assert(branch >= 0 && branch < branchFactor); assert(node); - assert(node->getHash() == mHashes[branch]); + assert(!isEmptyBranch(branch)); + auto const childIndex = *getChildIndex(branch); + auto [_, hashes, children] = hashesAndChildren_.getHashesAndChildren(); + assert(node->getHash() == hashes[childIndex]); std::lock_guard lock(childLock); - if (mChildren[branch]) + if (children[childIndex]) { // There is already a node hooked up, return it - node = mChildren[branch]; + node = children[childIndex]; } else { // Hook this node up - mChildren[branch] = node; + children[childIndex] = node; } return node; } @@ -288,20 +387,38 @@ void SHAMapInnerNode::invariants(bool is_root) const { unsigned count = 0; - for (int i = 0; i < 16; ++i) + auto [numAllocated, hashes, children] = + hashesAndChildren_.getHashesAndChildren(); + + if (numAllocated != branchFactor) { - if (mHashes[i].isNonZero()) + auto const branchCount = getBranchCount(); + for (int i = 0; i < branchCount; ++i) { - assert((mIsBranch & (1 << i)) != 0); - if (mChildren[i] != nullptr) - mChildren[i]->invariants(); + assert(hashes[i].isNonZero()); + if (children[i] != nullptr) + children[i]->invariants(); ++count; } - else + } + else + { + for (int i = 0; i < branchFactor; ++i) { - assert((mIsBranch & (1 << i)) == 0); + if (hashes[i].isNonZero()) + { + assert((isBranch_ & (1 << i)) != 0); + if (children[i] != nullptr) + children[i]->invariants(); + ++count; + } + else + { + assert((isBranch_ & (1 << i)) == 0); + } } } + if (!is_root) { assert(hash_.isNonZero()); diff --git a/src/ripple/shamap/impl/TaggedPointer.h b/src/ripple/shamap/impl/TaggedPointer.h new file mode 100644 index 000000000..02ad087ca --- /dev/null +++ b/src/ripple/shamap/impl/TaggedPointer.h @@ -0,0 +1,227 @@ +//------------------------------------------------------------------------------ +/* + This file is part of rippled: https://github.com/ripple/rippled + Copyright (c) 2020 Ripple Labs Inc. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef RIPPLE_SHAMAP_TAGGEDPOINTER_H_INCLUDED +#define RIPPLE_SHAMAP_TAGGEDPOINTER_H_INCLUDED + +#include + +#include +#include + +namespace ripple { + +/** TaggedPointer is a combination of a pointer and a mask stored in the + lowest two bits. + + Since pointers do not have arbitrary alignment, the lowest bits in the + pointer are guaranteed to be zero. TaggedPointer stores information in these + low bits. When dereferencing the pointer, these low "tag" bits are set to + zero. When accessing the tag bits, the high "pointer" bits are set to zero. + + The "pointer" part points to to the equivalent to an array of + `SHAMapHash` followed immediately by an array of + `shared_ptr`. The sizes of these arrays are + determined by the tag. The tag is an index into an array (`boundaries`, + defined in the cpp file) that specifies the size. Both arrays are the + same size. Note that the sizes may be smaller than the full 16 elements + needed to explicitly store all the children. In this case, the arrays + only store the non-empty children. The non-empty children are stored in + index order. For example, if only children `2` and `14` are non-empty, a + two-element array would store child `2` in array index 0 and child `14` + in array index 1. There are functions to convert between a child's tree + index and the child's index in a sparse array. + + The motivation for this class is saving RAM. A large percentage of inner + nodes only store a small number of children. Memory can be saved by + storing the inner node's children in sparse arrays. Measurements show + that on average a typical SHAMap's inner nodes can be stored using only + 25% of the original space. +*/ +class TaggedPointer +{ + static_assert( + alignof(SHAMapHash) >= 4, + "Bad alignment: Tag pointer requires low two bits to be zero."); + /** Upper bits are the pointer, lowest two bits are the tag + A moved-from object will have a tp_ of zero. + */ + std::uintptr_t tp_ = 0; + /** bit-and with this mask to get the tag bits (lowest two bits) */ + static constexpr std::uintptr_t tagMask = 3; + /** bit-and with this mask to get the pointer bits (mask out the tag) */ + static constexpr std::uintptr_t ptrMask = ~tagMask; + + /** Deallocate memory and run destructors */ + void + destroyHashesAndChildren(); + + struct RawAllocateTag + { + }; + /** This constructor allocates space for the hashes and children, but + does not run constructors. + + @param RawAllocateTag used to select overload only + + @param numChildren allocate space for at least this number of children + (must be <= branchFactor) + + @note Since the hashes/children destructors are always run in the + TaggedPointer destructor, this means those constructors _must_ be run + after this constructor is run. This constructor is private and only used + in places where the hashes/children constructor are subsequently run. + */ + explicit TaggedPointer(RawAllocateTag, std::uint8_t numChildren); + +public: + TaggedPointer() = delete; + explicit TaggedPointer(std::uint8_t numChildren); + + /** Constructor is used change the number of allocated children. + + Existing children from `other` are copied (toAllocate must be >= the + number of children). The motivation for making this a constructor is it + saves unneeded copying and zeroing out of hashes if this were + implemented directly in the SHAMapInnerNode class. + + @param other children and hashes are moved from this param + + @param isBranch bitset of non-empty children in `other` + + @param toAllocate allocate space for at least this number of children + (must be <= branchFactor) + */ + explicit TaggedPointer( + TaggedPointer&& other, + std::uint16_t isBranch, + std::uint8_t toAllocate); + + /** Given `other` with the specified children in `srcBranches`, create a + new TaggedPointer with the allocated number of children and the + children specified in `dstBranches`. + + @param other children and hashes are moved from this param + + @param srcBranches bitset of non-empty children in `other` + + @param dstBranches bitset of children to copy from `other` (or space to + leave in a sparse array - see note below) + + @param toAllocate allocate space for at least this number of children + (must be <= branchFactor) + + @note a child may be absent in srcBranches but present in dstBranches + (if dst has a sparse representation, space for the new child will be + left in the sparse array). Typically, srcBranches and dstBranches will + differ by at most one bit. The function works correctly if they differ + by more, but there are likely more efficient algorithms to consider if + this becomes a common use-case. + */ + explicit TaggedPointer( + TaggedPointer&& other, + std::uint16_t srcBranches, + std::uint16_t dstBranches, + std::uint8_t toAllocate); + + TaggedPointer(TaggedPointer const&) = delete; + + TaggedPointer(TaggedPointer&&); + + TaggedPointer& + operator=(TaggedPointer&&); + + ~TaggedPointer(); + + /** Decode the tagged pointer into its tag and pointer */ + [[nodiscard]] std::pair + decode() const; + + /** Get the number of elements allocated for each array */ + [[nodiscard]] std::uint8_t + capacity() const; + + /** Check if the arrays have a dense format. + + @note The dense format is when there is an array element for all 16 + (branchFactor) possible children. + */ + [[nodiscard]] bool + isDense() const; + + /** Get the number of elements in each array and a pointer to the start + of each array. + */ + [[nodiscard]] std:: + tuple*> + getHashesAndChildren() const; + + /** Get the `hashes` array */ + [[nodiscard]] SHAMapHash* + getHashes() const; + + /** Get the `children` array */ + [[nodiscard]] std::shared_ptr* + getChildren() const; + + /** Call the `f` callback for all 16 (branchFactor) branches - even if + the branch is empty. + + @param isBranch bitset of non-empty children + + @param f a one parameter callback function. The parameter is the + child's hash. + */ + template + void + iterChildren(std::uint16_t isBranch, F&& f) const; + + /** Call the `f` callback for all non-empty branches. + + @param isBranch bitset of non-empty children + + @param f a two parameter callback function. The first parameter is + the branch number, the second parameter is the index into the array. + For dense formats these are the same, for sparse they may be + different. + */ + template + void + iterNonEmptyChildIndexes(std::uint16_t isBranch, F&& f) const; + + /** Get the child's index inside the `hashes` or `children` array (which + may or may not be sparse). The optional will be empty if an empty + branch is requested and the children are sparse. + + @param isBranch bitset of non-empty children + + @param i index of the requested child + */ + std::optional + getChildIndex(std::uint16_t isBranch, int i) const; +}; + +inline TaggedPointer::~TaggedPointer() +{ + destroyHashesAndChildren(); +} + +} // namespace ripple + +#endif diff --git a/src/ripple/shamap/impl/TaggedPointer.ipp b/src/ripple/shamap/impl/TaggedPointer.ipp new file mode 100644 index 000000000..93403932d --- /dev/null +++ b/src/ripple/shamap/impl/TaggedPointer.ipp @@ -0,0 +1,639 @@ +//------------------------------------------------------------------------------ +/* + This file is part of rippled: https://github.com/ripple/rippled + Copyright (c) 2020 Ripple Labs Inc. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include + +#include + +#include + +// #define FORCE_BOOST_POOL 1 +#if FORCE_BOOST_POOL || !__has_include() +#define USE_BOOST_POOL 1 +#else +#define USE_BOOST_POOL 0 +#endif + +#if USE_BOOST_POOL +#include +#else +#include +#endif + +namespace ripple { + +namespace { +// Sparse array size boundaries. +// Given n children, an array of size `*std::lower_bound(boundaries.begin(), +// boundaries.end(), n);` is used to store the children. Note that the last +// element must be the number of children in a dense array. +constexpr std::array boundaries{ + 2, + 4, + 6, + SHAMapInnerNode::branchFactor}; +static_assert( + boundaries.size() <= 4, + "The hashesAndChildren member uses a tagged array format with two bits " + "reserved for the tag. This supports at most 4 values."); +static_assert( + boundaries.back() == SHAMapInnerNode::branchFactor, + "Last element of boundaries must be number of children in a dense array"); + +// Terminology: A chunk is the memory being allocated from a block. A block +// contains multiple chunks. This is the terminology the boost documentation +// uses. Pools use "Simple Segregated Storage" as their storage format. +constexpr size_t elementSizeBytes = + (sizeof(SHAMapHash) + sizeof(std::shared_ptr)); + +constexpr size_t blockSizeBytes = kilobytes(512); + +template +constexpr std::array initArrayChunkSizeBytes( + std::index_sequence) +{ + return std::array{ + boundaries[I] * elementSizeBytes..., + }; +} +constexpr auto arrayChunkSizeBytes = + initArrayChunkSizeBytes(std::make_index_sequence{}); + +template +constexpr std::array initArrayChunksPerBlock( + std::index_sequence) +{ + return std::array{ + blockSizeBytes / arrayChunkSizeBytes[I]..., + }; +} +constexpr auto chunksPerBlock = + initArrayChunksPerBlock(std::make_index_sequence{}); + +[[nodiscard]] inline std::uint8_t +numAllocatedChildren(std::uint8_t n) +{ + assert(n <= SHAMapInnerNode::branchFactor); + return *std::lower_bound(boundaries.begin(), boundaries.end(), n); +} + +[[nodiscard]] inline std::size_t +boundariesIndex(std::uint8_t numChildren) +{ + assert(numChildren <= SHAMapInnerNode::branchFactor); + return std::distance( + boundaries.begin(), + std::lower_bound(boundaries.begin(), boundaries.end(), numChildren)); +} + +#if USE_BOOST_POOL + +template +std::array, boundaries.size()> initAllocateArrayFuns( + std::index_sequence) +{ + return std::array, boundaries.size()>{ + boost::singleton_pool< + boost::fast_pool_allocator_tag, + arrayChunkSizeBytes[I], + boost::default_user_allocator_new_delete, + std::mutex, + chunksPerBlock[I], + chunksPerBlock[I]>::malloc..., + }; +} +std::array, boundaries.size()> const allocateArrayFuns = + initAllocateArrayFuns(std::make_index_sequence{}); + +template +std::array, boundaries.size()> initFreeArrayFuns( + std::index_sequence) +{ + return std::array, boundaries.size()>{ + static_cast(boost::singleton_pool< + boost::fast_pool_allocator_tag, + arrayChunkSizeBytes[I], + boost::default_user_allocator_new_delete, + std::mutex, + chunksPerBlock[I], + chunksPerBlock[I]>::free)..., + }; +} +std::array, boundaries.size()> const freeArrayFuns = + initFreeArrayFuns(std::make_index_sequence{}); + +template +std::array, boundaries.size()> initIsFromArrayFuns( + std::index_sequence) +{ + return std::array, boundaries.size()>{ + boost::singleton_pool< + boost::fast_pool_allocator_tag, + arrayChunkSizeBytes[I], + boost::default_user_allocator_new_delete, + std::mutex, + chunksPerBlock[I], + chunksPerBlock[I]>::is_from..., + }; +} +std::array, boundaries.size()> const + isFromArrayFuns = + initIsFromArrayFuns(std::make_index_sequence{}); + +// This function returns an untagged pointer +[[nodiscard]] inline std::pair +allocateArrays(std::uint8_t numChildren) +{ + auto const i = boundariesIndex(numChildren); + return {i, allocateArrayFuns[i]()}; +} + +// This function takes an untagged pointer +inline void +deallocateArrays(std::uint8_t boundaryIndex, void* p) +{ + assert(isFromArrayFuns[boundaryIndex](p)); + freeArrayFuns[boundaryIndex](p); +} +#else + +template +std::array + initPmrArrayFuns(std::index_sequence) +{ + return std::array{ + std::pmr::synchronized_pool_resource{std::pmr::pool_options{ + /* max_blocks_per_chunk */ chunksPerBlock[I], + /* largest_required_pool_block */ chunksPerBlock[I]}}..., + }; +} +std::array + pmrArrayFuns = + initPmrArrayFuns(std::make_index_sequence{}); + +// This function returns an untagged pointer +[[nodiscard]] inline std::pair +allocateArrays(std::uint8_t numChildren) +{ + auto const i = boundariesIndex(numChildren); + return {i, pmrArrayFuns[i].allocate(arrayChunkSizeBytes[i])}; +} + +// This function takes an untagged pointer +inline void +deallocateArrays(std::uint8_t boundaryIndex, void* p) +{ + return pmrArrayFuns[boundaryIndex].deallocate( + p, arrayChunkSizeBytes[boundaryIndex]); +} +#endif + +[[nodiscard]] inline int +popcnt16(std::uint16_t a) +{ +#if defined(__clang__) || defined(__GNUC__) + return __builtin_popcount(a); +#else + // fallback to table lookup + static auto constexpr const tbl = []() { + std::array ret{}; + for (int i = 0; i != 256; ++i) + { + for (int j = 0; j != 8; ++j) + { + if (i & (1 << j)) + ret[i]++; + } + } + return ret; + }(); + return tbl[a & 0xff] + tbl[a >> 8]; +#endif +} + +// Used in `iterChildren` and elsewhere as the hash value for sparse arrays when +// the hash isn't actually stored in the array. +static SHAMapHash const zeroSHAMapHash; + +} // namespace + +template +void +TaggedPointer::iterChildren(std::uint16_t isBranch, F&& f) const +{ + auto [numAllocated, hashes, _] = getHashesAndChildren(); + if (numAllocated == SHAMapInnerNode::branchFactor) + { + // dense case + for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i) + f(hashes[i]); + } + else + { + // sparse case + int curHashI = 0; + for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i) + { + if ((1 << i) & isBranch) + { + f(hashes[curHashI++]); + } + else + { + f(zeroSHAMapHash); + } + } + } +} + +template +void +TaggedPointer::iterNonEmptyChildIndexes(std::uint16_t isBranch, F&& f) const +{ + if (capacity() == SHAMapInnerNode::branchFactor) + { + // dense case + for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i) + { + if ((1 << i) & isBranch) + { + f(i, i); + } + } + } + else + { + // sparse case + int curHashI = 0; + for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i) + { + if ((1 << i) & isBranch) + { + f(i, curHashI++); + } + } + } +} + +inline void +TaggedPointer::destroyHashesAndChildren() +{ + if (!tp_) + return; + + auto [numAllocated, hashes, children] = getHashesAndChildren(); + for (std::size_t i = 0; i < numAllocated; ++i) + { + hashes[i].~SHAMapHash(); + children[i].~shared_ptr(); + } + + auto [tag, ptr] = decode(); + deallocateArrays(tag, ptr); +} + +inline std::optional +TaggedPointer::getChildIndex(std::uint16_t isBranch, int i) const +{ + if (isDense()) + return i; + + // Sparse case + if ((isBranch & (1 << i)) == 0) + { + // Empty branch. Sparse children do not store empty branches + return {}; + } + + // Sparse children are stored sorted. This means the index + // of a child in the array is the number of non-empty children + // before it. Since `isBranch_` is a bitset of the stored + // children, we simply need to mask out (and set to zero) all + // the bits in `isBranch_` equal to to higher than `i` and count + // the bits. + + // mask sets all the bits >=i to zero and all the bits (p) & ptrMask) == + reinterpret_cast(p)); + tp_ = reinterpret_cast(p) + tag; +} + +inline TaggedPointer::TaggedPointer( + TaggedPointer&& other, + std::uint16_t srcBranches, + std::uint16_t dstBranches, + std::uint8_t toAllocate) +{ + assert(toAllocate >= popcnt16(dstBranches)); + + if (other.capacity() == numAllocatedChildren(toAllocate)) + { + // in place + *this = std::move(other); + auto [srcDstNumAllocated, srcDstHashes, srcDstChildren] = + getHashesAndChildren(); + bool const srcDstIsDense = isDense(); + int srcDstIndex = 0; + for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i) + { + auto const mask = (1 << i); + bool const inSrc = (srcBranches & mask); + bool const inDst = (dstBranches & mask); + if (inSrc && inDst) + { + // keep + ++srcDstIndex; + } + else if (inSrc && !inDst) + { + // remove + if (srcDstIsDense) + { + srcDstHashes[srcDstIndex].zero(); + srcDstChildren[srcDstIndex].reset(); + ++srcDstIndex; + } + else + { + // sparse + // need to shift all the elements to the left by + // one + for (int c = srcDstIndex; c < srcDstNumAllocated - 1; ++c) + { + srcDstHashes[c] = srcDstHashes[c + 1]; + srcDstChildren[c] = std::move(srcDstChildren[c + 1]); + } + srcDstHashes[srcDstNumAllocated - 1].zero(); + srcDstChildren[srcDstNumAllocated - 1].reset(); + // do not increment the index + } + } + else if (!inSrc && inDst) + { + // add + if (srcDstIsDense) + { + // nothing to do, child is already present in the dense rep + ++srcDstIndex; + } + else + { + // sparse + // need to create a hole by shifting all the elements to the + // right by one + for (int c = srcDstNumAllocated - 1; c > srcDstIndex; --c) + { + srcDstHashes[c] = srcDstHashes[c - 1]; + srcDstChildren[c] = std::move(srcDstChildren[c - 1]); + } + srcDstHashes[srcDstIndex].zero(); + srcDstChildren[srcDstIndex].reset(); + ++srcDstIndex; + } + } + else if (!inDst && !inDst) + { + // in neither + if (srcDstIsDense) + { + ++srcDstIndex; + } + } + } + } + else + { + // not in place + TaggedPointer dst{RawAllocateTag{}, toAllocate}; + auto [dstNumAllocated, dstHashes, dstChildren] = + dst.getHashesAndChildren(); + // Move `other` into a local var so it's not in a partially moved from + // state after this function runs + TaggedPointer src(std::move(other)); + auto [srcNumAllocated, srcHashes, srcChildren] = + src.getHashesAndChildren(); + bool const srcIsDense = src.isDense(); + bool const dstIsDense = dst.isDense(); + int srcIndex = 0, dstIndex = 0; + for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i) + { + auto const mask = (1 << i); + bool const inSrc = (srcBranches & mask); + bool const inDst = (dstBranches & mask); + if (inSrc && inDst) + { + // keep + new (&dstHashes[dstIndex]) SHAMapHash{srcHashes[srcIndex]}; + new (&dstChildren[dstIndex]) std::shared_ptr{ + std::move(srcChildren[srcIndex])}; + ++dstIndex; + ++srcIndex; + } + else if (inSrc && !inDst) + { + // remove + ++srcIndex; + if (dstIsDense) + { + new (&dstHashes[dstIndex]) SHAMapHash{}; + new (&dstChildren[dstIndex]) + std::shared_ptr{}; + ++dstIndex; + } + } + else if (!inSrc && inDst) + { + // add + new (&dstHashes[dstIndex]) SHAMapHash{}; + new (&dstChildren[dstIndex]) std::shared_ptr{}; + ++dstIndex; + if (srcIsDense) + { + ++srcIndex; + } + } + else if (!inDst && !inDst) + { + // in neither + if (dstIsDense) + { + new (&dstHashes[dstIndex]) SHAMapHash{}; + new (&dstChildren[dstIndex]) + std::shared_ptr{}; + ++dstIndex; + } + if (srcIsDense) + { + ++srcIndex; + } + } + } + // If sparse, may need to run additional constructors + assert(!dstIsDense || dstIndex == dstNumAllocated); + for (int i = dstIndex; i < dstNumAllocated; ++i) + { + new (&dstHashes[i]) SHAMapHash{}; + new (&dstChildren[i]) std::shared_ptr{}; + } + *this = std::move(dst); + } +} + +inline TaggedPointer::TaggedPointer( + TaggedPointer&& other, + std::uint16_t isBranch, + std::uint8_t toAllocate) + : TaggedPointer(std::move(other)) +{ + auto const oldNumAllocated = capacity(); + toAllocate = numAllocatedChildren(toAllocate); + if (toAllocate == oldNumAllocated) + return; + + // allocate hashes and children, but do not run constructors + TaggedPointer newHashesAndChildren{RawAllocateTag{}, toAllocate}; + SHAMapHash *newHashes, *oldHashes; + std::shared_ptr*newChildren, *oldChildren; + std::uint8_t newNumAllocated; + // structured bindings can't be captured in c++ 17; use tie instead + std::tie(newNumAllocated, newHashes, newChildren) = + newHashesAndChildren.getHashesAndChildren(); + std::tie(std::ignore, oldHashes, oldChildren) = getHashesAndChildren(); + + if (newNumAllocated == SHAMapInnerNode::branchFactor) + { + // new arrays are dense, old arrays are sparse + iterNonEmptyChildIndexes(isBranch, [&](auto branchNum, auto indexNum) { + new (&newHashes[branchNum]) SHAMapHash{oldHashes[indexNum]}; + new (&newChildren[branchNum]) std::shared_ptr{ + std::move(oldChildren[indexNum])}; + }); + // Run the constructors for the remaining elements + for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i) + { + if ((1 << i) & isBranch) + continue; + new (&newHashes[i]) SHAMapHash{}; + new (&newChildren[i]) std::shared_ptr{}; + } + } + else + { + // new arrays are sparse, old arrays may be sparse or dense + int curCompressedIndex = 0; + iterNonEmptyChildIndexes(isBranch, [&](auto branchNum, auto indexNum) { + new (&newHashes[curCompressedIndex]) + SHAMapHash{oldHashes[indexNum]}; + new (&newChildren[curCompressedIndex]) + std::shared_ptr{ + std::move(oldChildren[indexNum])}; + ++curCompressedIndex; + }); + // Run the constructors for the remaining elements + for (int i = curCompressedIndex; i < newNumAllocated; ++i) + { + new (&newHashes[i]) SHAMapHash{}; + new (&newChildren[i]) std::shared_ptr{}; + } + } + + *this = std::move(newHashesAndChildren); +} + +inline TaggedPointer::TaggedPointer(std::uint8_t numChildren) + : TaggedPointer(TaggedPointer::RawAllocateTag{}, numChildren) +{ + auto [numAllocated, hashes, children] = getHashesAndChildren(); + for (std::size_t i = 0; i < numAllocated; ++i) + { + new (&hashes[i]) SHAMapHash{}; + new (&children[i]) std::shared_ptr{}; + } +} + +inline TaggedPointer::TaggedPointer(TaggedPointer&& other) : tp_{other.tp_} +{ + other.tp_ = 0; +} + +inline TaggedPointer& +TaggedPointer::operator=(TaggedPointer&& other) +{ + if (this == &other) + return *this; + destroyHashesAndChildren(); + tp_ = other.tp_; + other.tp_ = 0; + return *this; +} + +[[nodiscard]] inline std::pair +TaggedPointer::decode() const +{ + return {tp_ & tagMask, reinterpret_cast(tp_ & ptrMask)}; +} + +[[nodiscard]] inline std::uint8_t +TaggedPointer::capacity() const +{ + return boundaries[tp_ & tagMask]; +} + +[[nodiscard]] inline bool +TaggedPointer::isDense() const +{ + return (tp_ & tagMask) == boundaries.size() - 1; +} + +[[nodiscard]] inline std:: + tuple*> + TaggedPointer::getHashesAndChildren() const +{ + auto const [tag, ptr] = decode(); + auto const hashes = reinterpret_cast(ptr); + std::uint8_t numAllocated = boundaries[tag]; + auto const children = reinterpret_cast*>( + hashes + numAllocated); + return {numAllocated, hashes, children}; +}; + +[[nodiscard]] inline SHAMapHash* +TaggedPointer::getHashes() const +{ + return reinterpret_cast(tp_ & ptrMask); +}; + +[[nodiscard]] inline std::shared_ptr* +TaggedPointer::getChildren() const +{ + auto [unused1, unused2, result] = getHashesAndChildren(); + return result; +}; + +} // namespace ripple