Store InnerNode children in sparse arrays:

A large percentage of inner nodes only store a small number of children. Memory
can be saved by storing the inner node's children in sparse arrays. Measurements
show that on average a typical SHAMap's inner nodes can be stored using only 25%
of the original space.
This commit is contained in:
seelabs
2020-11-12 14:46:16 -05:00
committed by Nik Bougalis
parent 5c8e072b7f
commit cb0572d66e
5 changed files with 1146 additions and 104 deletions

View File

@@ -111,8 +111,10 @@ private:
mutable bool full_ = false; // Map is believed complete in database
public:
/** Each non-leaf node has 16 children (the 'radix tree' part of the map) */
static inline constexpr unsigned int branchFactor = 16;
/** Number of children each non-leaf node has (the 'radix tree' part of the
* map) */
static inline constexpr unsigned int branchFactor =
SHAMapInnerNode::branchFactor;
/** The depth of the hash map: data is only present in the leaves */
static inline constexpr unsigned int leafDepth = 64;

View File

@@ -25,6 +25,7 @@
#include <ripple/shamap/SHAMapItem.h>
#include <ripple/shamap/SHAMapNodeID.h>
#include <ripple/shamap/SHAMapTreeNode.h>
#include <ripple/shamap/impl/TaggedPointer.h>
#include <bitset>
#include <cstdint>
@@ -38,15 +39,75 @@ namespace ripple {
class SHAMapInnerNode final : public SHAMapTreeNode,
public CountedObject<SHAMapInnerNode>
{
std::array<SHAMapHash, 16> mHashes;
std::shared_ptr<SHAMapTreeNode> mChildren[16];
int mIsBranch = 0;
std::uint32_t mFullBelowGen = 0;
public:
/** Each inner node has 16 children (the 'radix tree' part of the map) */
static inline constexpr unsigned int branchFactor = 16;
private:
/** Opaque type that contains the `hashes` array (array of type
`SHAMapHash`) and the `children` array (array of type
`std::shared_ptr<SHAMapInnerNode>`).
*/
TaggedPointer hashesAndChildren_;
std::uint32_t fullBelowGen_ = 0;
std::uint16_t isBranch_ = 0;
static std::mutex childLock;
/** Convert arrays stored in `hashesAndChildren_` so they can store the
requested number of children.
@param toAllocate allocate space for at least this number of children
(must be <= branchFactor)
@note the arrays may allocate more than the requested value in
`toAllocate`. This is due to the implementation of TagPointer, which
only supports allocating arrays of 4 different sizes.
*/
void
resizeChildArrays(std::uint8_t toAllocate);
/** Get the child's index inside the `hashes` or `children` array (stored in
`hashesAndChildren_`.
These arrays may or may not be sparse). The optional will be empty is an
empty branch is requested and the arrays are sparse.
@param i index of the requested child
*/
std::optional<int>
getChildIndex(int i) const;
/** Call the `f` callback for all 16 (branchFactor) branches - even if
the branch is empty.
@param f a one parameter callback function. The parameter is the
child's hash.
*/
template <class F>
void
iterChildren(F&& f) const;
/** Call the `f` callback for all non-empty branches.
@param f a two parameter callback function. The first parameter is
the branch number, the second parameter is the index into the array.
For dense formats these are the same, for sparse they may be
different.
*/
template <class F>
void
iterNonEmptyChildIndexes(F&& f) const;
public:
SHAMapInnerNode(std::uint32_t cowid);
explicit SHAMapInnerNode(
std::uint32_t cowid,
std::uint8_t numAllocatedChildren = branchFactor);
SHAMapInnerNode(SHAMapInnerNode const&) = delete;
SHAMapInnerNode&
operator=(SHAMapInnerNode const&) = delete;
std::shared_ptr<SHAMapTreeNode>
clone(std::uint32_t cowid) const override;
@@ -71,27 +132,35 @@ public:
bool
isEmpty() const;
bool
isEmptyBranch(int m) const;
int
getBranchCount() const;
SHAMapHash const&
getChildHash(int m) const;
void
setChild(int m, std::shared_ptr<SHAMapTreeNode> const& child);
void
shareChild(int m, std::shared_ptr<SHAMapTreeNode> const& child);
SHAMapTreeNode*
getChildPointer(int branch);
std::shared_ptr<SHAMapTreeNode>
getChild(int branch);
virtual std::shared_ptr<SHAMapTreeNode>
canonicalizeChild(int branch, std::shared_ptr<SHAMapTreeNode> node);
// sync functions
bool
isFullBelow(std::uint32_t generation) const;
void
setFullBelowGen(std::uint32_t gen);
@@ -121,34 +190,22 @@ public:
makeCompressedInner(Slice data);
};
inline SHAMapInnerNode::SHAMapInnerNode(std::uint32_t cowid)
: SHAMapTreeNode(cowid)
{
}
inline bool
SHAMapInnerNode::isEmptyBranch(int m) const
{
return (mIsBranch & (1 << m)) == 0;
}
inline SHAMapHash const&
SHAMapInnerNode::getChildHash(int m) const
{
assert(m >= 0 && m < 16);
return mHashes[m];
return (isBranch_ & (1 << m)) == 0;
}
inline bool
SHAMapInnerNode::isFullBelow(std::uint32_t generation) const
{
return mFullBelowGen == generation;
return fullBelowGen_ == generation;
}
inline void
SHAMapInnerNode::setFullBelowGen(std::uint32_t gen)
{
mFullBelowGen = gen;
fullBelowGen_ = gen;
}
} // namespace ripple

View File

@@ -28,12 +28,12 @@
#include <ripple/protocol/HashPrefix.h>
#include <ripple/protocol/digest.h>
#include <ripple/shamap/SHAMapTreeNode.h>
#include <ripple/shamap/impl/TaggedPointer.ipp>
#include <openssl/sha.h>
#include <algorithm>
#include <array>
#include <atomic>
#include <iterator>
#include <mutex>
#include <utility>
@@ -42,17 +42,85 @@ namespace ripple {
std::mutex SHAMapInnerNode::childLock;
SHAMapInnerNode::SHAMapInnerNode(
std::uint32_t cowid,
std::uint8_t numAllocatedChildren)
: SHAMapTreeNode(cowid), hashesAndChildren_(numAllocatedChildren)
{
}
template <class F>
void
SHAMapInnerNode::iterChildren(F&& f) const
{
hashesAndChildren_.iterChildren(isBranch_, std::forward<F>(f));
}
template <class F>
void
SHAMapInnerNode::iterNonEmptyChildIndexes(F&& f) const
{
hashesAndChildren_.iterNonEmptyChildIndexes(isBranch_, std::forward<F>(f));
}
void
SHAMapInnerNode::resizeChildArrays(std::uint8_t toAllocate)
{
hashesAndChildren_ =
TaggedPointer(std::move(hashesAndChildren_), isBranch_, toAllocate);
}
std::optional<int>
SHAMapInnerNode::getChildIndex(int i) const
{
return hashesAndChildren_.getChildIndex(isBranch_, i);
}
std::shared_ptr<SHAMapTreeNode>
SHAMapInnerNode::clone(std::uint32_t cowid) const
{
auto p = std::make_shared<SHAMapInnerNode>(cowid);
auto const branchCount = getBranchCount();
auto const thisIsSparse = !hashesAndChildren_.isDense();
auto p = std::make_shared<SHAMapInnerNode>(cowid, branchCount);
p->hash_ = hash_;
p->mIsBranch = mIsBranch;
p->mFullBelowGen = mFullBelowGen;
p->mHashes = mHashes;
p->isBranch_ = isBranch_;
p->fullBelowGen_ = fullBelowGen_;
SHAMapHash *cloneHashes, *thisHashes;
std::shared_ptr<SHAMapTreeNode>*cloneChildren, *thisChildren;
// structured bindings can't be captured in c++ 17; use tie instead
std::tie(std::ignore, cloneHashes, cloneChildren) =
p->hashesAndChildren_.getHashesAndChildren();
std::tie(std::ignore, thisHashes, thisChildren) =
hashesAndChildren_.getHashesAndChildren();
if (thisIsSparse)
{
int cloneChildIndex = 0;
iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) {
cloneHashes[cloneChildIndex++] = thisHashes[indexNum];
});
}
else
{
iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) {
cloneHashes[branchNum] = thisHashes[indexNum];
});
}
std::lock_guard lock(childLock);
for (int i = 0; i < 16; ++i)
p->mChildren[i] = mChildren[i];
if (thisIsSparse)
{
int cloneChildIndex = 0;
iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) {
cloneChildren[cloneChildIndex++] = thisChildren[indexNum];
});
}
else
{
iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) {
cloneChildren[branchNum] = thisChildren[indexNum];
});
}
return p;
}
@@ -65,18 +133,21 @@ SHAMapInnerNode::makeFullInner(
if (data.size() != 512)
Throw<std::runtime_error>("Invalid FI node");
auto ret = std::make_shared<SHAMapInnerNode>(0);
auto ret = std::make_shared<SHAMapInnerNode>(0, branchFactor);
Serializer s(data.data(), data.size());
for (int i = 0; i < 16; ++i)
auto retHashes = ret->hashesAndChildren_.getHashes();
for (int i = 0; i < branchFactor; ++i)
{
s.getBitString(ret->mHashes[i].as_uint256(), i * 32);
s.getBitString(retHashes[i].as_uint256(), i * 32);
if (ret->mHashes[i].isNonZero())
ret->mIsBranch |= (1 << i);
if (retHashes[i].isNonZero())
ret->isBranch_ |= (1 << i);
}
ret->resizeChildArrays(ret->getBranchCount());
if (hashValid)
ret->hash_ = hash;
else
@@ -91,8 +162,9 @@ SHAMapInnerNode::makeCompressedInner(Slice data)
int len = s.getLength();
auto ret = std::make_shared<SHAMapInnerNode>(0);
auto ret = std::make_shared<SHAMapInnerNode>(0, branchFactor);
auto retHashes = ret->hashesAndChildren_.getHashes();
for (int i = 0; i < (len / 33); ++i)
{
int pos;
@@ -100,15 +172,17 @@ SHAMapInnerNode::makeCompressedInner(Slice data)
if (!s.get8(pos, 32 + (i * 33)))
Throw<std::runtime_error>("short CI node");
if ((pos < 0) || (pos >= 16))
if ((pos < 0) || (pos >= branchFactor))
Throw<std::runtime_error>("invalid CI node");
s.getBitString(ret->mHashes[pos].as_uint256(), i * 33);
s.getBitString(retHashes[pos].as_uint256(), i * 33);
if (ret->mHashes[pos].isNonZero())
ret->mIsBranch |= (1 << pos);
if (retHashes[pos].isNonZero())
ret->isBranch_ |= (1 << pos);
}
ret->resizeChildArrays(ret->getBranchCount());
ret->updateHash();
return ret;
@@ -118,13 +192,12 @@ void
SHAMapInnerNode::updateHash()
{
uint256 nh;
if (mIsBranch != 0)
if (isBranch_ != 0)
{
sha512_half_hasher h;
using beast::hash_append;
hash_append(h, HashPrefix::innerNode);
for (auto const& hh : mHashes)
hash_append(h, hh);
iterChildren([&](SHAMapHash const& hh) { hash_append(h, hh); });
nh = static_cast<typename sha512_half_hasher::result_type>(h);
}
hash_ = SHAMapHash{nh};
@@ -133,11 +206,15 @@ SHAMapInnerNode::updateHash()
void
SHAMapInnerNode::updateHashDeep()
{
for (auto pos = 0; pos < 16; ++pos)
{
if (mChildren[pos] != nullptr)
mHashes[pos] = mChildren[pos]->getHash();
}
SHAMapHash* hashes;
std::shared_ptr<SHAMapTreeNode>* children;
// structured bindings can't be captured in c++ 17; use tie instead
std::tie(std::ignore, hashes, children) =
hashesAndChildren_.getHashesAndChildren();
iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) {
if (children[indexNum] != nullptr)
hashes[indexNum] = children[indexNum]->getHash();
});
updateHash();
}
@@ -150,22 +227,17 @@ SHAMapInnerNode::serializeForWire(Serializer& s) const
if (getBranchCount() < 12)
{
// compressed node
for (int i = 0; i < mHashes.size(); ++i)
{
if (!isEmptyBranch(i))
{
s.addBitString(mHashes[i].as_uint256());
s.add8(i);
}
}
auto hashes = hashesAndChildren_.getHashes();
iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) {
s.addBitString(hashes[indexNum].as_uint256());
s.add8(branchNum);
});
s.add8(wireTypeCompressedInner);
}
else
{
for (auto const& hh : mHashes)
s.addBitString(hh.as_uint256());
iterChildren(
[&](SHAMapHash const& hh) { s.addBitString(hh.as_uint256()); });
s.add8(wireTypeInner);
}
}
@@ -176,42 +248,33 @@ SHAMapInnerNode::serializeWithPrefix(Serializer& s) const
assert(!isEmpty());
s.add32(HashPrefix::innerNode);
for (auto const& hh : mHashes)
s.addBitString(hh.as_uint256());
iterChildren(
[&](SHAMapHash const& hh) { s.addBitString(hh.as_uint256()); });
}
bool
SHAMapInnerNode::isEmpty() const
{
return mIsBranch == 0;
return isBranch_ == 0;
}
int
SHAMapInnerNode::getBranchCount() const
{
int count = 0;
for (int i = 0; i < 16; ++i)
if (!isEmptyBranch(i))
++count;
return count;
return popcnt16(isBranch_);
}
std::string
SHAMapInnerNode::getString(const SHAMapNodeID& id) const
{
std::string ret = SHAMapTreeNode::getString(id);
for (int i = 0; i < mHashes.size(); ++i)
{
if (!isEmptyBranch(i))
{
ret += "\n";
ret += std::to_string(i);
ret += " = ";
ret += to_string(mHashes[i]);
}
}
auto hashes = hashesAndChildren_.getHashes();
iterNonEmptyChildIndexes([&](auto branchNum, auto indexNum) {
ret += "\nb";
ret += std::to_string(branchNum);
ret += " = ";
ret += to_string(hashes[indexNum]);
});
return ret;
}
@@ -219,46 +282,79 @@ SHAMapInnerNode::getString(const SHAMapNodeID& id) const
void
SHAMapInnerNode::setChild(int m, std::shared_ptr<SHAMapTreeNode> const& child)
{
assert((m >= 0) && (m < 16));
assert((m >= 0) && (m < branchFactor));
assert(cowid_ != 0);
assert(child.get() != this);
mHashes[m].zero();
hash_.zero();
auto const dstIsBranch = [&] {
if (child)
return isBranch_ | (1 << m);
else
return isBranch_ & ~(1 << m);
}();
auto const dstToAllocate = popcnt16(dstIsBranch);
// change hashesAndChildren to remove the element, or make room for the
// added element, if necessary
hashesAndChildren_ = TaggedPointer(
std::move(hashesAndChildren_), isBranch_, dstIsBranch, dstToAllocate);
isBranch_ = dstIsBranch;
if (child)
mIsBranch |= (1 << m);
else
mIsBranch &= ~(1 << m);
mChildren[m] = child;
{
auto const childIndex = *getChildIndex(m);
auto [_, hashes, children] = hashesAndChildren_.getHashesAndChildren();
hashes[childIndex].zero();
children[childIndex] = child;
}
hash_.zero();
assert(getBranchCount() <= hashesAndChildren_.capacity());
}
// finished modifying, now make shareable
void
SHAMapInnerNode::shareChild(int m, std::shared_ptr<SHAMapTreeNode> const& child)
{
assert((m >= 0) && (m < 16));
assert((m >= 0) && (m < branchFactor));
assert(cowid_ != 0);
assert(child);
assert(child.get() != this);
mChildren[m] = child;
assert(!isEmptyBranch(m));
hashesAndChildren_.getChildren()[*getChildIndex(m)] = child;
}
SHAMapTreeNode*
SHAMapInnerNode::getChildPointer(int branch)
{
assert(branch >= 0 && branch < 16);
assert(branch >= 0 && branch < branchFactor);
assert(!isEmptyBranch(branch));
std::lock_guard lock(childLock);
return mChildren[branch].get();
return hashesAndChildren_.getChildren()[*getChildIndex(branch)].get();
}
std::shared_ptr<SHAMapTreeNode>
SHAMapInnerNode::getChild(int branch)
{
assert(branch >= 0 && branch < 16);
assert(branch >= 0 && branch < branchFactor);
assert(!isEmptyBranch(branch));
std::lock_guard lock(childLock);
return mChildren[branch];
return hashesAndChildren_.getChildren()[*getChildIndex(branch)];
}
SHAMapHash const&
SHAMapInnerNode::getChildHash(int m) const
{
assert((m >= 0) && (m < branchFactor));
if (auto const i = getChildIndex(m))
return hashesAndChildren_.getHashes()[*i];
return zeroSHAMapHash;
}
std::shared_ptr<SHAMapTreeNode>
@@ -266,20 +362,23 @@ SHAMapInnerNode::canonicalizeChild(
int branch,
std::shared_ptr<SHAMapTreeNode> node)
{
assert(branch >= 0 && branch < 16);
assert(branch >= 0 && branch < branchFactor);
assert(node);
assert(node->getHash() == mHashes[branch]);
assert(!isEmptyBranch(branch));
auto const childIndex = *getChildIndex(branch);
auto [_, hashes, children] = hashesAndChildren_.getHashesAndChildren();
assert(node->getHash() == hashes[childIndex]);
std::lock_guard lock(childLock);
if (mChildren[branch])
if (children[childIndex])
{
// There is already a node hooked up, return it
node = mChildren[branch];
node = children[childIndex];
}
else
{
// Hook this node up
mChildren[branch] = node;
children[childIndex] = node;
}
return node;
}
@@ -288,20 +387,38 @@ void
SHAMapInnerNode::invariants(bool is_root) const
{
unsigned count = 0;
for (int i = 0; i < 16; ++i)
auto [numAllocated, hashes, children] =
hashesAndChildren_.getHashesAndChildren();
if (numAllocated != branchFactor)
{
if (mHashes[i].isNonZero())
auto const branchCount = getBranchCount();
for (int i = 0; i < branchCount; ++i)
{
assert((mIsBranch & (1 << i)) != 0);
if (mChildren[i] != nullptr)
mChildren[i]->invariants();
assert(hashes[i].isNonZero());
if (children[i] != nullptr)
children[i]->invariants();
++count;
}
else
}
else
{
for (int i = 0; i < branchFactor; ++i)
{
assert((mIsBranch & (1 << i)) == 0);
if (hashes[i].isNonZero())
{
assert((isBranch_ & (1 << i)) != 0);
if (children[i] != nullptr)
children[i]->invariants();
++count;
}
else
{
assert((isBranch_ & (1 << i)) == 0);
}
}
}
if (!is_root)
{
assert(hash_.isNonZero());

View File

@@ -0,0 +1,227 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2020 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef RIPPLE_SHAMAP_TAGGEDPOINTER_H_INCLUDED
#define RIPPLE_SHAMAP_TAGGEDPOINTER_H_INCLUDED
#include <ripple/shamap/SHAMapTreeNode.h>
#include <cstdint>
#include <optional>
namespace ripple {
/** TaggedPointer is a combination of a pointer and a mask stored in the
lowest two bits.
Since pointers do not have arbitrary alignment, the lowest bits in the
pointer are guaranteed to be zero. TaggedPointer stores information in these
low bits. When dereferencing the pointer, these low "tag" bits are set to
zero. When accessing the tag bits, the high "pointer" bits are set to zero.
The "pointer" part points to to the equivalent to an array of
`SHAMapHash` followed immediately by an array of
`shared_ptr<SHAMapTreeNode>`. The sizes of these arrays are
determined by the tag. The tag is an index into an array (`boundaries`,
defined in the cpp file) that specifies the size. Both arrays are the
same size. Note that the sizes may be smaller than the full 16 elements
needed to explicitly store all the children. In this case, the arrays
only store the non-empty children. The non-empty children are stored in
index order. For example, if only children `2` and `14` are non-empty, a
two-element array would store child `2` in array index 0 and child `14`
in array index 1. There are functions to convert between a child's tree
index and the child's index in a sparse array.
The motivation for this class is saving RAM. A large percentage of inner
nodes only store a small number of children. Memory can be saved by
storing the inner node's children in sparse arrays. Measurements show
that on average a typical SHAMap's inner nodes can be stored using only
25% of the original space.
*/
class TaggedPointer
{
static_assert(
alignof(SHAMapHash) >= 4,
"Bad alignment: Tag pointer requires low two bits to be zero.");
/** Upper bits are the pointer, lowest two bits are the tag
A moved-from object will have a tp_ of zero.
*/
std::uintptr_t tp_ = 0;
/** bit-and with this mask to get the tag bits (lowest two bits) */
static constexpr std::uintptr_t tagMask = 3;
/** bit-and with this mask to get the pointer bits (mask out the tag) */
static constexpr std::uintptr_t ptrMask = ~tagMask;
/** Deallocate memory and run destructors */
void
destroyHashesAndChildren();
struct RawAllocateTag
{
};
/** This constructor allocates space for the hashes and children, but
does not run constructors.
@param RawAllocateTag used to select overload only
@param numChildren allocate space for at least this number of children
(must be <= branchFactor)
@note Since the hashes/children destructors are always run in the
TaggedPointer destructor, this means those constructors _must_ be run
after this constructor is run. This constructor is private and only used
in places where the hashes/children constructor are subsequently run.
*/
explicit TaggedPointer(RawAllocateTag, std::uint8_t numChildren);
public:
TaggedPointer() = delete;
explicit TaggedPointer(std::uint8_t numChildren);
/** Constructor is used change the number of allocated children.
Existing children from `other` are copied (toAllocate must be >= the
number of children). The motivation for making this a constructor is it
saves unneeded copying and zeroing out of hashes if this were
implemented directly in the SHAMapInnerNode class.
@param other children and hashes are moved from this param
@param isBranch bitset of non-empty children in `other`
@param toAllocate allocate space for at least this number of children
(must be <= branchFactor)
*/
explicit TaggedPointer(
TaggedPointer&& other,
std::uint16_t isBranch,
std::uint8_t toAllocate);
/** Given `other` with the specified children in `srcBranches`, create a
new TaggedPointer with the allocated number of children and the
children specified in `dstBranches`.
@param other children and hashes are moved from this param
@param srcBranches bitset of non-empty children in `other`
@param dstBranches bitset of children to copy from `other` (or space to
leave in a sparse array - see note below)
@param toAllocate allocate space for at least this number of children
(must be <= branchFactor)
@note a child may be absent in srcBranches but present in dstBranches
(if dst has a sparse representation, space for the new child will be
left in the sparse array). Typically, srcBranches and dstBranches will
differ by at most one bit. The function works correctly if they differ
by more, but there are likely more efficient algorithms to consider if
this becomes a common use-case.
*/
explicit TaggedPointer(
TaggedPointer&& other,
std::uint16_t srcBranches,
std::uint16_t dstBranches,
std::uint8_t toAllocate);
TaggedPointer(TaggedPointer const&) = delete;
TaggedPointer(TaggedPointer&&);
TaggedPointer&
operator=(TaggedPointer&&);
~TaggedPointer();
/** Decode the tagged pointer into its tag and pointer */
[[nodiscard]] std::pair<std::uint8_t, void*>
decode() const;
/** Get the number of elements allocated for each array */
[[nodiscard]] std::uint8_t
capacity() const;
/** Check if the arrays have a dense format.
@note The dense format is when there is an array element for all 16
(branchFactor) possible children.
*/
[[nodiscard]] bool
isDense() const;
/** Get the number of elements in each array and a pointer to the start
of each array.
*/
[[nodiscard]] std::
tuple<std::uint8_t, SHAMapHash*, std::shared_ptr<SHAMapTreeNode>*>
getHashesAndChildren() const;
/** Get the `hashes` array */
[[nodiscard]] SHAMapHash*
getHashes() const;
/** Get the `children` array */
[[nodiscard]] std::shared_ptr<SHAMapTreeNode>*
getChildren() const;
/** Call the `f` callback for all 16 (branchFactor) branches - even if
the branch is empty.
@param isBranch bitset of non-empty children
@param f a one parameter callback function. The parameter is the
child's hash.
*/
template <class F>
void
iterChildren(std::uint16_t isBranch, F&& f) const;
/** Call the `f` callback for all non-empty branches.
@param isBranch bitset of non-empty children
@param f a two parameter callback function. The first parameter is
the branch number, the second parameter is the index into the array.
For dense formats these are the same, for sparse they may be
different.
*/
template <class F>
void
iterNonEmptyChildIndexes(std::uint16_t isBranch, F&& f) const;
/** Get the child's index inside the `hashes` or `children` array (which
may or may not be sparse). The optional will be empty if an empty
branch is requested and the children are sparse.
@param isBranch bitset of non-empty children
@param i index of the requested child
*/
std::optional<int>
getChildIndex(std::uint16_t isBranch, int i) const;
};
inline TaggedPointer::~TaggedPointer()
{
destroyHashesAndChildren();
}
} // namespace ripple
#endif

View File

@@ -0,0 +1,639 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2020 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <ripple/shamap/impl/TaggedPointer.h>
#include <ripple/shamap/SHAMapInnerNode.h>
#include <array>
// #define FORCE_BOOST_POOL 1
#if FORCE_BOOST_POOL || !__has_include(<memory_resource>)
#define USE_BOOST_POOL 1
#else
#define USE_BOOST_POOL 0
#endif
#if USE_BOOST_POOL
#include <boost/pool/pool_alloc.hpp>
#else
#include <memory_resource>
#endif
namespace ripple {
namespace {
// Sparse array size boundaries.
// Given n children, an array of size `*std::lower_bound(boundaries.begin(),
// boundaries.end(), n);` is used to store the children. Note that the last
// element must be the number of children in a dense array.
constexpr std::array<std::uint8_t, 4> boundaries{
2,
4,
6,
SHAMapInnerNode::branchFactor};
static_assert(
boundaries.size() <= 4,
"The hashesAndChildren member uses a tagged array format with two bits "
"reserved for the tag. This supports at most 4 values.");
static_assert(
boundaries.back() == SHAMapInnerNode::branchFactor,
"Last element of boundaries must be number of children in a dense array");
// Terminology: A chunk is the memory being allocated from a block. A block
// contains multiple chunks. This is the terminology the boost documentation
// uses. Pools use "Simple Segregated Storage" as their storage format.
constexpr size_t elementSizeBytes =
(sizeof(SHAMapHash) + sizeof(std::shared_ptr<SHAMapTreeNode>));
constexpr size_t blockSizeBytes = kilobytes(512);
template <std::size_t... I>
constexpr std::array<size_t, boundaries.size()> initArrayChunkSizeBytes(
std::index_sequence<I...>)
{
return std::array<size_t, boundaries.size()>{
boundaries[I] * elementSizeBytes...,
};
}
constexpr auto arrayChunkSizeBytes =
initArrayChunkSizeBytes(std::make_index_sequence<boundaries.size()>{});
template <std::size_t... I>
constexpr std::array<size_t, boundaries.size()> initArrayChunksPerBlock(
std::index_sequence<I...>)
{
return std::array<size_t, boundaries.size()>{
blockSizeBytes / arrayChunkSizeBytes[I]...,
};
}
constexpr auto chunksPerBlock =
initArrayChunksPerBlock(std::make_index_sequence<boundaries.size()>{});
[[nodiscard]] inline std::uint8_t
numAllocatedChildren(std::uint8_t n)
{
assert(n <= SHAMapInnerNode::branchFactor);
return *std::lower_bound(boundaries.begin(), boundaries.end(), n);
}
[[nodiscard]] inline std::size_t
boundariesIndex(std::uint8_t numChildren)
{
assert(numChildren <= SHAMapInnerNode::branchFactor);
return std::distance(
boundaries.begin(),
std::lower_bound(boundaries.begin(), boundaries.end(), numChildren));
}
#if USE_BOOST_POOL
template <std::size_t... I>
std::array<std::function<void*()>, boundaries.size()> initAllocateArrayFuns(
std::index_sequence<I...>)
{
return std::array<std::function<void*()>, boundaries.size()>{
boost::singleton_pool<
boost::fast_pool_allocator_tag,
arrayChunkSizeBytes[I],
boost::default_user_allocator_new_delete,
std::mutex,
chunksPerBlock[I],
chunksPerBlock[I]>::malloc...,
};
}
std::array<std::function<void*()>, boundaries.size()> const allocateArrayFuns =
initAllocateArrayFuns(std::make_index_sequence<boundaries.size()>{});
template <std::size_t... I>
std::array<std::function<void(void*)>, boundaries.size()> initFreeArrayFuns(
std::index_sequence<I...>)
{
return std::array<std::function<void(void*)>, boundaries.size()>{
static_cast<void (*)(void*)>(boost::singleton_pool<
boost::fast_pool_allocator_tag,
arrayChunkSizeBytes[I],
boost::default_user_allocator_new_delete,
std::mutex,
chunksPerBlock[I],
chunksPerBlock[I]>::free)...,
};
}
std::array<std::function<void(void*)>, boundaries.size()> const freeArrayFuns =
initFreeArrayFuns(std::make_index_sequence<boundaries.size()>{});
template <std::size_t... I>
std::array<std::function<bool(void*)>, boundaries.size()> initIsFromArrayFuns(
std::index_sequence<I...>)
{
return std::array<std::function<bool(void*)>, boundaries.size()>{
boost::singleton_pool<
boost::fast_pool_allocator_tag,
arrayChunkSizeBytes[I],
boost::default_user_allocator_new_delete,
std::mutex,
chunksPerBlock[I],
chunksPerBlock[I]>::is_from...,
};
}
std::array<std::function<bool(void*)>, boundaries.size()> const
isFromArrayFuns =
initIsFromArrayFuns(std::make_index_sequence<boundaries.size()>{});
// This function returns an untagged pointer
[[nodiscard]] inline std::pair<std::uint8_t, void*>
allocateArrays(std::uint8_t numChildren)
{
auto const i = boundariesIndex(numChildren);
return {i, allocateArrayFuns[i]()};
}
// This function takes an untagged pointer
inline void
deallocateArrays(std::uint8_t boundaryIndex, void* p)
{
assert(isFromArrayFuns[boundaryIndex](p));
freeArrayFuns[boundaryIndex](p);
}
#else
template <std::size_t... I>
std::array<std::pmr::synchronized_pool_resource, boundaries.size()>
initPmrArrayFuns(std::index_sequence<I...>)
{
return std::array<std::pmr::synchronized_pool_resource, boundaries.size()>{
std::pmr::synchronized_pool_resource{std::pmr::pool_options{
/* max_blocks_per_chunk */ chunksPerBlock[I],
/* largest_required_pool_block */ chunksPerBlock[I]}}...,
};
}
std::array<std::pmr::synchronized_pool_resource, boundaries.size()>
pmrArrayFuns =
initPmrArrayFuns(std::make_index_sequence<boundaries.size()>{});
// This function returns an untagged pointer
[[nodiscard]] inline std::pair<std::uint8_t, void*>
allocateArrays(std::uint8_t numChildren)
{
auto const i = boundariesIndex(numChildren);
return {i, pmrArrayFuns[i].allocate(arrayChunkSizeBytes[i])};
}
// This function takes an untagged pointer
inline void
deallocateArrays(std::uint8_t boundaryIndex, void* p)
{
return pmrArrayFuns[boundaryIndex].deallocate(
p, arrayChunkSizeBytes[boundaryIndex]);
}
#endif
[[nodiscard]] inline int
popcnt16(std::uint16_t a)
{
#if defined(__clang__) || defined(__GNUC__)
return __builtin_popcount(a);
#else
// fallback to table lookup
static auto constexpr const tbl = []() {
std::array<std::uint8_t, 256> ret{};
for (int i = 0; i != 256; ++i)
{
for (int j = 0; j != 8; ++j)
{
if (i & (1 << j))
ret[i]++;
}
}
return ret;
}();
return tbl[a & 0xff] + tbl[a >> 8];
#endif
}
// Used in `iterChildren` and elsewhere as the hash value for sparse arrays when
// the hash isn't actually stored in the array.
static SHAMapHash const zeroSHAMapHash;
} // namespace
template <class F>
void
TaggedPointer::iterChildren(std::uint16_t isBranch, F&& f) const
{
auto [numAllocated, hashes, _] = getHashesAndChildren();
if (numAllocated == SHAMapInnerNode::branchFactor)
{
// dense case
for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i)
f(hashes[i]);
}
else
{
// sparse case
int curHashI = 0;
for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i)
{
if ((1 << i) & isBranch)
{
f(hashes[curHashI++]);
}
else
{
f(zeroSHAMapHash);
}
}
}
}
template <class F>
void
TaggedPointer::iterNonEmptyChildIndexes(std::uint16_t isBranch, F&& f) const
{
if (capacity() == SHAMapInnerNode::branchFactor)
{
// dense case
for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i)
{
if ((1 << i) & isBranch)
{
f(i, i);
}
}
}
else
{
// sparse case
int curHashI = 0;
for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i)
{
if ((1 << i) & isBranch)
{
f(i, curHashI++);
}
}
}
}
inline void
TaggedPointer::destroyHashesAndChildren()
{
if (!tp_)
return;
auto [numAllocated, hashes, children] = getHashesAndChildren();
for (std::size_t i = 0; i < numAllocated; ++i)
{
hashes[i].~SHAMapHash();
children[i].~shared_ptr<SHAMapTreeNode>();
}
auto [tag, ptr] = decode();
deallocateArrays(tag, ptr);
}
inline std::optional<int>
TaggedPointer::getChildIndex(std::uint16_t isBranch, int i) const
{
if (isDense())
return i;
// Sparse case
if ((isBranch & (1 << i)) == 0)
{
// Empty branch. Sparse children do not store empty branches
return {};
}
// Sparse children are stored sorted. This means the index
// of a child in the array is the number of non-empty children
// before it. Since `isBranch_` is a bitset of the stored
// children, we simply need to mask out (and set to zero) all
// the bits in `isBranch_` equal to to higher than `i` and count
// the bits.
// mask sets all the bits >=i to zero and all the bits <i to
// one.
auto const mask = (1 << i) - 1;
return popcnt16(isBranch & mask);
}
inline TaggedPointer::TaggedPointer(RawAllocateTag, std::uint8_t numChildren)
{
auto [tag, p] = allocateArrays(numChildren);
assert(tag < boundaries.size());
assert(
(reinterpret_cast<std::uintptr_t>(p) & ptrMask) ==
reinterpret_cast<std::uintptr_t>(p));
tp_ = reinterpret_cast<std::uintptr_t>(p) + tag;
}
inline TaggedPointer::TaggedPointer(
TaggedPointer&& other,
std::uint16_t srcBranches,
std::uint16_t dstBranches,
std::uint8_t toAllocate)
{
assert(toAllocate >= popcnt16(dstBranches));
if (other.capacity() == numAllocatedChildren(toAllocate))
{
// in place
*this = std::move(other);
auto [srcDstNumAllocated, srcDstHashes, srcDstChildren] =
getHashesAndChildren();
bool const srcDstIsDense = isDense();
int srcDstIndex = 0;
for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i)
{
auto const mask = (1 << i);
bool const inSrc = (srcBranches & mask);
bool const inDst = (dstBranches & mask);
if (inSrc && inDst)
{
// keep
++srcDstIndex;
}
else if (inSrc && !inDst)
{
// remove
if (srcDstIsDense)
{
srcDstHashes[srcDstIndex].zero();
srcDstChildren[srcDstIndex].reset();
++srcDstIndex;
}
else
{
// sparse
// need to shift all the elements to the left by
// one
for (int c = srcDstIndex; c < srcDstNumAllocated - 1; ++c)
{
srcDstHashes[c] = srcDstHashes[c + 1];
srcDstChildren[c] = std::move(srcDstChildren[c + 1]);
}
srcDstHashes[srcDstNumAllocated - 1].zero();
srcDstChildren[srcDstNumAllocated - 1].reset();
// do not increment the index
}
}
else if (!inSrc && inDst)
{
// add
if (srcDstIsDense)
{
// nothing to do, child is already present in the dense rep
++srcDstIndex;
}
else
{
// sparse
// need to create a hole by shifting all the elements to the
// right by one
for (int c = srcDstNumAllocated - 1; c > srcDstIndex; --c)
{
srcDstHashes[c] = srcDstHashes[c - 1];
srcDstChildren[c] = std::move(srcDstChildren[c - 1]);
}
srcDstHashes[srcDstIndex].zero();
srcDstChildren[srcDstIndex].reset();
++srcDstIndex;
}
}
else if (!inDst && !inDst)
{
// in neither
if (srcDstIsDense)
{
++srcDstIndex;
}
}
}
}
else
{
// not in place
TaggedPointer dst{RawAllocateTag{}, toAllocate};
auto [dstNumAllocated, dstHashes, dstChildren] =
dst.getHashesAndChildren();
// Move `other` into a local var so it's not in a partially moved from
// state after this function runs
TaggedPointer src(std::move(other));
auto [srcNumAllocated, srcHashes, srcChildren] =
src.getHashesAndChildren();
bool const srcIsDense = src.isDense();
bool const dstIsDense = dst.isDense();
int srcIndex = 0, dstIndex = 0;
for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i)
{
auto const mask = (1 << i);
bool const inSrc = (srcBranches & mask);
bool const inDst = (dstBranches & mask);
if (inSrc && inDst)
{
// keep
new (&dstHashes[dstIndex]) SHAMapHash{srcHashes[srcIndex]};
new (&dstChildren[dstIndex]) std::shared_ptr<SHAMapTreeNode>{
std::move(srcChildren[srcIndex])};
++dstIndex;
++srcIndex;
}
else if (inSrc && !inDst)
{
// remove
++srcIndex;
if (dstIsDense)
{
new (&dstHashes[dstIndex]) SHAMapHash{};
new (&dstChildren[dstIndex])
std::shared_ptr<SHAMapTreeNode>{};
++dstIndex;
}
}
else if (!inSrc && inDst)
{
// add
new (&dstHashes[dstIndex]) SHAMapHash{};
new (&dstChildren[dstIndex]) std::shared_ptr<SHAMapTreeNode>{};
++dstIndex;
if (srcIsDense)
{
++srcIndex;
}
}
else if (!inDst && !inDst)
{
// in neither
if (dstIsDense)
{
new (&dstHashes[dstIndex]) SHAMapHash{};
new (&dstChildren[dstIndex])
std::shared_ptr<SHAMapTreeNode>{};
++dstIndex;
}
if (srcIsDense)
{
++srcIndex;
}
}
}
// If sparse, may need to run additional constructors
assert(!dstIsDense || dstIndex == dstNumAllocated);
for (int i = dstIndex; i < dstNumAllocated; ++i)
{
new (&dstHashes[i]) SHAMapHash{};
new (&dstChildren[i]) std::shared_ptr<SHAMapTreeNode>{};
}
*this = std::move(dst);
}
}
inline TaggedPointer::TaggedPointer(
TaggedPointer&& other,
std::uint16_t isBranch,
std::uint8_t toAllocate)
: TaggedPointer(std::move(other))
{
auto const oldNumAllocated = capacity();
toAllocate = numAllocatedChildren(toAllocate);
if (toAllocate == oldNumAllocated)
return;
// allocate hashes and children, but do not run constructors
TaggedPointer newHashesAndChildren{RawAllocateTag{}, toAllocate};
SHAMapHash *newHashes, *oldHashes;
std::shared_ptr<SHAMapTreeNode>*newChildren, *oldChildren;
std::uint8_t newNumAllocated;
// structured bindings can't be captured in c++ 17; use tie instead
std::tie(newNumAllocated, newHashes, newChildren) =
newHashesAndChildren.getHashesAndChildren();
std::tie(std::ignore, oldHashes, oldChildren) = getHashesAndChildren();
if (newNumAllocated == SHAMapInnerNode::branchFactor)
{
// new arrays are dense, old arrays are sparse
iterNonEmptyChildIndexes(isBranch, [&](auto branchNum, auto indexNum) {
new (&newHashes[branchNum]) SHAMapHash{oldHashes[indexNum]};
new (&newChildren[branchNum]) std::shared_ptr<SHAMapTreeNode>{
std::move(oldChildren[indexNum])};
});
// Run the constructors for the remaining elements
for (int i = 0; i < SHAMapInnerNode::branchFactor; ++i)
{
if ((1 << i) & isBranch)
continue;
new (&newHashes[i]) SHAMapHash{};
new (&newChildren[i]) std::shared_ptr<SHAMapTreeNode>{};
}
}
else
{
// new arrays are sparse, old arrays may be sparse or dense
int curCompressedIndex = 0;
iterNonEmptyChildIndexes(isBranch, [&](auto branchNum, auto indexNum) {
new (&newHashes[curCompressedIndex])
SHAMapHash{oldHashes[indexNum]};
new (&newChildren[curCompressedIndex])
std::shared_ptr<SHAMapTreeNode>{
std::move(oldChildren[indexNum])};
++curCompressedIndex;
});
// Run the constructors for the remaining elements
for (int i = curCompressedIndex; i < newNumAllocated; ++i)
{
new (&newHashes[i]) SHAMapHash{};
new (&newChildren[i]) std::shared_ptr<SHAMapTreeNode>{};
}
}
*this = std::move(newHashesAndChildren);
}
inline TaggedPointer::TaggedPointer(std::uint8_t numChildren)
: TaggedPointer(TaggedPointer::RawAllocateTag{}, numChildren)
{
auto [numAllocated, hashes, children] = getHashesAndChildren();
for (std::size_t i = 0; i < numAllocated; ++i)
{
new (&hashes[i]) SHAMapHash{};
new (&children[i]) std::shared_ptr<SHAMapTreeNode>{};
}
}
inline TaggedPointer::TaggedPointer(TaggedPointer&& other) : tp_{other.tp_}
{
other.tp_ = 0;
}
inline TaggedPointer&
TaggedPointer::operator=(TaggedPointer&& other)
{
if (this == &other)
return *this;
destroyHashesAndChildren();
tp_ = other.tp_;
other.tp_ = 0;
return *this;
}
[[nodiscard]] inline std::pair<std::uint8_t, void*>
TaggedPointer::decode() const
{
return {tp_ & tagMask, reinterpret_cast<void*>(tp_ & ptrMask)};
}
[[nodiscard]] inline std::uint8_t
TaggedPointer::capacity() const
{
return boundaries[tp_ & tagMask];
}
[[nodiscard]] inline bool
TaggedPointer::isDense() const
{
return (tp_ & tagMask) == boundaries.size() - 1;
}
[[nodiscard]] inline std::
tuple<std::uint8_t, SHAMapHash*, std::shared_ptr<SHAMapTreeNode>*>
TaggedPointer::getHashesAndChildren() const
{
auto const [tag, ptr] = decode();
auto const hashes = reinterpret_cast<SHAMapHash*>(ptr);
std::uint8_t numAllocated = boundaries[tag];
auto const children = reinterpret_cast<std::shared_ptr<SHAMapTreeNode>*>(
hashes + numAllocated);
return {numAllocated, hashes, children};
};
[[nodiscard]] inline SHAMapHash*
TaggedPointer::getHashes() const
{
return reinterpret_cast<SHAMapHash*>(tp_ & ptrMask);
};
[[nodiscard]] inline std::shared_ptr<SHAMapTreeNode>*
TaggedPointer::getChildren() const
{
auto [unused1, unused2, result] = getHashesAndChildren();
return result;
};
} // namespace ripple