From 0eb0c9bb821e47e1c97c175cc8af3965fe198d08 Mon Sep 17 00:00:00 2001 From: Kosie van der Merwe Date: Fri, 7 Dec 2012 10:42:19 -0800 Subject: [PATCH] Added methods to write small ints to bit streams. Summary: Added BitStreamPutInt() and BitStreamGetInt() which take a stream of chars and can write integers of arbitrary bit sizes to that stream at arbitrary positions. There are also convenience versions of these functions that take std::strings and leveldb::Slices. Test Plan: make check Reviewers: sheki, vamsi, dhruba, emayanke Reviewed By: vamsi CC: leveldb Differential Revision: https://reviews.facebook.net/D7071 --- util/coding.cc | 100 ++++++++++++++++++++++++++++++++++++++++++++ util/coding.h | 26 ++++++++++++ util/coding_test.cc | 93 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 219 insertions(+) diff --git a/util/coding.cc b/util/coding.cc index 9148713e0a..7dae9eb1b7 100644 --- a/util/coding.cc +++ b/util/coding.cc @@ -4,6 +4,8 @@ #include "util/coding.h" +#include + namespace leveldb { void EncodeFixed32(char* buf, uint32_t value) { @@ -191,4 +193,102 @@ bool GetLengthPrefixedSlice(Slice* input, Slice* result) { } } +void BitStreamPutInt(char* dst, size_t dstlen, size_t offset, + uint32_t bits, uint64_t value) { + assert((offset + bits + 7)/8 <= dstlen); + assert(bits <= 64); + + unsigned char* ptr = reinterpret_cast(dst); + + size_t byteOffset = offset / 8; + size_t bitOffset = offset % 8; + + // This prevents unused variable warnings when compiling. +#ifndef NDEBUG + // Store truncated value. + uint64_t origValue = (bits < 64)?(value & (((uint64_t)1 << bits) - 1)):value; + uint32_t origBits = bits; +#endif + + while (bits > 0) { + size_t bitsToGet = std::min(bits, 8 - bitOffset); + unsigned char mask = ((1 << bitsToGet) - 1); + + ptr[byteOffset] = (ptr[byteOffset] & ~(mask << bitOffset)) + + ((value & mask) << bitOffset); + + value >>= bitsToGet; + byteOffset += 1; + bitOffset = 0; + bits -= bitsToGet; + } + + assert(origValue == BitStreamGetInt(dst, dstlen, offset, origBits)); +} + +uint64_t BitStreamGetInt(const char* src, size_t srclen, size_t offset, + uint32_t bits) { + assert((offset + bits + 7)/8 <= srclen); + assert(bits <= 64); + + const unsigned char* ptr = reinterpret_cast(src); + + uint64_t result = 0; + + size_t byteOffset = offset / 8; + size_t bitOffset = offset % 8; + size_t shift = 0; + + while (bits > 0) { + size_t bitsToGet = std::min(bits, 8 - bitOffset); + unsigned char mask = ((1 << bitsToGet) - 1); + + result += (uint64_t)((ptr[byteOffset] >> bitOffset) & mask) << shift; + + shift += bitsToGet; + byteOffset += 1; + bitOffset = 0; + bits -= bitsToGet; + } + + return result; +} + +void BitStreamPutInt(std::string* dst, size_t offset, uint32_t bits, + uint64_t value) { + assert((offset + bits + 7)/8 <= dst->size()); + + const size_t kTmpBufLen = sizeof(value) + 1; + char tmpBuf[kTmpBufLen]; + + // Number of bytes of tmpBuf being used + const size_t kUsedBytes = (offset%8 + bits)/8; + + // Copy relevant parts of dst to tmpBuf + for (size_t idx = 0; idx <= kUsedBytes; ++idx) { + tmpBuf[idx] = (*dst)[offset/8 + idx]; + } + + BitStreamPutInt(tmpBuf, kTmpBufLen, offset%8, bits, value); + + // Copy tmpBuf back to dst + for (size_t idx = 0; idx <= kUsedBytes; ++idx) { + (*dst)[offset/8 + idx] = tmpBuf[idx]; + } + + // Do the check here too as we are working with a buffer. + assert(((bits < 64)?(value & (((uint64_t)1 << bits) - 1)):value) == + BitStreamGetInt(dst, offset, bits)); +} + +uint64_t BitStreamGetInt(const std::string* src, size_t offset, + uint32_t bits) { + return BitStreamGetInt(src->data(), src->size(), offset, bits); +} + +uint64_t BitStreamGetInt(const Slice* src, size_t offset, + uint32_t bits) { + return BitStreamGetInt(src->data(), src->size(), offset, bits); +} + } // namespace leveldb diff --git a/util/coding.h b/util/coding.h index 3993c4a755..d70bab7b66 100644 --- a/util/coding.h +++ b/util/coding.h @@ -99,6 +99,32 @@ inline const char* GetVarint32Ptr(const char* p, return GetVarint32PtrFallback(p, limit, value); } +// Writes an unsigned integer with bits number of bits with its least +// significant bit at offset. +// Bits are numbered from 0 to 7 in the first byte, 8 to 15 in the second and +// so on. +// value is truncated to the bits number of least significant bits. +// REQUIRES: (offset+bits+7)/8 <= dstlen +// REQUIRES: bits <= 64 +extern void BitStreamPutInt(char* dst, size_t dstlen, size_t offset, + uint32_t bits, uint64_t value); + +// Reads an unsigned integer with bits number of bits with its least +// significant bit at offset. +// Bits are numbered in the same way as ByteStreamPutInt(). +// REQUIRES: (offset+bits+7)/8 <= srclen +// REQUIRES: bits <= 64 +extern uint64_t BitStreamGetInt(const char* src, size_t srclen, size_t offset, + uint32_t bits); + +// Convenience functions +extern void BitStreamPutInt(std::string* dst, size_t offset, uint32_t bits, + uint64_t value); +extern uint64_t BitStreamGetInt(const std::string* src, size_t offset, + uint32_t bits); +extern uint64_t BitStreamGetInt(const Slice* src, size_t offset, + uint32_t bits); + } // namespace leveldb #endif // STORAGE_LEVELDB_UTIL_CODING_H_ diff --git a/util/coding_test.cc b/util/coding_test.cc index 465a88cffc..a5d7cb0d16 100644 --- a/util/coding_test.cc +++ b/util/coding_test.cc @@ -189,6 +189,99 @@ TEST(Coding, Strings) { ASSERT_EQ("", input.ToString()); } +TEST(Coding, BitStream) { + const int kNumBytes = 10; + char bytes[kNumBytes+1]; + for (int i = 0; i < kNumBytes + 1; ++i) { + bytes[i] = '\0'; + } + + // Simple byte aligned test. + for (int i = 0; i < kNumBytes; ++i) { + BitStreamPutInt(bytes, kNumBytes, i*8, 8, 255-i); + + ASSERT_EQ((unsigned char)bytes[i], (unsigned char)(255-i)); + } + for (int i = 0; i < kNumBytes; ++i) { + ASSERT_EQ(BitStreamGetInt(bytes, kNumBytes, i*8, 8), (uint32_t)(255-i)); + } + ASSERT_EQ(bytes[kNumBytes+1], '\0'); + + // Write and read back at strange offsets + for (int i = 0; i < kNumBytes + 1; ++i) { + bytes[i] = '\0'; + } + for (int i = 0; i < kNumBytes; ++i) { + BitStreamPutInt(bytes, kNumBytes, i*5+1, 4, (i * 7) % (1 << 4)); + } + for (int i = 0; i < kNumBytes; ++i) { + ASSERT_EQ(BitStreamGetInt(bytes, kNumBytes, i*5+1, 4), + (uint32_t)((i * 7) % (1 << 4))); + } + ASSERT_EQ(bytes[kNumBytes+1], '\0'); + + // Create 11011011 as a bit pattern + for (int i = 0; i < kNumBytes + 1; ++i) { + bytes[i] = '\0'; + } + for (int i = 0; i < kNumBytes; ++i) { + BitStreamPutInt(bytes, kNumBytes, i*8, 2, 3); + BitStreamPutInt(bytes, kNumBytes, i*8+3, 2, 3); + BitStreamPutInt(bytes, kNumBytes, i*8+6, 2, 3); + + ASSERT_EQ((unsigned char)bytes[i], + (unsigned char)(3 + (3 << 3) + (3 << 6))); + } + ASSERT_EQ(bytes[kNumBytes+1], '\0'); + + + // Test large values + for (int i = 0; i < kNumBytes + 1; ++i) { + bytes[i] = '\0'; + } + BitStreamPutInt(bytes, kNumBytes, 0, 64, (uint64_t)(-1)); + for (int i = 0; i < 64/8; ++i) { + ASSERT_EQ((unsigned char)bytes[i], + (unsigned char)(255)); + } + ASSERT_EQ(bytes[64/8+1], '\0'); + + +} + +TEST(Coding, BitStreamConvenienceFuncs) { + std::string bytes(1, '\0'); + + // Check that independent changes to byte are preserved. + BitStreamPutInt(&bytes, 0, 2, 3); + BitStreamPutInt(&bytes, 3, 2, 3); + BitStreamPutInt(&bytes, 6, 2, 3); + ASSERT_EQ((unsigned char)bytes[0], (unsigned char)(3 + (3 << 3) + (3 << 6))); + ASSERT_EQ(BitStreamGetInt(&bytes, 0, 2), 3u); + ASSERT_EQ(BitStreamGetInt(&bytes, 3, 2), 3u); + ASSERT_EQ(BitStreamGetInt(&bytes, 6, 2), 3u); + Slice slice(bytes); + ASSERT_EQ(BitStreamGetInt(&slice, 0, 2), 3u); + ASSERT_EQ(BitStreamGetInt(&slice, 3, 2), 3u); + ASSERT_EQ(BitStreamGetInt(&slice, 6, 2), 3u); + + // Test overlapping crossing over byte boundaries + bytes = std::string(2, '\0'); + BitStreamPutInt(&bytes, 6, 4, 15); + ASSERT_EQ((unsigned char)bytes[0], 3 << 6); + ASSERT_EQ((unsigned char)bytes[1], 3); + ASSERT_EQ(BitStreamGetInt(&bytes, 6, 4), 15u); + slice = Slice(bytes); + ASSERT_EQ(BitStreamGetInt(&slice, 6, 4), 15u); + + // Test 64-bit number + bytes = std::string(64/8, '\0'); + BitStreamPutInt(&bytes, 0, 64, (uint64_t)(-1)); + ASSERT_EQ(BitStreamGetInt(&bytes, 0, 64), (uint64_t)(-1)); + slice = Slice(bytes); + ASSERT_EQ(BitStreamGetInt(&slice, 0, 64), (uint64_t)(-1)); +} + } // namespace leveldb int main(int argc, char** argv) {