rippled
Loading...
Searching...
No Matches
codec.h
1#pragma once
2
3// Disable lz4 deprecation warning due to incompatibility with clang attributes
4#define LZ4_DISABLE_DEPRECATE_WARNINGS
5
6#include <xrpl/basics/contract.h>
7#include <xrpl/basics/safe_cast.h>
8#include <xrpl/nodestore/NodeObject.h>
9#include <xrpl/nodestore/detail/varint.h>
10#include <xrpl/protocol/HashPrefix.h>
11
12#include <nudb/detail/field.hpp>
13
14#include <lz4.h>
15
16#include <cstddef>
17#include <cstring>
18#include <string>
19
20namespace xrpl {
21namespace NodeStore {
22
23template <class BufferFactory>
25lz4_decompress(void const* in, std::size_t in_size, BufferFactory&& bf)
26{
27 if (static_cast<int>(in_size) < 0)
28 Throw<std::runtime_error>("lz4_decompress: integer overflow (input)");
29
30 std::size_t outSize = 0;
31
32 auto const n = read_varint(reinterpret_cast<std::uint8_t const*>(in), in_size, outSize);
33
34 if (n == 0 || n >= in_size)
35 Throw<std::runtime_error>("lz4_decompress: invalid blob");
36
37 if (static_cast<int>(outSize) <= 0)
38 Throw<std::runtime_error>("lz4_decompress: integer overflow (output)");
39
40 void* const out = bf(outSize);
41
42 if (LZ4_decompress_safe(
43 reinterpret_cast<char const*>(in) + n,
44 reinterpret_cast<char*>(out),
45 static_cast<int>(in_size - n),
46 static_cast<int>(outSize)) != static_cast<int>(outSize))
47 Throw<std::runtime_error>("lz4_decompress: LZ4_decompress_safe");
48
49 return {out, outSize};
50}
51
52template <class BufferFactory>
54lz4_compress(void const* in, std::size_t in_size, BufferFactory&& bf)
55{
57 using namespace nudb::detail;
60 auto const n = write_varint(vi.data(), in_size);
61 auto const out_max = LZ4_compressBound(in_size);
62 std::uint8_t* out = reinterpret_cast<std::uint8_t*>(bf(n + out_max));
63 result.first = out;
64 std::memcpy(out, vi.data(), n);
65 auto const out_size =
66 LZ4_compress_default(reinterpret_cast<char const*>(in), reinterpret_cast<char*>(out + n), in_size, out_max);
67 if (out_size == 0)
68 Throw<std::runtime_error>("lz4 compress");
69 result.second = n + out_size;
70 return result;
71}
72
73//------------------------------------------------------------------------------
74
75/*
76 object types:
77
78 0 = Uncompressed
79 1 = lz4 compressed
80 2 = inner node compressed
81 3 = full inner node
82*/
83
84template <class BufferFactory>
86nodeobject_decompress(void const* in, std::size_t in_size, BufferFactory&& bf)
87{
88 using namespace nudb::detail;
89
90 std::uint8_t const* p = reinterpret_cast<std::uint8_t const*>(in);
91 std::size_t type;
92 auto const vn = read_varint(p, in_size, type);
93 if (vn == 0)
94 Throw<std::runtime_error>("nodeobject decompress");
95 p += vn;
96 in_size -= vn;
97
99 switch (type)
100 {
101 case 0: // uncompressed
102 {
103 result.first = p;
104 result.second = in_size;
105 break;
106 }
107 case 1: // lz4
108 {
109 result = lz4_decompress(p, in_size, bf);
110 break;
111 }
112 case 2: // compressed v1 inner node
113 {
114 auto const hs = field<std::uint16_t>::size; // Mask
115 if (in_size < hs + 32)
116 Throw<std::runtime_error>(
117 "nodeobject codec v1: short inner node size: " + std::string("in_size = ") +
118 std::to_string(in_size) + " hs = " + std::to_string(hs));
119 istream is(p, in_size);
120 std::uint16_t mask;
121 read<std::uint16_t>(is, mask); // Mask
122 in_size -= hs;
123 result.second = 525;
124 void* const out = bf(result.second);
125 result.first = out;
126 ostream os(out, result.second);
127 write<std::uint32_t>(os, 0);
128 write<std::uint32_t>(os, 0);
129 write<std::uint8_t>(os, hotUNKNOWN);
130 write<std::uint32_t>(os, static_cast<std::uint32_t>(HashPrefix::innerNode));
131 if (mask == 0)
132 Throw<std::runtime_error>("nodeobject codec v1: empty inner node");
133 std::uint16_t bit = 0x8000;
134 for (int i = 16; i--; bit >>= 1)
135 {
136 if (mask & bit)
137 {
138 if (in_size < 32)
139 Throw<std::runtime_error>(
140 "nodeobject codec v1: short inner node subsize: " + std::string("in_size = ") +
141 std::to_string(in_size) + " i = " + std::to_string(i));
142 std::memcpy(os.data(32), is(32), 32);
143 in_size -= 32;
144 }
145 else
146 {
147 std::memset(os.data(32), 0, 32);
148 }
149 }
150 if (in_size > 0)
151 Throw<std::runtime_error>("nodeobject codec v1: long inner node, in_size = " + std::to_string(in_size));
152 break;
153 }
154 case 3: // full v1 inner node
155 {
156 if (in_size != 16 * 32) // hashes
157 Throw<std::runtime_error>(
158 "nodeobject codec v1: short full inner node, in_size = " + std::to_string(in_size));
159 istream is(p, in_size);
160 result.second = 525;
161 void* const out = bf(result.second);
162 result.first = out;
163 ostream os(out, result.second);
164 write<std::uint32_t>(os, 0);
165 write<std::uint32_t>(os, 0);
166 write<std::uint8_t>(os, hotUNKNOWN);
167 write<std::uint32_t>(os, static_cast<std::uint32_t>(HashPrefix::innerNode));
168 write(os, is(512), 512);
169 break;
170 }
171 default:
172 Throw<std::runtime_error>("nodeobject codec: bad type=" + std::to_string(type));
173 };
174 return result;
175}
176
177template <class = void>
178void const*
180{
181 static std::array<char, 32> v{};
182 return v.data();
183}
184
185template <class BufferFactory>
187nodeobject_compress(void const* in, std::size_t in_size, BufferFactory&& bf)
188{
189 using std::runtime_error;
190 using namespace nudb::detail;
191
192 // Check for inner node v1
193 if (in_size == 525)
194 {
195 istream is(in, in_size);
196 std::uint32_t index;
197 std::uint32_t unused;
198 std::uint8_t kind;
199 std::uint32_t prefix;
200 read<std::uint32_t>(is, index);
201 read<std::uint32_t>(is, unused);
202 read<std::uint8_t>(is, kind);
203 read<std::uint32_t>(is, prefix);
204 if (safe_cast<HashPrefix>(prefix) == HashPrefix::innerNode)
205 {
206 std::size_t n = 0;
207 std::uint16_t mask = 0;
209 for (unsigned bit = 0x8000; bit; bit >>= 1)
210 {
211 void const* const h = is(32);
212 if (std::memcmp(h, zero32(), 32) == 0)
213 continue;
214 std::memcpy(vh.data() + 32 * n, h, 32);
215 mask |= bit;
216 ++n;
217 }
219 if (n < 16)
220 {
221 // 2 = v1 inner node compressed
222 auto const type = 2U;
223 auto const vs = size_varint(type);
224 result.second = vs + field<std::uint16_t>::size + // mask
225 n * 32; // hashes
226 std::uint8_t* out = reinterpret_cast<std::uint8_t*>(bf(result.second));
227 result.first = out;
228 ostream os(out, result.second);
229 write<varint>(os, type);
230 write<std::uint16_t>(os, mask);
231 write(os, vh.data(), n * 32);
232 return result;
233 }
234 // 3 = full v1 inner node
235 auto const type = 3U;
236 auto const vs = size_varint(type);
237 result.second = vs + n * 32; // hashes
238 std::uint8_t* out = reinterpret_cast<std::uint8_t*>(bf(result.second));
239 result.first = out;
240 ostream os(out, result.second);
241 write<varint>(os, type);
242 write(os, vh.data(), n * 32);
243 return result;
244 }
245 }
246
248
249 constexpr std::size_t codecType = 1;
250 auto const vn = write_varint(vi.data(), codecType);
252 switch (codecType)
253 {
254 // case 0 was uncompressed data; we always compress now.
255 case 1: // lz4
256 {
257 std::uint8_t* p;
258 auto const lzr = NodeStore::lz4_compress(in, in_size, [&p, &vn, &bf](std::size_t n) {
259 p = reinterpret_cast<std::uint8_t*>(bf(vn + n));
260 return p + vn;
261 });
262 std::memcpy(p, vi.data(), vn);
263 result.first = p;
264 result.second = vn + lzr.second;
265 break;
266 }
267 default:
268 Throw<std::logic_error>("nodeobject codec: unknown=" + std::to_string(codecType));
269 };
270 return result;
271}
272
273// Modifies an inner node to erase the ledger
274// sequence and type information so the codec
275// verification can pass.
276//
277template <class = void>
278void
280{
281 using namespace nudb::detail;
282
283 // Check for inner node
284 if (in_size == 525)
285 {
286 istream is(in, in_size);
287 std::uint32_t index;
288 std::uint32_t unused;
289 std::uint8_t kind;
290 std::uint32_t prefix;
291 read<std::uint32_t>(is, index);
292 read<std::uint32_t>(is, unused);
293 read<std::uint8_t>(is, kind);
294 read<std::uint32_t>(is, prefix);
295 if (safe_cast<HashPrefix>(prefix) == HashPrefix::innerNode)
296 {
297 ostream os(in, 9);
298 write<std::uint32_t>(os, 0);
299 write<std::uint32_t>(os, 0);
300 write<std::uint8_t>(os, hotUNKNOWN);
301 }
302 }
303}
304
305} // namespace NodeStore
306} // namespace xrpl
T data(T... args)
T memcmp(T... args)
T memcpy(T... args)
T memset(T... args)
std::pair< void const *, std::size_t > nodeobject_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition codec.h:187
void filter_inner(void *in, std::size_t in_size)
Definition codec.h:279
std::pair< void const *, std::size_t > lz4_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition codec.h:54
std::pair< void const *, std::size_t > lz4_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition codec.h:25
std::size_t size_varint(T v)
Definition varint.h:70
void write(nudb::detail::ostream &os, std::size_t t)
Definition varint.h:114
std::pair< void const *, std::size_t > nodeobject_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition codec.h:86
std::size_t write_varint(void *p0, std::size_t v)
Definition varint.h:83
std::size_t read_varint(void const *buf, std::size_t buflen, std::size_t &t)
Definition varint.h:37
void const * zero32()
Definition codec.h:179
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition algorithm.h:5
@ hotUNKNOWN
Definition NodeObject.h:13
@ innerNode
inner node in V1 tree
T to_string(T... args)