rippled
codec.h
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2012, 2013 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #ifndef RIPPLE_NODESTORE_CODEC_H_INCLUDED
21 #define RIPPLE_NODESTORE_CODEC_H_INCLUDED
22 
23 // Disable lz4 deprecation warning due to incompatibility with clang attributes
24 #define LZ4_DISABLE_DEPRECATE_WARNINGS
25 
26 #include <ripple/basics/contract.h>
27 #include <ripple/basics/safe_cast.h>
28 #include <ripple/nodestore/NodeObject.h>
29 #include <ripple/nodestore/impl/varint.h>
30 #include <ripple/protocol/HashPrefix.h>
31 #include <cstddef>
32 #include <cstring>
33 #include <lz4.h>
34 #include <nudb/detail/field.hpp>
35 #include <string>
36 #include <utility>
37 
38 namespace ripple {
39 namespace NodeStore {
40 
41 template <class BufferFactory>
43 lz4_decompress(void const* in, std::size_t in_size, BufferFactory&& bf)
44 {
45  using std::runtime_error;
46  using namespace nudb::detail;
48  std::uint8_t const* p = reinterpret_cast<std::uint8_t const*>(in);
49  auto const n = read_varint(p, in_size, result.second);
50  if (n == 0)
51  Throw<std::runtime_error>("lz4 decompress: n == 0");
52  void* const out = bf(result.second);
53  result.first = out;
54  if (LZ4_decompress_fast(
55  reinterpret_cast<char const*>(in) + n,
56  reinterpret_cast<char*>(out),
57  result.second) +
58  n !=
59  in_size)
60  Throw<std::runtime_error>("lz4 decompress: LZ4_decompress_fast");
61  return result;
62 }
63 
64 template <class BufferFactory>
66 lz4_compress(void const* in, std::size_t in_size, BufferFactory&& bf)
67 {
68  using std::runtime_error;
69  using namespace nudb::detail;
72  auto const n = write_varint(vi.data(), in_size);
73  auto const out_max = LZ4_compressBound(in_size);
74  std::uint8_t* out = reinterpret_cast<std::uint8_t*>(bf(n + out_max));
75  result.first = out;
76  std::memcpy(out, vi.data(), n);
77  auto const out_size = LZ4_compress_default(
78  reinterpret_cast<char const*>(in),
79  reinterpret_cast<char*>(out + n),
80  in_size,
81  out_max);
82  if (out_size == 0)
83  Throw<std::runtime_error>("lz4 compress");
84  result.second = n + out_size;
85  return result;
86 }
87 
88 //------------------------------------------------------------------------------
89 
90 /*
91  object types:
92 
93  0 = Uncompressed
94  1 = lz4 compressed
95  2 = inner node compressed
96  3 = full inner node
97 */
98 
99 template <class BufferFactory>
101 nodeobject_decompress(void const* in, std::size_t in_size, BufferFactory&& bf)
102 {
103  using namespace nudb::detail;
104 
105  std::uint8_t const* p = reinterpret_cast<std::uint8_t const*>(in);
106  std::size_t type;
107  auto const vn = read_varint(p, in_size, type);
108  if (vn == 0)
109  Throw<std::runtime_error>("nodeobject decompress");
110  p += vn;
111  in_size -= vn;
112 
114  switch (type)
115  {
116  case 0: // uncompressed
117  {
118  result.first = p;
119  result.second = in_size;
120  break;
121  }
122  case 1: // lz4
123  {
124  result = lz4_decompress(p, in_size, bf);
125  break;
126  }
127  case 2: // compressed v1 inner node
128  {
129  auto const hs = field<std::uint16_t>::size; // Mask
130  if (in_size < hs + 32)
131  Throw<std::runtime_error>(
132  "nodeobject codec v1: short inner node size: " +
133  std::string("in_size = ") + std::to_string(in_size) +
134  " hs = " + std::to_string(hs));
135  istream is(p, in_size);
136  std::uint16_t mask;
137  read<std::uint16_t>(is, mask); // Mask
138  in_size -= hs;
139  result.second = 525;
140  void* const out = bf(result.second);
141  result.first = out;
142  ostream os(out, result.second);
143  write<std::uint32_t>(os, 0);
144  write<std::uint32_t>(os, 0);
145  write<std::uint8_t>(os, hotUNKNOWN);
146  write<std::uint32_t>(
147  os, static_cast<std::uint32_t>(HashPrefix::innerNode));
148  if (mask == 0)
149  Throw<std::runtime_error>(
150  "nodeobject codec v1: empty inner node");
151  std::uint16_t bit = 0x8000;
152  for (int i = 16; i--; bit >>= 1)
153  {
154  if (mask & bit)
155  {
156  if (in_size < 32)
157  Throw<std::runtime_error>(
158  "nodeobject codec v1: short inner node subsize: " +
159  std::string("in_size = ") +
160  std::to_string(in_size) +
161  " i = " + std::to_string(i));
162  std::memcpy(os.data(32), is(32), 32);
163  in_size -= 32;
164  }
165  else
166  {
167  std::memset(os.data(32), 0, 32);
168  }
169  }
170  if (in_size > 0)
171  Throw<std::runtime_error>(
172  "nodeobject codec v1: long inner node, in_size = " +
173  std::to_string(in_size));
174  break;
175  }
176  case 3: // full v1 inner node
177  {
178  if (in_size != 16 * 32) // hashes
179  Throw<std::runtime_error>(
180  "nodeobject codec v1: short full inner node, in_size = " +
181  std::to_string(in_size));
182  istream is(p, in_size);
183  result.second = 525;
184  void* const out = bf(result.second);
185  result.first = out;
186  ostream os(out, result.second);
187  write<std::uint32_t>(os, 0);
188  write<std::uint32_t>(os, 0);
189  write<std::uint8_t>(os, hotUNKNOWN);
190  write<std::uint32_t>(
191  os, static_cast<std::uint32_t>(HashPrefix::innerNode));
192  write(os, is(512), 512);
193  break;
194  }
195  default:
196  Throw<std::runtime_error>(
197  "nodeobject codec: bad type=" + std::to_string(type));
198  };
199  return result;
200 }
201 
202 template <class = void>
203 void const*
205 {
206  static std::array<char, 32> v{};
207  return v.data();
208 }
209 
210 template <class BufferFactory>
212 nodeobject_compress(void const* in, std::size_t in_size, BufferFactory&& bf)
213 {
214  using std::runtime_error;
215  using namespace nudb::detail;
216 
217  // Check for inner node v1
218  if (in_size == 525)
219  {
220  istream is(in, in_size);
221  std::uint32_t index;
222  std::uint32_t unused;
223  std::uint8_t kind;
224  std::uint32_t prefix;
225  read<std::uint32_t>(is, index);
226  read<std::uint32_t>(is, unused);
227  read<std::uint8_t>(is, kind);
228  read<std::uint32_t>(is, prefix);
229  if (safe_cast<HashPrefix>(prefix) == HashPrefix::innerNode)
230  {
231  std::size_t n = 0;
232  std::uint16_t mask = 0;
234  for (unsigned bit = 0x8000; bit; bit >>= 1)
235  {
236  void const* const h = is(32);
237  if (std::memcmp(h, zero32(), 32) == 0)
238  continue;
239  std::memcpy(vh.data() + 32 * n, h, 32);
240  mask |= bit;
241  ++n;
242  }
244  if (n < 16)
245  {
246  // 2 = v1 inner node compressed
247  auto const type = 2U;
248  auto const vs = size_varint(type);
249  result.second = vs + field<std::uint16_t>::size + // mask
250  n * 32; // hashes
251  std::uint8_t* out =
252  reinterpret_cast<std::uint8_t*>(bf(result.second));
253  result.first = out;
254  ostream os(out, result.second);
255  write<varint>(os, type);
256  write<std::uint16_t>(os, mask);
257  write(os, vh.data(), n * 32);
258  return result;
259  }
260  // 3 = full v1 inner node
261  auto const type = 3U;
262  auto const vs = size_varint(type);
263  result.second = vs + n * 32; // hashes
264  std::uint8_t* out =
265  reinterpret_cast<std::uint8_t*>(bf(result.second));
266  result.first = out;
267  ostream os(out, result.second);
268  write<varint>(os, type);
269  write(os, vh.data(), n * 32);
270  return result;
271  }
272  }
273 
275 
276  constexpr std::size_t codecType = 1;
277  auto const vn = write_varint(vi.data(), codecType);
279  switch (codecType)
280  {
281  // case 0 was uncompressed data; we always compress now.
282  case 1: // lz4
283  {
284  std::uint8_t* p;
285  auto const lzr = NodeStore::lz4_compress(
286  in, in_size, [&p, &vn, &bf](std::size_t n) {
287  p = reinterpret_cast<std::uint8_t*>(bf(vn + n));
288  return p + vn;
289  });
290  std::memcpy(p, vi.data(), vn);
291  result.first = p;
292  result.second = vn + lzr.second;
293  break;
294  }
295  default:
296  Throw<std::logic_error>(
297  "nodeobject codec: unknown=" + std::to_string(codecType));
298  };
299  return result;
300 }
301 
302 // Modifies an inner node to erase the ledger
303 // sequence and type information so the codec
304 // verification can pass.
305 //
306 template <class = void>
307 void
308 filter_inner(void* in, std::size_t in_size)
309 {
310  using namespace nudb::detail;
311 
312  // Check for inner node
313  if (in_size == 525)
314  {
315  istream is(in, in_size);
316  std::uint32_t index;
317  std::uint32_t unused;
318  std::uint8_t kind;
319  std::uint32_t prefix;
320  read<std::uint32_t>(is, index);
321  read<std::uint32_t>(is, unused);
322  read<std::uint8_t>(is, kind);
323  read<std::uint32_t>(is, prefix);
324  if (safe_cast<HashPrefix>(prefix) == HashPrefix::innerNode)
325  {
326  ostream os(in, 9);
327  write<std::uint32_t>(os, 0);
328  write<std::uint32_t>(os, 0);
329  write<std::uint8_t>(os, hotUNKNOWN);
330  }
331  }
332 }
333 
334 } // namespace NodeStore
335 } // namespace ripple
336 
337 #endif
ripple::NodeStore::nodeobject_decompress
std::pair< void const *, std::size_t > nodeobject_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:101
ripple::hotUNKNOWN
@ hotUNKNOWN
Definition: NodeObject.h:33
std::string
STL class.
utility
cstring
std::pair
ripple::NodeStore::lz4_decompress
std::pair< void const *, std::size_t > lz4_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:43
ripple::QualityDirection::in
@ in
ripple::NodeStore::write
void write(nudb::detail::ostream &os, std::size_t t)
Definition: varint.h:131
ripple::QualityDirection::out
@ out
ripple::HashPrefix::innerNode
@ innerNode
inner node in V1 tree
ripple::NodeStore::read_varint
std::size_t read_varint(void const *buf, std::size_t buflen, std::size_t &t)
Definition: varint.h:56
ripple::NodeStore::lz4_compress
std::pair< void const *, std::size_t > lz4_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:66
cstddef
std::to_string
T to_string(T... args)
ripple::NodeStore::filter_inner
void filter_inner(void *in, std::size_t in_size)
Definition: codec.h:308
std::array
STL class.
std::runtime_error
STL class.
std::uint8_t
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
ripple::NodeStore::zero32
void const * zero32()
Definition: codec.h:204
ripple::NodeStore::size_varint
std::size_t size_varint(T v)
Definition: varint.h:87
std::size_t
std::memcpy
T memcpy(T... args)
ripple::NodeStore::nodeobject_compress
std::pair< void const *, std::size_t > nodeobject_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:212
std::memcmp
T memcmp(T... args)
std::array::data
T data(T... args)
ripple::NodeStore::write_varint
std::size_t write_varint(void *p0, std::size_t v)
Definition: varint.h:100
std::memset
T memset(T... args)
string