rippled
codec.h
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2012, 2013 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #ifndef RIPPLE_NODESTORE_CODEC_H_INCLUDED
21 #define RIPPLE_NODESTORE_CODEC_H_INCLUDED
22 
23 // Disable lz4 deprecation warning due to incompatibility with clang attributes
24 #define LZ4_DISABLE_DEPRECATE_WARNINGS
25 
26 #include <ripple/basics/contract.h>
27 #include <ripple/basics/safe_cast.h>
28 #include <ripple/nodestore/NodeObject.h>
29 #include <ripple/nodestore/impl/varint.h>
30 #include <ripple/protocol/HashPrefix.h>
31 #include <cstddef>
32 #include <cstring>
33 #include <lz4.h>
34 #include <nudb/detail/field.hpp>
35 #include <string>
36 #include <utility>
37 
38 namespace ripple {
39 namespace NodeStore {
40 
41 template <class BufferFactory>
43 lz4_decompress(void const* in, std::size_t in_size, BufferFactory&& bf)
44 {
45  using std::runtime_error;
46  using namespace nudb::detail;
48  std::uint8_t const* p = reinterpret_cast<std::uint8_t const*>(in);
49  auto const n = read_varint(p, in_size, result.second);
50  if (n == 0)
51  Throw<std::runtime_error>("lz4 decompress: n == 0");
52  void* const out = bf(result.second);
53  result.first = out;
54  if (LZ4_decompress_safe(
55  reinterpret_cast<char const*>(in) + n,
56  reinterpret_cast<char*>(out),
57  in_size - n,
58  result.second) != result.second)
59  Throw<std::runtime_error>("lz4 decompress: LZ4_decompress_safe");
60  return result;
61 }
62 
63 template <class BufferFactory>
65 lz4_compress(void const* in, std::size_t in_size, BufferFactory&& bf)
66 {
67  using std::runtime_error;
68  using namespace nudb::detail;
71  auto const n = write_varint(vi.data(), in_size);
72  auto const out_max = LZ4_compressBound(in_size);
73  std::uint8_t* out = reinterpret_cast<std::uint8_t*>(bf(n + out_max));
74  result.first = out;
75  std::memcpy(out, vi.data(), n);
76  auto const out_size = LZ4_compress_default(
77  reinterpret_cast<char const*>(in),
78  reinterpret_cast<char*>(out + n),
79  in_size,
80  out_max);
81  if (out_size == 0)
82  Throw<std::runtime_error>("lz4 compress");
83  result.second = n + out_size;
84  return result;
85 }
86 
87 //------------------------------------------------------------------------------
88 
89 /*
90  object types:
91 
92  0 = Uncompressed
93  1 = lz4 compressed
94  2 = inner node compressed
95  3 = full inner node
96 */
97 
98 template <class BufferFactory>
100 nodeobject_decompress(void const* in, std::size_t in_size, BufferFactory&& bf)
101 {
102  using namespace nudb::detail;
103 
104  std::uint8_t const* p = reinterpret_cast<std::uint8_t const*>(in);
105  std::size_t type;
106  auto const vn = read_varint(p, in_size, type);
107  if (vn == 0)
108  Throw<std::runtime_error>("nodeobject decompress");
109  p += vn;
110  in_size -= vn;
111 
113  switch (type)
114  {
115  case 0: // uncompressed
116  {
117  result.first = p;
118  result.second = in_size;
119  break;
120  }
121  case 1: // lz4
122  {
123  result = lz4_decompress(p, in_size, bf);
124  break;
125  }
126  case 2: // compressed v1 inner node
127  {
128  auto const hs = field<std::uint16_t>::size; // Mask
129  if (in_size < hs + 32)
130  Throw<std::runtime_error>(
131  "nodeobject codec v1: short inner node size: " +
132  std::string("in_size = ") + std::to_string(in_size) +
133  " hs = " + std::to_string(hs));
134  istream is(p, in_size);
135  std::uint16_t mask;
136  read<std::uint16_t>(is, mask); // Mask
137  in_size -= hs;
138  result.second = 525;
139  void* const out = bf(result.second);
140  result.first = out;
141  ostream os(out, result.second);
142  write<std::uint32_t>(os, 0);
143  write<std::uint32_t>(os, 0);
144  write<std::uint8_t>(os, hotUNKNOWN);
145  write<std::uint32_t>(
146  os, static_cast<std::uint32_t>(HashPrefix::innerNode));
147  if (mask == 0)
148  Throw<std::runtime_error>(
149  "nodeobject codec v1: empty inner node");
150  std::uint16_t bit = 0x8000;
151  for (int i = 16; i--; bit >>= 1)
152  {
153  if (mask & bit)
154  {
155  if (in_size < 32)
156  Throw<std::runtime_error>(
157  "nodeobject codec v1: short inner node subsize: " +
158  std::string("in_size = ") +
159  std::to_string(in_size) +
160  " i = " + std::to_string(i));
161  std::memcpy(os.data(32), is(32), 32);
162  in_size -= 32;
163  }
164  else
165  {
166  std::memset(os.data(32), 0, 32);
167  }
168  }
169  if (in_size > 0)
170  Throw<std::runtime_error>(
171  "nodeobject codec v1: long inner node, in_size = " +
172  std::to_string(in_size));
173  break;
174  }
175  case 3: // full v1 inner node
176  {
177  if (in_size != 16 * 32) // hashes
178  Throw<std::runtime_error>(
179  "nodeobject codec v1: short full inner node, in_size = " +
180  std::to_string(in_size));
181  istream is(p, in_size);
182  result.second = 525;
183  void* const out = bf(result.second);
184  result.first = out;
185  ostream os(out, result.second);
186  write<std::uint32_t>(os, 0);
187  write<std::uint32_t>(os, 0);
188  write<std::uint8_t>(os, hotUNKNOWN);
189  write<std::uint32_t>(
190  os, static_cast<std::uint32_t>(HashPrefix::innerNode));
191  write(os, is(512), 512);
192  break;
193  }
194  default:
195  Throw<std::runtime_error>(
196  "nodeobject codec: bad type=" + std::to_string(type));
197  };
198  return result;
199 }
200 
201 template <class = void>
202 void const*
204 {
205  static std::array<char, 32> v{};
206  return v.data();
207 }
208 
209 template <class BufferFactory>
211 nodeobject_compress(void const* in, std::size_t in_size, BufferFactory&& bf)
212 {
213  using std::runtime_error;
214  using namespace nudb::detail;
215 
216  // Check for inner node v1
217  if (in_size == 525)
218  {
219  istream is(in, in_size);
220  std::uint32_t index;
221  std::uint32_t unused;
222  std::uint8_t kind;
223  std::uint32_t prefix;
224  read<std::uint32_t>(is, index);
225  read<std::uint32_t>(is, unused);
226  read<std::uint8_t>(is, kind);
227  read<std::uint32_t>(is, prefix);
228  if (safe_cast<HashPrefix>(prefix) == HashPrefix::innerNode)
229  {
230  std::size_t n = 0;
231  std::uint16_t mask = 0;
233  for (unsigned bit = 0x8000; bit; bit >>= 1)
234  {
235  void const* const h = is(32);
236  if (std::memcmp(h, zero32(), 32) == 0)
237  continue;
238  std::memcpy(vh.data() + 32 * n, h, 32);
239  mask |= bit;
240  ++n;
241  }
243  if (n < 16)
244  {
245  // 2 = v1 inner node compressed
246  auto const type = 2U;
247  auto const vs = size_varint(type);
248  result.second = vs + field<std::uint16_t>::size + // mask
249  n * 32; // hashes
250  std::uint8_t* out =
251  reinterpret_cast<std::uint8_t*>(bf(result.second));
252  result.first = out;
253  ostream os(out, result.second);
254  write<varint>(os, type);
255  write<std::uint16_t>(os, mask);
256  write(os, vh.data(), n * 32);
257  return result;
258  }
259  // 3 = full v1 inner node
260  auto const type = 3U;
261  auto const vs = size_varint(type);
262  result.second = vs + n * 32; // hashes
263  std::uint8_t* out =
264  reinterpret_cast<std::uint8_t*>(bf(result.second));
265  result.first = out;
266  ostream os(out, result.second);
267  write<varint>(os, type);
268  write(os, vh.data(), n * 32);
269  return result;
270  }
271  }
272 
274 
275  constexpr std::size_t codecType = 1;
276  auto const vn = write_varint(vi.data(), codecType);
278  switch (codecType)
279  {
280  // case 0 was uncompressed data; we always compress now.
281  case 1: // lz4
282  {
283  std::uint8_t* p;
284  auto const lzr = NodeStore::lz4_compress(
285  in, in_size, [&p, &vn, &bf](std::size_t n) {
286  p = reinterpret_cast<std::uint8_t*>(bf(vn + n));
287  return p + vn;
288  });
289  std::memcpy(p, vi.data(), vn);
290  result.first = p;
291  result.second = vn + lzr.second;
292  break;
293  }
294  default:
295  Throw<std::logic_error>(
296  "nodeobject codec: unknown=" + std::to_string(codecType));
297  };
298  return result;
299 }
300 
301 // Modifies an inner node to erase the ledger
302 // sequence and type information so the codec
303 // verification can pass.
304 //
305 template <class = void>
306 void
307 filter_inner(void* in, std::size_t in_size)
308 {
309  using namespace nudb::detail;
310 
311  // Check for inner node
312  if (in_size == 525)
313  {
314  istream is(in, in_size);
315  std::uint32_t index;
316  std::uint32_t unused;
317  std::uint8_t kind;
318  std::uint32_t prefix;
319  read<std::uint32_t>(is, index);
320  read<std::uint32_t>(is, unused);
321  read<std::uint8_t>(is, kind);
322  read<std::uint32_t>(is, prefix);
323  if (safe_cast<HashPrefix>(prefix) == HashPrefix::innerNode)
324  {
325  ostream os(in, 9);
326  write<std::uint32_t>(os, 0);
327  write<std::uint32_t>(os, 0);
328  write<std::uint8_t>(os, hotUNKNOWN);
329  }
330  }
331 }
332 
333 } // namespace NodeStore
334 } // namespace ripple
335 
336 #endif
ripple::NodeStore::nodeobject_decompress
std::pair< void const *, std::size_t > nodeobject_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:100
ripple::hotUNKNOWN
@ hotUNKNOWN
Definition: NodeObject.h:33
std::string
STL class.
utility
cstring
std::pair
ripple::NodeStore::lz4_decompress
std::pair< void const *, std::size_t > lz4_decompress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:43
ripple::QualityDirection::in
@ in
ripple::NodeStore::write
void write(nudb::detail::ostream &os, std::size_t t)
Definition: varint.h:133
ripple::QualityDirection::out
@ out
ripple::HashPrefix::innerNode
@ innerNode
inner node in V1 tree
ripple::NodeStore::read_varint
std::size_t read_varint(void const *buf, std::size_t buflen, std::size_t &t)
Definition: varint.h:56
ripple::NodeStore::lz4_compress
std::pair< void const *, std::size_t > lz4_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:65
cstddef
std::to_string
T to_string(T... args)
ripple::NodeStore::filter_inner
void filter_inner(void *in, std::size_t in_size)
Definition: codec.h:307
std::array
STL class.
std::runtime_error
STL class.
std::uint8_t
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
ripple::NodeStore::zero32
void const * zero32()
Definition: codec.h:203
ripple::NodeStore::size_varint
std::size_t size_varint(T v)
Definition: varint.h:89
std::size_t
std::memcpy
T memcpy(T... args)
ripple::NodeStore::nodeobject_compress
std::pair< void const *, std::size_t > nodeobject_compress(void const *in, std::size_t in_size, BufferFactory &&bf)
Definition: codec.h:211
std::memcmp
T memcmp(T... args)
std::array::data
T data(T... args)
ripple::NodeStore::write_varint
std::size_t write_varint(void *p0, std::size_t v)
Definition: varint.h:102
std::memset
T memset(T... args)
string