rippled
ReportingETL.cpp
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2020 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #include <ripple/app/rdb/backend/RelationalDBInterfacePostgres.h>
21 #include <ripple/app/reporting/ReportingETL.h>
22 
23 #include <ripple/beast/core/CurrentThreadName.h>
24 #include <ripple/json/json_reader.h>
25 #include <ripple/json/json_writer.h>
26 #include <boost/asio/connect.hpp>
27 #include <boost/asio/ip/tcp.hpp>
28 #include <boost/beast/core.hpp>
29 #include <boost/beast/websocket.hpp>
30 #include <cstdlib>
31 #include <iostream>
32 #include <string>
33 #include <variant>
34 
35 namespace ripple {
36 
37 namespace detail {
40 toString(LedgerInfo const& info)
41 {
43  ss << "LedgerInfo { Sequence : " << info.seq
44  << " Hash : " << strHex(info.hash) << " TxHash : " << strHex(info.txHash)
45  << " AccountHash : " << strHex(info.accountHash)
46  << " ParentHash : " << strHex(info.parentHash) << " }";
47  return ss.str();
48 }
49 } // namespace detail
50 
51 void
55 {
57  size_t num = 0;
58  while (!stopping_ && (sle = writeQueue.pop()))
59  {
60  assert(sle);
61  if (!ledger->exists(sle->key()))
62  ledger->rawInsert(sle);
63 
64  if (flushInterval_ != 0 && (num % flushInterval_) == 0)
65  {
66  JLOG(journal_.debug()) << "Flushing! key = " << strHex(sle->key());
67  ledger->stateMap().flushDirty(hotACCOUNT_NODE);
68  }
69  ++num;
70  }
71 }
72 
76  org::xrpl::rpc::v1::GetLedgerResponse& data)
77 {
79  for (auto& txn : data.transactions_list().transactions())
80  {
81  auto& raw = txn.transaction_blob();
82 
83  SerialIter it{raw.data(), raw.size()};
84  STTx sttx{it};
85 
86  auto txSerializer = std::make_shared<Serializer>(sttx.getSerializer());
87 
88  TxMeta txMeta{
89  sttx.getTransactionID(), ledger->info().seq, txn.metadata_blob()};
90 
91  auto metaSerializer =
92  std::make_shared<Serializer>(txMeta.getAsObject().getSerializer());
93 
94  JLOG(journal_.trace())
95  << __func__ << " : "
96  << "Inserting transaction = " << sttx.getTransactionID();
97  uint256 nodestoreHash = ledger->rawTxInsertWithHash(
98  sttx.getTransactionID(), txSerializer, metaSerializer);
99  accountTxData.emplace_back(txMeta, std::move(nodestoreHash), journal_);
100  }
101  return accountTxData;
102 }
103 
105 ReportingETL::loadInitialLedger(uint32_t startingSequence)
106 {
107  // check that database is actually empty
108  auto ledger = std::const_pointer_cast<Ledger>(
110  if (ledger)
111  {
112  JLOG(journal_.fatal()) << __func__ << " : "
113  << "Database is not empty";
114  assert(false);
115  return {};
116  }
117 
118  // fetch the ledger from the network. This function will not return until
119  // either the fetch is successful, or the server is being shutdown. This
120  // only fetches the ledger header and the transactions+metadata
122  fetchLedgerData(startingSequence)};
123  if (!ledgerData)
124  return {};
125 
126  LedgerInfo lgrInfo =
127  deserializeHeader(makeSlice(ledgerData->ledger_header()), true);
128 
129  JLOG(journal_.debug()) << __func__ << " : "
130  << "Deserialized ledger header. "
131  << detail::toString(lgrInfo);
132 
133  ledger =
134  std::make_shared<Ledger>(lgrInfo, app_.config(), app_.getNodeFamily());
135  ledger->stateMap().clearSynching();
136  ledger->txMap().clearSynching();
137 
138 #ifdef RIPPLED_REPORTING
140  insertTransactions(ledger, *ledgerData);
141 #endif
142 
144 
146  std::thread asyncWriter{[this, &ledger, &writeQueue]() {
147  consumeLedgerData(ledger, writeQueue);
148  }};
149 
150  // download the full account state map. This function downloads full ledger
151  // data and pushes the downloaded data into the writeQueue. asyncWriter
152  // consumes from the queue and inserts the data into the Ledger object.
153  // Once the below call returns, all data has been pushed into the queue
154  loadBalancer_.loadInitialLedger(startingSequence, writeQueue);
155 
156  // null is used to respresent the end of the queue
158  writeQueue.push(null);
159  // wait for the writer to finish
160  asyncWriter.join();
161 
162  if (!stopping_)
163  {
164  flushLedger(ledger);
165  if (app_.config().reporting())
166  {
167 #ifdef RIPPLED_REPORTING
168  dynamic_cast<RelationalDBInterfacePostgres*>(
170  ->writeLedgerAndTransactions(ledger->info(), accountTxData);
171 #endif
172  }
173  }
174  auto end = std::chrono::system_clock::now();
175  JLOG(journal_.debug()) << "Time to download and store ledger = "
176  << ((end - start).count()) / 1000000000.0;
177  return ledger;
178 }
179 
180 void
182 {
183  JLOG(journal_.debug()) << __func__ << " : "
184  << "Flushing ledger. "
185  << detail::toString(ledger->info());
186  // These are recomputed in setImmutable
187  auto& accountHash = ledger->info().accountHash;
188  auto& txHash = ledger->info().txHash;
189  auto& ledgerHash = ledger->info().hash;
190 
191  ledger->setImmutable(app_.config(), false);
193 
194  auto numFlushed = ledger->stateMap().flushDirty(hotACCOUNT_NODE);
195 
196  auto numTxFlushed = ledger->txMap().flushDirty(hotTRANSACTION_NODE);
197 
198  {
199  Serializer s(128);
201  addRaw(ledger->info(), s);
203  hotLEDGER,
204  std::move(s.modData()),
205  ledger->info().hash,
206  ledger->info().seq);
207  }
208 
209  app_.getNodeStore().sync();
210 
211  auto end = std::chrono::system_clock::now();
212 
213  JLOG(journal_.debug()) << __func__ << " : "
214  << "Flushed " << numFlushed
215  << " nodes to nodestore from stateMap";
216  JLOG(journal_.debug()) << __func__ << " : "
217  << "Flushed " << numTxFlushed
218  << " nodes to nodestore from txMap";
219 
220  JLOG(journal_.debug()) << __func__ << " : "
221  << "Flush took "
222  << (end - start).count() / 1000000000.0
223  << " seconds";
224 
225  if (numFlushed == 0)
226  {
227  JLOG(journal_.fatal()) << __func__ << " : "
228  << "Flushed 0 nodes from state map";
229  assert(false);
230  }
231  if (numTxFlushed == 0)
232  {
233  JLOG(journal_.warn()) << __func__ << " : "
234  << "Flushed 0 nodes from tx map";
235  }
236 
237  // Make sure calculated hashes are correct
238  if (ledger->stateMap().getHash().as_uint256() != accountHash)
239  {
240  JLOG(journal_.fatal())
241  << __func__ << " : "
242  << "State map hash does not match. "
243  << "Expected hash = " << strHex(accountHash) << "Actual hash = "
244  << strHex(ledger->stateMap().getHash().as_uint256());
245  Throw<std::runtime_error>("state map hash mismatch");
246  }
247 
248  if (ledger->txMap().getHash().as_uint256() != txHash)
249  {
250  JLOG(journal_.fatal())
251  << __func__ << " : "
252  << "Tx map hash does not match. "
253  << "Expected hash = " << strHex(txHash) << "Actual hash = "
254  << strHex(ledger->txMap().getHash().as_uint256());
255  Throw<std::runtime_error>("tx map hash mismatch");
256  }
257 
258  if (ledger->info().hash != ledgerHash)
259  {
260  JLOG(journal_.fatal())
261  << __func__ << " : "
262  << "Ledger hash does not match. "
263  << "Expected hash = " << strHex(ledgerHash)
264  << "Actual hash = " << strHex(ledger->info().hash);
265  Throw<std::runtime_error>("ledger hash mismatch");
266  }
267 
268  JLOG(journal_.info()) << __func__ << " : "
269  << "Successfully flushed ledger! "
270  << detail::toString(ledger->info());
271 }
272 
273 void
275 {
276  app_.getOPs().pubLedger(ledger);
277 
278  setLastPublish();
279 }
280 
281 bool
282 ReportingETL::publishLedger(uint32_t ledgerSequence, uint32_t maxAttempts)
283 {
284  JLOG(journal_.info()) << __func__ << " : "
285  << "Attempting to publish ledger = "
286  << ledgerSequence;
287  size_t numAttempts = 0;
288  while (!stopping_)
289  {
290  auto ledger = app_.getLedgerMaster().getLedgerBySeq(ledgerSequence);
291 
292  if (!ledger)
293  {
294  JLOG(journal_.warn())
295  << __func__ << " : "
296  << "Trying to publish. Could not find ledger with sequence = "
297  << ledgerSequence;
298  // We try maxAttempts times to publish the ledger, waiting one
299  // second in between each attempt.
300  // If the ledger is not present in the database after maxAttempts,
301  // we attempt to take over as the writer. If the takeover fails,
302  // doContinuousETL will return, and this node will go back to
303  // publishing.
304  // If the node is in strict read only mode, we simply
305  // skip publishing this ledger and return false indicating the
306  // publish failed
307  if (numAttempts >= maxAttempts)
308  {
309  JLOG(journal_.error()) << __func__ << " : "
310  << "Failed to publish ledger after "
311  << numAttempts << " attempts.";
312  if (!readOnly_)
313  {
314  JLOG(journal_.info()) << __func__ << " : "
315  << "Attempting to become ETL writer";
316  return false;
317  }
318  else
319  {
320  JLOG(journal_.debug())
321  << __func__ << " : "
322  << "In strict read-only mode. "
323  << "Skipping publishing this ledger. "
324  << "Beginning fast forward.";
325  return false;
326  }
327  }
328  else
329  {
331  ++numAttempts;
332  }
333  continue;
334  }
335 
336  publishStrand_.post([this, ledger, fname = __func__]() {
337  app_.getOPs().pubLedger(ledger);
338  setLastPublish();
339  JLOG(journal_.info())
340  << fname << " : "
341  << "Published ledger. " << detail::toString(ledger->info());
342  });
343  return true;
344  }
345  return false;
346 }
347 
350 {
351  JLOG(journal_.debug()) << __func__ << " : "
352  << "Attempting to fetch ledger with sequence = "
353  << idx;
354 
356  loadBalancer_.fetchLedger(idx, false);
357  JLOG(journal_.trace()) << __func__ << " : "
358  << "GetLedger reply = " << response->DebugString();
359  return response;
360 }
361 
364 {
365  JLOG(journal_.debug()) << __func__ << " : "
366  << "Attempting to fetch ledger with sequence = "
367  << idx;
368 
370  loadBalancer_.fetchLedger(idx, true);
371  JLOG(journal_.trace()) << __func__ << " : "
372  << "GetLedger reply = " << response->DebugString();
373  return response;
374 }
375 
379  org::xrpl::rpc::v1::GetLedgerResponse& rawData)
380 {
381  JLOG(journal_.info()) << __func__ << " : "
382  << "Beginning ledger update";
383 
384  LedgerInfo lgrInfo =
385  deserializeHeader(makeSlice(rawData.ledger_header()), true);
386 
387  JLOG(journal_.debug()) << __func__ << " : "
388  << "Deserialized ledger header. "
389  << detail::toString(lgrInfo);
390 
391  next->setLedgerInfo(lgrInfo);
392 
393  next->stateMap().clearSynching();
394  next->txMap().clearSynching();
395 
397  insertTransactions(next, rawData)};
398 
399  JLOG(journal_.debug())
400  << __func__ << " : "
401  << "Inserted all transactions. Number of transactions = "
402  << rawData.transactions_list().transactions_size();
403 
404  for (auto& obj : rawData.ledger_objects().objects())
405  {
406  auto key = uint256::fromVoid(obj.key().data());
407  auto& data = obj.data();
408 
409  // indicates object was deleted
410  if (data.size() == 0)
411  {
412  JLOG(journal_.trace()) << __func__ << " : "
413  << "Erasing object = " << key;
414  if (next->exists(key))
415  next->rawErase(key);
416  }
417  else
418  {
419  SerialIter it{data.data(), data.size()};
420  std::shared_ptr<SLE> sle = std::make_shared<SLE>(it, key);
421 
422  if (next->exists(key))
423  {
424  JLOG(journal_.trace()) << __func__ << " : "
425  << "Replacing object = " << key;
426  next->rawReplace(sle);
427  }
428  else
429  {
430  JLOG(journal_.trace()) << __func__ << " : "
431  << "Inserting object = " << key;
432  next->rawInsert(sle);
433  }
434  }
435  }
436  JLOG(journal_.debug())
437  << __func__ << " : "
438  << "Inserted/modified/deleted all objects. Number of objects = "
439  << rawData.ledger_objects().objects_size();
440 
441  if (!rawData.skiplist_included())
442  {
443  next->updateSkipList();
444  JLOG(journal_.warn())
445  << __func__ << " : "
446  << "tx process is not sending skiplist. This indicates that the tx "
447  "process is parsing metadata instead of doing a SHAMap diff. "
448  "Make sure tx process is running the same code as reporting to "
449  "use SHAMap diff instead of parsing metadata";
450  }
451 
452  JLOG(journal_.debug()) << __func__ << " : "
453  << "Finished ledger update. "
454  << detail::toString(next->info());
455  return {std::move(next), std::move(accountTxData)};
456 }
457 
458 // Database must be populated when this starts
460 ReportingETL::runETLPipeline(uint32_t startSequence)
461 {
462  /*
463  * Behold, mortals! This function spawns three separate threads, which talk
464  * to each other via 2 different thread safe queues and 1 atomic variable.
465  * All threads and queues are function local. This function returns when all
466  * of the threads exit. There are two termination conditions: the first is
467  * if the load thread encounters a write conflict. In this case, the load
468  * thread sets writeConflict, an atomic bool, to true, which signals the
469  * other threads to stop. The second termination condition is when the
470  * entire server is shutting down, which is detected in one of three ways:
471  * 1. isStopping() returns true if the server is shutting down
472  * 2. networkValidatedLedgers_.waitUntilValidatedByNetwork returns
473  * false, signaling the wait was aborted.
474  * 3. fetchLedgerDataAndDiff returns an empty optional, signaling the fetch
475  * was aborted.
476  * In all cases, the extract thread detects this condition,
477  * and pushes an empty optional onto the transform queue. The transform
478  * thread, upon popping an empty optional, pushes an empty optional onto the
479  * load queue, and then returns. The load thread, upon popping an empty
480  * optional, returns.
481  */
482 
483  JLOG(journal_.debug()) << __func__ << " : "
484  << "Starting etl pipeline";
485  writing_ = true;
486 
487  std::shared_ptr<Ledger> parent = std::const_pointer_cast<Ledger>(
488  app_.getLedgerMaster().getLedgerBySeq(startSequence - 1));
489  if (!parent)
490  {
491  assert(false);
492  Throw<std::runtime_error>("runETLPipeline: parent ledger is null");
493  }
494 
495  std::atomic_bool writeConflict = false;
496  std::optional<uint32_t> lastPublishedSequence;
497  constexpr uint32_t maxQueueSize = 1000;
498 
500  transformQueue{maxQueueSize};
501 
502  std::thread extracter{[this,
503  &startSequence,
504  &writeConflict,
505  &transformQueue]() {
506  beast::setCurrentThreadName("rippled: ReportingETL extract");
507  uint32_t currentSequence = startSequence;
508 
509  // there are two stopping conditions here.
510  // First, if there is a write conflict in the load thread, the ETL
511  // mechanism should stop.
512  // The other stopping condition is if the entire server is shutting
513  // down. This can be detected in a variety of ways. See the comment
514  // at the top of the function
516  currentSequence) &&
517  !writeConflict && !isStopping())
518  {
521  fetchLedgerDataAndDiff(currentSequence)};
522  auto end = std::chrono::system_clock::now();
523 
524  auto time = ((end - start).count()) / 1000000000.0;
525  auto tps =
526  fetchResponse->transactions_list().transactions_size() / time;
527 
528  JLOG(journal_.debug()) << "Extract phase time = " << time
529  << " . Extract phase tps = " << tps;
530  // if the fetch is unsuccessful, stop. fetchLedger only returns
531  // false if the server is shutting down, or if the ledger was
532  // found in the database (which means another process already
533  // wrote the ledger that this process was trying to extract;
534  // this is a form of a write conflict). Otherwise,
535  // fetchLedgerDataAndDiff will keep trying to fetch the
536  // specified ledger until successful
537  if (!fetchResponse)
538  {
539  break;
540  }
541 
542  transformQueue.push(std::move(fetchResponse));
543  ++currentSequence;
544  }
545  // empty optional tells the transformer to shut down
546  transformQueue.push({});
547  }};
548 
552  loadQueue{maxQueueSize};
553  std::thread transformer{[this,
554  &parent,
555  &writeConflict,
556  &loadQueue,
557  &transformQueue]() {
558  beast::setCurrentThreadName("rippled: ReportingETL transform");
559 
560  assert(parent);
561  parent = std::make_shared<Ledger>(*parent, NetClock::time_point{});
562  while (!writeConflict)
563  {
565  transformQueue.pop()};
566  // if fetchResponse is an empty optional, the extracter thread has
567  // stopped and the transformer should stop as well
568  if (!fetchResponse)
569  {
570  break;
571  }
572  if (isStopping())
573  continue;
574 
576  auto [next, accountTxData] =
577  buildNextLedger(parent, *fetchResponse);
578  auto end = std::chrono::system_clock::now();
579 
580  auto duration = ((end - start).count()) / 1000000000.0;
581  JLOG(journal_.debug()) << "transform time = " << duration;
582  // The below line needs to execute before pushing to the queue, in
583  // order to prevent this thread and the loader thread from accessing
584  // the same SHAMap concurrently
585  parent = std::make_shared<Ledger>(*next, NetClock::time_point{});
586  loadQueue.push(
587  std::make_pair(std::move(next), std::move(accountTxData)));
588  }
589  // empty optional tells the loader to shutdown
590  loadQueue.push({});
591  }};
592 
593  std::thread loader{
594  [this, &lastPublishedSequence, &loadQueue, &writeConflict]() {
595  beast::setCurrentThreadName("rippled: ReportingETL load");
596  size_t totalTransactions = 0;
597  double totalTime = 0;
598  while (!writeConflict)
599  {
603  result{loadQueue.pop()};
604  // if result is an empty optional, the transformer thread has
605  // stopped and the loader should stop as well
606  if (!result)
607  break;
608  if (isStopping())
609  continue;
610 
611  auto& ledger = result->first;
612  auto& accountTxData = result->second;
613 
615  // write to the key-value store
616  flushLedger(ledger);
617 
618  auto mid = std::chrono::system_clock::now();
619  // write to RDBMS
620  // if there is a write conflict, some other process has already
621  // written this ledger and has taken over as the ETL writer
622 #ifdef RIPPLED_REPORTING
623  if (!dynamic_cast<RelationalDBInterfacePostgres*>(
626  ledger->info(), accountTxData))
627  writeConflict = true;
628 #endif
629  auto end = std::chrono::system_clock::now();
630 
631  if (!writeConflict)
632  {
633  publishLedger(ledger);
634  lastPublishedSequence = ledger->info().seq;
635  }
636  // print some performance numbers
637  auto kvTime = ((mid - start).count()) / 1000000000.0;
638  auto relationalTime = ((end - mid).count()) / 1000000000.0;
639 
640  size_t numTxns = accountTxData.size();
641  totalTime += kvTime;
642  totalTransactions += numTxns;
643  JLOG(journal_.info())
644  << "Load phase of etl : "
645  << "Successfully published ledger! Ledger info: "
646  << detail::toString(ledger->info())
647  << ". txn count = " << numTxns
648  << ". key-value write time = " << kvTime
649  << ". relational write time = " << relationalTime
650  << ". key-value tps = " << numTxns / kvTime
651  << ". relational tps = " << numTxns / relationalTime
652  << ". total key-value tps = "
653  << totalTransactions / totalTime;
654  }
655  }};
656 
657  // wait for all of the threads to stop
658  loader.join();
659  extracter.join();
660  transformer.join();
661  writing_ = false;
662 
663  JLOG(journal_.debug()) << __func__ << " : "
664  << "Stopping etl pipeline";
665 
666  return lastPublishedSequence;
667 }
668 
669 // main loop. The software begins monitoring the ledgers that are validated
670 // by the nework. The member networkValidatedLedgers_ keeps track of the
671 // sequences of ledgers validated by the network. Whenever a ledger is validated
672 // by the network, the software looks for that ledger in the database. Once the
673 // ledger is found in the database, the software publishes that ledger to the
674 // ledgers stream. If a network validated ledger is not found in the database
675 // after a certain amount of time, then the software attempts to take over
676 // responsibility of the ETL process, where it writes new ledgers to the
677 // database. The software will relinquish control of the ETL process if it
678 // detects that another process has taken over ETL.
679 void
681 {
682  auto ledger = std::const_pointer_cast<Ledger>(
684  if (!ledger)
685  {
686  JLOG(journal_.info()) << __func__ << " : "
687  << "Database is empty. Will download a ledger "
688  "from the network.";
689  if (startSequence_)
690  {
691  JLOG(journal_.info())
692  << __func__ << " : "
693  << "ledger sequence specified in config. "
694  << "Will begin ETL process starting with ledger "
695  << *startSequence_;
697  }
698  else
699  {
700  JLOG(journal_.info())
701  << __func__ << " : "
702  << "Waiting for next ledger to be validated by network...";
703  std::optional<uint32_t> mostRecentValidated =
705  if (mostRecentValidated)
706  {
707  JLOG(journal_.info()) << __func__ << " : "
708  << "Ledger " << *mostRecentValidated
709  << " has been validated. "
710  << "Downloading...";
711  ledger = loadInitialLedger(*mostRecentValidated);
712  }
713  else
714  {
715  JLOG(journal_.info()) << __func__ << " : "
716  << "The wait for the next validated "
717  << "ledger has been aborted. "
718  << "Exiting monitor loop";
719  return;
720  }
721  }
722  }
723  else
724  {
725  if (startSequence_)
726  {
727  Throw<std::runtime_error>(
728  "start sequence specified but db is already populated");
729  }
730  JLOG(journal_.info())
731  << __func__ << " : "
732  << "Database already populated. Picking up from the tip of history";
733  }
734  if (!ledger)
735  {
736  JLOG(journal_.error())
737  << __func__ << " : "
738  << "Failed to load initial ledger. Exiting monitor loop";
739  return;
740  }
741  else
742  {
743  publishLedger(ledger);
744  }
745  uint32_t nextSequence = ledger->info().seq + 1;
746 
747  JLOG(journal_.debug()) << __func__ << " : "
748  << "Database is populated. "
749  << "Starting monitor loop. sequence = "
750  << nextSequence;
751  while (!stopping_ &&
753  {
754  JLOG(journal_.info()) << __func__ << " : "
755  << "Ledger with sequence = " << nextSequence
756  << " has been validated by the network. "
757  << "Attempting to find in database and publish";
758  // Attempt to take over responsibility of ETL writer after 10 failed
759  // attempts to publish the ledger. publishLedger() fails if the
760  // ledger that has been validated by the network is not found in the
761  // database after the specified number of attempts. publishLedger()
762  // waits one second between each attempt to read the ledger from the
763  // database
764  //
765  // In strict read-only mode, when the software fails to find a
766  // ledger in the database that has been validated by the network,
767  // the software will only try to publish subsequent ledgers once,
768  // until one of those ledgers is found in the database. Once the
769  // software successfully publishes a ledger, the software will fall
770  // back to the normal behavior of trying several times to publish
771  // the ledger that has been validated by the network. In this
772  // manner, a reporting processing running in read-only mode does not
773  // need to restart if the database is wiped.
774  constexpr size_t timeoutSeconds = 10;
775  bool success = publishLedger(nextSequence, timeoutSeconds);
776  if (!success)
777  {
778  JLOG(journal_.warn())
779  << __func__ << " : "
780  << "Failed to publish ledger with sequence = " << nextSequence
781  << " . Beginning ETL";
782  // doContinousETLPipelined returns the most recent sequence
783  // published empty optional if no sequence was published
784  std::optional<uint32_t> lastPublished =
785  runETLPipeline(nextSequence);
786  JLOG(journal_.info()) << __func__ << " : "
787  << "Aborting ETL. Falling back to publishing";
788  // if no ledger was published, don't increment nextSequence
789  if (lastPublished)
790  nextSequence = *lastPublished + 1;
791  }
792  else
793  {
794  ++nextSequence;
795  }
796  }
797 }
798 
799 void
801 {
802  JLOG(journal_.debug()) << "Starting reporting in strict read only mode";
803  std::optional<uint32_t> mostRecent =
805  if (!mostRecent)
806  return;
807  uint32_t sequence = *mostRecent;
808  bool success = true;
809  while (!stopping_ &&
811  {
812  success = publishLedger(sequence, success ? 30 : 1);
813  ++sequence;
814  }
815 }
816 
817 void
819 {
820  worker_ = std::thread([this]() {
821  beast::setCurrentThreadName("rippled: ReportingETL worker");
822  if (readOnly_)
823  monitorReadOnly();
824  else
825  monitor();
826  });
827 }
828 
830  : app_(app)
831  , journal_(app.journal("ReportingETL"))
832  , publishStrand_(app_.getIOService())
833  , loadBalancer_(*this)
834 {
835  // if present, get endpoint from config
836  if (app_.config().exists("reporting"))
837  {
838 #ifndef RIPPLED_REPORTING
839  Throw<std::runtime_error>(
840  "Config file specifies reporting, but software was not built with "
841  "-Dreporting=1. To use reporting, configure CMake with "
842  "-Dreporting=1");
843 #endif
844  if (!app_.config().useTxTables())
845  Throw<std::runtime_error>(
846  "Reporting requires tx tables. Set use_tx_tables=1 in config "
847  "file, under [ledger_tx_tables] section");
848  Section section = app_.config().section("reporting");
849 
850  JLOG(journal_.debug()) << "Parsing config info";
851 
852  auto& vals = section.values();
853  for (auto& v : vals)
854  {
855  JLOG(journal_.debug()) << "val is " << v;
856  Section source = app_.config().section(v);
857 
858  std::pair<std::string, bool> ipPair = source.find("source_ip");
859  if (!ipPair.second)
860  continue;
861 
862  std::pair<std::string, bool> wsPortPair =
863  source.find("source_ws_port");
864  if (!wsPortPair.second)
865  continue;
866 
867  std::pair<std::string, bool> grpcPortPair =
868  source.find("source_grpc_port");
869  if (!grpcPortPair.second)
870  {
871  // add source without grpc port
872  // used in read-only mode to detect when new ledgers have
873  // been validated. Used for publishing
874  if (app_.config().reportingReadOnly())
875  loadBalancer_.add(ipPair.first, wsPortPair.first);
876  continue;
877  }
878 
880  ipPair.first, wsPortPair.first, grpcPortPair.first);
881  }
882 
883  // this is true iff --reportingReadOnly was passed via command line
885 
886  // if --reportingReadOnly was not passed via command line, check config
887  // file. Command line takes precedence
888  if (!readOnly_)
889  {
890  std::pair<std::string, bool> ro = section.find("read_only");
891  if (ro.second)
892  {
893  readOnly_ = (ro.first == "true" || ro.first == "1");
895  }
896  }
897 
898  // handle command line arguments
899  if (app_.config().START_UP == Config::StartUpType::FRESH && !readOnly_)
900  {
902  }
903  // if not passed via command line, check config for start sequence
904  if (!startSequence_)
905  {
906  std::pair<std::string, bool> start = section.find("start_sequence");
907  if (start.second)
908  {
909  startSequence_ = std::stoi(start.first);
910  }
911  }
912 
913  std::pair<std::string, bool> flushInterval =
914  section.find("flush_interval");
915  if (flushInterval.second)
916  flushInterval_ = std::stoi(flushInterval.first);
917 
918  std::pair<std::string, bool> numMarkers = section.find("num_markers");
919  if (numMarkers.second)
920  numMarkers_ = std::stoi(numMarkers.first);
921  }
922 }
923 
924 } // namespace ripple
ripple::NetworkOPs::pubLedger
virtual void pubLedger(std::shared_ptr< ReadView const > const &lpAccepted)=0
beast::Journal::fatal
Stream fatal() const
Definition: Journal.h:339
ripple::ReportingETL::flushInterval_
size_t flushInterval_
Used to determine when to write to the database during the initial ledger download.
Definition: ReportingETL.h:115
ripple::Section
Holds a collection of configuration values.
Definition: BasicConfig.h:43
ripple::Application
Definition: Application.h:103
ripple::Application::getNodeFamily
virtual Family & getNodeFamily()=0
ripple::HashPrefix::ledgerMaster
@ ledgerMaster
ledger master data for signing
std::this_thread::sleep_for
T sleep_for(T... args)
ripple::makeSlice
std::enable_if_t< std::is_same< T, char >::value||std::is_same< T, unsigned char >::value, Slice > makeSlice(std::array< T, N > const &a)
Definition: Slice.h:240
ripple::ReportingETL::fetchLedgerData
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedgerData(uint32_t sequence)
Extract data for a particular ledger from an ETL source.
Definition: ReportingETL.cpp:349
ripple::ReportingETL::loadInitialLedger
std::shared_ptr< Ledger > loadInitialLedger(uint32_t sequence)
Download a ledger with specified sequence in full, via GetLedgerData, and write the data to the datab...
Definition: ReportingETL.cpp:105
ripple::ReportingETL::startSequence_
std::optional< uint32_t > startSequence_
Ledger sequence to start ETL from.
Definition: ReportingETL.h:139
std::string
STL class.
std::shared_ptr< Ledger >
ripple::LedgerInfo::parentHash
uint256 parentHash
Definition: ReadView.h:103
ripple::ThreadSafeQueue
Generic thread-safe queue with an optional maximum size Note, we can't use a lockfree queue here,...
Definition: ETLHelpers.h:105
beast::Journal::trace
Stream trace() const
Severity stream access functions.
Definition: Journal.h:309
ripple::Serializer::modData
Blob & modData()
Definition: Serializer.h:178
std::pair
ripple::ReportingETL::setLastPublish
void setLastPublish()
Definition: ReportingETL.h:155
ripple::LedgerInfo::hash
uint256 hash
Definition: ReadView.h:100
ripple::ReportingETL::insertTransactions
std::vector< AccountTransactionsData > insertTransactions(std::shared_ptr< Ledger > &ledger, org::xrpl::rpc::v1::GetLedgerResponse &data)
Insert all of the extracted transactions into the ledger.
Definition: ReportingETL.cpp:74
ripple::ThreadSafeQueue::push
void push(T const &elt)
Definition: ETLHelpers.h:126
ripple::addRaw
void addRaw(LedgerInfo const &info, Serializer &s, bool includeHash)
Definition: View.cpp:44
ripple::ReportingETL::flushLedger
void flushLedger(std::shared_ptr< Ledger > &ledger)
Write all new data to the key-value store.
Definition: ReportingETL.cpp:181
ripple::hotACCOUNT_NODE
@ hotACCOUNT_NODE
Definition: NodeObject.h:35
std::vector
STL class.
ripple::Application::getRelationalDBInterface
virtual RelationalDBInterface & getRelationalDBInterface()=0
ripple::ReportingETL::loadBalancer_
ETLLoadBalancer loadBalancer_
Mechanism for communicating with ETL sources.
Definition: ReportingETL.h:96
std::chrono::seconds
ripple::ETLLoadBalancer::fetchLedger
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedger(uint32_t ledgerSequence, bool getObjects)
Fetch data for a specific ledger.
Definition: ETLSource.cpp:679
ripple::NetworkValidatedLedgers::waitUntilValidatedByNetwork
bool waitUntilValidatedByNetwork(uint32_t sequence)
Waits for the sequence to be validated by the network.
Definition: ETLHelpers.h:79
ripple::NodeStore::Database::sync
virtual void sync()=0
std::stringstream
STL class.
beast::Journal::warn
Stream warn() const
Definition: Journal.h:327
ripple::NodeStore::Database::store
virtual void store(NodeObjectType type, Blob &&data, uint256 const &hash, std::uint32_t ledgerSeq)=0
Store the object.
ripple::LedgerInfo::seq
LedgerIndex seq
Definition: ReadView.h:92
ripple::hotTRANSACTION_NODE
@ hotTRANSACTION_NODE
Definition: NodeObject.h:36
iostream
ripple::LedgerInfo::txHash
uint256 txHash
Definition: ReadView.h:101
ripple::RelationalDBInterfacePostgres
Definition: RelationalDBInterfacePostgres.h:27
ripple::Application::getOPs
virtual NetworkOPs & getOPs()=0
ripple::Section::values
std::vector< std::string > const & values() const
Returns all the values in the section.
Definition: BasicConfig.h:76
ripple::ReportingETL::networkValidatedLedgers_
NetworkValidatedLedgers networkValidatedLedgers_
Mechanism for detecting when the network has validated a new ledger.
Definition: ReportingETL.h:100
ripple::TxMeta
Definition: TxMeta.h:32
ripple::base_uint< 256 >
ripple::writeLedgerAndTransactions
bool writeLedgerAndTransactions(std::shared_ptr< PgPool > const &pgPool, LedgerInfo const &info, std::vector< AccountTransactionsData > const &accountTxData, beast::Journal &j)
writeLedgerAndTransactions Write new ledger and transaction data to Postgres.
Definition: RelationalDBInterface_postgres.cpp:762
ripple::ReportingETL::publishLedger
bool publishLedger(uint32_t ledgerSequence, uint32_t maxAttempts=10)
Attempt to read the specified ledger from the database, and then publish that ledger to the ledgers s...
Definition: ReportingETL.cpp:282
ripple::ReportingETL::journal_
beast::Journal journal_
Definition: ReportingETL.h:75
std::stol
T stol(T... args)
ripple::Config::reporting
bool reporting() const
Definition: Config.h:276
std::thread
STL class.
ripple::Application::getLedgerMaster
virtual LedgerMaster & getLedgerMaster()=0
ripple::ReportingETL::writing_
std::atomic_bool writing_
Whether the process is writing to the database. Used by server_info.
Definition: ReportingETL.h:134
ripple::ReportingETL::readOnly_
bool readOnly_
Whether the process is in strict read-only mode.
Definition: ReportingETL.h:131
ripple::ReportingETL::numMarkers_
size_t numMarkers_
This variable controls the number of GetLedgerData calls that will be executed in parallel during the...
Definition: ReportingETL.h:126
ripple::ETLLoadBalancer::loadInitialLedger
void loadInitialLedger(uint32_t sequence, ThreadSafeQueue< std::shared_ptr< SLE >> &writeQueue)
Load the initial ledger, writing data to the queue.
Definition: ETLSource.cpp:660
ripple::Application::config
virtual Config & config()=0
ripple::Config::useTxTables
bool useTxTables() const
Definition: Config.h:282
ripple::ReportingETL::monitorReadOnly
void monitorReadOnly()
Monitor the database for newly written ledgers.
Definition: ReportingETL.cpp:800
ripple::detail::toString
std::string toString(LedgerInfo const &info)
Convenience function for printing out basic ledger info.
Definition: ReportingETL.cpp:40
beast::Journal::error
Stream error() const
Definition: Journal.h:333
beast::Journal::info
Stream info() const
Definition: Journal.h:321
std::chrono::time_point
ripple::ReportingETL::publishStrand_
boost::asio::io_context::strand publishStrand_
Strand to ensure that ledgers are published in order.
Definition: ReportingETL.h:91
ripple::STTx
Definition: STTx.h:42
ripple::LedgerMaster::getLedgerBySeq
std::shared_ptr< Ledger const > getLedgerBySeq(std::uint32_t index)
Definition: LedgerMaster.cpp:1767
ripple::SerialIter
Definition: Serializer.h:310
ripple::Config::START_UP
StartUpType START_UP
Definition: Config.h:133
std::atomic_bool
ripple::ReportingETL::fetchLedgerDataAndDiff
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedgerDataAndDiff(uint32_t sequence)
Extract data for a particular ledger from an ETL source.
Definition: ReportingETL.cpp:363
ripple::NetworkValidatedLedgers::getMostRecent
std::optional< uint32_t > getMostRecent()
Get most recently validated sequence.
Definition: ETLHelpers.h:67
ripple::Section::find
std::pair< std::string, bool > find(std::string const &name) const
Retrieve a key/value pair.
Definition: BasicConfig.cpp:113
ripple::Serializer
Definition: Serializer.h:39
ripple::Config::setReportingReadOnly
void setReportingReadOnly(bool b)
Definition: Config.h:294
beast::setCurrentThreadName
void setCurrentThreadName(std::string_view name)
Changes the name of the caller thread.
Definition: CurrentThreadName.cpp:119
std::vector::emplace_back
T emplace_back(T... args)
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
ripple::ReportingETL::stopping_
std::atomic_bool stopping_
Whether the software is stopping.
Definition: ReportingETL.h:103
ripple::Application::getNodeStore
virtual NodeStore::Database & getNodeStore()=0
ripple::deserializeHeader
LedgerInfo deserializeHeader(Slice data, bool hasHash)
Deserialize a ledger header from a byte array.
Definition: InboundLedger.cpp:269
cstdlib
ripple::base_uint< 256 >::fromVoid
static base_uint fromVoid(void const *data)
Definition: base_uint.h:311
ripple::LedgerMaster::getValidatedLedger
std::shared_ptr< Ledger const > getValidatedLedger()
Definition: LedgerMaster.cpp:1612
ripple::ReportingETL::app_
Application & app_
Definition: ReportingETL.h:73
std::optional
std::stringstream::str
T str(T... args)
beast::Journal::debug
Stream debug() const
Definition: Journal.h:315
ripple::ReportingETL::start
void start()
start all of the necessary components and begin ETL
Definition: ReportingETL.h:326
ripple::ReportingETL::worker_
std::thread worker_
Definition: ReportingETL.h:77
ripple::hotLEDGER
@ hotLEDGER
Definition: NodeObject.h:34
std::make_pair
T make_pair(T... args)
ripple::Serializer::add32
int add32(std::uint32_t i)
Definition: Serializer.cpp:38
ripple::LedgerInfo
Information about the notional ledger backing the view.
Definition: ReadView.h:84
ripple::strHex
std::string strHex(FwdIt begin, FwdIt end)
Definition: strHex.h:45
ripple::Config::reportingReadOnly
bool reportingReadOnly() const
Definition: Config.h:288
ripple::ReportingETL::buildNextLedger
std::pair< std::shared_ptr< Ledger >, std::vector< AccountTransactionsData > > buildNextLedger(std::shared_ptr< Ledger > &parent, org::xrpl::rpc::v1::GetLedgerResponse &rawData)
Build the next ledger using the previous ledger and the extracted data.
Definition: ReportingETL.cpp:377
ripple::ReportingETL::monitor
void monitor()
Monitor the network for newly validated ledgers.
Definition: ReportingETL.cpp:680
ripple::ReportingETL::ReportingETL
ReportingETL(Application &app)
Definition: ReportingETL.cpp:829
ripple::Config::START_LEDGER
std::string START_LEDGER
Definition: Config.h:137
ripple::ReportingETL::isStopping
bool isStopping() const
Definition: ReportingETL.h:282
ripple::ReportingETL::consumeLedgerData
void consumeLedgerData(std::shared_ptr< Ledger > &ledger, ThreadSafeQueue< std::shared_ptr< SLE >> &writeQueue)
Consume data from a queue and insert that data into the ledger This function will continue to pull fr...
Definition: ReportingETL.cpp:52
ripple::ReportingETL::doWork
void doWork()
Definition: ReportingETL.cpp:818
ripple::LedgerInfo::accountHash
uint256 accountHash
Definition: ReadView.h:102
ripple::ReportingETL::runETLPipeline
std::optional< uint32_t > runETLPipeline(uint32_t startSequence)
Run ETL.
Definition: ReportingETL.cpp:460
ripple::RelationalDBInterfacePostgres::writeLedgerAndTransactions
virtual bool writeLedgerAndTransactions(LedgerInfo const &info, std::vector< AccountTransactionsData > const &accountTxData)=0
writeLedgerAndTransactions Write new ledger and transaction data into database.
ripple::BasicConfig::exists
bool exists(std::string const &name) const
Returns true if a section with the given name exists.
Definition: BasicConfig.cpp:132
ripple::BasicConfig::section
Section & section(std::string const &name)
Returns the section with the given name.
Definition: BasicConfig.cpp:138
variant
string
ripple::ETLLoadBalancer::add
void add(std::string &host, std::string &websocketPort, std::string &grpcPort)
Add an ETL source.
Definition: ETLSource.cpp:637
std::chrono::system_clock::now
T now(T... args)