mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-08 05:05:51 +00:00
* log_report.py is a script to generate debugging reports and combine the logs of the locally run mainchain and sidechain servers. * Log address book before pytest start * Cleanup test utils * Modify log_analyzer so joins all logs into a single file * Organize "all" log as a dictionary * Allow ConfigFile and Section classes to be pickled: This caused a bug on mac platforms. Linux did not appear to use pickle. * Add account history command to py scripts * Add additional logging * Add support to run sidechains under rr: This is an undocumented feature to help debugging. If the environment variable `RIPPLED_SIDECHAIN_RR` is set, it is assumed to point to the rr executable. Sidechain 0 will then be run under rr.
286 lines
9.5 KiB
Python
Executable File
286 lines
9.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
from collections import defaultdict
|
|
import datetime
|
|
import json
|
|
import numpy as np
|
|
import os
|
|
import pandas as pd
|
|
import string
|
|
import sys
|
|
from typing import Dict, Set
|
|
|
|
from common import eprint
|
|
import log_analyzer
|
|
|
|
|
|
def _has_256bit_hex_field_other(data, result: Set[str]):
|
|
return
|
|
|
|
|
|
_has_256bit_hex_field_overloads = defaultdict(
|
|
lambda: _has_256bit_hex_field_other)
|
|
|
|
|
|
def _has_256bit_hex_field_str(data: str, result: Set[str]):
|
|
if len(data) != 64:
|
|
return
|
|
for c in data:
|
|
o = ord(c.upper())
|
|
if ord('A') <= o <= ord('F'):
|
|
continue
|
|
if ord('0') <= o <= ord('9'):
|
|
continue
|
|
return
|
|
result.add(data)
|
|
|
|
|
|
_has_256bit_hex_field_overloads[str] = _has_256bit_hex_field_str
|
|
|
|
|
|
def _has_256bit_hex_field_dict(data: dict, result: Set[str]):
|
|
for k, v in data.items():
|
|
if k in [
|
|
"meta", "index", "LedgerIndex", "ledger_index", "ledger_hash",
|
|
"SigningPubKey", "suppression"
|
|
]:
|
|
continue
|
|
_has_256bit_hex_field_overloads[type(v)](v, result)
|
|
|
|
|
|
_has_256bit_hex_field_overloads[dict] = _has_256bit_hex_field_dict
|
|
|
|
|
|
def _has_256bit_hex_field_list(data: list, result: Set[str]):
|
|
for v in data:
|
|
_has_256bit_hex_field_overloads[type(v)](v, result)
|
|
|
|
|
|
_has_256bit_hex_field_overloads[list] = _has_256bit_hex_field_list
|
|
|
|
|
|
def has_256bit_hex_field(data: dict) -> Set[str]:
|
|
'''
|
|
Find all the fields that are strings 64 chars long with only hex digits
|
|
This is useful when grouping transactions by hex
|
|
'''
|
|
result = set()
|
|
_has_256bit_hex_field_dict(data, result)
|
|
return result
|
|
|
|
|
|
def group_by_txn(data: dict) -> dict:
|
|
'''
|
|
return a dictionary where the key is the transaction hash, the value is another dictionary.
|
|
The second dictionary the key is the server id, and the values are a list of log items
|
|
'''
|
|
def _make_default():
|
|
return defaultdict(lambda: list())
|
|
|
|
result = defaultdict(_make_default)
|
|
for server_id, log_list in data.items():
|
|
for log_item in log_list:
|
|
if txn_hashes := has_256bit_hex_field(log_item):
|
|
for h in txn_hashes:
|
|
result[h][server_id].append(log_item)
|
|
return result
|
|
|
|
|
|
def _rekey_dict_by_txn_date(hash_to_timestamp: dict,
|
|
grouped_by_txn: dict) -> dict:
|
|
'''
|
|
hash_to_timestamp is a dictionary with a key of the txn hash and a value of the timestamp.
|
|
grouped_by_txn is a dictionary with a key of the txn and an unspecified value.
|
|
the keys in hash_to_timestamp are a superset of the keys in grouped_by_txn
|
|
This function returns a new grouped_by_txn dictionary with the transactions sorted by date.
|
|
'''
|
|
known_txns = [
|
|
k for k, v in sorted(hash_to_timestamp.items(), key=lambda x: x[1])
|
|
]
|
|
result = {}
|
|
for k, v in grouped_by_txn.items():
|
|
if k not in known_txns:
|
|
result[k] = v
|
|
for h in known_txns:
|
|
result[h] = grouped_by_txn[h]
|
|
return result
|
|
|
|
|
|
def _to_timestamp(str_time: str) -> datetime.datetime:
|
|
return datetime.datetime.strptime(
|
|
str_time.split('.')[0], "%Y-%b-%d %H:%M:%S")
|
|
|
|
|
|
class Report:
|
|
def __init__(self, in_dir, out_dir):
|
|
self.in_dir = in_dir
|
|
self.out_dir = out_dir
|
|
|
|
self.combined_logs_file_name = f'{self.out_dir}/combined_logs.json'
|
|
self.grouped_by_txn_file_name = f'{self.out_dir}/grouped_by_txn.json'
|
|
self.counts_by_txn_and_server_file_name = f'{self.out_dir}/counts_by_txn_and_server.org'
|
|
self.data = None # combined logs
|
|
|
|
# grouped_by_txn is a dictionary where the key is the server id. mainchain servers
|
|
# have a key of `mainchain_#` and sidechain servers have a key of
|
|
# `sidechain_#`, where `#` is a number.
|
|
self.grouped_by_txn = None
|
|
|
|
if not os.path.isdir(in_dir):
|
|
eprint(f'The input {self.in_dir} must be an existing directory')
|
|
sys.exit(1)
|
|
|
|
if os.path.exists(self.out_dir):
|
|
if not os.path.isdir(self.out_dir):
|
|
eprint(
|
|
f'The output: {self.out_dir} exists and is not a directory'
|
|
)
|
|
sys.exit(1)
|
|
else:
|
|
os.makedirs(self.out_dir)
|
|
|
|
self.combine_logs()
|
|
with open(self.combined_logs_file_name) as f:
|
|
self.data = json.load(f)
|
|
self.grouped_by_txn = group_by_txn(self.data)
|
|
|
|
# counts_by_txn_and_server is a dictionary where the key is the txn_hash
|
|
# and the value is a pandas df with a row for every server and a column for every message
|
|
# the value is a count of how many times that message appears for that server.
|
|
counts_by_txn_and_server = {}
|
|
# dict where the key is a transaction hash and the value is the transaction
|
|
hash_to_txn = {}
|
|
# dict where the key is a transaction hash and the value is earliest timestamp in a log file
|
|
hash_to_timestamp = {}
|
|
for txn_hash, server_dict in self.grouped_by_txn.items():
|
|
message_set = set()
|
|
# message list is ordered by when it appears in the log
|
|
message_list = []
|
|
for server_id, messages in server_dict.items():
|
|
for m in messages:
|
|
try:
|
|
d = m['data']
|
|
if 'msg' in d and 'transaction' in d['msg']:
|
|
t = d['msg']['transaction']
|
|
elif 'tx_json' in d:
|
|
t = d['tx_json']
|
|
if t['hash'] == txn_hash:
|
|
hash_to_txn[txn_hash] = t
|
|
except:
|
|
pass
|
|
msg = m['msg']
|
|
t = _to_timestamp(m['t'])
|
|
if txn_hash not in hash_to_timestamp:
|
|
hash_to_timestamp[txn_hash] = t
|
|
elif hash_to_timestamp[txn_hash] > t:
|
|
hash_to_timestamp[txn_hash] = t
|
|
if msg not in message_set:
|
|
message_set.add(msg)
|
|
message_list.append(msg)
|
|
df = pd.DataFrame(0,
|
|
index=server_dict.keys(),
|
|
columns=message_list)
|
|
for server_id, messages in server_dict.items():
|
|
for m in messages:
|
|
df[m['msg']][server_id] += 1
|
|
counts_by_txn_and_server[txn_hash] = df
|
|
|
|
# sort the transactions by timestamp, but the txns with unknown timestamp at the beginning
|
|
self.grouped_by_txn = _rekey_dict_by_txn_date(hash_to_timestamp,
|
|
self.grouped_by_txn)
|
|
counts_by_txn_and_server = _rekey_dict_by_txn_date(
|
|
hash_to_timestamp, counts_by_txn_and_server)
|
|
|
|
with open(self.grouped_by_txn_file_name, 'w') as out:
|
|
print(json.dumps(self.grouped_by_txn, indent=1), file=out)
|
|
|
|
with open(self.counts_by_txn_and_server_file_name, 'w') as out:
|
|
for txn_hash, df in counts_by_txn_and_server.items():
|
|
print(f'\n\n* Txn: {txn_hash}', file=out)
|
|
if txn_hash in hash_to_txn:
|
|
print(json.dumps(hash_to_txn[txn_hash], indent=1),
|
|
file=out)
|
|
rename_dict = {}
|
|
for column, renamed_column in zip(df.columns.array,
|
|
string.ascii_uppercase):
|
|
print(f'{renamed_column} = {column}', file=out)
|
|
rename_dict[column] = renamed_column
|
|
df.rename(columns=rename_dict, inplace=True)
|
|
print(f'\n{df}', file=out)
|
|
|
|
def combine_logs(self):
|
|
try:
|
|
with open(self.combined_logs_file_name, "w") as out:
|
|
log_analyzer.convert_all(args.input, out, pure_json=True)
|
|
except Exception as e:
|
|
eprint(f'Excption: {e}')
|
|
raise e
|
|
|
|
|
|
def main(input_dir_name: str, output_dir_name: str):
|
|
r = Report(input_dir_name, output_dir_name)
|
|
|
|
# Values are a list of log lines formatted as json. There are five fields:
|
|
# `t` is the timestamp.
|
|
# `m` is the module.
|
|
# `l` is the log level.
|
|
# `msg` is the message.
|
|
# `data` is the data.
|
|
# For example:
|
|
#
|
|
# {
|
|
# "t": "2021-Oct-08 21:33:41.731371562 UTC",
|
|
# "m": "SidechainFederator",
|
|
# "l": "TRC",
|
|
# "msg": "no last xchain txn with result",
|
|
# "data": {
|
|
# "needsOtherChainLastXChainTxn": true,
|
|
# "isMainchain": false,
|
|
# "jlogId": 121
|
|
# }
|
|
# },
|
|
|
|
|
|
# Lifecycle of a transaction
|
|
# For each federator record:
|
|
# Transaction detected: amount, seq, destination, chain, hash
|
|
# Signature received: hash, seq
|
|
# Signature sent: hash, seq, federator dst
|
|
# Transaction submitted
|
|
# Result received, and detect if error
|
|
# Detect any field that doesn't match
|
|
|
|
# Lifecycle of initialization
|
|
|
|
# Chain listener messages
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description=(
|
|
'python script to generate a log report from a sidechain config directory structure containing the logs'
|
|
))
|
|
|
|
parser.add_argument(
|
|
'--input',
|
|
'-i',
|
|
help=('directory with sidechain config directory structure'),
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--output',
|
|
'-o',
|
|
help=('output directory for report files'),
|
|
)
|
|
|
|
return parser.parse_known_args()[0]
|
|
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
args = parse_args()
|
|
main(args.input, args.output)
|
|
except Exception as e:
|
|
eprint(f'Excption: {e}')
|
|
raise e
|