feat: Add public key to log messages (#5678)

To protect the identity of UNL validators, the IP addresses are redacted from the log messages sent to the common Grafana instance. However, without such identifying information it is challenging to debug issues. This change adds a node's public key to logs to improve our ability to debug issues.

Co-authored-by: Bart Thomee <11445373+bthomee@users.noreply.github.com>
This commit is contained in:
Jingchen
2025-10-29 14:16:37 +00:00
committed by GitHub
parent a8e4da0b11
commit ed5d6f3e22
14 changed files with 162 additions and 103 deletions

View File

@@ -21,6 +21,7 @@
#define RIPPLE_PROTOCOL_PUBLICKEY_H_INCLUDED
#include <xrpl/basics/Slice.h>
#include <xrpl/beast/net/IPEndpoint.h>
#include <xrpl/protocol/KeyType.h>
#include <xrpl/protocol/STExchange.h>
#include <xrpl/protocol/UintTypes.h>
@@ -264,6 +265,24 @@ calcNodeID(PublicKey const&);
AccountID
calcAccountID(PublicKey const& pk);
inline std::string
getFingerprint(
beast::IP::Endpoint const& address,
std::optional<PublicKey> const& publicKey = std::nullopt,
std::optional<std::string> const& id = std::nullopt)
{
std::stringstream ss;
ss << "IP Address: " << address;
if (publicKey.has_value())
{
ss << ", Public Key: " << toBase58(TokenType::NodePublic, *publicKey);
}
if (id.has_value())
{
ss << ", Id: " << id.value();
}
return ss.str();
}
} // namespace ripple
//------------------------------------------------------------------------------

View File

@@ -21,6 +21,7 @@
#define RIPPLE_RESOURCE_CONSUMER_H_INCLUDED
#include <xrpl/basics/Log.h>
#include <xrpl/protocol/PublicKey.h>
#include <xrpl/resource/Charge.h>
#include <xrpl/resource/Disposition.h>
@@ -87,6 +88,9 @@ public:
Entry&
entry();
void
setPublicKey(PublicKey const& publicKey);
private:
Logic* m_logic;
Entry* m_entry;

View File

@@ -53,7 +53,7 @@ struct Entry : public beast::List<Entry>::Node
std::string
to_string() const
{
return key->address.to_string();
return getFingerprint(key->address, publicKey);
}
/**
@@ -82,6 +82,9 @@ struct Entry : public beast::List<Entry>::Node
return local_balance.add(charge, now) + remote_balance;
}
// The public key of the peer
std::optional<PublicKey> publicKey;
// Back pointer to the map key (bit of a hack here)
Key const* key;

View File

@@ -148,6 +148,12 @@ Consumer::entry()
return *m_entry;
}
void
Consumer::setPublicKey(PublicKey const& publicKey)
{
m_entry->publicKey = publicKey;
}
std::ostream&
operator<<(std::ostream& os, Consumer const& v)
{

View File

@@ -325,6 +325,13 @@ public:
return false;
}
std::string const&
fingerprint() const override
{
return fingerprint_;
}
std::string fingerprint_;
bool ledgerReplayEnabled_;
PublicKey nodePublicKey_;
};

View File

@@ -475,6 +475,12 @@ public:
return id_;
}
std::string const&
fingerprint() const override
{
return fingerprint_;
}
static void
resetId()
{
@@ -508,6 +514,7 @@ public:
private:
inline static id_t sid_ = 0;
std::string fingerprint_;
id_t id_;
Overlay& overlay_;
reduce_relay::Squelch<ManualClock> squelch_;

View File

@@ -795,9 +795,7 @@ ValidatorList::sendValidatorList(
<< " validator list collection(s) containing " << numVLs
<< " validator list(s) for " << strHex(publisherKey)
<< " with sequence range " << peerSequence << ", "
<< newPeerSequence << " to "
<< peer.getRemoteAddress().to_string() << " [" << peer.id()
<< "]";
<< newPeerSequence << " to " << peer.fingerprint();
else
{
XRPL_ASSERT(
@@ -807,8 +805,7 @@ ValidatorList::sendValidatorList(
JLOG(j.debug())
<< "Sent validator list for " << strHex(publisherKey)
<< " with sequence " << newPeerSequence << " to "
<< peer.getRemoteAddress().to_string() << " [" << peer.id()
<< "]";
<< peer.fingerprint();
}
}
}

View File

@@ -112,6 +112,8 @@ public:
virtual void
setPublisherListSequence(PublicKey const&, std::size_t const) = 0;
virtual std::string const&
fingerprint() const = 0;
//
// Ledger
//

View File

@@ -589,6 +589,8 @@ ConnectAttempt::processResponse()
remote_endpoint_.address(),
app_);
usage_.setPublicKey(publicKey);
JLOG(journal_.debug())
<< "Protocol: " << to_string(*negotiatedProtocol);
JLOG(journal_.info())

View File

@@ -262,6 +262,8 @@ OverlayImpl::onHandoff(
remote_endpoint.address(),
app_);
consumer.setPublicKey(publicKey);
{
// The node gets a reserved slot if it is in our cluster
// or if it has a reservation.
@@ -434,6 +436,9 @@ OverlayImpl::connect(beast::IP::Endpoint const& remote_endpoint)
void
OverlayImpl::add_active(std::shared_ptr<PeerImp> const& peer)
{
beast::WrappedSink sink{journal_.sink(), peer->prefix()};
beast::Journal journal{sink};
std::lock_guard lock(mutex_);
{
@@ -457,11 +462,7 @@ OverlayImpl::add_active(std::shared_ptr<PeerImp> const& peer)
list_.emplace(peer.get(), peer);
JLOG(journal_.debug()) << "activated " << peer->getRemoteAddress() << " ("
<< peer->id() << ":"
<< toBase58(
TokenType::NodePublic, peer->getNodePublic())
<< ")";
JLOG(journal.debug()) << "activated";
// As we are not on the strand, run() must be called
// while holding the lock, otherwise new I/O can be
@@ -605,6 +606,9 @@ OverlayImpl::onWrite(beast::PropertyStream::Map& stream)
void
OverlayImpl::activate(std::shared_ptr<PeerImp> const& peer)
{
beast::WrappedSink sink{journal_.sink(), peer->prefix()};
beast::Journal journal{sink};
// Now track this peer
{
std::lock_guard lock(mutex_);
@@ -618,11 +622,7 @@ OverlayImpl::activate(std::shared_ptr<PeerImp> const& peer)
(void)result.second;
}
JLOG(journal_.debug()) << "activated " << peer->getRemoteAddress() << " ("
<< peer->id() << ":"
<< toBase58(
TokenType::NodePublic, peer->getNodePublic())
<< ")";
JLOG(journal.debug()) << "activated";
// We just accepted this peer so we have non-zero active peers
XRPL_ASSERT(size(), "ripple::OverlayImpl::activate : nonzero peers");

View File

@@ -82,8 +82,11 @@ PeerImp::PeerImp(
: Child(overlay)
, app_(app)
, id_(id)
, sink_(app_.journal("Peer"), makePrefix(id))
, p_sink_(app_.journal("Protocol"), makePrefix(id))
, fingerprint_(
getFingerprint(slot->remote_endpoint(), publicKey, to_string(id)))
, prefix_(makePrefix(fingerprint_))
, sink_(app_.journal("Peer"), prefix_)
, p_sink_(app_.journal("Protocol"), prefix_)
, journal_(sink_)
, p_journal_(p_sink_)
, stream_ptr_(std::move(stream_ptr))
@@ -131,8 +134,7 @@ PeerImp::PeerImp(
headers_,
FEATURE_VPRR,
app_.config().VP_REDUCE_RELAY_BASE_SQUELCH_ENABLE)
<< " tx reduce-relay enabled " << txReduceRelayEnabled_ << " on "
<< remote_address_ << " " << id_;
<< " tx reduce-relay enabled " << txReduceRelayEnabled_;
}
PeerImp::~PeerImp()
@@ -280,8 +282,7 @@ PeerImp::send(std::shared_ptr<Message> const& m)
sink && (sendq_size % Tuning::sendQueueLogFreq) == 0)
{
std::string const n = name();
sink << (n.empty() ? remote_address_.to_string() : n)
<< " sendq: " << sendq_size;
sink << n << " sendq: " << sendq_size;
}
send_queue_.push(m);
@@ -585,10 +586,7 @@ PeerImp::fail(std::string const& name, error_code ec)
if (!socket_.is_open())
return;
JLOG(journal_.warn()) << name << " from "
<< toBase58(TokenType::NodePublic, publicKey_)
<< " at " << remote_address_.to_string() << ": "
<< ec.message();
JLOG(journal_.warn()) << name << ": " << ec.message();
shutdown();
}
@@ -611,8 +609,7 @@ PeerImp::fail(std::string const& reason)
if (journal_.active(beast::severities::kWarning))
{
std::string const n = name();
JLOG(journal_.warn()) << (n.empty() ? remote_address_.to_string() : n)
<< " failed: " << reason;
JLOG(journal_.warn()) << n << " failed: " << reason;
}
shutdown();
@@ -730,6 +727,16 @@ PeerImp::setTimer(std::chrono::seconds interval)
&PeerImp::onTimer, shared_from_this(), std::placeholders::_1)));
}
//------------------------------------------------------------------------------
std::string
PeerImp::makePrefix(std::string const& fingerprint)
{
std::stringstream ss;
ss << "[" << fingerprint << "] ";
return ss.str();
}
void
PeerImp::onTimer(error_code const& ec)
{
@@ -810,14 +817,6 @@ PeerImp::cancelTimer() noexcept
}
}
std::string
PeerImp::makePrefix(id_t id)
{
std::stringstream ss;
ss << "[" << std::setfill('0') << std::setw(3) << id << "] ";
return ss.str();
}
//------------------------------------------------------------------------------
void
PeerImp::doAccept()
@@ -826,7 +825,7 @@ PeerImp::doAccept()
read_buffer_.size() == 0,
"ripple::PeerImp::doAccept : empty read buffer");
JLOG(journal_.debug()) << "doAccept: " << remote_address_;
JLOG(journal_.debug()) << "doAccept";
// a shutdown was initiated before the handshake, there is nothing to do
if (shutdown_)
@@ -840,8 +839,6 @@ PeerImp::doAccept()
return fail("makeSharedValue: Unexpected failure");
JLOG(journal_.debug()) << "Protocol: " << to_string(protocol_);
JLOG(journal_.info()) << "Public Key: "
<< toBase58(TokenType::NodePublic, publicKey_);
if (auto member = app_.cluster().member(publicKey_))
{
@@ -2143,8 +2140,7 @@ PeerImp::onValidatorListMessage(
// ValidatorList class rules), so charge accordingly and skip processing.
if (blobs.empty())
{
JLOG(p_journal_.warn()) << "Ignored malformed " << messageType
<< " from peer " << remote_address_;
JLOG(p_journal_.warn()) << "Ignored malformed " << messageType;
// This shouldn't ever happen with a well-behaved peer
fee_.update(Resource::feeHeavyBurdenPeer, "no blobs");
return;
@@ -2152,9 +2148,7 @@ PeerImp::onValidatorListMessage(
auto const hash = sha512Half(manifest, blobs, version);
JLOG(p_journal_.debug())
<< "Received " << messageType << " from " << remote_address_.to_string()
<< " (" << id_ << ")";
JLOG(p_journal_.debug()) << "Received " << messageType;
if (!app_.getHashRouter().addSuppressionPeer(hash, id_))
{
@@ -2181,8 +2175,7 @@ PeerImp::onValidatorListMessage(
<< "Processed " << messageType << " version " << version << " from "
<< (applyResult.publisherKey ? strHex(*applyResult.publisherKey)
: "unknown or invalid publisher")
<< " from " << remote_address_.to_string() << " (" << id_
<< ") with best result " << to_string(applyResult.bestDisposition());
<< " with best result " << to_string(applyResult.bestDisposition());
// Act based on the best result
switch (applyResult.bestDisposition())
@@ -2296,51 +2289,44 @@ PeerImp::onValidatorListMessage(
// New list
case ListDisposition::accepted:
JLOG(p_journal_.debug())
<< "Applied " << count << " new " << messageType
<< "(s) from peer " << remote_address_;
<< "Applied " << count << " new " << messageType;
break;
// Newest list is expired, and that needs to be broadcast, too
case ListDisposition::expired:
JLOG(p_journal_.debug())
<< "Applied " << count << " expired " << messageType
<< "(s) from peer " << remote_address_;
<< "Applied " << count << " expired " << messageType;
break;
// Future list
case ListDisposition::pending:
JLOG(p_journal_.debug())
<< "Processed " << count << " future " << messageType
<< "(s) from peer " << remote_address_;
<< "Processed " << count << " future " << messageType;
break;
case ListDisposition::same_sequence:
JLOG(p_journal_.warn())
<< "Ignored " << count << " " << messageType
<< "(s) with current sequence from peer "
<< remote_address_;
<< "(s) with current sequence";
break;
case ListDisposition::known_sequence:
JLOG(p_journal_.warn())
<< "Ignored " << count << " " << messageType
<< "(s) with future sequence from peer " << remote_address_;
<< "(s) with future sequence";
break;
case ListDisposition::stale:
JLOG(p_journal_.warn())
<< "Ignored " << count << "stale " << messageType
<< "(s) from peer " << remote_address_;
<< "Ignored " << count << "stale " << messageType;
break;
case ListDisposition::untrusted:
JLOG(p_journal_.warn())
<< "Ignored " << count << " untrusted " << messageType
<< "(s) from peer " << remote_address_;
<< "Ignored " << count << " untrusted " << messageType;
break;
case ListDisposition::unsupported_version:
JLOG(p_journal_.warn())
<< "Ignored " << count << "unsupported version "
<< messageType << "(s) from peer " << remote_address_;
<< messageType;
break;
case ListDisposition::invalid:
JLOG(p_journal_.warn())
<< "Ignored " << count << "invalid " << messageType
<< "(s) from peer " << remote_address_;
<< "Ignored " << count << "invalid " << messageType;
break;
// LCOV_EXCL_START
default:
@@ -2374,8 +2360,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidatorList> const& m)
}
catch (std::exception const& e)
{
JLOG(p_journal_.warn()) << "ValidatorList: Exception, " << e.what()
<< " from peer " << remote_address_;
JLOG(p_journal_.warn()) << "ValidatorList: Exception, " << e.what();
using namespace std::string_literals;
fee_.update(Resource::feeInvalidData, e.what());
}
@@ -2414,8 +2399,8 @@ PeerImp::onMessage(
}
catch (std::exception const& e)
{
JLOG(p_journal_.warn()) << "ValidatorListCollection: Exception, "
<< e.what() << " from peer " << remote_address_;
JLOG(p_journal_.warn())
<< "ValidatorListCollection: Exception, " << e.what();
using namespace std::string_literals;
fee_.update(Resource::feeInvalidData, e.what());
}

View File

@@ -134,6 +134,8 @@ private:
Application& app_;
id_t const id_;
std::string fingerprint_;
std::string prefix_;
beast::WrappedSink sink_;
beast::WrappedSink p_sink_;
beast::Journal const journal_;
@@ -635,7 +637,7 @@ private:
cancelTimer() noexcept;
static std::string
makePrefix(id_t id);
makePrefix(std::string const& fingerprint);
void
doAccept();
@@ -689,6 +691,18 @@ private:
handleHaveTransactions(
std::shared_ptr<protocol::TMHaveTransactions> const& m);
std::string const&
fingerprint() const override
{
return fingerprint_;
}
std::string const&
prefix() const
{
return prefix_;
}
public:
//--------------------------------------------------------------------------
//
@@ -832,8 +846,11 @@ PeerImp::PeerImp(
: Child(overlay)
, app_(app)
, id_(id)
, sink_(app_.journal("Peer"), makePrefix(id))
, p_sink_(app_.journal("Protocol"), makePrefix(id))
, fingerprint_(
getFingerprint(slot->remote_endpoint(), publicKey, to_string(id_)))
, prefix_(makePrefix(fingerprint_))
, sink_(app_.journal("Peer"), prefix_)
, p_sink_(app_.journal("Protocol"), prefix_)
, journal_(sink_)
, p_journal_(p_sink_)
, stream_ptr_(std::move(stream_ptr))

View File

@@ -35,6 +35,7 @@
#include <xrpl/basics/contract.h>
#include <xrpl/basics/random.h>
#include <xrpl/beast/net/IPAddressConversion.h>
#include <xrpl/beast/utility/WrappedSink.h>
#include <algorithm>
#include <functional>
@@ -240,21 +241,23 @@ public:
slot.checked = true;
slot.connectivityCheckInProgress = false;
beast::WrappedSink sink{m_journal.sink(), slot.prefix()};
beast::Journal journal{sink};
if (ec)
{
// VFALCO TODO Should we retry depending on the error?
slot.canAccept = false;
JLOG(m_journal.error())
<< beast::leftw(18) << "Logic testing " << iter->first
<< " with error, " << ec.message();
JLOG(journal.error()) << "Logic testing " << iter->first
<< " with error, " << ec.message();
bootcache_.on_failure(checkedAddress);
return;
}
slot.canAccept = true;
slot.set_listening_port(checkedAddress.port());
JLOG(m_journal.debug()) << beast::leftw(18) << "Logic testing "
<< checkedAddress << " succeeded";
JLOG(journal.debug())
<< "Logic testing " << checkedAddress << " succeeded";
}
//--------------------------------------------------------------------------
@@ -359,9 +362,10 @@ public:
SlotImp::ptr const& slot,
beast::IP::Endpoint const& local_endpoint)
{
JLOG(m_journal.trace())
<< beast::leftw(18) << "Logic connected" << slot->remote_endpoint()
<< " on local " << local_endpoint;
beast::WrappedSink sink{m_journal.sink(), slot->prefix()};
beast::Journal journal{sink};
JLOG(journal.trace()) << "Logic connected on local " << local_endpoint;
std::lock_guard _(lock_);
@@ -381,9 +385,7 @@ public:
iter->second->local_endpoint() == slot->remote_endpoint(),
"ripple::PeerFinder::Logic::onConnected : local and remote "
"endpoints do match");
JLOG(m_journal.warn())
<< beast::leftw(18) << "Logic dropping "
<< slot->remote_endpoint() << " as self connect";
JLOG(journal.warn()) << "Logic dropping as self connect";
return false;
}
}
@@ -398,9 +400,12 @@ public:
Result
activate(SlotImp::ptr const& slot, PublicKey const& key, bool reserved)
{
JLOG(m_journal.debug())
<< beast::leftw(18) << "Logic handshake " << slot->remote_endpoint()
<< " with " << (reserved ? "reserved " : "") << "key " << key;
beast::WrappedSink sink{m_journal.sink(), slot->prefix()};
beast::Journal journal{sink};
JLOG(journal.debug())
<< "Logic handshake " << slot->remote_endpoint() << " with "
<< (reserved ? "reserved " : "") << "key " << key;
std::lock_guard _(lock_);
@@ -462,8 +467,7 @@ public:
"missing from fixed_");
iter->second.success(m_clock.now());
JLOG(m_journal.trace()) << beast::leftw(18) << "Logic fixed "
<< slot->remote_endpoint() << " success";
JLOG(journal.trace()) << "Logic fixed success";
}
return Result::success;
@@ -681,9 +685,10 @@ public:
{
SlotImp::ptr const& slot = t.slot();
auto const& list = t.list();
JLOG(m_journal.trace())
<< beast::leftw(18) << "Logic sending "
<< slot->remote_endpoint() << " with " << list.size()
beast::WrappedSink sink{m_journal.sink(), slot->prefix()};
beast::Journal journal{sink};
JLOG(journal.trace())
<< "Logic sending " << list.size()
<< ((list.size() == 1) ? " endpoint" : " endpoints");
result.push_back(std::make_pair(slot, list));
}
@@ -788,6 +793,9 @@ public:
void
on_endpoints(SlotImp::ptr const& slot, Endpoints list)
{
beast::WrappedSink sink{m_journal.sink(), slot->prefix()};
beast::Journal journal{sink};
// If we're sent too many endpoints, sample them at random:
if (list.size() > Tuning::numberOfEndpointsMax)
{
@@ -795,10 +803,8 @@ public:
list.resize(Tuning::numberOfEndpointsMax);
}
JLOG(m_journal.trace())
<< beast::leftw(18) << "Endpoints from " << slot->remote_endpoint()
<< " contained " << list.size()
<< ((list.size() > 1) ? " entries" : " entry");
JLOG(journal.trace()) << "Endpoints contained " << list.size()
<< ((list.size() > 1) ? " entries" : " entry");
std::lock_guard _(lock_);
@@ -835,9 +841,8 @@ public:
{
if (slot->connectivityCheckInProgress)
{
JLOG(m_journal.debug())
<< beast::leftw(18) << "Logic testing " << ep.address
<< " already in progress";
JLOG(journal.debug()) << "Logic testing " << ep.address
<< " already in progress";
continue;
}
@@ -934,6 +939,9 @@ public:
remove(slot);
beast::WrappedSink sink{m_journal.sink(), slot->prefix()};
beast::Journal journal{sink};
// Mark fixed slot failure
if (slot->fixed() && !slot->inbound() && slot->state() != Slot::active)
{
@@ -944,16 +952,14 @@ public:
"missing from fixed_");
iter->second.failure(m_clock.now());
JLOG(m_journal.debug()) << beast::leftw(18) << "Logic fixed "
<< slot->remote_endpoint() << " failed";
JLOG(journal.debug()) << "Logic fixed failed";
}
// Do state specific bookkeeping
switch (slot->state())
{
case Slot::accept:
JLOG(m_journal.trace()) << beast::leftw(18) << "Logic accept "
<< slot->remote_endpoint() << " failed";
JLOG(journal.trace()) << "Logic accept failed";
break;
case Slot::connect:
@@ -967,13 +973,11 @@ public:
break;
case Slot::active:
JLOG(m_journal.trace()) << beast::leftw(18) << "Logic close "
<< slot->remote_endpoint();
JLOG(journal.trace()) << "Logic close";
break;
case Slot::closing:
JLOG(m_journal.trace()) << beast::leftw(18) << "Logic finished "
<< slot->remote_endpoint();
JLOG(journal.trace()) << "Logic finished";
break;
// LCOV_EXCL_START

View File

@@ -91,6 +91,12 @@ public:
return m_public_key;
}
std::string
prefix() const
{
return "[" + getFingerprint(remote_endpoint(), public_key()) + "] ";
}
std::optional<std::uint16_t>
listening_port() const override
{