fix: Address review feedback in graceful peer disconnect

Three concurrency / correctness fixes:

- Arm the kShutdownTimerInterval (5s) safety timer in PeerImp::shutdown()
  rather than in tryAsyncShutdown(). Otherwise, when shutdown is requested
  while I/O is pending, the existing 60s peer-health timer remains armed
  and onTimer() treats its expiry as a shutdown timeout, delaying forced
  close far beyond the intended 5s budget.

- Track the inbound HTTP-handshake-response write in doAccept() via
  writePending_, matching the pattern used for send() and onWriteMessage().
  Without this, tryAsyncShutdown() could launch async_shutdown() while the
  response write was still in flight on the SSL stream.

- Replace the string-match for "application data after close notify" in
  onShutdown() (both ConnectAttempt and PeerImp) with a category +
  ERR_GET_REASON check against SSL_R_APPLICATION_DATA_AFTER_CLOSE_NOTIFY.
  The previous text comparison was fragile across OpenSSL versions.

Plus cleanup: Doxygen fixes (20s -> 25s timeout, broken @brief markup),
"handshare" -> "handshake" typo, removed extraneous semicolon after
stepToString(), made stepToString return string_view, and simplified the
stop() log-severity comment so it matches the actual code.
This commit is contained in:
Vito
2026-05-18 18:44:25 +02:00
parent ad7232cbc5
commit 5548f1ce40
4 changed files with 41 additions and 16 deletions

View File

@@ -25,6 +25,7 @@
#include <boost/asio/io_context.hpp>
#include <boost/asio/ip/tcp.hpp>
#include <boost/asio/post.hpp>
#include <boost/asio/ssl/error.hpp>
#include <boost/asio/ssl/stream_base.hpp>
#include <boost/asio/ssl/verify_mode.hpp>
#include <boost/asio/strand.hpp>
@@ -34,6 +35,9 @@
#include <boost/beast/http/status.hpp>
#include <boost/system/system_error.hpp>
#include <openssl/err.h>
#include <openssl/sslerr.h>
#include <chrono>
#include <cstdint>
#include <exception>
@@ -179,10 +183,14 @@ ConnectAttempt::onShutdown(error_code ec)
// - stream_truncated: the tcp connection closed (no handshake) it could
// occur if a peer does not perform a graceful disconnect
// - broken_pipe: the peer is gone
// - application data after close notify: benign SSL shutdown condition
// - SSL_R_APPLICATION_DATA_AFTER_CLOSE_NOTIFY: benign SSL shutdown
// condition where the peer sent data after we sent close_notify
bool const isAppDataAfterCloseNotify =
ec.category() == boost::asio::error::get_ssl_category() &&
ERR_GET_REASON(ec.value()) == SSL_R_APPLICATION_DATA_AFTER_CLOSE_NOTIFY;
bool const shouldLog =
(ec != boost::asio::error::eof && ec != boost::asio::error::operation_aborted &&
ec.message().find("application data after close notify") == std::string::npos);
!isAppDataAfterCloseNotify);
if (shouldLog)
{

View File

@@ -3,6 +3,7 @@
#include <xrpld/overlay/detail/OverlayImpl.h>
#include <chrono>
#include <string_view>
namespace xrpl {
@@ -24,7 +25,7 @@ namespace xrpl {
* 5. **Complete**: Connection successfully established
*
* Uses a hybrid timeout approach:
* - **Global Timer**: Hard limit (20s) for entire connection process
* - **Global Timer**: Hard limit (25s) for entire connection process
* - **Step Timers**: Individual timeouts for each connection phase
*
* - All errors result in connection termination
@@ -229,7 +230,7 @@ private:
void
processResponse();
static std::string
static std::string_view
stepToString(ConnectionStep step)
{
switch (step)
@@ -250,7 +251,7 @@ private:
return "ShutdownStarted";
}
return "Unknown";
};
}
template <class = void>
static boost::asio::ip::tcp::endpoint

View File

@@ -70,6 +70,7 @@
#include <boost/asio/completion_condition.hpp>
#include <boost/asio/error.hpp>
#include <boost/asio/post.hpp>
#include <boost/asio/ssl/error.hpp>
#include <boost/asio/strand.hpp>
#include <boost/asio/write.hpp>
#include <boost/beast/core/multi_buffer.hpp>
@@ -77,6 +78,8 @@
#include <boost/beast/core/stream_traits.hpp>
#include <google/protobuf/message.h>
#include <openssl/err.h>
#include <openssl/sslerr.h>
#include <xrpl.pb.h>
@@ -272,10 +275,7 @@ PeerImp::stop()
if (!socket_.is_open())
return;
// The rationale for using different severity levels is that
// outbound connections are under our control and may be logged
// at a higher level, but inbound connections are more numerous and
// uncontrolled so to prevent log flooding the severity is reduced.
// Log peer stop at debug severity to avoid excessive log noise.
JLOG(journal_.debug()) << "stop: Stop";
shutdown();
@@ -675,9 +675,9 @@ PeerImp::tryAsyncShutdown()
shutdownStarted_ = true;
setTimer(kShutdownTimerInterval);
// gracefully shutdown the SSL socket, performing a shutdown handshake
// gracefully shutdown the SSL socket, performing a shutdown handshake.
// The safety timer was armed in shutdown(); it bounds the whole shutdown
// lifecycle (I/O drain + SSL handshake), not just this async_shutdown.
stream_.async_shutdown(bind_executor(
strand_, std::bind(&PeerImp::onShutdown, shared_from_this(), std::placeholders::_1)));
}
@@ -694,6 +694,14 @@ PeerImp::shutdown()
boost::beast::get_lowest_layer(stream_).cancel();
// Arm the shutdown safety timer at request time, replacing any pending
// peer-health timer. Otherwise, if I/O is still pending, the existing
// kPeerTimerInterval (60s) timer could be the one to fire and onTimer()
// would treat its expiry as a shutdown timeout — delaying forced close
// far beyond kShutdownTimerInterval.
cancelTimer();
setTimer(kShutdownTimerInterval);
tryAsyncShutdown();
}
@@ -708,9 +716,14 @@ PeerImp::onShutdown(error_code ec)
// - stream_truncated: the tcp connection closed (no handshake) it could
// occur if a peer does not perform a graceful disconnect
// - broken_pipe: the peer is gone
// - SSL_R_APPLICATION_DATA_AFTER_CLOSE_NOTIFY: benign SSL shutdown
// condition where the peer sent data after we sent close_notify
bool const isAppDataAfterCloseNotify =
ec.category() == boost::asio::error::get_ssl_category() &&
ERR_GET_REASON(ec.value()) == SSL_R_APPLICATION_DATA_AFTER_CLOSE_NOTIFY;
bool const shouldLog =
(ec != boost::asio::error::eof && ec != boost::asio::error::operation_aborted &&
ec.message().find("application data after close notify") == std::string::npos);
!isAppDataAfterCloseNotify);
if (shouldLog)
{
@@ -913,6 +926,9 @@ PeerImp::doAccept()
app_);
// Write the whole buffer and only start protocol when that's done.
// Track the write via writePending_ so a concurrent shutdown() doesn't
// launch async_shutdown while this write is still in flight.
writePending_ = true;
boost::asio::async_write(
stream_,
writeBuffer->data(),
@@ -921,6 +937,7 @@ PeerImp::doAccept()
strand_,
[this, writeBuffer, self = shared_from_this()](
error_code ec, std::size_t bytesTransferred) {
writePending_ = false;
if (!socket_.is_open())
return;
if (ec == boost::asio::error::operation_aborted)
@@ -963,7 +980,7 @@ PeerImp::domain() const
void
PeerImp::doProtocolStart()
{
// a shutdown was initiated before the handshare, there is nothing to do
// a shutdown was initiated before the handshake, there is nothing to do
if (shutdown_)
{
tryAsyncShutdown();

View File

@@ -33,9 +33,8 @@ class SHAMap;
/**
* @class PeerImp
* @brief This class manages established peer-to-peer connections, handles
message exchange, monitors connection health, and graceful shutdown.
* message exchange, monitors connection health, and graceful shutdown.
*
* The PeerImp shutdown mechanism is a multi-stage process
* designed to ensure graceful connection termination while handling ongoing
* I/O operations safely. The shutdown can be initiated from multiple points