Add more fine grained counters (#683)

Fixes #665
This commit is contained in:
Alex Kremer
2023-06-09 16:48:55 +01:00
committed by GitHub
parent 9d28e64383
commit b83d206ced
15 changed files with 454 additions and 502 deletions

View File

@@ -23,13 +23,22 @@
namespace RPC {
void
Counters::rpcFailed(std::string const& method)
{
std::scoped_lock lk(mutex_);
MethodInfo& counters = methodInfo_[method];
++counters.started;
++counters.failed;
}
void
Counters::rpcErrored(std::string const& method)
{
std::scoped_lock lk(mutex_);
MethodInfo& counters = methodInfo_[method];
counters.started++;
counters.errored++;
++counters.started;
++counters.errored;
}
void
@@ -37,8 +46,8 @@ Counters::rpcComplete(std::string const& method, std::chrono::microseconds const
{
std::scoped_lock lk(mutex_);
MethodInfo& counters = methodInfo_[method];
counters.started++;
counters.finished++;
++counters.started;
++counters.finished;
counters.duration += rpcDuration.count();
}
@@ -47,7 +56,45 @@ Counters::rpcForwarded(std::string const& method)
{
std::scoped_lock lk(mutex_);
MethodInfo& counters = methodInfo_[method];
counters.forwarded++;
++counters.forwarded;
}
void
Counters::rpcFailedToForward(std::string const& method)
{
std::scoped_lock lk(mutex_);
MethodInfo& counters = methodInfo_[method];
++counters.failedForward;
}
void
Counters::onTooBusy()
{
++tooBusyCounter_;
}
void
Counters::onNotReady()
{
++notReadyCounter_;
}
void
Counters::onBadSyntax()
{
++badSyntaxCounter_;
}
void
Counters::onUnknownCommand()
{
++unknownCommandCounter_;
}
void
Counters::onInternalError()
{
++internalErrorCounter_;
}
boost::json::object
@@ -65,12 +112,20 @@ Counters::report() const
counters[JS(started)] = std::to_string(info.started);
counters[JS(finished)] = std::to_string(info.finished);
counters[JS(errored)] = std::to_string(info.errored);
counters[JS(failed)] = std::to_string(info.failed);
counters["forwarded"] = std::to_string(info.forwarded);
counters["failed_forward"] = std::to_string(info.failedForward);
counters[JS(duration_us)] = std::to_string(info.duration);
rpc[method] = std::move(counters);
}
obj["too_busy_errors"] = std::to_string(tooBusyCounter_);
obj["not_ready_errors"] = std::to_string(notReadyCounter_);
obj["bad_syntax_errors"] = std::to_string(badSyntaxCounter_);
obj["unknown_command_errors"] = std::to_string(unknownCommandCounter_);
obj["internal_errors"] = std::to_string(internalErrorCounter_);
obj["work_queue"] = workQueue_.get().report();
return obj;

View File

@@ -36,14 +36,23 @@ class Counters
{
std::uint64_t started = 0u;
std::uint64_t finished = 0u;
std::uint64_t failed = 0u;
std::uint64_t errored = 0u;
std::uint64_t forwarded = 0u;
std::uint64_t failedForward = 0u;
std::uint64_t duration = 0u;
};
mutable std::mutex mutex_;
std::unordered_map<std::string, MethodInfo> methodInfo_;
// counters that don't carry RPC method information
std::atomic_uint64_t tooBusyCounter_;
std::atomic_uint64_t notReadyCounter_;
std::atomic_uint64_t badSyntaxCounter_;
std::atomic_uint64_t unknownCommandCounter_;
std::atomic_uint64_t internalErrorCounter_;
std::reference_wrapper<const WorkQueue> workQueue_;
public:
@@ -55,6 +64,9 @@ public:
return Counters{wq};
}
void
rpcFailed(std::string const& method);
void
rpcErrored(std::string const& method);
@@ -64,6 +76,24 @@ public:
void
rpcForwarded(std::string const& method);
void
rpcFailedToForward(std::string const& method);
void
onTooBusy();
void
onNotReady();
void
onBadSyntax();
void
onUnknownCommand();
void
onInternalError();
boost::json::object
report() const;
};

View File

@@ -117,24 +117,31 @@ public:
auto toForward = ctx.params;
toForward["command"] = ctx.method;
auto const res = balancer_->forwardToRippled(toForward, ctx.clientIp, ctx.yield);
notifyForwarded(ctx.method);
if (!res)
if (auto const res = balancer_->forwardToRippled(toForward, ctx.clientIp, ctx.yield); not res)
{
notifyFailedToForward(ctx.method);
return Status{RippledError::rpcFAILED_TO_FORWARD};
return *res;
}
else
{
notifyForwarded(ctx.method);
return *res;
}
}
if (backend_->isTooBusy())
{
log_.error() << "Database is too busy. Rejecting request";
notifyTooBusy(); // TODO: should we add ctx.method if we have it?
return Status{RippledError::rpcTOO_BUSY};
}
auto const method = handlerTable_.getHandler(ctx.method);
if (!method)
{
notifyUnknownCommand();
return Status{RippledError::rpcUNKNOWN_COMMAND};
}
try
{
@@ -149,24 +156,23 @@ public:
if (v)
return v->as_object();
else
{
notifyErrored(ctx.method);
return Status{v.error()};
}
catch (InvalidParamsError const& err)
{
return Status{RippledError::rpcINVALID_PARAMS, err.what()};
}
catch (AccountNotFoundError const& err)
{
return Status{RippledError::rpcACT_NOT_FOUND, err.what()};
}
}
catch (Backend::DatabaseTimeout const& t)
{
log_.error() << "Database timeout";
notifyTooBusy();
return Status{RippledError::rpcTOO_BUSY};
}
catch (std::exception const& err)
catch (std::exception const& ex)
{
log_.error() << ctx.tag() << " caught exception: " << err.what();
log_.error() << ctx.tag() << "Caught exception: " << ex.what();
notifyInternalError();
return Status{RippledError::rpcINTERNAL};
}
}
@@ -180,7 +186,13 @@ public:
bool
post(Fn&& func, std::string const& ip)
{
return workQueue_.get().postCoro(std::forward<Fn>(func), dosGuard_.get().isWhiteListed(ip));
if (!workQueue_.get().postCoro(std::forward<Fn>(func), dosGuard_.get().isWhiteListed(ip)))
{
notifyTooBusy();
return false;
}
return true;
}
/**
@@ -197,7 +209,24 @@ public:
}
/**
* @brief Notify the system that specified method failed to execute
* @brief Notify the system that specified method failed to execute due to a recoverable user error
*
* Used for errors based on user input, not actual failures of the db or clio itself.
*
* @param method
*/
void
notifyFailed(std::string const& method)
{
if (validHandler(method))
counters_.get().rpcFailed(method);
}
/**
* @brief Notify the system that specified method failed due to some unrecoverable error
*
* Used for erors such as database timeout, internal errors, etc.
*
* @param method
*/
void
@@ -218,6 +247,64 @@ public:
counters_.get().rpcForwarded(method);
}
/**
* @brief Notify the system that specified method failed to be forwarded to rippled
* @param method
*/
void
notifyFailedToForward(std::string const& method)
{
if (validHandler(method))
counters_.get().rpcFailedToForward(method);
}
/**
* @brief Notify the system that the RPC system is too busy to handle an incoming request
*/
void
notifyTooBusy()
{
counters_.get().onTooBusy();
}
/**
* @brief Notify the system that the RPC system was not ready to handle an incoming request
*
* This happens when the backend is not yet have a ledger range
*/
void
notifyNotReady()
{
counters_.get().onNotReady();
}
/**
* @brief Notify the system that the incoming request did not specify the RPC method/command
*/
void
notifyBadSyntax()
{
counters_.get().onBadSyntax();
}
/**
* @brief Notify the system that the incoming request specified an unknown/unsupported method/command
*/
void
notifyUnknownCommand()
{
counters_.get().onUnknownCommand();
}
/**
* @brief Notify the system that the incoming request lead to an internal error (unrecoverable)
*/
void
notifyInternalError()
{
counters_.get().onInternalError();
}
private:
bool
shouldForwardToRippled(Web::Context const& ctx) const

View File

@@ -36,66 +36,6 @@ clio::Logger gLog{"RPC"};
namespace RPC {
std::optional<bool>
getBool(boost::json::object const& request, std::string const& field)
{
if (!request.contains(field))
return {};
else if (request.at(field).is_bool())
return request.at(field).as_bool();
else
throw InvalidParamsError("Invalid field " + field + ", not bool.");
}
bool
getBool(boost::json::object const& request, std::string const& field, bool dfault)
{
if (auto res = getBool(request, field))
return *res;
else
return dfault;
}
bool
getRequiredBool(boost::json::object const& request, std::string const& field)
{
if (auto res = getBool(request, field))
return *res;
else
throw InvalidParamsError("Missing field " + field);
}
std::optional<std::uint32_t>
getUInt(boost::json::object const& request, std::string const& field)
{
if (!request.contains(field))
return {};
else if (request.at(field).is_uint64())
return request.at(field).as_uint64();
else if (request.at(field).is_int64())
return request.at(field).as_int64();
else
throw InvalidParamsError("Invalid field " + field + ", not uint.");
}
std::uint32_t
getUInt(boost::json::object const& request, std::string const& field, std::uint32_t const dfault)
{
if (auto res = getUInt(request, field))
return *res;
else
return dfault;
}
std::uint32_t
getRequiredUInt(boost::json::object const& request, std::string const& field)
{
if (auto res = getUInt(request, field))
return *res;
else
throw InvalidParamsError("Missing field " + field);
}
std::optional<AccountCursor>
parseAccountCursor(std::optional<std::string> jsonCursor)
{
@@ -130,143 +70,6 @@ parseAccountCursor(std::optional<std::string> jsonCursor)
return AccountCursor({cursorIndex, startHint});
}
std::optional<std::string>
getString(boost::json::object const& request, std::string const& field)
{
if (!request.contains(field))
return {};
else if (request.at(field).is_string())
return request.at(field).as_string().c_str();
else
throw InvalidParamsError("Invalid field " + field + ", not string.");
}
std::string
getRequiredString(boost::json::object const& request, std::string const& field)
{
if (auto res = getString(request, field))
return *res;
else
throw InvalidParamsError("Missing field " + field);
}
std::string
getString(boost::json::object const& request, std::string const& field, std::string dfault)
{
if (auto res = getString(request, field))
return *res;
else
return dfault;
}
Status
getHexMarker(boost::json::object const& request, ripple::uint256& marker)
{
if (request.contains(JS(marker)))
{
if (!request.at(JS(marker)).is_string())
return Status{RippledError::rpcINVALID_PARAMS, "markerNotString"};
if (!marker.parseHex(request.at(JS(marker)).as_string().c_str()))
return Status{RippledError::rpcINVALID_PARAMS, "malformedMarker"};
}
return {};
}
Status
getAccount(
boost::json::object const& request,
ripple::AccountID& account,
boost::string_view const& field,
bool required)
{
if (!request.contains(field))
{
if (required)
return Status{RippledError::rpcINVALID_PARAMS, field.to_string() + "Missing"};
return {};
}
if (!request.at(field).is_string())
return Status{RippledError::rpcINVALID_PARAMS, field.to_string() + "NotString"};
if (auto a = accountFromStringStrict(request.at(field).as_string().c_str()); a)
{
account = a.value();
return {};
}
return Status{RippledError::rpcACT_MALFORMED, field.to_string() + "Malformed"};
}
Status
getOptionalAccount(
boost::json::object const& request,
std::optional<ripple::AccountID>& account,
boost::string_view const& field)
{
if (!request.contains(field))
{
account = {};
return {};
}
if (!request.at(field).is_string())
return Status{RippledError::rpcINVALID_PARAMS, field.to_string() + "NotString"};
if (auto a = accountFromStringStrict(request.at(field).as_string().c_str()); a)
{
account = a.value();
return {};
}
return Status{RippledError::rpcINVALID_PARAMS, field.to_string() + "Malformed"};
}
Status
getAccount(boost::json::object const& request, ripple::AccountID& accountId)
{
return getAccount(request, accountId, JS(account), true);
}
Status
getAccount(boost::json::object const& request, ripple::AccountID& destAccount, boost::string_view const& field)
{
return getAccount(request, destAccount, field, false);
}
Status
getTaker(boost::json::object const& request, ripple::AccountID& takerID)
{
if (request.contains(JS(taker)))
{
auto parsed = parseTaker(request.at(JS(taker)));
if (auto status = std::get_if<Status>(&parsed); status)
return *status;
else
takerID = std::get<ripple::AccountID>(parsed);
}
return {};
}
Status
getChannelId(boost::json::object const& request, ripple::uint256& channelId)
{
if (!request.contains(JS(channel_id)))
return Status{RippledError::rpcINVALID_PARAMS, "missingChannelID"};
if (!request.at(JS(channel_id)).is_string())
return Status{RippledError::rpcINVALID_PARAMS, "channelIDNotString"};
if (!channelId.parseHex(request.at(JS(channel_id)).as_string().c_str()))
return Status{RippledError::rpcCHANNEL_MALFORMED, "malformedChannelID"};
return {};
}
std::optional<ripple::STAmount>
getDeliveredAmount(
std::shared_ptr<ripple::STTx const> const& txn,
@@ -305,11 +108,6 @@ canHaveDeliveredAmount(
if (tt != ripple::ttPAYMENT && tt != ripple::ttCHECK_CASH && tt != ripple::ttACCOUNT_DELETE)
return false;
/*
if (tt == ttCHECK_CASH && !getFix1623Enabled())
return false;
*/
if (meta->getResultTER() != ripple::tesSUCCESS)
return false;

View File

@@ -216,54 +216,6 @@ parseBook(boost::json::object const& request);
std::variant<Status, ripple::AccountID>
parseTaker(boost::json::value const& request);
std::optional<std::uint32_t>
getUInt(boost::json::object const& request, std::string const& field);
std::uint32_t
getUInt(boost::json::object const& request, std::string const& field, std::uint32_t dfault);
std::uint32_t
getRequiredUInt(boost::json::object const& request, std::string const& field);
std::optional<bool>
getBool(boost::json::object const& request, std::string const& field);
bool
getBool(boost::json::object const& request, std::string const& field, bool dfault);
bool
getRequiredBool(boost::json::object const& request, std::string const& field);
std::optional<std::string>
getString(boost::json::object const& request, std::string const& field);
std::string
getRequiredString(boost::json::object const& request, std::string const& field);
std::string
getString(boost::json::object const& request, std::string const& field, std::string dfault);
Status
getHexMarker(boost::json::object const& request, ripple::uint256& marker);
Status
getAccount(boost::json::object const& request, ripple::AccountID& accountId);
Status
getAccount(boost::json::object const& request, ripple::AccountID& destAccount, boost::string_view const& field);
Status
getOptionalAccount(
boost::json::object const& request,
std::optional<ripple::AccountID>& account,
boost::string_view const& field);
Status
getTaker(boost::json::object const& request, ripple::AccountID& takerID);
Status
getChannelId(boost::json::object const& request, ripple::uint256& channelId);
bool
specifiesCurrentOrClosedLedger(boost::json::object const& request);

View File

@@ -27,10 +27,8 @@ namespace RPC {
LedgerRangeHandler::Result
LedgerRangeHandler::process() const
{
if (auto const maybeRange = sharedPtrBackend_->fetchLedgerRange(); maybeRange)
return Output{*maybeRange};
else
return Error{Status{RippledError::rpcNOT_READY, "rangeNotFound"}};
// note: we can't get here if range is not available so it's safe
return Output{sharedPtrBackend_->fetchLedgerRange().value()};
}
void

View File

@@ -115,12 +115,6 @@ public:
using namespace std::chrono;
auto const range = backend_->fetchLedgerRange();
// TODO: remove this check in https://github.com/XRPLF/clio/issues/592
// note: this should happen on framework level.
if (not range.has_value())
return Error{Status{RippledError::rpcNOT_READY, "emptyDatabase", "The server has no data in the database"}};
auto const lgrInfo = backend_->fetchLedgerBySequence(range->maxSequence, ctx.yield);
if (not lgrInfo.has_value())
return Error{Status{RippledError::rpcINTERNAL}};

View File

@@ -60,34 +60,39 @@ public:
void
operator()(std::string const& reqStr, std::shared_ptr<Server::ConnectionBase> const& connection)
{
auto req = boost::json::object{};
try
{
req = boost::json::parse(reqStr).as_object();
auto req = boost::json::parse(reqStr).as_object();
perfLog_.debug() << connection->tag() << "Adding to work queue";
if (not connection->upgraded and not req.contains("params"))
req["params"] = boost::json::array({boost::json::object{}});
if (!rpcEngine_->post(
[request = std::move(req), connection, this](boost::asio::yield_context yc) mutable {
handleRequest(yc, std::move(request), connection);
},
connection->clientIp))
{
rpcEngine_->notifyTooBusy();
connection->send(
boost::json::serialize(RPC::makeError(RPC::RippledError::rpcTOO_BUSY)),
boost::beast::http::status::ok);
}
}
catch (boost::exception const& _)
catch (boost::system::system_error const&)
{
// system_error thrown when json parsing failed
rpcEngine_->notifyBadSyntax();
connection->send(
boost::json::serialize(RPC::makeError(RPC::RippledError::rpcBAD_SYNTAX)),
boost::beast::http::status::ok);
return;
}
perfLog_.debug() << connection->tag() << "Adding to work queue";
// specially handle for http connections
if (!connection->upgraded)
catch (std::exception const& ex)
{
if (!req.contains("params"))
req["params"] = boost::json::array({boost::json::object{}});
}
if (!rpcEngine_->post(
[request = std::move(req), connection, this](boost::asio::yield_context yc) mutable {
handleRequest(yc, std::move(request), connection);
},
connection->clientIp))
{
connection->send(
boost::json::serialize(RPC::makeError(RPC::RippledError::rpcTOO_BUSY)), boost::beast::http::status::ok);
perfLog_.error() << connection->tag() << "Caught exception: " << ex.what();
rpcEngine_->notifyInternalError();
throw;
}
}
@@ -121,14 +126,11 @@ private:
if (!id.is_null())
e["id"] = id;
e["request"] = request;
if (connection->upgraded)
{
return e;
}
else
{
return boost::json::object{{"result", e}};
}
};
try
@@ -136,9 +138,12 @@ private:
auto const range = backend_->fetchLedgerRange();
// for the error happened before the handler, we don't attach the clio warning
if (!range)
{
rpcEngine_->notifyNotReady();
return connection->send(
boost::json::serialize(composeError(RPC::RippledError::rpcNOT_READY)),
boost::beast::http::status::ok);
}
auto context = connection->upgraded
? RPC::make_WsContext(
@@ -149,6 +154,8 @@ private:
{
perfLog_.warn() << connection->tag() << "Could not create RPC context";
log_.warn() << connection->tag() << "Could not create RPC context";
rpcEngine_->notifyBadSyntax();
return connection->send(
boost::json::serialize(composeError(RPC::RippledError::rpcBAD_SYNTAX)),
boost::beast::http::status::ok);
@@ -162,16 +169,16 @@ private:
boost::json::object response;
if (auto const status = std::get_if<RPC::Status>(&v))
{
rpcEngine_->notifyErrored(context->method);
// note: error statuses are counted/notified in buildResponse itself
response = std::move(composeError(*status));
auto const responseStr = boost::json::serialize(response);
perfLog_.debug() << context->tag() << "Encountered error: " << responseStr;
log_.debug() << context->tag() << "Encountered error: " << responseStr;
}
else
{
// This can still technically be an error. Clio counts forwarded
// requests as successful.
// This can still technically be an error. Clio counts forwarded requests as successful.
rpcEngine_->notifyComplete(context->method, us);
auto& result = std::get<boost::json::object>(v);
@@ -189,6 +196,7 @@ private:
{
response["result"] = result;
}
// for ws , there is additional field "status" in response
// otherwise , the "status" is in the "result" field
if (connection->upgraded)
@@ -215,10 +223,15 @@ private:
response["warnings"] = warnings;
connection->send(boost::json::serialize(response), boost::beast::http::status::ok);
}
catch (std::exception const& e)
catch (std::exception const& ex)
{
perfLog_.error() << connection->tag() << "Caught exception : " << e.what();
log_.error() << connection->tag() << "Caught exception : " << e.what();
// note: while we are catching this in buildResponse too, this is here to make sure
// that any other code that may throw is outside of buildResponse is also worked around.
perfLog_.error() << connection->tag() << "Caught exception: " << ex.what();
log_.error() << connection->tag() << "Caught exception: " << ex.what();
rpcEngine_->notifyInternalError();
return connection->send(
boost::json::serialize(composeError(RPC::RippledError::rpcINTERNAL)),
boost::beast::http::status::internal_server_error);

View File

@@ -38,7 +38,8 @@ namespace Server {
using tcp = boost::asio::ip::tcp;
/**
* This is the implementation class for http sessions
* @brief This is the implementation class for http sessions
*
* @tparam Derived The derived class
* @tparam Handler The handler class, will be called when a request is received.
*/
@@ -66,13 +67,11 @@ class HttpBase : public ConnectionBase
if (self_.dead())
return;
// The lifetime of the message has to extend
// for the duration of the async operation so
// we use a shared_ptr to manage it.
// The lifetime of the message has to extend for the duration of the async operation so we use a shared_ptr
// to manage it.
auto sp = std::make_shared<http::message<isRequest, Body, Fields>>(std::move(msg));
// Store a type-erased version of the shared
// pointer in the class to keep it alive.
// Store a type-erased version of the shared pointer in the class to keep it alive.
self_.res_ = sp;
// Write the response
@@ -199,6 +198,7 @@ public:
// connection limit
if (!dosGuard_.get().request(clientIp))
{
// TODO: this looks like it could be useful to count too in the future
return sender_(httpResponse(
http::status::service_unavailable,
"text/plain",
@@ -211,9 +211,8 @@ public:
{
(*handler_)(req_.body(), derived().shared_from_this());
}
catch (std::exception const& e)
catch (std::exception const&)
{
perfLog_.error() << tag() << "Caught exception : " << e.what();
return sender_(httpResponse(
http::status::internal_server_error,
"application/json",

View File

@@ -230,7 +230,7 @@ public:
e["id"] = request.as_object().at("id");
e["request"] = std::move(request);
}
catch (std::exception&)
catch (std::exception const&)
{
e["request"] = std::move(requestStr);
}
@@ -246,6 +246,7 @@ public:
// dosGuard served request++ and check ip address
if (!dosGuard_.get().request(clientIp))
{
// TODO: could be useful to count in counters in the future too
sendError(RPC::RippledError::rpcSLOW_DOWN, std::move(msg));
}
else
@@ -254,9 +255,8 @@ public:
{
(*handler_)(msg, shared_from_this());
}
catch (std::exception const& e)
catch (std::exception const&)
{
perfLog_.error() << tag() << "Caught exception : " << e.what();
sendError(RPC::RippledError::rpcINTERNAL, std::move(msg));
}
}