fix: Catch exception in ClusterCommunicationService (#2093)

Fixes #2016.
This commit is contained in:
Sergey Kuznetsov
2025-05-12 16:16:23 +01:00
committed by GitHub
parent dbfabd4102
commit aa910ba889
5 changed files with 47 additions and 12 deletions

View File

@@ -36,6 +36,7 @@
#include <chrono>
#include <ctime>
#include <memory>
#include <string>
#include <utility>
#include <vector>
@@ -111,9 +112,12 @@ ClusterCommunicationService::selfData() const
return result;
}
std::vector<ClioNode>
std::expected<std::vector<ClioNode>, std::string>
ClusterCommunicationService::clusterData() const
{
if (not isHealthy_) {
return std::unexpected{"Service is not healthy"};
}
std::vector<ClioNode> result;
boost::asio::spawn(strand_, [this, &result](boost::asio::yield_context) {
result = otherNodesData_;
@@ -127,7 +131,13 @@ ClusterCommunicationService::doRead(boost::asio::yield_context yield)
{
otherNodesData_.clear();
auto const expectedResult = backend_->fetchClioNodesData(yield);
BackendInterface::ClioNodesDataFetchResult expectedResult;
try {
expectedResult = backend_->fetchClioNodesData(yield);
} catch (...) {
expectedResult = std::unexpected{"Failed to fecth Clio nodes data"};
}
if (!expectedResult.has_value()) {
LOG(log_.error()) << "Failed to fetch nodes data";
isHealthy_ = false;

View File

@@ -34,6 +34,7 @@
#include <chrono>
#include <memory>
#include <string>
#include <vector>
namespace cluster {
@@ -125,9 +126,9 @@ public:
/**
* @brief Get the data of all nodes in the cluster (including self).
*
* @return The data of all nodes in the cluster.
* @return The data of all nodes in the cluster or error if the service is not healthy.
*/
std::vector<ClioNode>
std::expected<std::vector<ClioNode>, std::string>
clusterData() const override;
private:

View File

@@ -21,6 +21,8 @@
#include "cluster/ClioNode.hpp"
#include <expected>
#include <string>
#include <vector>
namespace cluster {
@@ -43,9 +45,9 @@ public:
/**
* @brief Get the data of all nodes in the cluster (including self).
*
* @return The data of all nodes in the cluster.
* @return The data of all nodes in the cluster or error if the service is not healthy.
*/
[[nodiscard]] virtual std::vector<ClioNode>
[[nodiscard]] virtual std::expected<std::vector<ClioNode>, std::string>
clusterData() const = 0;
};

View File

@@ -569,13 +569,17 @@ public:
virtual std::optional<std::string>
fetchMigratorStatus(std::string const& migratorName, boost::asio::yield_context yield) const = 0;
/** @brief Return type for fetchClioNodesData() method */
using ClioNodesDataFetchResult =
std::expected<std::vector<std::pair<boost::uuids::uuid, std::string>>, std::string>;
/**
* @brief Fetches the data of all nodes in the cluster.
*
* @param yield The coroutine context
*@return The data of all nodes in the cluster.
*/
[[nodiscard]] virtual std::expected<std::vector<std::pair<boost::uuids::uuid, std::string>>, std::string>
[[nodiscard]] virtual ClioNodesDataFetchResult
fetchClioNodesData(boost::asio::yield_context yield) const = 0;
/**

View File

@@ -19,6 +19,7 @@
#include "cluster/ClioNode.hpp"
#include "cluster/ClusterCommunicationService.hpp"
#include "data/BackendInterface.hpp"
#include "util/MockBackendTestFixture.hpp"
#include "util/MockPrometheus.hpp"
#include "util/TimeUtils.hpp"
@@ -111,6 +112,20 @@ TEST_F(ClusterCommunicationServiceTest, Read_FetchFailed)
EXPECT_FALSE(isHealthyMetric);
}
TEST_F(ClusterCommunicationServiceTest, Read_FetchThrew)
{
EXPECT_TRUE(isHealthyMetric);
EXPECT_CALL(*backend_, writeNodeMessage).Times(2).WillOnce([](auto&&, auto&&) {}).WillOnce([this](auto&&, auto&&) {
notify();
});
EXPECT_CALL(*backend_, fetchClioNodesData).WillOnce(testing::Throw(data::DatabaseTimeout{}));
clusterCommunicationService.run();
wait();
EXPECT_FALSE(isHealthyMetric);
EXPECT_FALSE(clusterCommunicationService.clusterData().has_value());
}
TEST_F(ClusterCommunicationServiceTest, Read_GotInvalidJson)
{
EXPECT_TRUE(isHealthyMetric);
@@ -126,6 +141,7 @@ TEST_F(ClusterCommunicationServiceTest, Read_GotInvalidJson)
clusterCommunicationService.run();
wait();
EXPECT_FALSE(isHealthyMetric);
EXPECT_FALSE(clusterCommunicationService.clusterData().has_value());
}
TEST_F(ClusterCommunicationServiceTest, Read_GotInvalidNodeData)
@@ -141,6 +157,7 @@ TEST_F(ClusterCommunicationServiceTest, Read_GotInvalidNodeData)
clusterCommunicationService.run();
wait();
EXPECT_FALSE(isHealthyMetric);
EXPECT_FALSE(clusterCommunicationService.clusterData().has_value());
}
TEST_F(ClusterCommunicationServiceTest, Read_Success)
@@ -161,16 +178,17 @@ TEST_F(ClusterCommunicationServiceTest, Read_Success)
EXPECT_CALL(*backend_, writeNodeMessage).Times(2).WillOnce([](auto&&, auto&&) {}).WillOnce([&](auto&&, auto&&) {
auto const clusterData = clusterCommunicationService.clusterData();
ASSERT_EQ(clusterData.size(), otherNodesData.size() + 1);
ASSERT_TRUE(clusterData.has_value());
ASSERT_EQ(clusterData->size(), otherNodesData.size() + 1);
for (auto const& node : otherNodesData) {
auto const it =
std::ranges::find_if(clusterData, [&](ClioNode const& n) { return *(n.uuid) == *(node.uuid); });
EXPECT_NE(it, clusterData.cend()) << boost::uuids::to_string(*node.uuid);
std::ranges::find_if(*clusterData, [&](ClioNode const& n) { return *(n.uuid) == *(node.uuid); });
EXPECT_NE(it, clusterData->cend()) << boost::uuids::to_string(*node.uuid);
}
auto const selfUuid = clusterCommunicationService.selfUuid();
auto const it =
std::ranges::find_if(clusterData, [&selfUuid](ClioNode const& node) { return node.uuid == selfUuid; });
EXPECT_NE(it, clusterData.end());
std::ranges::find_if(*clusterData, [&selfUuid](ClioNode const& node) { return node.uuid == selfUuid; });
EXPECT_NE(it, clusterData->end());
notify();
});