Large cluster optimizations. (#348)

* Added sync log to streamer.
* Fixed ledger closing attempt while syncing.
* Added diagnostic contract.
* Reset to stage 0 on unreliable votes.
* Reduced peer msg age threshold.
* Added health tracking.
* Weakly-connected detection improvement.
* Increased version 0.5.1.
* Improved client lib server version check.
* Added health logging support to text client.
* Added weakly connected status in status response.
* Increased max peers limits when serializing.
* Local docker cluster manual ip.
* Updated vultr script vm region order.
* Sync status reporting improvement.
* Added milliseconds to logging.
This commit is contained in:
Ravin Perera
2021-09-17 11:53:49 +05:30
committed by GitHub
parent c686745c81
commit 6dc0776b56
32 changed files with 720 additions and 86 deletions

View File

@@ -2,6 +2,7 @@
#include "util/sequence_hash.hpp"
#include "ledger/ledger_common.hpp"
#include "conf.hpp"
#include "p2p/p2p.hpp"
namespace status
{
@@ -20,6 +21,11 @@ namespace status
std::shared_mutex peers_mutex;
std::set<conf::peer_ip_port> peers; // Known ip:port pairs for connection verified peers.
std::atomic<size_t> peer_count = 0;
std::atomic<bool> weakly_connected = false;
std::atomic<int16_t> available_mesh_capacity = -1;
proposal_health phealth = {};
//----- Ledger status
@@ -33,7 +39,7 @@ namespace status
void ledger_created(const util::sequence_hash &ledger_id, const ledger::ledger_record &ledger)
{
// If currently not-in-sync, report it as in-sync when a ledger is created.
if (in_sync != 1)
if (in_sync.load() != 1)
sync_status_changed(true);
std::unique_lock lock(ledger_mutex);
@@ -44,8 +50,12 @@ namespace status
void sync_status_changed(const bool new_in_sync)
{
in_sync = new_in_sync ? 1 : 0;
event_queue.try_enqueue(sync_status_change_event{new_in_sync});
const int new_value = new_in_sync ? 1 : 0;
if (new_value != in_sync.load())
{
in_sync = new_value;
event_queue.try_enqueue(sync_status_change_event{new_in_sync});
}
}
const util::sequence_hash get_lcl_id()
@@ -56,7 +66,7 @@ namespace status
const bool is_in_sync()
{
return in_sync == 1;
return in_sync.load() == 1;
}
const ledger::ledger_record get_last_ledger()
@@ -93,6 +103,14 @@ namespace status
{
std::unique_lock lock(peers_mutex);
peers = std::move(updated_peers);
if (peers.size() != peer_count)
{
peer_count = peers.size();
if (conf::cfg.health.connectivity_stats)
event_queue.try_enqueue(connectivity_health{peer_count.load(), weakly_connected.load()});
}
}
const std::set<conf::peer_ip_port> get_peers()
@@ -101,4 +119,90 @@ namespace status
return peers;
}
const size_t get_peers_count()
{
return peer_count.load();
}
void set_weakly_connected(const bool is_weakly_connected)
{
if (weakly_connected.load() != is_weakly_connected)
{
weakly_connected = is_weakly_connected;
if (conf::cfg.health.connectivity_stats)
event_queue.try_enqueue(connectivity_health{peer_count.load(), weakly_connected.load()});
}
}
const bool get_weakly_connected()
{
return weakly_connected.load();
}
void set_available_mesh_capacity(const int16_t new_capacity)
{
available_mesh_capacity = new_capacity;
}
const int16_t get_available_mesh_capacity()
{
return available_mesh_capacity.load();
}
//----- Node health
void report_proposal_batch(const std::list<p2p::proposal> &proposals)
{
if (!conf::cfg.health.proposal_stats)
return;
phealth.comm_latency_min = UINT64_MAX;
phealth.comm_latency_max = 0;
phealth.comm_latency_avg = 0;
phealth.read_latency_min = UINT64_MAX;
phealth.read_latency_max = 0;
phealth.read_latency_avg = 0;
phealth.batch_size = proposals.size();
if (phealth.batch_size == 0)
return;
const uint64_t now = util::get_epoch_milliseconds();
uint64_t total_comm_latency = 0;
uint64_t total_read_latency = 0;
for (const p2p::proposal &p : proposals)
{
const uint64_t comm_latency = (p.sent_timestamp < p.recv_timestamp) ? (p.recv_timestamp - p.sent_timestamp) : 0;
const uint64_t read_latency = now - p.recv_timestamp;
total_comm_latency += comm_latency;
total_read_latency += read_latency;
if (comm_latency < phealth.comm_latency_min)
phealth.comm_latency_min = comm_latency;
if (comm_latency > phealth.comm_latency_max)
phealth.comm_latency_max = comm_latency;
if (read_latency < phealth.read_latency_min)
phealth.read_latency_min = read_latency;
if (read_latency > phealth.read_latency_max)
phealth.read_latency_max = read_latency;
}
phealth.comm_latency_avg = total_comm_latency / phealth.batch_size;
phealth.read_latency_avg = total_read_latency / phealth.batch_size;
}
void emit_proposal_health()
{
if (!conf::cfg.health.proposal_stats)
return;
event_queue.try_enqueue(phealth);
}
} // namespace status