mirror of
https://github.com/EvernodeXRPL/hpcore.git
synced 2026-04-29 15:37:59 +00:00
Large cluster optimizations. (#348)
* Added sync log to streamer. * Fixed ledger closing attempt while syncing. * Added diagnostic contract. * Reset to stage 0 on unreliable votes. * Reduced peer msg age threshold. * Added health tracking. * Weakly-connected detection improvement. * Increased version 0.5.1. * Improved client lib server version check. * Added health logging support to text client. * Added weakly connected status in status response. * Increased max peers limits when serializing. * Local docker cluster manual ip. * Updated vultr script vm region order. * Sync status reporting improvement. * Added milliseconds to logging.
This commit is contained in:
@@ -22,7 +22,7 @@
|
||||
TextDecoder = util.TextDecoder;
|
||||
}
|
||||
|
||||
const supportedHpVersion = "0.5.0";
|
||||
const supportedHpVersion = "0.5.";
|
||||
const serverChallengeSize = 16;
|
||||
const outputValidationPassThreshold = 0.8;
|
||||
const connectionCheckIntervalMs = 1000;
|
||||
@@ -52,14 +52,16 @@
|
||||
contractReadResponse: "contract_read_response",
|
||||
connectionChange: "connection_change",
|
||||
unlChange: "unl_change",
|
||||
ledgerEvent: "ledger_event"
|
||||
ledgerEvent: "ledger_event",
|
||||
healthEvent: "health_event"
|
||||
}
|
||||
Object.freeze(events);
|
||||
|
||||
/*--- Included in public interface. ---*/
|
||||
const notificationChannels = {
|
||||
unlChange: "unl_change",
|
||||
ledgerEvent: "ledger_event"
|
||||
ledgerEvent: "ledger_event",
|
||||
healthEvent: "health_event"
|
||||
}
|
||||
Object.freeze(notificationChannels);
|
||||
|
||||
@@ -181,6 +183,7 @@
|
||||
// Subscribe for unl changes if we have to maintain the trusted server key checks.
|
||||
subscriptions[notificationChannels.unlChange] = trustedKeysLookup ? true : false;
|
||||
subscriptions[notificationChannels.ledgerEvent] = false;
|
||||
subscriptions[notificationChannels.healthEvent] = false;
|
||||
|
||||
let status = 0; //0:none, 1:connected, 2:closed
|
||||
|
||||
@@ -548,8 +551,8 @@
|
||||
|
||||
if (connectionStatus == 0 && m.type == "user_challenge" && m.hp_version && m.contract_id) {
|
||||
|
||||
if (m.hp_version != supportedHpVersion) {
|
||||
liblog(1, `Incompatible Hot Pocket server version. Expected:${supportedHpVersion} Got:${m.hp_version}`);
|
||||
if (!m.hp_version.startsWith(supportedHpVersion)) {
|
||||
liblog(1, `Incompatible Hot Pocket server version. Expected:${supportedHpVersion}* Got:${m.hp_version}`);
|
||||
return false;
|
||||
}
|
||||
else if (!m.contract_id) {
|
||||
@@ -665,6 +668,7 @@
|
||||
contractExecutionEnabled: m.contract_execution_enabled,
|
||||
readRequestsEnabled: m.read_requests_enabled,
|
||||
isFullHistoryNode: m.is_full_history_node,
|
||||
weaklyConnected: m.weakly_connected,
|
||||
currentUnl: m.current_unl.map(u => msgHelper.deserializeValue(u)),
|
||||
peers: m.peers
|
||||
});
|
||||
@@ -691,6 +695,10 @@
|
||||
ev.inSync = m.in_sync;
|
||||
emitter.emit(events.ledgerEvent, ev);
|
||||
}
|
||||
else if (m.type == "health_event") {
|
||||
const ev = msgHelper.deserializeHealthEvent(m);
|
||||
emitter.emit(events.healthEvent, ev);
|
||||
}
|
||||
else if (m.type == "ledger_query_result") {
|
||||
const resolver = ledgerQueryResolvers[m.reply_for];
|
||||
if (resolver) {
|
||||
@@ -1142,6 +1150,24 @@
|
||||
outputHash: this.deserializeValue(l.output_hash)
|
||||
}
|
||||
}
|
||||
|
||||
this.deserializeHealthEvent = (m) => {
|
||||
if (m.event === "proposal") {
|
||||
return {
|
||||
event: m.event,
|
||||
commLatency: m.comm_latency,
|
||||
readLatency: m.read_latency,
|
||||
batchSize: m.batch_size
|
||||
}
|
||||
}
|
||||
else if (m.event === "connectivity") {
|
||||
return {
|
||||
event: m.event,
|
||||
peerCount: m.peer_count,
|
||||
weaklyConnected: m.weakly_connected
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function hexToUint8Array(hexString) {
|
||||
|
||||
@@ -54,12 +54,16 @@ async function main() {
|
||||
|
||||
// This will get fired when contract sends outputs.
|
||||
hpc.on(HotPocket.events.contractOutput, (r) => {
|
||||
r.outputs.forEach(o => console.log(`Output (ledger:${r.ledgerSeqNo})>> ${o}`));
|
||||
r.outputs.forEach(o => {
|
||||
const outputLog = o.length <= 512 ? o : `[Big output (${o.length / 1024} KB)]`;
|
||||
console.log(`Output (ledger:${r.ledgerSeqNo})>> ${outputLog}`);
|
||||
});
|
||||
})
|
||||
|
||||
// This will get fired when contract sends a read response.
|
||||
hpc.on(HotPocket.events.contractReadResponse, (response) => {
|
||||
console.log("Read response>> " + response);
|
||||
hpc.on(HotPocket.events.contractReadResponse, (o) => {
|
||||
const outputLog = o.length <= 512 ? o : `[Big output (${o.length / 1024} KB)]`;
|
||||
console.log("Read response>> " + outputLog);
|
||||
})
|
||||
|
||||
// This will get fired when the unl public key list changes.
|
||||
@@ -73,6 +77,11 @@ async function main() {
|
||||
console.log(ev);
|
||||
})
|
||||
|
||||
// This will get fired when any health event occurs (proposal stats, connectivity changes...).
|
||||
hpc.on(HotPocket.events.healthEvent, (ev) => {
|
||||
console.log(ev);
|
||||
})
|
||||
|
||||
// Establish HotPocket connection.
|
||||
if (!await hpc.connect()) {
|
||||
console.log('Connection failed.');
|
||||
@@ -83,6 +92,7 @@ async function main() {
|
||||
// After connecting, we can subscribe to events from the HotPocket node.
|
||||
// await hpc.subscribe(HotPocket.notificationChannels.unlChange);
|
||||
// await hpc.subscribe(HotPocket.notificationChannels.ledgerEvent);
|
||||
// await hpc.subscribe(HotPocket.notificationChannels.healthEvent);
|
||||
|
||||
// start listening for stdin
|
||||
const rl = readline.createInterface({
|
||||
@@ -110,7 +120,25 @@ async function main() {
|
||||
hpc.getLedgerBySeqNo(parseInt(inp.substr(7)), true, true)
|
||||
.then(result => console.log(result));
|
||||
}
|
||||
else if (inp.startsWith("health ")) {
|
||||
if (inp.endsWith("on"))
|
||||
hpc.subscribe(HotPocket.notificationChannels.healthEvent);
|
||||
else if (inp.endsWith("off"))
|
||||
hpc.unsubscribe(HotPocket.notificationChannels.healthEvent);
|
||||
}
|
||||
else if (inp === "stat") {
|
||||
hpc.getStatus().then(stat => console.log(stat));
|
||||
}
|
||||
else {
|
||||
|
||||
if (inp.startsWith("upload ")) {
|
||||
const size = parseInt(inp.split(" ")[1]);
|
||||
if (!isNaN(size)) {
|
||||
inp = "A".repeat(size * 1024 * 1024);
|
||||
console.log("Uploading " + size + " MB payload...");
|
||||
}
|
||||
}
|
||||
|
||||
hpc.submitContractInput(inp).then(input => {
|
||||
// console.log(input.hash);
|
||||
input.submissionStatus.then(s => {
|
||||
|
||||
138
examples/nodejs_contract/diagnostic_contract.js
Normal file
138
examples/nodejs_contract/diagnostic_contract.js
Normal file
@@ -0,0 +1,138 @@
|
||||
const HotPocket = require("./hp-contract-lib");
|
||||
const fs = require('fs').promises;
|
||||
var seedrandom = require('seedrandom');
|
||||
|
||||
const filename = "file.dat";
|
||||
const autofilePrefix = "autofile";
|
||||
const autofileSize = 1 * 1024 * 1024;
|
||||
|
||||
const diagnosticContract = async (ctx) => {
|
||||
|
||||
// Collection of per-user promises to wait for. Each promise completes when inputs for that user is processed.
|
||||
const userHandlers = [];
|
||||
|
||||
for (const user of ctx.users.list()) {
|
||||
|
||||
// For each user we add a promise to list of promises.
|
||||
userHandlers.push(new Promise(async (resolve) => {
|
||||
|
||||
// The contract need to ensure that all outputs for a particular user is emitted
|
||||
// in deterministic order. Hence, we are processing all inputs for each user sequentially.
|
||||
for (const input of user.inputs) {
|
||||
|
||||
const buf = await ctx.users.read(input);
|
||||
const parts = buf.toString().split(" ");
|
||||
const mode = parts[0];
|
||||
const data = parts[1];
|
||||
let output = null;
|
||||
|
||||
if (mode === "status") {
|
||||
output = "Hot Pocket diagnostic contract is running.";
|
||||
}
|
||||
else if (mode === "file") {
|
||||
const param = parseInt(data);
|
||||
const stat = await fs.stat(filename).catch(() => { });
|
||||
|
||||
if (isNaN(param)) {
|
||||
if (!stat)
|
||||
output = "File does not exist.";
|
||||
else
|
||||
output = "Current size: " + stat.size / (1024 * 1024) + " MB";
|
||||
}
|
||||
else {
|
||||
if (param == 0) {
|
||||
if (stat) {
|
||||
await fs.unlink(filename);
|
||||
output = "Deleted file.";
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!stat)
|
||||
await fs.writeFile(filename, "Initial");
|
||||
|
||||
await fs.truncate(filename, param * 1024 * 1024);
|
||||
output = "Updated file size to " + param + " MB";
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (mode === "files") {
|
||||
const param = parseInt(data);
|
||||
const autofiles = await (await fs.readdir(".")).filter(f => f.startsWith(autofilePrefix));
|
||||
|
||||
if (isNaN(param)) {
|
||||
output = autofiles.length + " autofiles found.";
|
||||
}
|
||||
else {
|
||||
if (param == 0) {
|
||||
for (file of autofiles) {
|
||||
await fs.unlink(file);
|
||||
}
|
||||
output = autofiles.length + " autofiles deleted.";
|
||||
}
|
||||
else {
|
||||
const content = "A".repeat(autofileSize);
|
||||
for (let i = (autofiles.length + 1); i <= (autofiles.length + param); i++) {
|
||||
await fs.writeFile(autofilePrefix + i, content);
|
||||
}
|
||||
output = param + " new autofiles created. Total: " + (autofiles.length + param);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (mode === "download") {
|
||||
const param = parseFloat(data);
|
||||
if (!isNaN(param)) {
|
||||
output = "A".repeat(param * 1024 * 1024);
|
||||
}
|
||||
}
|
||||
else if (mode === "roundtime") {
|
||||
const param = parseInt(data);
|
||||
if (!isNaN(param)) {
|
||||
if (param >= 100) {
|
||||
const config = await ctx.getConfig();
|
||||
config.roundtime = param;
|
||||
await ctx.updateConfig(config)
|
||||
output = "Updated Roundtime to " + config.roundtime;
|
||||
}
|
||||
}
|
||||
else {
|
||||
const config = await ctx.getConfig();
|
||||
output = "Roundtime: " + config.roundtime;
|
||||
}
|
||||
}
|
||||
else {
|
||||
output = "Received unrecognized input of length " + buf.length;
|
||||
}
|
||||
|
||||
if (output)
|
||||
await user.send(output);
|
||||
}
|
||||
|
||||
// The promise gets completed when all inputs for this user are processed.
|
||||
resolve();
|
||||
}));
|
||||
}
|
||||
|
||||
// Wait until all user promises are complete.
|
||||
await Promise.all(userHandlers);
|
||||
|
||||
// Modify random file bytes (if file exists)
|
||||
{
|
||||
const stat = await fs.stat(filename).catch(() => { });
|
||||
if (stat) {
|
||||
const rng = seedrandom(ctx.lcl_hash);
|
||||
const fh = await fs.open(filename, 'r+');
|
||||
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const pos = rng() * (stat.size - 50);
|
||||
const buf = ctx.lcl_hash.substr(i * 10, 10);
|
||||
await fh.write(buf, pos);
|
||||
}
|
||||
|
||||
await fh.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const hpc = new HotPocket.Contract();
|
||||
hpc.init(diagnosticContract);
|
||||
5
examples/nodejs_contract/package-lock.json
generated
5
examples/nodejs_contract/package-lock.json
generated
@@ -34,6 +34,11 @@
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
|
||||
"integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA=="
|
||||
},
|
||||
"seedrandom": {
|
||||
"version": "3.0.5",
|
||||
"resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-3.0.5.tgz",
|
||||
"integrity": "sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg=="
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
{
|
||||
"scripts": {
|
||||
"build-echo": "ncc build echo_contract.js -o dist/echo-contract",
|
||||
"build-file": "ncc build file_contract.js -o dist/file-contract"
|
||||
"build-file": "ncc build file_contract.js -o dist/file-contract",
|
||||
"build-diag": "ncc build diagnostic_contract.js -o dist/diagnostic-contract"
|
||||
},
|
||||
"dependencies": {
|
||||
"bson": "4.0.4"
|
||||
"bson": "4.0.4",
|
||||
"seedrandom": "3.0.5"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -256,6 +256,12 @@ namespace conf
|
||||
size_t max_file_count = 0; // Max no. of log files to keep.
|
||||
};
|
||||
|
||||
struct health_config
|
||||
{
|
||||
bool proposal_stats = false;
|
||||
bool connectivity_stats = false;
|
||||
};
|
||||
|
||||
// Holds all the config values.
|
||||
struct hp_config
|
||||
{
|
||||
@@ -265,6 +271,7 @@ namespace conf
|
||||
user_config user;
|
||||
hpfs_config hpfs;
|
||||
log_config log;
|
||||
health_config health; // For debugging only. Not included in the config file.
|
||||
};
|
||||
|
||||
// Global contract context struct exposed to the application.
|
||||
|
||||
@@ -92,6 +92,9 @@ namespace consensus
|
||||
break;
|
||||
}
|
||||
|
||||
if (ctx.stage == 0)
|
||||
status::emit_proposal_health();
|
||||
|
||||
if (consensus() == -1)
|
||||
{
|
||||
LOG_ERROR << "Consensus thread exited due to an error.";
|
||||
@@ -119,7 +122,7 @@ namespace consensus
|
||||
revise_candidate_proposals(ctx.vote_status == VOTES_SYNCED);
|
||||
|
||||
// Attempt to close the ledger after scanning last round stage 3 proposals.
|
||||
if (ctx.stage == 0)
|
||||
if (ctx.stage == 0 && ctx.vote_status == VOTES_SYNCED)
|
||||
attempt_ledger_close();
|
||||
|
||||
// Get current lcl, state, patch, primary shard and raw shard info.
|
||||
@@ -156,7 +159,7 @@ namespace consensus
|
||||
const size_t unl_count = unl::count();
|
||||
vote_counter votes;
|
||||
|
||||
// Check whether we are in sync with other nodes using proposals.
|
||||
// Check whether we are in sync with other nodes using the proposals we received.
|
||||
{
|
||||
int new_sync_status = check_sync_status(unl_count, votes, lcl_id);
|
||||
|
||||
@@ -174,7 +177,7 @@ namespace consensus
|
||||
}
|
||||
|
||||
// Update the node's status if we went from in-sync to not-in-sync. We will report back as being in-sync only when ledger is created.
|
||||
if (ctx.vote_status == VOTES_SYNCED && new_sync_status != VOTES_SYNCED)
|
||||
if (new_sync_status == VOTES_DESYNC)
|
||||
status::sync_status_changed(false);
|
||||
|
||||
// This marks entering into a new sync cycle.
|
||||
@@ -195,9 +198,10 @@ namespace consensus
|
||||
|
||||
if (ctx.vote_status == VOTES_UNRELIABLE)
|
||||
{
|
||||
ctx.stage = 0;
|
||||
ctx.unreliable_votes_attempts++;
|
||||
|
||||
// If we get too many consecative unreliable vote rounds, then we perform time config sniffing just in case the unreliable votes
|
||||
// If we get too many consecutive unreliable vote rounds, then we perform time config sniffing just in case the unreliable votes
|
||||
// are caused because our roundtime config information is different from other nodes.
|
||||
if (ctx.unreliable_votes_attempts >= MAX_UNRELIABLE_VOTES_ATTEMPTS)
|
||||
{
|
||||
@@ -208,21 +212,21 @@ namespace consensus
|
||||
else
|
||||
{
|
||||
ctx.unreliable_votes_attempts = 0;
|
||||
}
|
||||
|
||||
if (ctx.vote_status == VOTES_SYNCED)
|
||||
{
|
||||
// If we are in sync, vote and broadcast the winning votes to next stage.
|
||||
const p2p::proposal p = create_stage123_proposal(votes, unl_count, state_hash, patch_hash, last_primary_shard_id, last_raw_shard_id);
|
||||
broadcast_proposal(p);
|
||||
|
||||
// This marks the moment we finish a sync cycle. We are in stage 1 and we detect that our votes are in sync.
|
||||
if (ctx.stage == 1 && ctx.sync_ongoing)
|
||||
if (ctx.vote_status == VOTES_SYNCED)
|
||||
{
|
||||
// Clear any sync recovery pending state if we enter stage 1 while being in sync.
|
||||
ctx.sync_ongoing = false;
|
||||
status::sync_status_changed(true);
|
||||
LOG_DEBUG << "Sync recovery completed.";
|
||||
// If we are in sync, vote and broadcast the winning votes to next stage.
|
||||
const p2p::proposal p = create_stage123_proposal(votes, unl_count, state_hash, patch_hash, last_primary_shard_id, last_raw_shard_id);
|
||||
broadcast_proposal(p);
|
||||
|
||||
// This marks the moment we finish a sync cycle. We are in stage 1 and we just detected that our votes are in sync.
|
||||
if (ctx.stage == 1 && ctx.sync_ongoing)
|
||||
{
|
||||
// Clear any sync recovery pending state if we enter stage 1 while being in sync.
|
||||
ctx.sync_ongoing = false;
|
||||
status::sync_status_changed(true);
|
||||
LOG_DEBUG << "Sync recovery completed.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -447,7 +451,7 @@ namespace consensus
|
||||
/**
|
||||
* Moves proposals collected from the network into candidate proposals and
|
||||
* cleans up any outdated proposals from the candidate set.
|
||||
* @param in_sync Whether the node is currently on sync or not. We relax the pruning criteria if we are not in sync.
|
||||
* @param in_sync Whether the node is currently in sync or not. We relax the pruning criteria if we are not in sync.
|
||||
*/
|
||||
void revise_candidate_proposals(const bool in_sync)
|
||||
{
|
||||
@@ -459,6 +463,8 @@ namespace consensus
|
||||
collected_proposals.splice(collected_proposals.end(), p2p::ctx.collected_msgs.proposals);
|
||||
}
|
||||
|
||||
status::report_proposal_batch(collected_proposals);
|
||||
|
||||
// Prune incoming proposals if they are older than existing proposal from same node.
|
||||
{
|
||||
auto itr = collected_proposals.begin();
|
||||
|
||||
@@ -43,15 +43,13 @@ namespace hplog
|
||||
plog::util::localtime_s(&t, &record.getTime().time); // local time
|
||||
|
||||
plog::util::nostringstream ss;
|
||||
ss << t.tm_year + 1900 << std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_mon + 1 << std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_mday << PLOG_NSTR(" ");
|
||||
ss << std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_hour << PLOG_NSTR(":")
|
||||
<< std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_min << PLOG_NSTR(":")
|
||||
<< std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_sec << PLOG_NSTR(" ");
|
||||
// Uncomment for millseconds.
|
||||
// << std::setfill(PLOG_NSTR('0')) << std::setw(3) << record.getTime().millitm << PLOG_NSTR(" ");
|
||||
|
||||
ss << PLOG_NSTR("[") << severity_to_string(record.getSeverity()) << PLOG_NSTR("][hpc] ");
|
||||
ss << record.getMessage() << PLOG_NSTR("\n");
|
||||
ss << t.tm_year + 1900 << std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_mon + 1 << std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_mday
|
||||
<< PLOG_NSTR(" ") << std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_hour
|
||||
<< PLOG_NSTR(":") << std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_min
|
||||
<< PLOG_NSTR(":") << std::setfill(PLOG_NSTR('0')) << std::setw(2) << t.tm_sec
|
||||
<< PLOG_NSTR(".") << std::setfill(PLOG_NSTR('0')) << std::setw(3) << record.getTime().millitm // Uncomment for millseconds.
|
||||
<< PLOG_NSTR(" ") << PLOG_NSTR("[") << severity_to_string(record.getSeverity())
|
||||
<< PLOG_NSTR("][hpc] ") << record.getMessage() << PLOG_NSTR("\n");
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ namespace msg::usrmsg::bson
|
||||
* "contract_execution_enabled": true | false,
|
||||
* "read_requests_enabled": true | false,
|
||||
* "is_full_history_node": true | false,
|
||||
* "weakly_connected": true | false,
|
||||
* "current_unl": [ <ed prefixed pubkey>, ... ],
|
||||
* "peers": [ "ip:port", ... ]
|
||||
* }
|
||||
@@ -33,6 +34,7 @@ namespace msg::usrmsg::bson
|
||||
const util::sequence_hash lcl_id = status::get_lcl_id();
|
||||
const std::set<std::string> unl = status::get_unl();
|
||||
const bool in_sync = status::is_in_sync();
|
||||
const bool weakly_connected = status::get_weakly_connected();
|
||||
|
||||
jsoncons::bson::bson_bytes_encoder encoder(msg);
|
||||
encoder.begin_object();
|
||||
@@ -54,6 +56,8 @@ namespace msg::usrmsg::bson
|
||||
encoder.bool_value(conf::cfg.user.concurrent_read_requests != 0);
|
||||
encoder.key(msg::usrmsg::FLD_IS_FULL_HISTORY_NODE);
|
||||
encoder.bool_value(conf::cfg.node.history == conf::HISTORY::FULL);
|
||||
encoder.key(msg::usrmsg::FLD_WEAKLY_CONNECTED);
|
||||
encoder.bool_value(weakly_connected);
|
||||
|
||||
encoder.key(msg::usrmsg::FLD_CURRENT_UNL);
|
||||
encoder.begin_array();
|
||||
@@ -315,6 +319,70 @@ namespace msg::usrmsg::bson
|
||||
encoder.flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs health stat message.
|
||||
* @param msg Buffer to construct the generated bson message into.
|
||||
* Message format:
|
||||
* {
|
||||
* "type": "health_event",
|
||||
* "proposals": {
|
||||
* "comm_latency": {min:0, max:0, avg:0},
|
||||
* "read_latency": {min:0, max:0, avg:0}
|
||||
* "batch_size": 0
|
||||
* },
|
||||
* "peer_count": 0,
|
||||
* "weakly_connected": true | false
|
||||
* }
|
||||
* @param ev Current health information.
|
||||
*/
|
||||
void create_health_notification(std::vector<uint8_t> &msg, const status::health_event &ev)
|
||||
{
|
||||
jsoncons::bson::bson_bytes_encoder encoder(msg);
|
||||
encoder.begin_object();
|
||||
encoder.key(msg::usrmsg::FLD_TYPE);
|
||||
encoder.string_value(msg::usrmsg::MSGTYPE_HEALTH_EVENT);
|
||||
encoder.key(msg::usrmsg::FLD_EVENT);
|
||||
|
||||
if (ev.index() == 0)
|
||||
{
|
||||
const status::proposal_health &phealth = std::get<status::proposal_health>(ev);
|
||||
|
||||
encoder.string_value(msg::usrmsg::HEALTH_EVENT_PROPOSAL);
|
||||
encoder.key(msg::usrmsg::FLD_COMM_LATENCY);
|
||||
encoder.begin_object();
|
||||
encoder.key(msg::usrmsg::FLD_MIN);
|
||||
encoder.uint64_value(phealth.comm_latency_min);
|
||||
encoder.key(msg::usrmsg::FLD_MAX);
|
||||
encoder.uint64_value(phealth.comm_latency_max);
|
||||
encoder.key(msg::usrmsg::FLD_AVG);
|
||||
encoder.uint64_value(phealth.comm_latency_avg);
|
||||
encoder.end_object();
|
||||
encoder.key(msg::usrmsg::FLD_READ_LATENCY);
|
||||
encoder.begin_object();
|
||||
encoder.key(msg::usrmsg::FLD_MIN);
|
||||
encoder.uint64_value(phealth.read_latency_min);
|
||||
encoder.key(msg::usrmsg::FLD_MAX);
|
||||
encoder.uint64_value(phealth.read_latency_max);
|
||||
encoder.key(msg::usrmsg::FLD_AVG);
|
||||
encoder.uint64_value(phealth.read_latency_avg);
|
||||
encoder.end_object();
|
||||
encoder.key(msg::usrmsg::FLD_BATCH_SIZE);
|
||||
encoder.uint64_value(phealth.batch_size);
|
||||
}
|
||||
else if (ev.index() == 1)
|
||||
{
|
||||
const status::connectivity_health &conn = std::get<status::connectivity_health>(ev);
|
||||
encoder.string_value(msg::usrmsg::HEALTH_EVENT_CONNECTIVITY);
|
||||
encoder.key(msg::usrmsg::FLD_PEER_COUNT);
|
||||
encoder.uint64_value(conn.peer_count);
|
||||
encoder.key(msg::usrmsg::FLD_WEAKLY_CONNECTED);
|
||||
encoder.bool_value(conn.is_weakly_connected);
|
||||
}
|
||||
|
||||
encoder.end_object();
|
||||
encoder.flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a ledger query response.
|
||||
* @param msg Buffer to construct the generated bson message string into.
|
||||
@@ -533,6 +601,11 @@ namespace msg::usrmsg::bson
|
||||
{
|
||||
channel = usr::NOTIFICATION_CHANNEL::UNL_CHANGE;
|
||||
}
|
||||
else if (d[msg::usrmsg::FLD_CHANNEL] == msg::usrmsg::MSGTYPE_HEALTH_EVENT &&
|
||||
(conf::cfg.health.proposal_stats || conf::cfg.health.connectivity_stats))
|
||||
{
|
||||
channel = usr::NOTIFICATION_CHANNEL::HEALTH_STAT;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_DEBUG << "User subscription request invalid channel.";
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "../../util/merkle_hash_tree.hpp"
|
||||
#include "../../ledger/ledger_query.hpp"
|
||||
#include "../../usr/user_common.hpp"
|
||||
#include "../../status.hpp"
|
||||
|
||||
namespace msg::usrmsg::bson
|
||||
{
|
||||
@@ -28,6 +29,8 @@ namespace msg::usrmsg::bson
|
||||
|
||||
void create_sync_status_notification(std::vector<uint8_t> &msg, const bool in_sync);
|
||||
|
||||
void create_health_notification(std::vector<uint8_t> &msg, const status::health_event &ev);
|
||||
|
||||
void create_ledger_query_response(std::vector<uint8_t> &msg, std::string_view reply_for,
|
||||
const ledger::query::query_result &result);
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ namespace msg::fbuf::p2pmsg
|
||||
if (session && session->challenge_status == comm::CHALLENGE_STATUS::CHALLENGE_VERIFIED && message.size() <= MAX_SIZE_FOR_TIME_CHECK)
|
||||
{
|
||||
const uint64_t time_now = util::get_epoch_milliseconds();
|
||||
if (p2p_msg->created_on() < (time_now - (conf::cfg.contract.roundtime * 4)))
|
||||
if (p2p_msg->created_on() < (time_now - (conf::cfg.contract.roundtime * 3)))
|
||||
{
|
||||
LOG_DEBUG << "Peer message is too old. type:" << p2p_msg->content_type() << " from:" << (session ? session->display_name() : "");
|
||||
return p2p::peer_message_info{NULL, P2PMsgContent_NONE, 0};
|
||||
|
||||
@@ -143,6 +143,7 @@ namespace msg::usrmsg::json
|
||||
* "contract_execution_enabled": true | false,
|
||||
* "read_requests_enabled": true | false,
|
||||
* "is_full_history_node": true | false,
|
||||
* "weakly_connected": true | false,
|
||||
* "current_unl": [ "<ed prefixed pubkey hex>"", ... ],
|
||||
* "peers": [ "ip:port", ... ]
|
||||
* }
|
||||
@@ -152,6 +153,7 @@ namespace msg::usrmsg::json
|
||||
const util::sequence_hash lcl_id = status::get_lcl_id();
|
||||
const std::set<std::string> unl = status::get_unl();
|
||||
const bool in_sync = status::is_in_sync();
|
||||
const bool weakly_connected = status::get_weakly_connected();
|
||||
|
||||
msg.reserve(1024);
|
||||
msg += "{\"";
|
||||
@@ -191,6 +193,11 @@ namespace msg::usrmsg::json
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += conf::cfg.node.history == conf::HISTORY::FULL ? STR_TRUE : STR_FALSE;
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
msg += msg::usrmsg::FLD_WEAKLY_CONNECTED;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += weakly_connected ? STR_TRUE : STR_FALSE;
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
|
||||
msg += msg::usrmsg::FLD_CURRENT_UNL;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += OPEN_SQR_BRACKET;
|
||||
@@ -546,6 +553,97 @@ namespace msg::usrmsg::json
|
||||
msg += "}";
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs health stat message.
|
||||
* @param msg Buffer to construct the generated json message string into.
|
||||
* Message format:
|
||||
* {
|
||||
* "type": "health_event",
|
||||
* "event": "proposal" | "connectivity",
|
||||
*
|
||||
* // proposal
|
||||
* "comm_latency": {min:0, max:0, avg:0},
|
||||
* "read_latency": {min:0, max:0, avg:0}
|
||||
* "batch_size": 0
|
||||
*
|
||||
* // connectivity
|
||||
* "peer_count": 0,
|
||||
* "weakly_connected": true | false
|
||||
* }
|
||||
* @param ev Current health information.
|
||||
*/
|
||||
void create_health_notification(std::vector<uint8_t> &msg, const status::health_event &ev)
|
||||
{
|
||||
msg.reserve(128);
|
||||
msg += "{\"";
|
||||
msg += msg::usrmsg::FLD_TYPE;
|
||||
msg += SEP_COLON;
|
||||
msg += msg::usrmsg::MSGTYPE_HEALTH_EVENT;
|
||||
msg += SEP_COMMA;
|
||||
msg += msg::usrmsg::FLD_EVENT;
|
||||
msg += SEP_COLON;
|
||||
|
||||
if (ev.index() == 0)
|
||||
{
|
||||
const status::proposal_health &phealth = std::get<status::proposal_health>(ev);
|
||||
|
||||
msg += msg::usrmsg::HEALTH_EVENT_PROPOSAL;
|
||||
msg += SEP_COMMA;
|
||||
msg += msg::usrmsg::FLD_COMM_LATENCY;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += "{\"";
|
||||
msg += msg::usrmsg::FLD_MIN;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += std::to_string(phealth.comm_latency_min);
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
msg += msg::usrmsg::FLD_MAX;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += std::to_string(phealth.comm_latency_max);
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
msg += msg::usrmsg::FLD_AVG;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += std::to_string(phealth.comm_latency_avg);
|
||||
msg += "}";
|
||||
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
msg += msg::usrmsg::FLD_READ_LATENCY;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += "{\"";
|
||||
msg += msg::usrmsg::FLD_MIN;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += std::to_string(phealth.read_latency_min);
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
msg += msg::usrmsg::FLD_MAX;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += std::to_string(phealth.read_latency_max);
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
msg += msg::usrmsg::FLD_AVG;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += std::to_string(phealth.read_latency_avg);
|
||||
msg += "}";
|
||||
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
msg += msg::usrmsg::FLD_BATCH_SIZE;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += std::to_string(phealth.batch_size);
|
||||
}
|
||||
else if (ev.index() == 1)
|
||||
{
|
||||
const status::connectivity_health &conn = std::get<status::connectivity_health>(ev);
|
||||
msg += msg::usrmsg::HEALTH_EVENT_CONNECTIVITY;
|
||||
msg += SEP_COMMA;
|
||||
msg += msg::usrmsg::FLD_PEER_COUNT;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += std::to_string(conn.peer_count);
|
||||
msg += SEP_COMMA_NOQUOTE;
|
||||
msg += msg::usrmsg::FLD_WEAKLY_CONNECTED;
|
||||
msg += SEP_COLON_NOQUOTE;
|
||||
msg += conn.is_weakly_connected ? STR_TRUE : STR_FALSE;
|
||||
}
|
||||
|
||||
msg += "}";
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a ledger query response.
|
||||
* @param msg Buffer to construct the generated json message string into.
|
||||
@@ -892,6 +990,11 @@ namespace msg::usrmsg::json
|
||||
{
|
||||
channel = usr::NOTIFICATION_CHANNEL::UNL_CHANGE;
|
||||
}
|
||||
else if (d[msg::usrmsg::FLD_CHANNEL] == msg::usrmsg::MSGTYPE_HEALTH_EVENT &&
|
||||
(conf::cfg.health.proposal_stats || conf::cfg.health.connectivity_stats))
|
||||
{
|
||||
channel = usr::NOTIFICATION_CHANNEL::HEALTH_STAT;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_DEBUG << "User subscription request invalid channel.";
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "../../util/merkle_hash_tree.hpp"
|
||||
#include "../../ledger/ledger_query.hpp"
|
||||
#include "../../usr/user_common.hpp"
|
||||
#include "../../status.hpp"
|
||||
|
||||
namespace msg::usrmsg::json
|
||||
{
|
||||
@@ -32,6 +33,8 @@ namespace msg::usrmsg::json
|
||||
|
||||
void create_sync_status_notification(std::vector<uint8_t> &msg, const bool in_sync);
|
||||
|
||||
void create_health_notification(std::vector<uint8_t> &msg, const status::health_event &ev);
|
||||
|
||||
void create_ledger_query_response(std::vector<uint8_t> &msg, std::string_view reply_for,
|
||||
const ledger::query::query_result &result);
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace msg::usrmsg
|
||||
constexpr size_t CHALLENGE_LEN = 16;
|
||||
|
||||
// Max no. of known peers to return in get status.
|
||||
constexpr const size_t MAX_KNOWN_PEERS_INFO = 16;
|
||||
constexpr const size_t MAX_KNOWN_PEERS_INFO = 100;
|
||||
|
||||
// Message field names
|
||||
constexpr const char *FLD_HP_VERSION = "hp_version";
|
||||
@@ -65,6 +65,14 @@ namespace msg::usrmsg
|
||||
constexpr const char *FLD_LEDGER = "ledger";
|
||||
constexpr const char *FLD_CHANNEL = "channel";
|
||||
constexpr const char *FLD_ENABLED = "enabled";
|
||||
constexpr const char *FLD_COMM_LATENCY = "comm_latency";
|
||||
constexpr const char *FLD_READ_LATENCY = "read_latency";
|
||||
constexpr const char *FLD_BATCH_SIZE = "batch_size";
|
||||
constexpr const char *FLD_MIN = "min";
|
||||
constexpr const char *FLD_MAX = "max";
|
||||
constexpr const char *FLD_AVG = "avg";
|
||||
constexpr const char *FLD_PEER_COUNT = "peer_count";
|
||||
constexpr const char *FLD_WEAKLY_CONNECTED = "weakly_connected";
|
||||
|
||||
// Message types
|
||||
constexpr const char *MSGTYPE_USER_CHALLENGE = "user_challenge";
|
||||
@@ -81,6 +89,7 @@ namespace msg::usrmsg
|
||||
constexpr const char *MSGTYPE_LCL_RESPONSE = "lcl_response";
|
||||
constexpr const char *MSGTYPE_UNL_CHANGE = "unl_change";
|
||||
constexpr const char *MSGTYPE_LEDGER_EVENT = "ledger_event";
|
||||
constexpr const char *MSGTYPE_HEALTH_EVENT = "health_event";
|
||||
constexpr const char *MSGTYPE_LEDGER_QUERY = "ledger_query";
|
||||
constexpr const char *MSGTYPE_LEDGER_QUERY_RESULT = "ledger_query_result";
|
||||
constexpr const char *MSGTYPE_SUBSCRIPTION = "subscription";
|
||||
@@ -103,7 +112,8 @@ namespace msg::usrmsg
|
||||
constexpr const char *STR_FALSE = "false";
|
||||
constexpr const char *LEDGER_EVENT_LEDGER_CREATED = "ledger_created";
|
||||
constexpr const char *LEDGER_EVENT_SYNC_STATUS = "sync_status";
|
||||
|
||||
constexpr const char *HEALTH_EVENT_PROPOSAL = "proposal";
|
||||
constexpr const char *HEALTH_EVENT_CONNECTIVITY = "connectivity";
|
||||
|
||||
} // namespace msg::usrmsg
|
||||
|
||||
|
||||
@@ -80,6 +80,14 @@ namespace msg::usrmsg
|
||||
busrmsg::create_sync_status_notification(msg, in_sync);
|
||||
}
|
||||
|
||||
void usrmsg_parser::create_health_notification(std::vector<uint8_t> &msg, const status::health_event &ev) const
|
||||
{
|
||||
if (protocol == util::PROTOCOL::JSON)
|
||||
jusrmsg::create_health_notification(msg, ev);
|
||||
else
|
||||
busrmsg::create_health_notification(msg, ev);
|
||||
}
|
||||
|
||||
void usrmsg_parser::create_ledger_query_response(std::vector<uint8_t> &msg, std::string_view reply_for,
|
||||
const ledger::query::query_result &result) const
|
||||
{
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "../util/merkle_hash_tree.hpp"
|
||||
#include "../ledger/ledger_query.hpp"
|
||||
#include "../usr/user_common.hpp"
|
||||
#include "../status.hpp"
|
||||
|
||||
namespace msg::usrmsg
|
||||
{
|
||||
@@ -37,6 +38,8 @@ namespace msg::usrmsg
|
||||
|
||||
void create_sync_status_notification(std::vector<uint8_t> &msg, const bool in_sync) const;
|
||||
|
||||
void create_health_notification(std::vector<uint8_t> &msg, const status::health_event &ev) const;
|
||||
|
||||
void create_ledger_query_response(std::vector<uint8_t> &msg, std::string_view reply_for,
|
||||
const ledger::query::query_result &result) const;
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
namespace p2pmsg = msg::fbuf::p2pmsg;
|
||||
|
||||
// Maximum no. of peers that will be persisted back to config upon exit.
|
||||
constexpr size_t MAX_PERSISTED_KNOWN_PEERS = 50;
|
||||
constexpr size_t MAX_PERSISTED_KNOWN_PEERS = 100;
|
||||
|
||||
namespace p2p
|
||||
{
|
||||
@@ -409,8 +409,16 @@ namespace p2p
|
||||
*/
|
||||
void send_known_peer_list(peer_comm_session *session)
|
||||
{
|
||||
const std::vector<peer_properties> &peers = ctx.server->req_known_remotes;
|
||||
|
||||
// Add self to known peer announcement (indicated as blank host address).
|
||||
// peers.push_back(peer_properties{
|
||||
// conf::peer_ip_port{"", conf::cfg.mesh.port},
|
||||
// status::get_available_mesh_capacity(),
|
||||
// util::get_epoch_milliseconds()});
|
||||
|
||||
flatbuffers::FlatBufferBuilder fbuf;
|
||||
p2pmsg::create_msg_from_peer_list_response(fbuf, ctx.server->req_known_remotes, session->known_ipport);
|
||||
p2pmsg::create_msg_from_peer_list_response(fbuf, peers, session->known_ipport);
|
||||
session->send(msg::fbuf::builder_to_string_view(fbuf));
|
||||
}
|
||||
|
||||
@@ -432,7 +440,7 @@ namespace p2p
|
||||
itr->available_capacity = available_capacity;
|
||||
itr->timestamp = timestamp;
|
||||
|
||||
// Sorting the known remote list according to the weight value after updating the peer properties.
|
||||
// Sorting the known remote list according to the weight value after updating the peer properties.
|
||||
sort_known_remotes();
|
||||
}
|
||||
}
|
||||
@@ -452,13 +460,21 @@ namespace p2p
|
||||
/**
|
||||
* Merging the response peer list with the own known peer list.
|
||||
* @param peers Incoming peer list.
|
||||
* @param from The session that sent us the peer list.
|
||||
*/
|
||||
void merge_peer_list(const std::vector<peer_properties> &peers)
|
||||
void merge_peer_list(const std::vector<peer_properties> &peers, const p2p::peer_comm_session &from)
|
||||
{
|
||||
std::scoped_lock<std::mutex> lock(ctx.server->req_known_remotes_mutex);
|
||||
|
||||
for (const peer_properties &peer : peers)
|
||||
{
|
||||
// If the peer address is indicated as empty, that is the entry for the peer who sent us this.
|
||||
// We then fill that up with the host address we see for that peer.
|
||||
// if (peer.ip_port.host_address.empty())
|
||||
// {
|
||||
// peer.ip_port.host_address = from.host_address;
|
||||
// }
|
||||
|
||||
// If the peer is self, we won't add to the known peer list.
|
||||
if (self::ip_port.has_value() && self::ip_port == peer.ip_port)
|
||||
{
|
||||
@@ -519,7 +535,7 @@ namespace p2p
|
||||
* Calculate and retunrns the available capacity.
|
||||
* @returns -1 if available capacity is unlimited otherwise available value.
|
||||
*/
|
||||
int16_t get_available_capacity()
|
||||
int16_t calculate_available_capacity()
|
||||
{
|
||||
// If both max_connections and max_known_connections are configured calculate the capacity.
|
||||
if (conf::cfg.mesh.max_connections != 0 && conf::cfg.mesh.max_known_connections != 0)
|
||||
|
||||
@@ -237,11 +237,11 @@ namespace p2p
|
||||
|
||||
void update_known_peer_available_capacity(const conf::peer_ip_port &ip_port, const int16_t available_capacity, const uint64_t ×tamp);
|
||||
|
||||
void merge_peer_list(const std::vector<peer_properties> &peers);
|
||||
void merge_peer_list(const std::vector<peer_properties> &peers, const p2p::peer_comm_session &from);
|
||||
|
||||
void sort_known_remotes();
|
||||
|
||||
int16_t get_available_capacity();
|
||||
int16_t calculate_available_capacity();
|
||||
|
||||
void update_unl_connections();
|
||||
|
||||
|
||||
@@ -11,8 +11,6 @@
|
||||
namespace p2p
|
||||
{
|
||||
constexpr float WEAKLY_CONNECTED_THRESHOLD = 0.7;
|
||||
// Globally exposed weakly connected status variable.
|
||||
bool is_weakly_connected = false;
|
||||
|
||||
peer_comm_server::peer_comm_server(const uint16_t port, const uint64_t (&metric_thresholds)[5], const uint64_t max_msg_size,
|
||||
const uint64_t max_in_connections, const uint64_t max_in_connections_per_host,
|
||||
@@ -45,24 +43,31 @@ namespace p2p
|
||||
|
||||
uint16_t peer_managing_counter = 0;
|
||||
uint16_t known_connections_counter = 0;
|
||||
uint16_t available_capacity_counter = 0;
|
||||
|
||||
while (!is_shutting_down)
|
||||
{
|
||||
peer_managing_counter++;
|
||||
known_connections_counter++;
|
||||
available_capacity_counter++;
|
||||
|
||||
if (known_connections_counter % 20 == 0)
|
||||
if (known_connections_counter % 40 == 0)
|
||||
{
|
||||
maintain_known_connections();
|
||||
known_connections_counter = 0;
|
||||
}
|
||||
|
||||
// Send available peer capacity if peer max connections is configured.
|
||||
if (conf::cfg.mesh.max_connections != 0)
|
||||
p2p::send_available_capacity_announcement(p2p::get_available_capacity());
|
||||
if (available_capacity_counter % 300 == 0)
|
||||
{
|
||||
status::set_available_mesh_capacity(p2p::calculate_available_capacity());
|
||||
|
||||
// Send available peer capacity if peer max connections is configured.
|
||||
if (conf::cfg.mesh.max_connections != 0)
|
||||
p2p::send_available_capacity_announcement(status::get_available_mesh_capacity());
|
||||
}
|
||||
|
||||
// Start peer list request loop if dynamic peer discovery is enabled.
|
||||
if (conf::cfg.mesh.peer_discovery.enabled && known_remote_count > 0)
|
||||
if (conf::cfg.mesh.peer_discovery.enabled)
|
||||
{
|
||||
// If max known peer connection cap is reached then periodically request peer list from random known peer.
|
||||
// Otherwise frequently request peer list from a random known peer.
|
||||
@@ -193,23 +198,32 @@ namespace p2p
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Check whether the node is weakly connected or strongly connected in every 60 seconds.
|
||||
* Check whether the node is weakly connected or strongly connected.
|
||||
*/
|
||||
void peer_comm_server::detect_if_weakly_connected()
|
||||
{
|
||||
if (connected_status_check_counter == 600)
|
||||
// If the node is already weakly connected, check every 2 seconds whether we are now strongly connected.
|
||||
// Otherwise check every 60 seconds. This makes it harder to become weakly connected and easier to get out of it.
|
||||
// This can help with unnessary flooding of forwarded messages across the network.
|
||||
bool weakly_connected = status::get_weakly_connected();
|
||||
if (connected_status_check_counter == (weakly_connected ? 20 : 600))
|
||||
{
|
||||
// Get the count of peers which are unl nodes.
|
||||
// One is added to session list size only if we are a unl node, to reflect the self connection.
|
||||
// One is added to peer count only if we are a unl node, to reflect the self connection.
|
||||
const int connected_peer_count = std::count_if(sessions.begin(), sessions.end(), [](const p2p::peer_comm_session &session)
|
||||
{ return session.is_unl; }) +
|
||||
(conf::cfg.node.is_unl ? 1 : 0);
|
||||
const bool current_state = connected_peer_count < (unl::count() * WEAKLY_CONNECTED_THRESHOLD);
|
||||
if (is_weakly_connected != current_state)
|
||||
if (weakly_connected != current_state)
|
||||
{
|
||||
is_weakly_connected = !is_weakly_connected;
|
||||
send_peer_requirement_announcement(is_weakly_connected);
|
||||
LOG_DEBUG << "Sent weakly connected announcement.";
|
||||
weakly_connected = !weakly_connected;
|
||||
send_peer_requirement_announcement(weakly_connected);
|
||||
status::set_weakly_connected(weakly_connected);
|
||||
|
||||
if (weakly_connected)
|
||||
LOG_WARNING << "Became weakly connected.";
|
||||
else
|
||||
LOG_INFO << "No longer weakly connected.";
|
||||
}
|
||||
connected_status_check_counter = 0;
|
||||
}
|
||||
|
||||
@@ -6,9 +6,6 @@
|
||||
|
||||
namespace p2p
|
||||
{
|
||||
// Globally exposed weakly connected status variable.
|
||||
extern bool is_weakly_connected;
|
||||
|
||||
struct peer_properties;
|
||||
|
||||
class peer_comm_server : public comm::comm_server<peer_comm_session>
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "p2p.hpp"
|
||||
#include "../unl.hpp"
|
||||
#include "../sc/hpfs_log_sync.hpp"
|
||||
#include "../status.hpp"
|
||||
|
||||
namespace p2pmsg = msg::fbuf::p2pmsg;
|
||||
|
||||
@@ -32,7 +33,7 @@ namespace p2p
|
||||
int handle_peer_connect(p2p::peer_comm_session &session)
|
||||
{
|
||||
// Skip new inbound connection if max inbound connection cap is reached.
|
||||
if (session.is_inbound && get_available_capacity() == 0)
|
||||
if (session.is_inbound && calculate_available_capacity() == 0)
|
||||
{
|
||||
LOG_DEBUG << "Max peer connection cap reached. Rejecting new peer connection [" << session.display_name() << "]";
|
||||
return -1;
|
||||
@@ -148,7 +149,7 @@ namespace p2p
|
||||
|
||||
if (mi.type == p2pmsg::P2PMsgContent_PeerListResponseMsg)
|
||||
{
|
||||
p2p::merge_peer_list(p2pmsg::create_peer_list_response_from_msg(mi));
|
||||
p2p::merge_peer_list(p2pmsg::create_peer_list_response_from_msg(mi), session);
|
||||
}
|
||||
else if (mi.type == p2pmsg::P2PMsgContent_PeerListRequestMsg)
|
||||
{
|
||||
@@ -334,9 +335,7 @@ namespace p2p
|
||||
void handle_peer_on_verified(p2p::peer_comm_session &session)
|
||||
{
|
||||
// Sending newly verified node the requirement of consensus msg fowarding if this node is weakly connected.
|
||||
if (p2p::is_weakly_connected)
|
||||
{
|
||||
p2p::send_peer_requirement_announcement(is_weakly_connected, &session);
|
||||
}
|
||||
if (status::get_weakly_connected())
|
||||
p2p::send_peer_requirement_announcement(true, &session);
|
||||
}
|
||||
} // namespace p2p
|
||||
112
src/status.cpp
112
src/status.cpp
@@ -2,6 +2,7 @@
|
||||
#include "util/sequence_hash.hpp"
|
||||
#include "ledger/ledger_common.hpp"
|
||||
#include "conf.hpp"
|
||||
#include "p2p/p2p.hpp"
|
||||
|
||||
namespace status
|
||||
{
|
||||
@@ -20,6 +21,11 @@ namespace status
|
||||
|
||||
std::shared_mutex peers_mutex;
|
||||
std::set<conf::peer_ip_port> peers; // Known ip:port pairs for connection verified peers.
|
||||
std::atomic<size_t> peer_count = 0;
|
||||
std::atomic<bool> weakly_connected = false;
|
||||
std::atomic<int16_t> available_mesh_capacity = -1;
|
||||
|
||||
proposal_health phealth = {};
|
||||
|
||||
//----- Ledger status
|
||||
|
||||
@@ -33,7 +39,7 @@ namespace status
|
||||
void ledger_created(const util::sequence_hash &ledger_id, const ledger::ledger_record &ledger)
|
||||
{
|
||||
// If currently not-in-sync, report it as in-sync when a ledger is created.
|
||||
if (in_sync != 1)
|
||||
if (in_sync.load() != 1)
|
||||
sync_status_changed(true);
|
||||
|
||||
std::unique_lock lock(ledger_mutex);
|
||||
@@ -44,8 +50,12 @@ namespace status
|
||||
|
||||
void sync_status_changed(const bool new_in_sync)
|
||||
{
|
||||
in_sync = new_in_sync ? 1 : 0;
|
||||
event_queue.try_enqueue(sync_status_change_event{new_in_sync});
|
||||
const int new_value = new_in_sync ? 1 : 0;
|
||||
if (new_value != in_sync.load())
|
||||
{
|
||||
in_sync = new_value;
|
||||
event_queue.try_enqueue(sync_status_change_event{new_in_sync});
|
||||
}
|
||||
}
|
||||
|
||||
const util::sequence_hash get_lcl_id()
|
||||
@@ -56,7 +66,7 @@ namespace status
|
||||
|
||||
const bool is_in_sync()
|
||||
{
|
||||
return in_sync == 1;
|
||||
return in_sync.load() == 1;
|
||||
}
|
||||
|
||||
const ledger::ledger_record get_last_ledger()
|
||||
@@ -93,6 +103,14 @@ namespace status
|
||||
{
|
||||
std::unique_lock lock(peers_mutex);
|
||||
peers = std::move(updated_peers);
|
||||
|
||||
if (peers.size() != peer_count)
|
||||
{
|
||||
peer_count = peers.size();
|
||||
|
||||
if (conf::cfg.health.connectivity_stats)
|
||||
event_queue.try_enqueue(connectivity_health{peer_count.load(), weakly_connected.load()});
|
||||
}
|
||||
}
|
||||
|
||||
const std::set<conf::peer_ip_port> get_peers()
|
||||
@@ -101,4 +119,90 @@ namespace status
|
||||
return peers;
|
||||
}
|
||||
|
||||
const size_t get_peers_count()
|
||||
{
|
||||
return peer_count.load();
|
||||
}
|
||||
|
||||
void set_weakly_connected(const bool is_weakly_connected)
|
||||
{
|
||||
if (weakly_connected.load() != is_weakly_connected)
|
||||
{
|
||||
weakly_connected = is_weakly_connected;
|
||||
|
||||
if (conf::cfg.health.connectivity_stats)
|
||||
event_queue.try_enqueue(connectivity_health{peer_count.load(), weakly_connected.load()});
|
||||
}
|
||||
}
|
||||
|
||||
const bool get_weakly_connected()
|
||||
{
|
||||
return weakly_connected.load();
|
||||
}
|
||||
|
||||
void set_available_mesh_capacity(const int16_t new_capacity)
|
||||
{
|
||||
available_mesh_capacity = new_capacity;
|
||||
}
|
||||
|
||||
const int16_t get_available_mesh_capacity()
|
||||
{
|
||||
return available_mesh_capacity.load();
|
||||
}
|
||||
|
||||
//----- Node health
|
||||
|
||||
void report_proposal_batch(const std::list<p2p::proposal> &proposals)
|
||||
{
|
||||
if (!conf::cfg.health.proposal_stats)
|
||||
return;
|
||||
|
||||
phealth.comm_latency_min = UINT64_MAX;
|
||||
phealth.comm_latency_max = 0;
|
||||
phealth.comm_latency_avg = 0;
|
||||
phealth.read_latency_min = UINT64_MAX;
|
||||
phealth.read_latency_max = 0;
|
||||
phealth.read_latency_avg = 0;
|
||||
phealth.batch_size = proposals.size();
|
||||
|
||||
if (phealth.batch_size == 0)
|
||||
return;
|
||||
|
||||
const uint64_t now = util::get_epoch_milliseconds();
|
||||
uint64_t total_comm_latency = 0;
|
||||
uint64_t total_read_latency = 0;
|
||||
|
||||
for (const p2p::proposal &p : proposals)
|
||||
{
|
||||
const uint64_t comm_latency = (p.sent_timestamp < p.recv_timestamp) ? (p.recv_timestamp - p.sent_timestamp) : 0;
|
||||
const uint64_t read_latency = now - p.recv_timestamp;
|
||||
|
||||
total_comm_latency += comm_latency;
|
||||
total_read_latency += read_latency;
|
||||
|
||||
if (comm_latency < phealth.comm_latency_min)
|
||||
phealth.comm_latency_min = comm_latency;
|
||||
|
||||
if (comm_latency > phealth.comm_latency_max)
|
||||
phealth.comm_latency_max = comm_latency;
|
||||
|
||||
if (read_latency < phealth.read_latency_min)
|
||||
phealth.read_latency_min = read_latency;
|
||||
|
||||
if (read_latency > phealth.read_latency_max)
|
||||
phealth.read_latency_max = read_latency;
|
||||
}
|
||||
|
||||
phealth.comm_latency_avg = total_comm_latency / phealth.batch_size;
|
||||
phealth.read_latency_avg = total_read_latency / phealth.batch_size;
|
||||
}
|
||||
|
||||
void emit_proposal_health()
|
||||
{
|
||||
if (!conf::cfg.health.proposal_stats)
|
||||
return;
|
||||
|
||||
event_queue.try_enqueue(phealth);
|
||||
}
|
||||
|
||||
} // namespace status
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "util/sequence_hash.hpp"
|
||||
#include "ledger/ledger_common.hpp"
|
||||
#include "conf.hpp"
|
||||
#include "p2p/p2p.hpp"
|
||||
|
||||
namespace status
|
||||
{
|
||||
@@ -23,8 +24,27 @@ namespace status
|
||||
bool in_sync = false;
|
||||
};
|
||||
|
||||
struct proposal_health
|
||||
{
|
||||
uint64_t comm_latency_min = 0;
|
||||
uint64_t comm_latency_max = 0;
|
||||
uint64_t comm_latency_avg = 0;
|
||||
uint64_t read_latency_min = 0;
|
||||
uint64_t read_latency_max = 0;
|
||||
uint64_t read_latency_avg = 0;
|
||||
uint64_t batch_size = 0;
|
||||
};
|
||||
|
||||
struct connectivity_health
|
||||
{
|
||||
size_t peer_count = 0;
|
||||
bool is_weakly_connected = false;
|
||||
};
|
||||
|
||||
typedef std::variant<proposal_health, connectivity_health> health_event;
|
||||
|
||||
// Represents any kind of change that has happened in the node.
|
||||
typedef std::variant<unl_change_event, ledger_created_event, sync_status_change_event> change_event;
|
||||
typedef std::variant<unl_change_event, ledger_created_event, sync_status_change_event, health_event> change_event;
|
||||
|
||||
extern moodycamel::ConcurrentQueue<change_event> event_queue;
|
||||
|
||||
@@ -41,6 +61,14 @@ namespace status
|
||||
|
||||
void set_peers(const std::set<conf::peer_ip_port> &updated_peers);
|
||||
const std::set<conf::peer_ip_port> get_peers();
|
||||
const size_t get_peers_count();
|
||||
void set_weakly_connected(const bool is_weakly_connected);
|
||||
const bool get_weakly_connected();
|
||||
void set_available_mesh_capacity(const int16_t new_capacity);
|
||||
const int16_t get_available_mesh_capacity();
|
||||
|
||||
void report_proposal_batch(const std::list<p2p::proposal> &proposals);
|
||||
void emit_proposal_health();
|
||||
|
||||
} // namespace status
|
||||
|
||||
|
||||
@@ -7,7 +7,8 @@ namespace usr
|
||||
enum NOTIFICATION_CHANNEL
|
||||
{
|
||||
UNL_CHANGE = 0,
|
||||
LEDGER_EVENT = 1
|
||||
LEDGER_EVENT = 1,
|
||||
HEALTH_STAT = 2
|
||||
};
|
||||
|
||||
} // namespace usr
|
||||
|
||||
@@ -609,6 +609,24 @@ namespace usr
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ev.index() == 3) // Health events. Broadcast for subscribed users.
|
||||
{
|
||||
std::scoped_lock<std::mutex> lock(ctx.users_mutex);
|
||||
for (auto &[sid, user] : ctx.users)
|
||||
{
|
||||
if (user.subscriptions[NOTIFICATION_CHANNEL::HEALTH_STAT])
|
||||
{
|
||||
std::vector<uint8_t> &msg = protocol_msgs[user.protocol];
|
||||
if (msg.empty()) // Construct the message with relevant protocol if not done so already.
|
||||
{
|
||||
msg::usrmsg::usrmsg_parser parser(user.protocol);
|
||||
const status::health_event &health_ev = std::get<status::health_event>(ev);
|
||||
parser.create_health_notification(msg, health_ev);
|
||||
}
|
||||
user.session.send(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ namespace usr
|
||||
size_t collected_input_size = 0;
|
||||
|
||||
// User's notification subscription toggles.
|
||||
bool subscriptions[2];
|
||||
bool subscriptions[3];
|
||||
|
||||
// Holds the websocket session of this user.
|
||||
// We don't need to own the session object since the lifetime of user and session are coupled.
|
||||
@@ -55,6 +55,7 @@ namespace usr
|
||||
// Default subscriptions.
|
||||
subscriptions[NOTIFICATION_CHANNEL::UNL_CHANGE] = false;
|
||||
subscriptions[NOTIFICATION_CHANNEL::LEDGER_EVENT] = false;
|
||||
subscriptions[NOTIFICATION_CHANNEL::HEALTH_STAT] = false;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
namespace version
|
||||
{
|
||||
// Hot Pocket version. Written to new configs and p2p/user messages.
|
||||
constexpr const char *HP_VERSION = "0.5.0";
|
||||
constexpr const char *HP_VERSION = "0.5.1";
|
||||
|
||||
// Minimum compatible config version (this will be used to validate configs).
|
||||
constexpr const char *MIN_CONFIG_VERSION = "0.5.0";
|
||||
|
||||
@@ -17,6 +17,7 @@ ncount=$1
|
||||
loglevel=$2
|
||||
roundtime=$3
|
||||
hpcore=$(realpath ../..)
|
||||
iprange="172.1.1"
|
||||
|
||||
# Contract can be set with 'export CONTRACT=<name>'. Defaults to nodejs echo contract.
|
||||
if [ "$CONTRACT" = "cecho" ]; then # C echo contract
|
||||
@@ -37,6 +38,16 @@ elif [ "$CONTRACT" = "nodefile" ]; then # nodejs file contract (uses BSON protoc
|
||||
binary="/usr/bin/node"
|
||||
binargs="index.js"
|
||||
|
||||
elif [ "$CONTRACT" = "diag" ]; then # Diagnostic contract
|
||||
echo "Using diagnostic contract."
|
||||
pushd $hpcore/examples/nodejs_contract/ > /dev/null 2>&1
|
||||
npm install
|
||||
npm run build-diag
|
||||
popd > /dev/null 2>&1
|
||||
copyfiles="$hpcore/examples/nodejs_contract/dist/diagnostic-contract/index.js"
|
||||
binary="/usr/bin/node"
|
||||
binargs="index.js"
|
||||
|
||||
else # nodejs echo contract (default)
|
||||
echo "Using nodejs echo contract."
|
||||
pushd $hpcore/examples/nodejs_contract/ > /dev/null 2>&1
|
||||
@@ -85,7 +96,7 @@ do
|
||||
|
||||
# During hosting we use docker virtual dns instead of IP address.
|
||||
# So each node is reachable via 'node<id>' name.
|
||||
peers[i]="node${n}:${peerport}"
|
||||
peers[i]="$iprange.${n}:${peerport}"
|
||||
|
||||
# Update config.
|
||||
node_json=$(node -p "JSON.stringify({...require('./tmp.json').node, \
|
||||
@@ -214,7 +225,8 @@ popd > /dev/null 2>&1
|
||||
|
||||
# Create docker virtual network named 'hpnet'
|
||||
# All nodes will communicate with each other via this network.
|
||||
docker network create --driver bridge hpnet > /dev/null 2>&1
|
||||
docker network rm hpnet > /dev/null 2>&1
|
||||
docker network create --driver=bridge --subnet=$iprange.0/24 --gateway=$iprange.254 hpnet > /dev/null 2>&1
|
||||
|
||||
echo "Cluster generated at ${clusterloc}"
|
||||
echo "Use \"./cluster-start.sh <nodeid>\" to run each node."
|
||||
|
||||
@@ -21,7 +21,7 @@ let peerport=22860+$n
|
||||
# Mount the node<id> contract directory into hpcore docker container and run.
|
||||
# We specify --network=hpnet so all nodes will communicate via 'hpnet' docker virtual network.
|
||||
# We specify --name for each node so it will be the virtual dns name for each node.
|
||||
docker run --rm -t -i --network=hpnet --name=node${n} \
|
||||
docker run --rm -t -i --network=hpnet --ip=172.1.1.${n} --name=node${n} \
|
||||
-p ${pubport}:${pubport} \
|
||||
-p ${peerport}:${peerport} \
|
||||
--device /dev/fuse --cap-add SYS_ADMIN --security-opt apparmor:unconfined \
|
||||
|
||||
3
test/vm-cluster/.gitignore
vendored
3
test/vm-cluster/.gitignore
vendored
@@ -1,4 +1,5 @@
|
||||
cfg
|
||||
config.json
|
||||
hpfiles
|
||||
node_modules
|
||||
node_modules
|
||||
*.log
|
||||
@@ -1,6 +1,6 @@
|
||||
const HotPocket = require('../../examples/js_client/lib/hp-client-lib');
|
||||
const azure = require('azure-storage');
|
||||
const fs = require('fs');
|
||||
const fs = require('fs').promises;
|
||||
const https = require('https');
|
||||
const fetch = require('node-fetch');
|
||||
|
||||
@@ -10,6 +10,8 @@ const metricsTrackInterval = process.env.METRICSTRACK || 10000;
|
||||
const backoffDelayMax = process.env.BACKOFFMAX || 60000;
|
||||
const eventsBatchSize = process.env.EVENTBATCH || 20;
|
||||
const stateBatchSize = process.env.STATEBATCH || 20;
|
||||
const synclog = process.env.SYNCLOG || "off";
|
||||
const healthlog = process.env.HEALTHLOG || "off";
|
||||
|
||||
let keys = null;
|
||||
let vultrApiKey = null;
|
||||
@@ -27,7 +29,7 @@ async function main() {
|
||||
console.log('My public key is: ' + pkhex);
|
||||
|
||||
// Load cluster config.
|
||||
const config = JSON.parse(fs.readFileSync("config.json"));
|
||||
const config = JSON.parse(await fs.readFile("config.json"));
|
||||
vultrApiKey = config.vultr.api_key;
|
||||
|
||||
// Create Azure table service.
|
||||
@@ -200,13 +202,37 @@ async function establishClientConnection(node) {
|
||||
reportEvent(node, ev);
|
||||
});
|
||||
|
||||
// This will get fired when any diagnostic health event occurs.
|
||||
if (healthlog === "on") {
|
||||
hpc.on(HotPocket.events.healthEvent, async (ev) => {
|
||||
|
||||
const now = new Date().toUTCString();
|
||||
if (ev.event === "proposal") {
|
||||
delete ev.event;
|
||||
const str = JSON.stringify(ev);
|
||||
await fs.appendFile("prop_health.log", `${now}, Node${node.idx}, ${node.uri}, ${node.status}, ${str}\n`);
|
||||
}
|
||||
else if (ev.event === "connectivity") {
|
||||
delete ev.event;
|
||||
const str = JSON.stringify(ev);
|
||||
await fs.appendFile("conn_health.log", `${now}, Node${node.idx}, ${node.uri}, ${node.status}, ${str}\n`);
|
||||
}
|
||||
});
|
||||
|
||||
await hpc.subscribe(HotPocket.notificationChannels.healthEvent);
|
||||
}
|
||||
|
||||
// Establish HotPocket connection.
|
||||
if (!await hpc.connect()) {
|
||||
onConnectionFail(node);
|
||||
}
|
||||
else {
|
||||
|
||||
const stat = await hpc.getStatus();
|
||||
const lastLedger = await hpc.getLedgerBySeqNo(stat.ledgerSeqNo);
|
||||
|
||||
node.failureCount = 0;
|
||||
reportEvent(node, { event: "online" });
|
||||
reportEvent(node, { event: "online", ledger: lastLedger });
|
||||
await hpc.subscribe(HotPocket.notificationChannels.ledgerEvent);
|
||||
}
|
||||
}
|
||||
@@ -248,9 +274,13 @@ async function reportEvent(node, ev) {
|
||||
}
|
||||
else if (ev.event == 'sync_status') {
|
||||
node.status = ev.inSync ? 'in_sync' : 'desync';
|
||||
|
||||
if (synclog == "on")
|
||||
await fs.appendFile("sync_ops.log", `${new Date(ts).toUTCString()}, Node${node.idx}, ${node.uri}, ${node.status}, at ${node.lastLedger.seqNo}\n`);
|
||||
}
|
||||
else if (ev.event == 'online') {
|
||||
node.status = 'online';
|
||||
node.lastLedger = ev.ledger;
|
||||
}
|
||||
else if (ev.event == 'offline') {
|
||||
node.status = 'offline';
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
planid="vc2-1c-1gb" # $5/month
|
||||
osid=387 # Ubuntu 20.04
|
||||
# Order of Vultr regions to distribute servers across the globe.
|
||||
regions=("nrt" "syd" "fra" "yto" "icn" "cdg" "atl" "sgp" "lhr" "ord" "ams" "nrt" "dfw" "syd" "fra" "lax" "icn" "syd" "cdg" "mia" "sgp" "syd" "lhr" "ewr" "nrt" "syd" "fra" "sea" "icn" "syd" "cdg" "sjc")
|
||||
regions=("syd" "yto" "ams" "atl" "cdg" "dfw" "ewr" "fra" "icn" "lax" "lhr" "mia" "nrt" "ord" "sea" "sgp" "sjc")
|
||||
|
||||
# jq command is used for json manipulation.
|
||||
if ! command -v jq &> /dev/null
|
||||
|
||||
Reference in New Issue
Block a user