Large cluster optimizations. (#348)

* Added sync log to streamer.
* Fixed ledger closing attempt while syncing.
* Added diagnostic contract.
* Reset to stage 0 on unreliable votes.
* Reduced peer msg age threshold.
* Added health tracking.
* Weakly-connected detection improvement.
* Increased version 0.5.1.
* Improved client lib server version check.
* Added health logging support to text client.
* Added weakly connected status in status response.
* Increased max peers limits when serializing.
* Local docker cluster manual ip.
* Updated vultr script vm region order.
* Sync status reporting improvement.
* Added milliseconds to logging.
This commit is contained in:
Ravin Perera
2021-09-17 11:53:49 +05:30
committed by GitHub
parent c686745c81
commit 6dc0776b56
32 changed files with 720 additions and 86 deletions

View File

@@ -17,6 +17,7 @@ ncount=$1
loglevel=$2
roundtime=$3
hpcore=$(realpath ../..)
iprange="172.1.1"
# Contract can be set with 'export CONTRACT=<name>'. Defaults to nodejs echo contract.
if [ "$CONTRACT" = "cecho" ]; then # C echo contract
@@ -37,6 +38,16 @@ elif [ "$CONTRACT" = "nodefile" ]; then # nodejs file contract (uses BSON protoc
binary="/usr/bin/node"
binargs="index.js"
elif [ "$CONTRACT" = "diag" ]; then # Diagnostic contract
echo "Using diagnostic contract."
pushd $hpcore/examples/nodejs_contract/ > /dev/null 2>&1
npm install
npm run build-diag
popd > /dev/null 2>&1
copyfiles="$hpcore/examples/nodejs_contract/dist/diagnostic-contract/index.js"
binary="/usr/bin/node"
binargs="index.js"
else # nodejs echo contract (default)
echo "Using nodejs echo contract."
pushd $hpcore/examples/nodejs_contract/ > /dev/null 2>&1
@@ -85,7 +96,7 @@ do
# During hosting we use docker virtual dns instead of IP address.
# So each node is reachable via 'node<id>' name.
peers[i]="node${n}:${peerport}"
peers[i]="$iprange.${n}:${peerport}"
# Update config.
node_json=$(node -p "JSON.stringify({...require('./tmp.json').node, \
@@ -214,7 +225,8 @@ popd > /dev/null 2>&1
# Create docker virtual network named 'hpnet'
# All nodes will communicate with each other via this network.
docker network create --driver bridge hpnet > /dev/null 2>&1
docker network rm hpnet > /dev/null 2>&1
docker network create --driver=bridge --subnet=$iprange.0/24 --gateway=$iprange.254 hpnet > /dev/null 2>&1
echo "Cluster generated at ${clusterloc}"
echo "Use \"./cluster-start.sh <nodeid>\" to run each node."

View File

@@ -21,7 +21,7 @@ let peerport=22860+$n
# Mount the node<id> contract directory into hpcore docker container and run.
# We specify --network=hpnet so all nodes will communicate via 'hpnet' docker virtual network.
# We specify --name for each node so it will be the virtual dns name for each node.
docker run --rm -t -i --network=hpnet --name=node${n} \
docker run --rm -t -i --network=hpnet --ip=172.1.1.${n} --name=node${n} \
-p ${pubport}:${pubport} \
-p ${peerport}:${peerport} \
--device /dev/fuse --cap-add SYS_ADMIN --security-opt apparmor:unconfined \

View File

@@ -1,4 +1,5 @@
cfg
config.json
hpfiles
node_modules
node_modules
*.log

View File

@@ -1,6 +1,6 @@
const HotPocket = require('../../examples/js_client/lib/hp-client-lib');
const azure = require('azure-storage');
const fs = require('fs');
const fs = require('fs').promises;
const https = require('https');
const fetch = require('node-fetch');
@@ -10,6 +10,8 @@ const metricsTrackInterval = process.env.METRICSTRACK || 10000;
const backoffDelayMax = process.env.BACKOFFMAX || 60000;
const eventsBatchSize = process.env.EVENTBATCH || 20;
const stateBatchSize = process.env.STATEBATCH || 20;
const synclog = process.env.SYNCLOG || "off";
const healthlog = process.env.HEALTHLOG || "off";
let keys = null;
let vultrApiKey = null;
@@ -27,7 +29,7 @@ async function main() {
console.log('My public key is: ' + pkhex);
// Load cluster config.
const config = JSON.parse(fs.readFileSync("config.json"));
const config = JSON.parse(await fs.readFile("config.json"));
vultrApiKey = config.vultr.api_key;
// Create Azure table service.
@@ -200,13 +202,37 @@ async function establishClientConnection(node) {
reportEvent(node, ev);
});
// This will get fired when any diagnostic health event occurs.
if (healthlog === "on") {
hpc.on(HotPocket.events.healthEvent, async (ev) => {
const now = new Date().toUTCString();
if (ev.event === "proposal") {
delete ev.event;
const str = JSON.stringify(ev);
await fs.appendFile("prop_health.log", `${now}, Node${node.idx}, ${node.uri}, ${node.status}, ${str}\n`);
}
else if (ev.event === "connectivity") {
delete ev.event;
const str = JSON.stringify(ev);
await fs.appendFile("conn_health.log", `${now}, Node${node.idx}, ${node.uri}, ${node.status}, ${str}\n`);
}
});
await hpc.subscribe(HotPocket.notificationChannels.healthEvent);
}
// Establish HotPocket connection.
if (!await hpc.connect()) {
onConnectionFail(node);
}
else {
const stat = await hpc.getStatus();
const lastLedger = await hpc.getLedgerBySeqNo(stat.ledgerSeqNo);
node.failureCount = 0;
reportEvent(node, { event: "online" });
reportEvent(node, { event: "online", ledger: lastLedger });
await hpc.subscribe(HotPocket.notificationChannels.ledgerEvent);
}
}
@@ -248,9 +274,13 @@ async function reportEvent(node, ev) {
}
else if (ev.event == 'sync_status') {
node.status = ev.inSync ? 'in_sync' : 'desync';
if (synclog == "on")
await fs.appendFile("sync_ops.log", `${new Date(ts).toUTCString()}, Node${node.idx}, ${node.uri}, ${node.status}, at ${node.lastLedger.seqNo}\n`);
}
else if (ev.event == 'online') {
node.status = 'online';
node.lastLedger = ev.ledger;
}
else if (ev.event == 'offline') {
node.status = 'offline';

View File

@@ -11,7 +11,7 @@
planid="vc2-1c-1gb" # $5/month
osid=387 # Ubuntu 20.04
# Order of Vultr regions to distribute servers across the globe.
regions=("nrt" "syd" "fra" "yto" "icn" "cdg" "atl" "sgp" "lhr" "ord" "ams" "nrt" "dfw" "syd" "fra" "lax" "icn" "syd" "cdg" "mia" "sgp" "syd" "lhr" "ewr" "nrt" "syd" "fra" "sea" "icn" "syd" "cdg" "sjc")
regions=("syd" "yto" "ams" "atl" "cdg" "dfw" "ewr" "fra" "icn" "lax" "lhr" "mia" "nrt" "ord" "sea" "sgp" "sjc")
# jq command is used for json manipulation.
if ! command -v jq &> /dev/null