Merge branch 'pratik/otel-phase8-log-correlation' into pratik/otel-phase9-metric-gap-fill

Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
This commit is contained in:
Pratik Mankawde
2026-06-01 14:56:09 +01:00
1650 changed files with 81102 additions and 65833 deletions

48
docker/check-sanitizers.sh Executable file
View File

@@ -0,0 +1,48 @@
#!/bin/bash
# Sanity-check that the sanitizer runtimes shipped with g++/clang++ work
# end-to-end against the system loader: compile each example with both
# compilers, run it, and confirm the expected diagnostic is emitted.
set -eo pipefail
cpp_files_dir="${1:?usage: $0 <cpp_files_dir>}"
case "$(uname -m)" in
x86_64) loader=/lib64/ld-linux-x86-64.so.2 ;;
aarch64) loader=/lib/ld-linux-aarch64.so.1 ;;
*)
echo "Unsupported arch: $(uname -m)" >&2
exit 1
;;
esac
declare -A sanitize=(
[asan]="-fsanitize=address"
[tsan]="-fsanitize=thread"
[ubsan]="-fsanitize=undefined"
)
declare -A expect=(
[asan]="heap-use-after-free"
[tsan]="data race"
[ubsan]="signed integer overflow"
)
for compiler in g++ clang++; do
for name in asan tsan ubsan; do
bin="/tmp/${name}-${compiler}"
echo "=== Build ${name} with ${compiler} ==="
"$compiler" -std=c++20 -O1 -g ${sanitize[$name]} \
-Wl,--dynamic-linker=$loader \
"${cpp_files_dir}/${name}.cpp" -o "$bin"
echo "=== Run ${name}-${compiler} ==="
output=$("$bin" 2>&1) || true
echo "$output"
echo "$output" | grep -q "${expect[$name]}" ||
{
echo "expected '${expect[$name]}' from $bin"
exit 1
}
rm -f "$bin"
done
done

28
docker/cpp_files/asan.cpp Normal file
View File

@@ -0,0 +1,28 @@
#include <atomic>
#include <cstddef>
#include <iostream>
#if defined(__clang__) || defined(__GNUC__)
__attribute__((noinline))
#elif defined(_MSC_VER)
__declspec(noinline)
#endif
int
read_after_free(volatile int* array, std::size_t index)
{
std::atomic_signal_fence(std::memory_order_seq_cst);
int value = array[index];
std::atomic_signal_fence(std::memory_order_seq_cst);
return value;
}
int
main()
{
int* array = new int[5]{10, 20, 30, 40, 50};
delete[] array;
std::cout << "Value at index 2: " << read_after_free(array, 2) << std::endl;
return 0;
}

26
docker/cpp_files/tsan.cpp Normal file
View File

@@ -0,0 +1,26 @@
#include <iostream>
#include <thread>
static int kCounter = 0;
void
increment()
{
for (int i = 0; i < 100'000; ++i)
{
++kCounter;
}
}
int
main()
{
std::thread t1(increment);
std::thread t2(increment);
t1.join();
t2.join();
std::cout << "Final counter value: " << kCounter << std::endl;
return 0;
}

View File

@@ -0,0 +1,13 @@
#include <iostream>
#include <limits>
int
main()
{
int maxInt = std::numeric_limits<int>::max();
int volatile one = 1;
std::cout << "Current max: " << maxInt << std::endl;
int overflowed = maxInt + one;
std::cout << "Overflowed result: " << overflowed << std::endl;
return 0;
}

95
docker/nix.Dockerfile Normal file
View File

@@ -0,0 +1,95 @@
ARG BASE_IMAGE=nixos/nix:latest
# Nix builder
FROM nixos/nix:latest AS builder-source
RUN mkdir -p ~/.config/nix && \
echo "experimental-features = nix-command flakes" >> ~/.config/nix/nix.conf
# Copy our source and setup our working dir.
COPY nix/ci-env.nix /tmp/build/nix/ci-env.nix
COPY nix/packages.nix /tmp/build/nix/packages.nix
COPY nix/utils.nix /tmp/build/nix/utils.nix
COPY flake.nix /tmp/build/
COPY flake.lock /tmp/build/
WORKDIR /tmp/build
FROM builder-source AS builder
# Build our Nix CI environment (all build tools in a single store path)
RUN nix \
--option filter-syscalls false \
build
# Copy the Nix store closure into a directory. The Nix store closure is the
# entire set of Nix store values that we need for our build.
RUN mkdir /tmp/nix-store-closure && \
cp -R $(nix-store -qR result/) /tmp/nix-store-closure
# Final image
FROM ${BASE_IMAGE}
# bash is not located at /bin/bash in nixos/nix, so we need to create a symlink to it.
RUN if [ -d /nix ]; then \
ln -s /root/.nix-profile/bin/bash /bin/bash; \
fi
# Use Bash as the default shell for RUN commands, using the options
# `set -o errexit -o pipefail`, and as the entrypoint.
SHELL ["/bin/bash", "-e", "-o", "pipefail", "-c"]
ENTRYPOINT ["/bin/bash"]
# Copy /nix/store and the env symlink tree
COPY --from=builder /tmp/nix-store-closure /nix/store
COPY --from=builder /tmp/build/result /nix/ci-env
ENV PATH="/nix/ci-env/bin:$PATH"
# Externally-built dynamically-linked ELF binaries hard-code the loader path
# (e.g. /lib64/ld-linux-x86-64.so.2) in their PT_INTERP header. Copy the
# loader from the Nix store to that path when the base image doesn't already
# provide one (i.e. on nixos/nix).
RUN <<EOF
case "$(uname -m)" in
x86_64) target=/lib64/ld-linux-x86-64.so.2 ;;
aarch64) target=/lib/ld-linux-aarch64.so.1 ;;
*) echo "Unsupported arch: $(uname -m)" >&2; exit 1 ;;
esac
if [ ! -e "$target" ]; then
# Use the loader from the same glibc that gcc links libc against, so
# ld-linux and libc/libpthread share GLIBC_PRIVATE symbols at runtime.
src="$(dirname "$(gcc -print-file-name=libc.so.6)")/$(basename "$target")"
[ -e "$src" ] || { echo "ld-linux not found at $src" >&2; exit 1; }
mkdir -p "$(dirname "$target")"
cp "$src" "$target"
fi
EOF
RUN <<EOF
ccache --version
clang --version
clang++ --version
clang-format --version
cmake --version
conan --version
g++ --version
gcc --version
gcovr --version
git --version
make --version
mold --version
ninja --version
perl --version
pkg-config --version
pre-commit --version
python3 --version
run-clang-tidy --help
vim --version
EOF
# Sanity-check that the sanitizer runtimes shipped with g++/clang++ work
# end-to-end against the system loader.
COPY docker/cpp_files/ /tmp/cpp_files/
COPY docker/check-sanitizers.sh /tmp/check-sanitizers.sh
RUN grep -qi ubuntu /etc/os-release 2>/dev/null && /tmp/check-sanitizers.sh /tmp/cpp_files || true

View File

@@ -50,7 +50,7 @@ Wait for services to be ready:
curl -sf http://localhost:13133/ && echo "collector ready"
# Tempo readiness
curl -sf http://localhost:3200/ready > /dev/null && echo "tempo ready"
curl -sf http://localhost:3200/ready >/dev/null && echo "tempo ready"
```
### Step 2: Start xrpld in standalone mode
@@ -66,16 +66,16 @@ Wait a few seconds for the node to initialize.
```bash
# server_info
curl -s http://localhost:5005 \
-d '{"method":"server_info"}' | jq .result.info.server_state
-d '{"method":"server_info"}' | jq .result.info.server_state
# server_state
curl -s http://localhost:5005 \
-d '{"method":"server_state"}' | jq .result.state.server_state
-d '{"method":"server_state"}' | jq .result.state.server_state
# ledger
curl -s http://localhost:5005 \
-d '{"method":"ledger","params":[{"ledger_index":"current"}]}' \
| jq .result.ledger_current_index
-d '{"method":"ledger","params":[{"ledger_index":"current"}]}' |
jq .result.ledger_current_index
```
### Step 4: Submit a transaction
@@ -123,21 +123,21 @@ curl -s "$TEMPO/api/v2/search/tag/resource.service.name/values" | jq '.tagValues
# Check RPC spans
curl -s "$TEMPO/api/search" \
--data-urlencode 'q={resource.service.name="xrpld" && name="rpc.http_request"}' \
--data-urlencode 'limit=5' | jq '.traces | length'
--data-urlencode 'q={resource.service.name="xrpld" && name="rpc.http_request"}' \
--data-urlencode 'limit=5' | jq '.traces | length'
curl -s "$TEMPO/api/search" \
--data-urlencode 'q={resource.service.name="xrpld" && name="rpc.process"}' \
--data-urlencode 'limit=5' | jq '.traces | length'
--data-urlencode 'q={resource.service.name="xrpld" && name="rpc.process"}' \
--data-urlencode 'limit=5' | jq '.traces | length'
curl -s "$TEMPO/api/search" \
--data-urlencode 'q={resource.service.name="xrpld" && name="rpc.command.server_info"}' \
--data-urlencode 'limit=5' | jq '.traces | length'
--data-urlencode 'q={resource.service.name="xrpld" && name="rpc.command.server_info"}' \
--data-urlencode 'limit=5' | jq '.traces | length'
# Check transaction spans
curl -s "$TEMPO/api/search" \
--data-urlencode 'q={resource.service.name="xrpld" && name="tx.process"}' \
--data-urlencode 'limit=5' | jq '.traces | length'
--data-urlencode 'q={resource.service.name="xrpld" && name="tx.process"}' \
--data-urlencode 'limit=5' | jq '.traces | length'
```
Or open Grafana Explore with Tempo datasource: http://localhost:3000
@@ -221,8 +221,8 @@ Generate 6 key pairs:
```bash
for i in $(seq 1 6); do
curl -s http://localhost:5005 \
-d '{"method":"validation_create"}' | jq '.result'
curl -s http://localhost:5005 \
-d '{"method":"validation_create"}' | jq '.result'
done
```
@@ -318,8 +318,8 @@ trace_ledger=1
```bash
for i in $(seq 1 6); do
.build/xrpld --conf /tmp/xrpld-integration/node$i/xrpld.cfg --start &
echo $! > /tmp/xrpld-integration/node$i/xrpld.pid
.build/xrpld --conf /tmp/xrpld-integration/node$i/xrpld.cfg --start &
echo $! >/tmp/xrpld-integration/node$i/xrpld.pid
done
```
@@ -329,14 +329,14 @@ Poll each node until `server_state` = `"proposing"`:
```bash
for port in 5005 5006 5007 5008 5009 5010; do
while true; do
state=$(curl -s http://localhost:$port \
-d '{"method":"server_info"}' \
| jq -r '.result.info.server_state')
echo "Port $port: $state"
[ "$state" = "proposing" ] && break
sleep 5
done
while true; do
state=$(curl -s http://localhost:$port \
-d '{"method":"server_info"}' |
jq -r '.result.info.server_state')
echo "Port $port: $state"
[ "$state" = "proposing" ] && break
sleep 5
done
done
```
@@ -412,18 +412,18 @@ curl -s "$TEMPO/api/v2/search/tag/resource.service.name/values" | jq '.tagValues
# Query traces by operation
for op in "rpc.http_request" "rpc.ws_upgrade" "rpc.ws_message" "rpc.process" \
"rpc.command.server_info" "rpc.command.server_state" "rpc.command.ledger" \
"tx.process" "tx.receive" "tx.apply" \
"consensus.proposal.send" "consensus.ledger_close" \
"consensus.accept" "consensus.accept.apply" \
"consensus.validation.send" \
"ledger.build" "ledger.validate" "ledger.store" \
"peer.proposal.receive" "peer.validation.receive"; do
count=$(curl -s "$TEMPO/api/search" \
--data-urlencode "q={resource.service.name=\"xrpld\" && name=\"$op\"}" \
--data-urlencode "limit=5" \
| jq '.traces | length')
printf "%-35s %s traces\n" "$op" "$count"
"rpc.command.server_info" "rpc.command.server_state" "rpc.command.ledger" \
"tx.process" "tx.receive" "tx.apply" \
"consensus.proposal.send" "consensus.ledger_close" \
"consensus.accept" "consensus.accept.apply" \
"consensus.validation.send" \
"ledger.build" "ledger.validate" "ledger.store" \
"peer.proposal.receive" "peer.validation.receive"; do
count=$(curl -s "$TEMPO/api/search" \
--data-urlencode "q={resource.service.name=\"xrpld\" && name=\"$op\"}" \
--data-urlencode "limit=5" |
jq '.traces | length')
printf "%-35s %s traces\n" "$op" "$count"
done
```
@@ -435,16 +435,16 @@ Base URL: `http://localhost:9090`
PROM="http://localhost:9090"
# Span call counts (from spanmetrics connector)
curl -s "$PROM/api/v1/query?query=traces_span_metrics_calls_total" \
| jq '.data.result[] | {span: .metric.span_name, count: .value[1]}'
curl -s "$PROM/api/v1/query?query=traces_span_metrics_calls_total" |
jq '.data.result[] | {span: .metric.span_name, count: .value[1]}'
# Latency histogram
curl -s "$PROM/api/v1/query?query=traces_span_metrics_duration_milliseconds_count" \
| jq '.data.result[] | {span: .metric.span_name, count: .value[1]}'
curl -s "$PROM/api/v1/query?query=traces_span_metrics_duration_milliseconds_count" |
jq '.data.result[] | {span: .metric.span_name, count: .value[1]}'
# RPC calls by command
curl -s "$PROM/api/v1/query?query=traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}" \
| jq '.data.result[] | {command: .metric["xrpl.rpc.command"], count: .value[1]}'
curl -s "$PROM/api/v1/query?query=traces_span_metrics_calls_total{span_name=~\"rpc.command.*\"}" |
jq '.data.result[] | {command: .metric["xrpl.rpc.command"], count: .value[1]}'
```
### Grafana
@@ -512,8 +512,8 @@ exports parsed entries to Loki. Verify Loki has received entries:
```bash
# Query Loki for any xrpld logs
curl -sG "http://localhost:3100/loki/api/v1/query" \
--data-urlencode 'query={job="xrpld"}' \
--data-urlencode 'limit=5' | jq '.data.result | length'
--data-urlencode 'query={job="xrpld"}' \
--data-urlencode 'limit=5' | jq '.data.result | length'
```
Expected: > 0 results.
@@ -567,7 +567,7 @@ Expected: > 0 results.
1. Check that all peer ports (51235-51240) are not in use:
```bash
for p in 51235 51236 51237 51238 51239 51240; do
ss -tlnp | grep ":$p " && echo "port $p in use"
ss -tlnp | grep ":$p " && echo "port $p in use"
done
```
2. Verify `[ips_fixed]` lists all 6 peer ports
@@ -580,8 +580,8 @@ Expected: > 0 results.
1. Verify genesis account exists:
```bash
curl -s http://localhost:5005 \
-d '{"method":"account_info","params":[{"account":"rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh"}]}' \
| jq .result.account_data.Balance
-d '{"method":"account_info","params":[{"account":"rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh"}]}' |
jq .result.account_data.Balance
```
2. Check submit response for error codes
3. In standalone mode, remember to call `ledger_accept` after submitting

View File

@@ -116,20 +116,9 @@ datasources:
operator: "="
scope: span
type: dynamic
# Phase 2: Node health filters (Task 2.8) — resource attributes
- id: node-amendment-blocked
tag: xrpl.node.amendment_blocked
operator: "="
scope: resource
type: static
- id: node-server-state
tag: xrpl.node.server_state
operator: "="
scope: resource
type: dynamic
# Phase 3: Transaction tracing filters
- id: tx-hash
tag: xrpl.tx.hash
tag: tx_hash
operator: "="
scope: span
type: static

View File

@@ -34,7 +34,7 @@ RPC_PORT_BASE=5005
CONSENSUS_TIMEOUT=120
GENESIS_ACCOUNT="rHb9CJAWyB4rj91VRWn96DkukG4bwdtyTh"
GENESIS_SEED="snoPBrXtMeMyMHUVTgbuqAfg1SUTb"
DEST_ACCOUNT="" # Generated dynamically via wallet_propose
DEST_ACCOUNT="" # Generated dynamically via wallet_propose
TEMPO="http://localhost:3200"
PROM="http://localhost:9090"
@@ -45,18 +45,27 @@ FAIL=0
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
log() { printf "\033[1;34m[INFO]\033[0m %s\n" "$*"; }
ok() { printf "\033[1;32m[PASS]\033[0m %s\n" "$*"; PASS=$((PASS + 1)); }
fail() { printf "\033[1;31m[FAIL]\033[0m %s\n" "$*"; FAIL=$((FAIL + 1)); }
die() { printf "\033[1;31m[ERROR]\033[0m %s\n" "$*" >&2; exit 1; }
log() { printf "\033[1;34m[INFO]\033[0m %s\n" "$*"; }
ok() {
printf "\033[1;32m[PASS]\033[0m %s\n" "$*"
PASS=$((PASS + 1))
}
fail() {
printf "\033[1;31m[FAIL]\033[0m %s\n" "$*"
FAIL=$((FAIL + 1))
}
die() {
printf "\033[1;31m[ERROR]\033[0m %s\n" "$*" >&2
exit 1
}
check_span() {
local op="$1"
local count
count=$(curl -sf "$TEMPO/api/search" \
--data-urlencode "q={resource.service.name=\"rippled\" && name=\"$op\"}" \
--data-urlencode "limit=5" \
| jq '.traces | length' 2>/dev/null || echo 0)
--data-urlencode "limit=5" |
jq '.traces | length' 2>/dev/null || echo 0)
if [ "$count" -gt 0 ]; then
ok "$op ($count traces)"
else
@@ -104,8 +113,8 @@ check_log_correlation() {
if [ -n "$sample_trace_id" ]; then
local trace_found
# Tempo /api/traces/{id} returns OTLP shape: {"batches":[...]}
trace_found=$(curl -sf "$TEMPO/api/traces/$sample_trace_id" \
| jq '.batches | length' 2>/dev/null) || trace_found=0
trace_found=$(curl -sf "$TEMPO/api/traces/$sample_trace_id" |
jq '.batches | length' 2>/dev/null) || trace_found=0
if [ "$trace_found" -gt 0 ]; then
ok "Log-Tempo cross-check: trace_id=$sample_trace_id found in Tempo"
else
@@ -216,7 +225,7 @@ mkdir -p "$TEMP_DATA"
# Create a minimal temp config for key generation
TEMP_CFG="$TEMP_DATA/xrpld.cfg"
cat > "$TEMP_CFG" <<EOCFG
cat >"$TEMP_CFG" <<EOCFG
[server]
port_rpc_temp
@@ -241,7 +250,7 @@ $TEMP_DATA/debug.log
0
EOCFG
"$XRPLD" --conf "$TEMP_CFG" -a --start > "$TEMP_DATA/stdout.log" 2>&1 &
"$XRPLD" --conf "$TEMP_CFG" -a --start >"$TEMP_DATA/stdout.log" 2>&1 &
TEMP_PID=$!
log "Temporary xrpld started (PID $TEMP_PID), waiting for RPC..."
@@ -290,7 +299,7 @@ VALIDATORS_FILE="$WORKDIR/validators.txt"
for i in $(seq 0 $((NUM_NODES - 1))); do
echo "${PUBKEYS[$i]}"
done
} > "$VALIDATORS_FILE"
} >"$VALIDATORS_FILE"
# Create per-node configs
for i in $(seq 1 "$NUM_NODES"); do
@@ -310,7 +319,7 @@ for i in $(seq 1 "$NUM_NODES"); do
fi
done
cat > "$NODE_DIR/xrpld.cfg" <<EOCFG
cat >"$NODE_DIR/xrpld.cfg" <<EOCFG
[server]
port_rpc
port_peer
@@ -386,8 +395,8 @@ log "Starting $NUM_NODES xrpld nodes..."
for i in $(seq 1 "$NUM_NODES"); do
NODE_DIR="$WORKDIR/node$i"
"$XRPLD" --conf "$NODE_DIR/xrpld.cfg" --start > "$NODE_DIR/stdout.log" 2>&1 &
echo $! > "$NODE_DIR/xrpld.pid"
"$XRPLD" --conf "$NODE_DIR/xrpld.cfg" --start >"$NODE_DIR/stdout.log" 2>&1 &
echo $! >"$NODE_DIR/xrpld.pid"
log " Node $i started (PID $(cat "$NODE_DIR/xrpld.pid"))"
done
@@ -403,7 +412,7 @@ start_time=$(date +%s)
nodes_ready=0
while [ "$nodes_ready" -lt "$NUM_NODES" ]; do
elapsed=$(( $(date +%s) - start_time ))
elapsed=$(($(date +%s) - start_time))
if [ "$elapsed" -ge "$CONSENSUS_TIMEOUT" ]; then
fail "Consensus timeout after ${CONSENSUS_TIMEOUT}s ($nodes_ready/$NUM_NODES nodes ready)"
log "Continuing with partial consensus..."
@@ -414,8 +423,8 @@ while [ "$nodes_ready" -lt "$NUM_NODES" ]; do
for i in $(seq 1 "$NUM_NODES"); do
RPC_PORT=$((RPC_PORT_BASE + i - 1))
state=$(curl -sf "http://localhost:$RPC_PORT" \
-d '{"method":"server_info"}' 2>/dev/null \
| jq -r '.result.info.server_state' 2>/dev/null || echo "unreachable")
-d '{"method":"server_info"}' 2>/dev/null |
jq -r '.result.info.server_state' 2>/dev/null || echo "unreachable")
if [ "$state" = "proposing" ]; then
nodes_ready=$((nodes_ready + 1))
fi
@@ -439,8 +448,8 @@ fi
log "Waiting for first validated ledger..."
for attempt in $(seq 1 60); do
val_seq=$(curl -sf "http://localhost:$RPC_PORT_BASE" \
-d '{"method":"server_info"}' 2>/dev/null \
| jq -r '.result.info.validated_ledger.seq // 0' 2>/dev/null || echo 0)
-d '{"method":"server_info"}' 2>/dev/null |
jq -r '.result.info.validated_ledger.seq // 0' 2>/dev/null || echo 0)
if [ "$val_seq" -gt 2 ] 2>/dev/null; then
ok "First validated ledger: seq $val_seq"
break
@@ -457,11 +466,11 @@ done
log "Exercising RPC spans..."
curl -sf "http://localhost:$RPC_PORT_BASE" \
-d '{"method":"server_info"}' > /dev/null
-d '{"method":"server_info"}' >/dev/null
curl -sf "http://localhost:$RPC_PORT_BASE" \
-d '{"method":"server_state"}' > /dev/null
-d '{"method":"server_state"}' >/dev/null
curl -sf "http://localhost:$RPC_PORT_BASE" \
-d '{"method":"ledger","params":[{"ledger_index":"current"}]}' > /dev/null
-d '{"method":"ledger","params":[{"ledger_index":"current"}]}' >/dev/null
log "RPC commands sent. Waiting 5s for batch export..."
sleep 5
@@ -478,7 +487,7 @@ wallet_result=$(curl -sf "http://localhost:$RPC_PORT_BASE" \
DEST_ACCOUNT=$(echo "$wallet_result" | jq -r '.result.account_id' 2>/dev/null)
if [ -z "$DEST_ACCOUNT" ] || [ "$DEST_ACCOUNT" = "null" ]; then
fail "Could not generate destination wallet"
DEST_ACCOUNT="rrrrrrrrrrrrrrrrrrrrrhoLvTp" # ACCOUNT_ZERO fallback
DEST_ACCOUNT="rrrrrrrrrrrrrrrrrrrrrhoLvTp" # ACCOUNT_ZERO fallback
fi
log " Destination: $DEST_ACCOUNT"
@@ -511,8 +520,8 @@ sleep 15
log "Verifying spans in Tempo..."
# Check service registration
services=$(curl -sf "$TEMPO/api/v2/search/tag/resource.service.name/values" \
| jq -r '.tagValues[].value' 2>/dev/null || echo "")
services=$(curl -sf "$TEMPO/api/v2/search/tag/resource.service.name/values" |
jq -r '.tagValues[].value' 2>/dev/null || echo "")
if echo "$services" | grep -q "rippled"; then
ok "Service 'rippled' registered in Tempo"
else
@@ -566,16 +575,16 @@ log "--- Phase 5: Spanmetrics ---"
log "Waiting 20s for Prometheus scrape cycle..."
sleep 20
calls_count=$(curl -sf "$PROM/api/v1/query?query=traces_span_metrics_calls_total" \
| jq '.data.result | length' 2>/dev/null || echo 0)
calls_count=$(curl -sf "$PROM/api/v1/query?query=traces_span_metrics_calls_total" |
jq '.data.result | length' 2>/dev/null || echo 0)
if [ "$calls_count" -gt 0 ]; then
ok "Prometheus: traces_span_metrics_calls_total ($calls_count series)"
else
fail "Prometheus: traces_span_metrics_calls_total (0 series)"
fi
duration_count=$(curl -sf "$PROM/api/v1/query?query=traces_span_metrics_duration_milliseconds_count" \
| jq '.data.result | length' 2>/dev/null || echo 0)
duration_count=$(curl -sf "$PROM/api/v1/query?query=traces_span_metrics_duration_milliseconds_count" |
jq '.data.result | length' 2>/dev/null || echo 0)
if [ "$duration_count" -gt 0 ]; then
ok "Prometheus: duration histogram ($duration_count series)"
else
@@ -583,7 +592,7 @@ else
fi
# Check Grafana
if curl -sf http://localhost:3000/api/health > /dev/null 2>&1; then
if curl -sf http://localhost:3000/api/health >/dev/null 2>&1; then
ok "Grafana: healthy at localhost:3000"
else
fail "Grafana: not reachable at localhost:3000"
@@ -600,8 +609,8 @@ sleep 20
check_otel_metric() {
local metric_name="$1"
local result
result=$(curl -sf "$PROM/api/v1/query?query=$metric_name" \
| jq '.data.result | length' 2>/dev/null || echo 0)
result=$(curl -sf "$PROM/api/v1/query?query=$metric_name" |
jq '.data.result | length' 2>/dev/null || echo 0)
if [ "$result" -gt 0 ]; then
ok "OTel: $metric_name ($result series)"
else
@@ -648,8 +657,8 @@ sleep 15
check_otel_metric() {
local metric_name="$1"
local result
result=$(curl -sf "$PROM/api/v1/query?query=$metric_name" \
| jq '.data.result | length' 2>/dev/null || echo 0)
result=$(curl -sf "$PROM/api/v1/query?query=$metric_name" |
jq '.data.result | length' 2>/dev/null || echo 0)
if [ "$result" -gt 0 ]; then
ok "OTel: $metric_name ($result series)"
else