mirror of
https://github.com/XRPLF/rippled.git
synced 2026-04-29 15:37:57 +00:00
feat: add OTel-driven regression gate for Phase 10 telemetry validation
Captures per-span / per-RPC / per-job timings from Prometheus after the workload run and diffs them against a committed baseline. Regression requires breaching both a percentage and an absolute bound, tolerating small-value noise. When the baseline is a placeholder, the comparator emits the captured JSON in the exact schema for one-time paste into baselines/baseline-timings.json, and the CI Step Summary surfaces that block for the reviewer. Scope: gate only — automated baseline persistence, benchmark.sh PromQL migration, and the historical trend dashboard remain follow-ups.
This commit is contained in:
52
.github/workflows/telemetry-validation.yml
vendored
52
.github/workflows/telemetry-validation.yml
vendored
@@ -230,6 +230,58 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
# Publishes captured OTel timings + regression report to the Step Summary.
|
||||
# When the committed baseline is a placeholder, emits a fenced JSON block
|
||||
# that can be copy-pasted directly into baselines/baseline-timings.json.
|
||||
# When the baseline is populated, summarises the top regressions so the
|
||||
# PR author sees the failure reason without downloading artifacts.
|
||||
- name: Print regression summary
|
||||
if: always()
|
||||
run: |
|
||||
TIMINGS="/tmp/xrpld-validation/reports/timings.json"
|
||||
REGRESSION="/tmp/xrpld-validation/reports/regression-report.json"
|
||||
BASELINE="docker/telemetry/workload/baselines/baseline-timings.json"
|
||||
|
||||
if [ ! -f "$TIMINGS" ]; then
|
||||
echo "## Regression Gate: no timings captured" >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
IS_PLACEHOLDER=$(jq -r '.placeholder == true or (.metrics | length == 0)' "$BASELINE")
|
||||
|
||||
echo "## OTel Timings Regression Gate" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
if [ "$IS_PLACEHOLDER" = "true" ]; then
|
||||
echo "### Paste into \`baselines/baseline-timings.json\`" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "The committed baseline is a placeholder. Open a PR replacing" \
|
||||
"its contents with the JSON block below to activate the" \
|
||||
"regression gate." >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo '```json' >> "$GITHUB_STEP_SUMMARY"
|
||||
cat "$TIMINGS" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo '```' >> "$GITHUB_STEP_SUMMARY"
|
||||
elif [ -f "$REGRESSION" ]; then
|
||||
REGR_COUNT=$(jq '.summary.regressions' "$REGRESSION")
|
||||
IMPR_COUNT=$(jq '.summary.improvements' "$REGRESSION")
|
||||
TOTAL=$(jq '.summary.total' "$REGRESSION")
|
||||
echo "| Stat | Count |" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "|------|-------|" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "| Metrics compared | $TOTAL |" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "| Regressions | $REGR_COUNT |" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "| Improvements | $IMPR_COUNT |" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
if [ "$REGR_COUNT" -gt 0 ]; then
|
||||
echo "### Regressions" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "| Metric | Baseline | Current | Δ | % | Unit |" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "|--------|---------:|--------:|--:|--:|------|" >> "$GITHUB_STEP_SUMMARY"
|
||||
jq -r '.metrics[] | select(.regressed) | "| \(.key) | \(.baseline) | \(.current) | \(.delta) | \(.pct_change)% | \(.unit) |"' \
|
||||
"$REGRESSION" >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
|
||||
Reference in New Issue
Block a user