Fix CI: split telemetry workflow into build + validate jobs

The telemetry validation pipeline was building all Conan dependencies
from source instead of fetching pre-built binaries. Root cause: the
workflow ran on ubuntu-latest natively, where the system compiler
configuration (gcc-13 on Ubuntu 24.04) produced different Conan
package IDs than the pre-built packages in the XRPLF Conan remote.

Fix by splitting into two jobs:
1. build-xrpld: runs on a self-hosted runner inside the same
   debian-bookworm-gcc-13 container the main CI uses, ensuring
   Conan package IDs match and ccache hits the remote cache.
2. validate-telemetry: runs on ubuntu-latest (which has Docker)
   to launch the telemetry stack and validate end-to-end.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-03-12 20:49:21 +00:00
parent 7586701173
commit 4266625a2f

View File

@@ -11,15 +11,14 @@
# services, runs a multi-node cluster) — it validates the full telemetry
# stack end-to-end rather than individual unit tests.
#
# The build steps mirror the main CI pipeline (reusable-build-test-config.yml):
# - setup-conan action → build-deps action → cmake configure → cmake build
# This ensures dependency resolution, toolchain generation, and compiler
# flags are identical to what the PR workflow uses.
#
# NOTE: Unlike the main CI, this workflow runs natively on ubuntu-latest
# (not in a container), so we cannot use XRPLF/actions/prepare-runner
# which assumes /root paths. Instead we replicate the relevant setup
# steps inline.
# Architecture: two jobs to leverage cached dependencies:
# 1. build-xrpld — runs on a self-hosted runner inside the same container
# image the main CI uses (debian-bookworm-gcc-13). This ensures Conan
# packages are fetched from the XRPLF remote instead of built from
# source, and ccache hits the remote cache.
# 2. validate-telemetry — runs on ubuntu-latest (which has Docker) to
# launch the telemetry stack (OTel collector, Prometheus, Tempo, etc.)
# and validate the full pipeline end-to-end.
name: Telemetry Validation
@@ -63,37 +62,38 @@ concurrency:
group: telemetry-validation-${{ github.ref }}
cancel-in-progress: true
defaults:
run:
shell: bash
env:
BUILD_DIR: build
# ccache configuration — mirrors reusable-build-test-config.yml
CCACHE_NAMESPACE: telemetry-validation
CCACHE_REMOTE_ONLY: true
CCACHE_REMOTE_STORAGE: http://cache.dev.ripplex.io:8080|layout=bazel
CCACHE_SLOPPINESS: include_file_ctime,include_file_mtime
jobs:
validate-telemetry:
name: Telemetry Stack Validation
runs-on: ubuntu-latest
timeout-minutes: 90
# ── Job 1: Build xrpld in the same container the main CI uses ──────
# This ensures Conan binary packages are fetched from the XRPLF remote
# (matching package IDs) and ccache hits the remote compilation cache.
build-xrpld:
name: Build xrpld
runs-on: [self-hosted, Linux, X64, heavy]
container: ghcr.io/xrplf/ci/debian-bookworm:gcc-13-sha-ab4d1f0
timeout-minutes: 60
env:
CCACHE_NAMESPACE: telemetry-validation
CCACHE_REMOTE_ONLY: true
CCACHE_REMOTE_STORAGE: http://cache.dev.ripplex.io:8080|layout=bazel
CCACHE_SLOPPINESS: include_file_ctime,include_file_mtime
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y curl jq bc ninja-build ccache
- name: Install Conan and Python dependencies
run: |
pip3 install conan
pip3 install -r docker/telemetry/workload/requirements.txt
- name: Prepare runner
uses: XRPLF/actions/prepare-runner@2cbf481018d930656e9276fcc20dc0e3a0be5b6d
with:
enable_ccache: ${{ github.repository_owner == 'XRPLF' }}
- name: Print build environment
uses: ./.github/actions/print-env
continue-on-error: true
- name: Get number of processors
uses: XRPLF/actions/get-nproc@cf0433aa74563aead044a1e395610c96d65a37cf
@@ -101,26 +101,15 @@ jobs:
with:
subtract: 2
# ── Build steps (mirrors main CI: setup-conan → build-deps → cmake) ──
- name: Setup Conan
uses: ./.github/actions/setup-conan
- name: Log into Conan remote
if: ${{ github.repository == 'XRPLF/rippled' }}
env:
CONAN_USER: ${{ secrets.CONAN_REMOTE_USERNAME }}
CONAN_PASS: ${{ secrets.CONAN_REMOTE_PASSWORD }}
run: conan remote login xrplf "$CONAN_USER" --password "$CONAN_PASS"
- name: Enable optimized binary compatibility
run: echo "core.graph:compatibility_mode=optimized" >> "$(conan config home)/global.conf"
- name: Build dependencies
uses: ./.github/actions/build-deps
with:
build_nproc: ${{ steps.nproc.outputs.nproc }}
build_type: Release
log_verbosity: verbose
- name: Configure CMake
working-directory: ${{ env.BUILD_DIR }}
@@ -129,8 +118,6 @@ jobs:
-G Ninja \
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
..
- name: Build xrpld
@@ -145,12 +132,40 @@ jobs:
--target xrpld
- name: Show ccache statistics
if: ${{ github.repository_owner == 'XRPLF' }}
run: ccache --show-stats -vv
# ── Telemetry validation steps ──
- name: Upload xrpld binary
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: xrpld-telemetry
path: ${{ env.BUILD_DIR }}/xrpld
retention-days: 1
if-no-files-found: error
- name: Make scripts executable
run: chmod +x docker/telemetry/workload/*.sh
# ── Job 2: Run telemetry validation on ubuntu-latest (has Docker) ──
validate-telemetry:
name: Telemetry Stack Validation
needs: build-xrpld
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install Python dependencies
run: pip3 install -r docker/telemetry/workload/requirements.txt
- name: Download xrpld binary
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
with:
name: xrpld-telemetry
path: ${{ env.BUILD_DIR }}
- name: Make binaries and scripts executable
run: |
chmod +x ${{ env.BUILD_DIR }}/xrpld
chmod +x docker/telemetry/workload/*.sh
- name: Run full telemetry validation
id: validation