Remove Docker healthchecks from workload compose

The container images (otel-collector-contrib, jaeger, etc.) don't
reliably include curl or wget. The healthcheck failures caused
docker compose up -d to block on the collector's service_healthy
condition, preventing Prometheus and Grafana from starting.

The orchestrator script (run-full-validation.sh) already polls each
service endpoint from the host with its own curl-based readiness
loops, so Docker-level healthchecks are redundant.

Also removed the obsolete `version: "3.8"` key (docker compose v2).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-03-12 22:08:47 +00:00
parent 49ea2b950f
commit 67e061fb3b

View File

@@ -20,8 +20,10 @@
# - xrpld binary built with -DXRPL_ENABLE_TELEMETRY=ON
# - Validator keys generated via generate-validator-keys.sh
# - Node configs generated by run-full-validation.sh
version: "3.8"
#
# Note: No Docker healthchecks are defined here. The orchestrator script
# (run-full-validation.sh) polls each service endpoint directly from the
# host, which avoids issues with missing curl/wget in container images.
services:
# ---------------------------------------------------------------------------
@@ -46,11 +48,6 @@ services:
- tempo
networks:
- workload-net
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:13133/"]
interval: 5s
timeout: 3s
retries: 10
jaeger:
image: jaegertracing/all-in-one:latest
@@ -61,11 +58,6 @@ services:
- "14250:14250" # gRPC
networks:
- workload-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:16686/"]
interval: 5s
timeout: 3s
retries: 10
tempo:
image: grafana/tempo:2.7.2
@@ -85,15 +77,9 @@ services:
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
depends_on:
otel-collector:
condition: service_healthy
- otel-collector
networks:
- workload-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9090/-/healthy"]
interval: 5s
timeout: 3s
retries: 10
loki:
image: grafana/loki:3.4.2
@@ -102,11 +88,6 @@ services:
command: ["-config.file=/etc/loki/local-config.yaml"]
networks:
- workload-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3100/ready"]
interval: 5s
timeout: 3s
retries: 10
grafana:
image: grafana/grafana:latest
@@ -125,11 +106,6 @@ services:
- loki
networks:
- workload-net
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"]
interval: 5s
timeout: 3s
retries: 10
volumes:
tempo-data: