docs(telemetry): add thread-safety comments to stop() and sdkProvider_.reset()

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-04-29 13:00:39 +01:00
parent d4e91b462e
commit 694abe2004
2 changed files with 11 additions and 0 deletions

View File

@@ -315,6 +315,13 @@ public:
// Force flush with timeout to avoid blocking indefinitely
// when the OTLP endpoint is unreachable.
sdkProvider_->ForceFlush(std::chrono::milliseconds(5000));
// TODO: sdkProvider_ is not thread-safe. This reset() races with
// getTracer() if any thread is still calling startSpan().
// Currently safe because Application::stop() shuts down
// serverHandler_, overlay_, and jobQueue_ before calling
// telemetry_->stop() — so no callers should remain. If the
// shutdown order ever changes, add an std::atomic<bool> stopped_
// flag checked in getTracer() to make this robust.
sdkProvider_.reset();
trace_api::Provider::SetTracerProvider(
opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(

View File

@@ -1661,6 +1661,10 @@ ApplicationImp::run()
ledgerCleaner_->stop();
m_nodeStore->stop();
perfLog_->stop();
// Telemetry must stop last among trace-producing components.
// serverHandler_, overlay_, and jobQueue_ are already stopped above,
// so no threads should be calling startSpan() at this point.
// See TODO in TelemetryImpl::stop() re: thread-safety of sdkProvider_.
telemetry_->stop();
JLOG(m_journal.info()) << "Done.";