mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-12 05:06:48 +00:00
ci: Rule D — strip TraceQL scope prefix, recognize native-metric labels (L6)
Phase 9 surfaced two Rule D gaps (false positives, not data errors):
- TraceQL `span.<attr>` / `resource.<attr>` references: the bare attribute is
in L1, but the scope-prefixed form was flagged. Now strip the
span./resource./event./link/instrumentation_scope. prefix before the L1
lookup.
- Native OTel metric labels (e.g. `job_type`, `reason`) emitted by
MetricsRegistry are valid dashboard labels but are not span attributes. Add
an L6 source: parse `Add(.., {{"label", ...}})` instrument calls and accept
those label keys alongside L1 and builtins.
Verified against phase-9's real dashboards: 6 prior false positives -> 0.
79 tests (7 new for span-prefix stripping and metric-label extraction).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
18
.github/scripts/otel-naming/README.md
vendored
18
.github/scripts/otel-naming/README.md
vendored
@@ -33,15 +33,15 @@ hardcoded allowlist:
|
||||
|
||||
### Rules (each fails the build, when its inputs are present)
|
||||
|
||||
| Rule | Check |
|
||||
| ---- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| A | No stray dotted span-attribute key (only the derived resource keys may be dotted). |
|
||||
| G | Attribute keys are `lower_snake_case` (`^[a-z][a-z0-9_]*$` per dot-segment) — no camelCase, UPPERCASE, or spaces. |
|
||||
| F | No string literals as attribute keys or span-name arguments in `setAttribute`/`addEvent`/`span`/`childSpan`. Attribute _values_ are exempt (runtime data); `*SpanNames.h` definitions and test files are exempt. |
|
||||
| B | Every collector `spanmetrics.dimensions` name exists in the L1 key set. |
|
||||
| C | Every Tempo span-filter tag exists in the L1 key set. |
|
||||
| D | Every dashboard PromQL label (non-builtin) exists in the L1 key set. |
|
||||
| E | No dotted `xrpl.<domain>.<field>` attribute key in the runbook (only the L1 resource attrs `xrpl.network.*` may be dotted). Span names, filenames, OTel-standard keys, and metric labels are not flagged. |
|
||||
| Rule | Check |
|
||||
| ---- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| A | No stray dotted span-attribute key (only the derived resource keys may be dotted). |
|
||||
| G | Attribute keys are `lower_snake_case` (`^[a-z][a-z0-9_]*$` per dot-segment) — no camelCase, UPPERCASE, or spaces. |
|
||||
| F | No string literals as attribute keys or span-name arguments in `setAttribute`/`addEvent`/`span`/`childSpan`. Attribute _values_ are exempt (runtime data); `*SpanNames.h` definitions and test files are exempt. |
|
||||
| B | Every collector `spanmetrics.dimensions` name exists in the L1 key set. |
|
||||
| C | Every Tempo span-filter tag exists in the L1 key set. |
|
||||
| D | Every dashboard label resolves to an L1 span attribute, a native-metric label (L6, emitted by MetricsRegistry), or a Prometheus/Grafana builtin. TraceQL scope prefixes (`span.`/`resource.`/…) are stripped before the L1 lookup. |
|
||||
| E | No dotted `xrpl.<domain>.<field>` attribute key in the runbook (only the L1 resource attrs `xrpl.network.*` may be dotted). Span names, filenames, OTel-standard keys, and metric labels are not flagged. |
|
||||
|
||||
Rule F runs **unconditionally** (it is a purely syntactic check on the
|
||||
call-sites and needs no `*SpanNames.h`), so a code path that calls
|
||||
|
||||
57
.github/scripts/otel-naming/check_otel_naming.py
vendored
57
.github/scripts/otel-naming/check_otel_naming.py
vendored
@@ -36,6 +36,8 @@ Layers
|
||||
L3 tempo : docker/telemetry/tempo.yaml (span filter tags)
|
||||
L4 dashboards: docker/telemetry/grafana/dashboards/*.json (PromQL labels)
|
||||
L5 runbook : docs/telemetry-runbook.md (attr tables)
|
||||
L6 metrics : MetricsRegistry.cpp instrument labels (native-metric
|
||||
label keys, a valid dashboard-label source besides L1)
|
||||
|
||||
Rules (each FAILS the build, when its inputs are present)
|
||||
---------------------------------------------------------
|
||||
@@ -50,7 +52,9 @@ Rules (each FAILS the build, when its inputs are present)
|
||||
test files are exempt (they pass arbitrary literals to exercise the API).
|
||||
B Every collector spanmetrics dimension exists in the L1 key set.
|
||||
C Every tempo span-filter tag exists in the L1 key set.
|
||||
D Every dashboard PromQL label (non-builtin) exists in the L1 key set.
|
||||
D Every dashboard label resolves to an L1 span attribute, an L6
|
||||
native-metric label, or a builtin. TraceQL `span.`/`resource.` scope
|
||||
prefixes are stripped before the L1 lookup.
|
||||
E No dotted `xrpl.<domain>.<field>` attribute key in the runbook (only the
|
||||
L1 resource attrs xrpl.network.* may be dotted). Span names, filenames,
|
||||
OTel-standard keys, and metric labels are not flagged.
|
||||
@@ -131,6 +135,13 @@ STRING_LITERAL = re.compile(r'"((?:[^"\\]|\\.)*)"')
|
||||
# A C++ line comment (`//` ... end of line) and a block comment (`/* ... */`).
|
||||
LINE_COMMENT = re.compile(r"//[^\n]*")
|
||||
BLOCK_COMMENT = re.compile(r"/\*.*?\*/", re.DOTALL)
|
||||
# A TraceQL scope prefix on a label (`span.`, `resource.`, `event.`, etc.).
|
||||
# Dashboards reference span attributes in TraceQL as `span.<attr>`; the bare
|
||||
# attribute is what must exist in L1, so strip the scope before validating.
|
||||
TRACEQL_SCOPE = re.compile(r"^(?:span|resource|event|link|instrumentation_scope)\.")
|
||||
# An OTel metric label key as emitted in C++: `Add(.., {{"label", ...}})` /
|
||||
# `{{"label", value}}` instrument calls in MetricsRegistry.
|
||||
METRIC_LABEL = re.compile(r'\{\{\s*"([a-z_][a-z0-9_]*)"\s*,')
|
||||
|
||||
|
||||
def strip_comments(text: str) -> str:
|
||||
@@ -410,9 +421,14 @@ def main() -> None:
|
||||
run_rule_f(root, report, header_symbols)
|
||||
|
||||
# --- Cross-layer rules B/C/D/E (each presence-gated) -------------------
|
||||
# L6 native-metric labels: span attributes are not the only valid dashboard
|
||||
# labels — the MetricsRegistry emits OTel metrics whose label keys are an
|
||||
# additional source of truth. Derive them dynamically (same principle as L1)
|
||||
# so dashboards may reference them without tripping Rule D.
|
||||
metric_labels = metric_label_names(root)
|
||||
run_rule_b_collector(root, l1_keys, report)
|
||||
run_rule_c_tempo(root, l1_keys, report)
|
||||
run_rule_d_dashboards(root, l1_keys, report)
|
||||
run_rule_d_dashboards(root, l1_keys, metric_labels, report)
|
||||
run_rule_e_runbook(root, l1_keys, report)
|
||||
|
||||
report.render_and_exit()
|
||||
@@ -689,7 +705,25 @@ def run_rule_c_tempo(root: Path, l1_keys: Set[str], report: Report) -> None:
|
||||
report.ok(f"C: {len(span_tags)} tempo tag(s) all in L1")
|
||||
|
||||
|
||||
def run_rule_d_dashboards(root: Path, l1_keys: Set[str], report: Report) -> None:
|
||||
def metric_label_names(root: Path) -> Set[str]:
|
||||
"""L6: OTel native-metric label keys emitted by the telemetry code, e.g.
|
||||
`counter->Add(1, {{"job_type", value}})` in MetricsRegistry.cpp. These are
|
||||
a valid source of dashboard labels distinct from span attributes (L1)."""
|
||||
labels: Set[str] = set()
|
||||
for base in ("src", "include"):
|
||||
for p in (root / base).rglob("*.cpp"):
|
||||
if not p.is_file():
|
||||
continue
|
||||
text = read_source(p)
|
||||
if "MetricsRegistry" not in p.name and "metric" not in text.lower():
|
||||
continue
|
||||
labels |= set(METRIC_LABEL.findall(text))
|
||||
return labels
|
||||
|
||||
|
||||
def run_rule_d_dashboards(
|
||||
root: Path, l1_keys: Set[str], metric_labels: Set[str], report: Report
|
||||
) -> None:
|
||||
dash_dir = root / "docker" / "telemetry" / "grafana" / "dashboards"
|
||||
files = sorted(dash_dir.glob("*.json")) if dash_dir.is_dir() else []
|
||||
if not files:
|
||||
@@ -710,6 +744,9 @@ def run_rule_d_dashboards(root: Path, l1_keys: Set[str], report: Report) -> None
|
||||
"job",
|
||||
"instance",
|
||||
}
|
||||
# A dashboard label is valid if it is a span attribute (L1), a native-metric
|
||||
# label (L6), or a Prometheus/Grafana builtin.
|
||||
valid = l1_keys | metric_labels | builtins
|
||||
found = False
|
||||
for f in files:
|
||||
try:
|
||||
@@ -720,17 +757,23 @@ def run_rule_d_dashboards(root: Path, l1_keys: Set[str], report: Report) -> None
|
||||
labels: Set[str] = set()
|
||||
for m in re.finditer(r"by\s*\(([^)]*)\)", text):
|
||||
labels |= {x.strip() for x in m.group(1).split(",") if x.strip()}
|
||||
for m in re.finditer(r"\b([a-z_][a-z0-9_]*)\s*[=!]~?\s*\"", text):
|
||||
for m in re.finditer(r"\b([a-z_][a-z0-9_.]*)\s*[=!]~?\s*\"", text):
|
||||
labels.add(m.group(1))
|
||||
for lbl in sorted(labels):
|
||||
if lbl in builtins or lbl in l1_keys:
|
||||
# Strip a TraceQL scope prefix (span./resource./...) — the bare
|
||||
# attribute is what must resolve against L1.
|
||||
bare = TRACEQL_SCOPE.sub("", lbl)
|
||||
if bare in valid:
|
||||
continue
|
||||
found = True
|
||||
report.violation(
|
||||
"D", str(f.relative_to(root)), lbl, "must exist in L1 or builtin"
|
||||
"D",
|
||||
str(f.relative_to(root)),
|
||||
lbl,
|
||||
"must exist in L1, a metric label, or be a builtin",
|
||||
)
|
||||
if not found:
|
||||
report.ok(f"D: dashboard PromQL labels all in L1 ({len(files)} file(s))")
|
||||
report.ok(f"D: dashboard PromQL labels all resolve ({len(files)} file(s))")
|
||||
|
||||
|
||||
def run_rule_e_runbook(root: Path, l1_keys: Set[str], report: Report) -> None:
|
||||
|
||||
@@ -477,7 +477,7 @@ class RuleBCollector(unittest.TestCase):
|
||||
|
||||
|
||||
class RuleDDashboards(unittest.TestCase):
|
||||
def _run(self, json_text, l1):
|
||||
def _run(self, json_text, l1, metric_labels=frozenset()):
|
||||
d = Path(tempfile.mkdtemp())
|
||||
try:
|
||||
_write(
|
||||
@@ -485,7 +485,7 @@ class RuleDDashboards(unittest.TestCase):
|
||||
json_text,
|
||||
)
|
||||
report = chk.Report()
|
||||
chk.run_rule_d_dashboards(d, set(l1), report)
|
||||
chk.run_rule_d_dashboards(d, set(l1), set(metric_labels), report)
|
||||
return sorted(v[2] for v in report.violations)
|
||||
finally:
|
||||
shutil.rmtree(d)
|
||||
@@ -513,6 +513,72 @@ class RuleDDashboards(unittest.TestCase):
|
||||
def test_l1_label_passes(self):
|
||||
self.assertEqual(self._run('"q": "{command=\\"x\\"}"', {"command"}), [])
|
||||
|
||||
def test_traceql_span_prefix_stripped(self):
|
||||
# `span.establish_count` must validate against the bare L1 key.
|
||||
self.assertEqual(
|
||||
self._run(
|
||||
'"expr": "count_over_time(x) by (span.establish_count)"',
|
||||
{"establish_count"},
|
||||
),
|
||||
[],
|
||||
)
|
||||
|
||||
def test_traceql_resource_prefix_stripped(self):
|
||||
self.assertEqual(self._run('"q": "{resource.service_name=\\"x\\"}"', set()), [])
|
||||
|
||||
def test_native_metric_label_passes(self):
|
||||
# `job_type` / `reason` are emitted by MetricsRegistry, not span attrs.
|
||||
self.assertEqual(
|
||||
self._run(
|
||||
'"expr": "sum by (job_type, reason) (x)"',
|
||||
{"command"},
|
||||
metric_labels={"job_type", "reason"},
|
||||
),
|
||||
[],
|
||||
)
|
||||
|
||||
def test_unknown_label_still_flagged_with_metric_labels(self):
|
||||
# A label that is neither L1, metric label, nor builtin still fails.
|
||||
self.assertEqual(
|
||||
self._run(
|
||||
'"expr": "sum by (bogus) (x)"',
|
||||
{"command"},
|
||||
metric_labels={"job_type"},
|
||||
),
|
||||
["bogus"],
|
||||
)
|
||||
|
||||
def test_span_prefixed_unknown_still_flagged(self):
|
||||
# `span.not_a_key` whose bare form is unknown is still a violation.
|
||||
self.assertEqual(
|
||||
self._run('"expr": "x by (span.not_a_key)"', {"command"}),
|
||||
["span.not_a_key"],
|
||||
)
|
||||
|
||||
|
||||
class MetricLabelExtraction(unittest.TestCase):
|
||||
"""L6: native-metric label keys parsed from C++ instrument calls."""
|
||||
|
||||
def test_extracts_add_label(self):
|
||||
d = Path(tempfile.mkdtemp())
|
||||
try:
|
||||
_write(
|
||||
d / "src" / "xrpld" / "telemetry" / "MetricsRegistry.cpp",
|
||||
'counter->Add(1, {{"job_type", std::string(jobType)}});\n'
|
||||
'c2->Add(1, {{"reason", std::string(r)}});\n',
|
||||
)
|
||||
self.assertEqual(chk.metric_label_names(d), {"job_type", "reason"})
|
||||
finally:
|
||||
shutil.rmtree(d)
|
||||
|
||||
def test_no_metrics_file_empty(self):
|
||||
d = Path(tempfile.mkdtemp())
|
||||
try:
|
||||
(d / "src").mkdir()
|
||||
self.assertEqual(chk.metric_label_names(d), set())
|
||||
finally:
|
||||
shutil.rmtree(d)
|
||||
|
||||
|
||||
class ReportExitContract(unittest.TestCase):
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user