From 051a2197030a4e5ff149fa16879cd2c3535e2284 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Fri, 27 Feb 2026 17:57:15 +0000 Subject: [PATCH] =?UTF-8?q?Phase=202:=20Complete=20RPC=20tracing=20?= =?UTF-8?q?=E2=80=94=20interface,=20macros,=20attributes,=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .clang-tidy | 309 +++++++++--------- .../scripts/levelization/results/ordering.txt | 2 + .../workflows/reusable-build-test-config.yml | 17 +- .../workflows/reusable-clang-tidy-files.yml | 4 +- .github/workflows/reusable-clang-tidy.yml | 14 +- .github/workflows/upload-conan-deps.yml | 2 +- CONTRIBUTING.md | 23 -- OpenTelemetryPlan/Phase2_taskList.md | 187 +++++++++++ OpenTelemetryPlan/Phase3_taskList.md | 238 ++++++++++++++ OpenTelemetryPlan/Phase4_taskList.md | 221 +++++++++++++ OpenTelemetryPlan/Phase5_taskList.md | 241 ++++++++++++++ cspell.config.yaml | 3 + include/xrpl/basics/MallocTrim.h | 73 ----- include/xrpl/nodestore/Backend.h | 6 +- include/xrpl/protocol/detail/features.macro | 5 +- include/xrpl/telemetry/Telemetry.h | 4 + src/libxrpl/basics/MallocTrim.cpp | 157 --------- src/libxrpl/nodestore/DatabaseNodeImp.cpp | 12 +- src/libxrpl/nodestore/DatabaseRotatingImp.cpp | 2 +- .../nodestore/backend/MemoryFactory.cpp | 7 +- src/libxrpl/nodestore/backend/NuDBFactory.cpp | 12 +- src/libxrpl/nodestore/backend/NullFactory.cpp | 4 +- .../nodestore/backend/RocksDBFactory.cpp | 15 +- src/libxrpl/telemetry/NullTelemetry.cpp | 6 + src/libxrpl/telemetry/Telemetry.cpp | 12 + src/libxrpl/telemetry/TelemetryConfig.cpp | 4 +- src/test/app/Vault_test.cpp | 8 +- src/test/nodestore/TestBase.h | 4 +- src/test/nodestore/Timing_test.cpp | 17 +- src/tests/libxrpl/CMakeLists.txt | 8 + src/tests/libxrpl/basics/MallocTrim.cpp | 209 ------------ .../libxrpl/telemetry/TelemetryConfig.cpp | 111 +++++++ src/tests/libxrpl/telemetry/TracingMacros.cpp | 141 ++++++++ src/tests/libxrpl/telemetry/main.cpp | 8 + src/xrpld/rpc/detail/RPCHandler.cpp | 4 + src/xrpld/telemetry/TracingInstrumentation.h | 26 ++ 36 files changed, 1424 insertions(+), 692 deletions(-) create mode 100644 OpenTelemetryPlan/Phase2_taskList.md create mode 100644 OpenTelemetryPlan/Phase3_taskList.md create mode 100644 OpenTelemetryPlan/Phase4_taskList.md create mode 100644 OpenTelemetryPlan/Phase5_taskList.md delete mode 100644 include/xrpl/basics/MallocTrim.h delete mode 100644 src/libxrpl/basics/MallocTrim.cpp delete mode 100644 src/tests/libxrpl/basics/MallocTrim.cpp create mode 100644 src/tests/libxrpl/telemetry/TelemetryConfig.cpp create mode 100644 src/tests/libxrpl/telemetry/TracingMacros.cpp create mode 100644 src/tests/libxrpl/telemetry/main.cpp diff --git a/.clang-tidy b/.clang-tidy index 5f4187b008..f7009c4666 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,143 +1,105 @@ --- Checks: "-*, - bugprone-argument-comment, - bugprone-assert-side-effect, - bugprone-bad-signal-to-kill-thread, - bugprone-bool-pointer-implicit-conversion, - bugprone-casting-through-void, - bugprone-chained-comparison, - bugprone-compare-pointer-to-member-virtual-function, - bugprone-copy-constructor-init, - bugprone-dangling-handle, - bugprone-dynamic-static-initializers, - bugprone-fold-init-type, - bugprone-forward-declaration-namespace, - bugprone-inaccurate-erase, - bugprone-incorrect-enable-if, - bugprone-incorrect-roundings, - bugprone-infinite-loop, - bugprone-integer-division, - bugprone-lambda-function-name, - bugprone-macro-parentheses, - bugprone-macro-repeated-side-effects, - bugprone-misplaced-operator-in-strlen-in-alloc, - bugprone-misplaced-pointer-arithmetic-in-alloc, - bugprone-misplaced-widening-cast, - bugprone-multi-level-implicit-pointer-conversion, - bugprone-multiple-new-in-one-expression, - bugprone-multiple-statement-macro, - bugprone-no-escape, - bugprone-non-zero-enum-to-bool-conversion, - bugprone-parent-virtual-call, - bugprone-posix-return, - bugprone-redundant-branch-condition, - bugprone-shared-ptr-array-mismatch, - bugprone-signal-handler, - bugprone-signed-char-misuse, - bugprone-sizeof-container, - bugprone-spuriously-wake-up-functions, - bugprone-standalone-empty, - bugprone-string-constructor, - bugprone-string-integer-assignment, - bugprone-string-literal-with-embedded-nul, - bugprone-stringview-nullptr, - bugprone-suspicious-enum-usage, - bugprone-suspicious-include, - bugprone-suspicious-memory-comparison, - bugprone-suspicious-memset-usage, - bugprone-suspicious-realloc-usage, - bugprone-suspicious-semicolon, - bugprone-suspicious-string-compare, - bugprone-swapped-arguments, - bugprone-terminating-continue, - bugprone-throw-keyword-missing, - bugprone-undefined-memory-manipulation, - bugprone-undelegated-constructor, - bugprone-unhandled-exception-at-new, - bugprone-unique-ptr-array-mismatch, - bugprone-unsafe-functions, - bugprone-virtual-near-miss, - cppcoreguidelines-no-suspend-with-lock, - cppcoreguidelines-virtual-class-destructor, - hicpp-ignored-remove-result, - misc-definitions-in-headers, - misc-header-include-cycle, - misc-misplaced-const, - misc-static-assert, - misc-throw-by-value-catch-by-reference, - misc-unused-alias-decls, - misc-unused-using-decls, - readability-duplicate-include, - readability-enum-initial-value, - readability-misleading-indentation, - readability-non-const-parameter, - readability-redundant-declaration, - readability-reference-to-constructed-temporary, - modernize-deprecated-headers, - modernize-make-shared, - modernize-make-unique, - performance-implicit-conversion-in-loop, - performance-move-constructor-init, - performance-trivially-destructible + bugprone-argument-comment " -# --- -# checks that have some issues that need to be resolved: -# -# bugprone-empty-catch, +# bugprone-assert-side-effect, +# bugprone-bad-signal-to-kill-thread, +# bugprone-bool-pointer-implicit-conversion, +# bugprone-casting-through-void, +# bugprone-chained-comparison, +# bugprone-compare-pointer-to-member-virtual-function, +# bugprone-copy-constructor-init, # bugprone-crtp-constructor-accessibility, +# bugprone-dangling-handle, +# bugprone-dynamic-static-initializers, +# bugprone-empty-catch, +# bugprone-fold-init-type, +# bugprone-forward-declaration-namespace, +# bugprone-inaccurate-erase, # bugprone-inc-dec-in-conditions, -# bugprone-reserved-identifier, +# bugprone-incorrect-enable-if, +# bugprone-incorrect-roundings, +# bugprone-infinite-loop, +# bugprone-integer-division, +# bugprone-lambda-function-name, +# bugprone-macro-parentheses, +# bugprone-macro-repeated-side-effects, +# bugprone-misplaced-operator-in-strlen-in-alloc, +# bugprone-misplaced-pointer-arithmetic-in-alloc, +# bugprone-misplaced-widening-cast, # bugprone-move-forwarding-reference, -# bugprone-unused-local-non-trivial-variable, -# bugprone-return-const-ref-from-parameter, -# bugprone-switch-missing-default-case, -# bugprone-sizeof-expression, -# bugprone-suspicious-stringview-data-usage, -# bugprone-suspicious-missing-comma, -# bugprone-pointer-arithmetic-on-polymorphic-object, +# bugprone-multi-level-implicit-pointer-conversion, +# bugprone-multiple-new-in-one-expression, +# bugprone-multiple-statement-macro, +# bugprone-no-escape, +# bugprone-non-zero-enum-to-bool-conversion, # bugprone-optional-value-conversion, +# bugprone-parent-virtual-call, +# bugprone-pointer-arithmetic-on-polymorphic-object, +# bugprone-posix-return, +# bugprone-redundant-branch-condition, +# bugprone-reserved-identifier, +# bugprone-return-const-ref-from-parameter, +# bugprone-shared-ptr-array-mismatch, +# bugprone-signal-handler, +# bugprone-signed-char-misuse, +# bugprone-sizeof-container, +# bugprone-sizeof-expression, +# bugprone-spuriously-wake-up-functions, +# bugprone-standalone-empty, +# bugprone-string-constructor, +# bugprone-string-integer-assignment, +# bugprone-string-literal-with-embedded-nul, +# bugprone-stringview-nullptr, +# bugprone-suspicious-enum-usage, +# bugprone-suspicious-include, +# bugprone-suspicious-memory-comparison, +# bugprone-suspicious-memset-usage, +# bugprone-suspicious-missing-comma, +# bugprone-suspicious-realloc-usage, +# bugprone-suspicious-semicolon, +# bugprone-suspicious-string-compare, +# bugprone-suspicious-stringview-data-usage, +# bugprone-swapped-arguments, +# bugprone-switch-missing-default-case, +# bugprone-terminating-continue, +# bugprone-throw-keyword-missing, # bugprone-too-small-loop-variable, +# bugprone-undefined-memory-manipulation, +# bugprone-undelegated-constructor, +# bugprone-unhandled-exception-at-new, +# bugprone-unhandled-self-assignment, +# bugprone-unique-ptr-array-mismatch, +# bugprone-unsafe-functions, +# bugprone-unused-local-non-trivial-variable, +# bugprone-unused-raii, # bugprone-unused-return-value, # bugprone-use-after-move, -# bugprone-unhandled-self-assignment, -# bugprone-unused-raii, -# -# cppcoreguidelines-misleading-capture-default-by-value, +# bugprone-virtual-near-miss, # cppcoreguidelines-init-variables, +# cppcoreguidelines-misleading-capture-default-by-value, +# cppcoreguidelines-no-suspend-with-lock, # cppcoreguidelines-pro-type-member-init, # cppcoreguidelines-pro-type-static-cast-downcast, -# cppcoreguidelines-use-default-member-init, # cppcoreguidelines-rvalue-reference-param-not-moved, -# +# cppcoreguidelines-use-default-member-init, +# cppcoreguidelines-virtual-class-destructor, +# hicpp-ignored-remove-result, # llvm-namespace-comment, # misc-const-correctness, +# misc-definitions-in-headers, +# misc-header-include-cycle, # misc-include-cleaner, +# misc-misplaced-const, # misc-redundant-expression, -# -# readability-avoid-nested-conditional-operator, -# readability-avoid-return-with-void-value, -# readability-braces-around-statements, -# readability-container-contains, -# readability-container-size-empty, -# readability-convert-member-functions-to-static, -# readability-const-return-type, -# readability-else-after-return, -# readability-implicit-bool-conversion, -# readability-inconsistent-declaration-parameter-name, -# readability-identifier-naming, -# readability-make-member-function-const, -# readability-math-missing-parentheses, -# readability-redundant-inline-specifier, -# readability-redundant-member-init, -# readability-redundant-casting, -# readability-redundant-string-init, -# readability-simplify-boolean-expr, -# readability-static-definition-in-anonymous-namespace, -# readability-suspicious-call-argument, -# readability-use-std-min-max, -# readability-static-accessed-through-instance, -# +# misc-static-assert, +# misc-throw-by-value-catch-by-reference, +# misc-unused-alias-decls, +# misc-unused-using-decls, # modernize-concat-nested-namespaces, +# modernize-deprecated-headers, +# modernize-make-shared, +# modernize-make-unique, # modernize-pass-by-value, # modernize-type-traits, # modernize-use-designated-initializers, @@ -149,50 +111,79 @@ Checks: "-*, # modernize-use-starts-ends-with, # modernize-use-std-numbers, # modernize-use-using, -# # performance-faster-string-find, # performance-for-range-copy, +# performance-implicit-conversion-in-loop, # performance-inefficient-vector-operation, # performance-move-const-arg, +# performance-move-constructor-init, # performance-no-automatic-move, -# --- +# performance-trivially-destructible, +# readability-avoid-nested-conditional-operator, +# readability-avoid-return-with-void-value, +# readability-braces-around-statements, +# readability-const-return-type, +# readability-container-contains, +# readability-container-size-empty, +# readability-convert-member-functions-to-static, +# readability-duplicate-include, +# readability-else-after-return, +# readability-enum-initial-value, +# readability-implicit-bool-conversion, +# readability-inconsistent-declaration-parameter-name, +# readability-identifier-naming, +# readability-make-member-function-const, +# readability-math-missing-parentheses, +# readability-misleading-indentation, +# readability-non-const-parameter, +# readability-redundant-casting, +# readability-redundant-declaration, +# readability-redundant-inline-specifier, +# readability-redundant-member-init, +# readability-redundant-string-init, +# readability-reference-to-constructed-temporary, +# readability-simplify-boolean-expr, +# readability-static-accessed-through-instance, +# readability-static-definition-in-anonymous-namespace, +# readability-suspicious-call-argument, +# readability-use-std-min-max # -CheckOptions: - # readability-braces-around-statements.ShortStatementLines: 2 - # readability-identifier-naming.MacroDefinitionCase: UPPER_CASE - # readability-identifier-naming.ClassCase: CamelCase - # readability-identifier-naming.StructCase: CamelCase - # readability-identifier-naming.UnionCase: CamelCase - # readability-identifier-naming.EnumCase: CamelCase - # readability-identifier-naming.EnumConstantCase: CamelCase - # readability-identifier-naming.ScopedEnumConstantCase: CamelCase - # readability-identifier-naming.GlobalConstantCase: UPPER_CASE - # readability-identifier-naming.GlobalConstantPrefix: "k" - # readability-identifier-naming.GlobalVariableCase: CamelCase - # readability-identifier-naming.GlobalVariablePrefix: "g" - # readability-identifier-naming.ConstexprFunctionCase: camelBack - # readability-identifier-naming.ConstexprMethodCase: camelBack - # readability-identifier-naming.ClassMethodCase: camelBack - # readability-identifier-naming.ClassMemberCase: camelBack - # readability-identifier-naming.ClassConstantCase: UPPER_CASE - # readability-identifier-naming.ClassConstantPrefix: "k" - # readability-identifier-naming.StaticConstantCase: UPPER_CASE - # readability-identifier-naming.StaticConstantPrefix: "k" - # readability-identifier-naming.StaticVariableCase: UPPER_CASE - # readability-identifier-naming.StaticVariablePrefix: "k" - # readability-identifier-naming.ConstexprVariableCase: UPPER_CASE - # readability-identifier-naming.ConstexprVariablePrefix: "k" - # readability-identifier-naming.LocalConstantCase: camelBack - # readability-identifier-naming.LocalVariableCase: camelBack - # readability-identifier-naming.TemplateParameterCase: CamelCase - # readability-identifier-naming.ParameterCase: camelBack - # readability-identifier-naming.FunctionCase: camelBack - # readability-identifier-naming.MemberCase: camelBack - # readability-identifier-naming.PrivateMemberSuffix: _ - # readability-identifier-naming.ProtectedMemberSuffix: _ - # readability-identifier-naming.PublicMemberSuffix: "" - # readability-identifier-naming.FunctionIgnoredRegexp: ".*tag_invoke.*" - bugprone-unsafe-functions.ReportMoreUnsafeFunctions: true +# CheckOptions: +# readability-braces-around-statements.ShortStatementLines: 2 +# readability-identifier-naming.MacroDefinitionCase: UPPER_CASE +# readability-identifier-naming.ClassCase: CamelCase +# readability-identifier-naming.StructCase: CamelCase +# readability-identifier-naming.UnionCase: CamelCase +# readability-identifier-naming.EnumCase: CamelCase +# readability-identifier-naming.EnumConstantCase: CamelCase +# readability-identifier-naming.ScopedEnumConstantCase: CamelCase +# readability-identifier-naming.GlobalConstantCase: UPPER_CASE +# readability-identifier-naming.GlobalConstantPrefix: "k" +# readability-identifier-naming.GlobalVariableCase: CamelCase +# readability-identifier-naming.GlobalVariablePrefix: "g" +# readability-identifier-naming.ConstexprFunctionCase: camelBack +# readability-identifier-naming.ConstexprMethodCase: camelBack +# readability-identifier-naming.ClassMethodCase: camelBack +# readability-identifier-naming.ClassMemberCase: camelBack +# readability-identifier-naming.ClassConstantCase: UPPER_CASE +# readability-identifier-naming.ClassConstantPrefix: "k" +# readability-identifier-naming.StaticConstantCase: UPPER_CASE +# readability-identifier-naming.StaticConstantPrefix: "k" +# readability-identifier-naming.StaticVariableCase: UPPER_CASE +# readability-identifier-naming.StaticVariablePrefix: "k" +# readability-identifier-naming.ConstexprVariableCase: UPPER_CASE +# readability-identifier-naming.ConstexprVariablePrefix: "k" +# readability-identifier-naming.LocalConstantCase: camelBack +# readability-identifier-naming.LocalVariableCase: camelBack +# readability-identifier-naming.TemplateParameterCase: CamelCase +# readability-identifier-naming.ParameterCase: camelBack +# readability-identifier-naming.FunctionCase: camelBack +# readability-identifier-naming.MemberCase: camelBack +# readability-identifier-naming.PrivateMemberSuffix: _ +# readability-identifier-naming.ProtectedMemberSuffix: _ +# readability-identifier-naming.PublicMemberSuffix: "" +# readability-identifier-naming.FunctionIgnoredRegexp: ".*tag_invoke.*" +# bugprone-unsafe-functions.ReportMoreUnsafeFunctions: true # bugprone-unused-return-value.CheckedReturnTypes: ::std::error_code;::std::error_condition;::std::errc # misc-include-cleaner.IgnoreHeaders: '.*/(detail|impl)/.*;.*(expected|unexpected).*;.*ranges_lower_bound\.h;time.h;stdlib.h;__chrono/.*;fmt/chrono.h;boost/uuid/uuid_hash.hpp' # diff --git a/.github/scripts/levelization/results/ordering.txt b/.github/scripts/levelization/results/ordering.txt index 56ea8d2223..ad9c3c43c7 100644 --- a/.github/scripts/levelization/results/ordering.txt +++ b/.github/scripts/levelization/results/ordering.txt @@ -174,8 +174,10 @@ test.toplevel > test.csf test.toplevel > xrpl.json test.unit_test > xrpl.basics tests.libxrpl > xrpl.basics +tests.libxrpl > xrpld.telemetry tests.libxrpl > xrpl.json tests.libxrpl > xrpl.net +tests.libxrpl > xrpl.telemetry xrpl.conditions > xrpl.basics xrpl.conditions > xrpl.protocol xrpl.core > xrpl.basics diff --git a/.github/workflows/reusable-build-test-config.yml b/.github/workflows/reusable-build-test-config.yml index dabcc737f8..4f52b68b84 100644 --- a/.github/workflows/reusable-build-test-config.yml +++ b/.github/workflows/reusable-build-test-config.yml @@ -101,7 +101,7 @@ jobs: steps: - name: Cleanup workspace (macOS and Windows) if: ${{ runner.os == 'macOS' || runner.os == 'Windows' }} - uses: XRPLF/actions/cleanup-workspace@c7d9ce5ebb03c752a354889ecd870cadfc2b1cd4 + uses: XRPLF/actions/cleanup-workspace@cf0433aa74563aead044a1e395610c96d65a37cf - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -229,21 +229,8 @@ jobs: env: BUILD_NPROC: ${{ steps.nproc.outputs.nproc }} run: | - set -o pipefail - ./xrpld --unittest --unittest-jobs "${BUILD_NPROC}" 2>&1 | tee unittest.log + ./xrpld --unittest --unittest-jobs "${BUILD_NPROC}" - - name: Show test failure summary - if: ${{ failure() && !inputs.build_only }} - working-directory: ${{ runner.os == 'Windows' && format('{0}/{1}', env.BUILD_DIR, inputs.build_type) || env.BUILD_DIR }} - run: | - if [ ! -f unittest.log ]; then - echo "unittest.log not found; embedded tests may not have run." - exit 0 - fi - - if ! grep -E "failed" unittest.log; then - echo "Log present but no failure lines found in unittest.log." - fi - name: Debug failure (Linux) if: ${{ failure() && runner.os == 'Linux' && !inputs.build_only }} run: | diff --git a/.github/workflows/reusable-clang-tidy-files.yml b/.github/workflows/reusable-clang-tidy-files.yml index d36dea747c..432da1d15c 100644 --- a/.github/workflows/reusable-clang-tidy-files.yml +++ b/.github/workflows/reusable-clang-tidy-files.yml @@ -78,9 +78,9 @@ jobs: id: run_clang_tidy continue-on-error: true env: - TARGETS: ${{ inputs.files != '' && inputs.files || 'src tests' }} + FILES: ${{ inputs.files }} run: | - run-clang-tidy -j ${{ steps.nproc.outputs.nproc }} -p "${BUILD_DIR}" ${TARGETS} 2>&1 | tee clang-tidy-output.txt + run-clang-tidy -j ${{ steps.nproc.outputs.nproc }} -p "$BUILD_DIR" $FILES 2>&1 | tee clang-tidy-output.txt - name: Upload clang-tidy output if: steps.run_clang_tidy.outcome != 'success' diff --git a/.github/workflows/reusable-clang-tidy.yml b/.github/workflows/reusable-clang-tidy.yml index 7050d3509f..7c300ee26e 100644 --- a/.github/workflows/reusable-clang-tidy.yml +++ b/.github/workflows/reusable-clang-tidy.yml @@ -22,8 +22,7 @@ jobs: if: ${{ inputs.check_only_changed }} runs-on: ubuntu-latest outputs: - clang_tidy_config_changed: ${{ steps.changed_clang_tidy.outputs.any_changed }} - any_cpp_changed: ${{ steps.changed_files.outputs.any_changed }} + any_changed: ${{ steps.changed_files.outputs.any_changed }} all_changed_files: ${{ steps.changed_files.outputs.all_changed_files }} steps: - name: Checkout repository @@ -39,17 +38,10 @@ jobs: **/*.ipp separator: " " - - name: Get changed clang-tidy configuration - id: changed_clang_tidy - uses: tj-actions/changed-files@7dee1b0c1557f278e5c7dc244927139d78c0e22a # v47.0.4 - with: - files: | - .clang-tidy - run-clang-tidy: needs: [determine-files] - if: ${{ always() && !cancelled() && (!inputs.check_only_changed || needs.determine-files.outputs.any_cpp_changed == 'true' || needs.determine-files.outputs.clang_tidy_config_changed == 'true') }} + if: ${{ always() && !cancelled() && (!inputs.check_only_changed || needs.determine-files.outputs.any_changed == 'true') }} uses: ./.github/workflows/reusable-clang-tidy-files.yml with: - files: ${{ (needs.determine-files.outputs.clang_tidy_config_changed == 'true' && '') || (inputs.check_only_changed && needs.determine-files.outputs.all_changed_files || '') }} + files: ${{ inputs.check_only_changed && needs.determine-files.outputs.all_changed_files || '' }} create_issue_on_failure: ${{ inputs.create_issue_on_failure }} diff --git a/.github/workflows/upload-conan-deps.yml b/.github/workflows/upload-conan-deps.yml index df8aa43a18..b260c4c4f3 100644 --- a/.github/workflows/upload-conan-deps.yml +++ b/.github/workflows/upload-conan-deps.yml @@ -64,7 +64,7 @@ jobs: steps: - name: Cleanup workspace (macOS and Windows) if: ${{ runner.os == 'macOS' || runner.os == 'Windows' }} - uses: XRPLF/actions/cleanup-workspace@c7d9ce5ebb03c752a354889ecd870cadfc2b1cd4 + uses: XRPLF/actions/cleanup-workspace@cf0433aa74563aead044a1e395610c96d65a37cf - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4bb1db8689..a928065ef2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -251,29 +251,6 @@ pip3 install pre-commit pre-commit install ``` -## Clang-tidy - -All code must pass `clang-tidy` checks according to the settings in [`.clang-tidy`](./.clang-tidy). - -There is a Continuous Integration job that runs clang-tidy on pull requests. The CI will check: - -- All changed C++ files (`.cpp`, `.h`, `.ipp`) when only code files are modified -- **All files in the repository** when the `.clang-tidy` configuration file is changed - -This ensures that configuration changes don't introduce new warnings across the codebase. - -### Running clang-tidy locally - -Before running clang-tidy, you must build the project to generate required files (particularly protobuf headers). Refer to [`BUILD.md`](./BUILD.md) for build instructions. - -Then run clang-tidy on your local changes: - -``` -run-clang-tidy -p build src tests -``` - -This will check all source files in the `src` and `tests` directories using the compile commands from your `build` directory. - ## Contracts and instrumentation We are using [Antithesis](https://antithesis.com/) for continuous fuzzing, diff --git a/OpenTelemetryPlan/Phase2_taskList.md b/OpenTelemetryPlan/Phase2_taskList.md new file mode 100644 index 0000000000..542fd55a1c --- /dev/null +++ b/OpenTelemetryPlan/Phase2_taskList.md @@ -0,0 +1,187 @@ +# Phase 2: RPC Tracing Completion Task List + +> **Goal**: Complete full RPC tracing coverage with W3C Trace Context propagation, unit tests, and performance validation. Build on the POC foundation to achieve production-quality RPC observability. +> +> **Scope**: W3C header extraction, TraceContext propagation utilities, unit tests for core telemetry, integration tests for RPC tracing, and performance benchmarks. +> +> **Branch**: `pratik/otel-phase2-rpc-tracing` (from `pratik/OpenTelemetry_and_DistributedTracing_planning`) + +### Related Plan Documents + +| Document | Relevance | +| ------------------------------------------------------------ | ------------------------------------------------------------- | +| [04-code-samples.md](./04-code-samples.md) | TraceContextPropagator (§4.4.2), RPC instrumentation (§4.5.3) | +| [02-design-decisions.md](./02-design-decisions.md) | W3C Trace Context (§2.5), span attributes (§2.4.2) | +| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 2 tasks (§6.3), definition of done (§6.11.2) | + +--- + +## Task 2.1: Implement W3C Trace Context HTTP Header Extraction + +**Objective**: Extract `traceparent` and `tracestate` headers from incoming HTTP RPC requests so external callers can propagate their trace context into rippled. + +**What to do**: + +- Create `include/xrpl/telemetry/TraceContextPropagator.h`: + - `extractFromHeaders(headerGetter)` - extract W3C traceparent/tracestate from HTTP headers + - `injectToHeaders(ctx, headerSetter)` - inject trace context into response headers + - Use OTel's `TextMapPropagator` with `W3CTraceContextPropagator` for standards compliance + - Only compiled when `XRPL_ENABLE_TELEMETRY` is defined + +- Create `src/libxrpl/telemetry/TraceContextPropagator.cpp`: + - Implement a simple `TextMapCarrier` adapter for HTTP headers + - Use `opentelemetry::context::propagation::GlobalTextMapPropagator` for extraction/injection + - Register the W3C propagator in `TelemetryImpl::start()` + +- Modify `src/xrpld/rpc/detail/ServerHandler.cpp`: + - In the HTTP request handler, extract parent context from headers before creating span + - Pass extracted context to `startSpan()` as parent + - Inject trace context into response headers + +**Key new files**: + +- `include/xrpl/telemetry/TraceContextPropagator.h` +- `src/libxrpl/telemetry/TraceContextPropagator.cpp` + +**Key modified files**: + +- `src/xrpld/rpc/detail/ServerHandler.cpp` +- `src/libxrpl/telemetry/Telemetry.cpp` (register W3C propagator) + +**Reference**: + +- [04-code-samples.md §4.4.2](./04-code-samples.md) — TraceContextPropagator with extractFromHeaders/injectToHeaders +- [02-design-decisions.md §2.5](./02-design-decisions.md) — W3C Trace Context propagation design + +--- + +## Task 2.2: Add XRPL_TRACE_PEER Macro + +**Objective**: Add the missing peer-tracing macro for future Phase 3 use and ensure macro completeness. + +**What to do**: + +- Edit `src/xrpld/telemetry/TracingInstrumentation.h`: + - Add `XRPL_TRACE_PEER(_tel_obj_, _span_name_)` macro that checks `shouldTracePeer()` + - Add `XRPL_TRACE_LEDGER(_tel_obj_, _span_name_)` macro (for future ledger tracing) + - Ensure disabled variants expand to `((void)0)` + +**Key modified file**: + +- `src/xrpld/telemetry/TracingInstrumentation.h` + +--- + +## Task 2.3: Add shouldTraceLedger() to Telemetry Interface + +**Objective**: The `Setup` struct has a `traceLedger` field but there's no corresponding virtual method. Add it for interface completeness. + +**What to do**: + +- Edit `include/xrpl/telemetry/Telemetry.h`: + - Add `virtual bool shouldTraceLedger() const = 0;` + +- Update all implementations: + - `src/libxrpl/telemetry/Telemetry.cpp` (TelemetryImpl, NullTelemetryOtel) + - `src/libxrpl/telemetry/NullTelemetry.cpp` (NullTelemetry) + +**Key modified files**: + +- `include/xrpl/telemetry/Telemetry.h` +- `src/libxrpl/telemetry/Telemetry.cpp` +- `src/libxrpl/telemetry/NullTelemetry.cpp` + +--- + +## Task 2.4: Unit Tests for Core Telemetry Infrastructure + +**Objective**: Add unit tests for the core telemetry abstractions to validate correctness and catch regressions. + +**What to do**: + +- Create `src/test/telemetry/Telemetry_test.cpp`: + - Test NullTelemetry: verify all methods return expected no-op values + - Test Setup defaults: verify all Setup fields have correct defaults + - Test setup_Telemetry config parser: verify parsing of [telemetry] section + - Test enabled/disabled factory paths + - Test shouldTrace\* methods respect config flags + +- Create `src/test/telemetry/SpanGuard_test.cpp`: + - Test SpanGuard RAII lifecycle (span ends on destruction) + - Test move constructor works correctly + - Test setAttribute, setOk, setStatus, addEvent, recordException + - Test context() returns valid context + +- Add test files to CMake build + +**Key new files**: + +- `src/test/telemetry/Telemetry_test.cpp` +- `src/test/telemetry/SpanGuard_test.cpp` + +**Reference**: + +- [06-implementation-phases.md §6.11.1](./06-implementation-phases.md) — Phase 1 exit criteria (unit tests passing) + +--- + +## Task 2.5: Enhance RPC Span Attributes + +**Objective**: Add additional attributes to RPC spans per the semantic conventions defined in the plan. + +**What to do**: + +- Edit `src/xrpld/rpc/detail/ServerHandler.cpp`: + - Add `http.method` attribute for HTTP requests + - Add `http.status_code` attribute for responses + - Add `net.peer.ip` attribute for client IP (if available) + +- Edit `src/xrpld/rpc/detail/RPCHandler.cpp`: + - Add `xrpl.rpc.duration_ms` attribute on completion + - Add error message attribute on failure: `xrpl.rpc.error_message` + +**Key modified files**: + +- `src/xrpld/rpc/detail/ServerHandler.cpp` +- `src/xrpld/rpc/detail/RPCHandler.cpp` + +**Reference**: + +- [02-design-decisions.md §2.4.2](./02-design-decisions.md) — RPC attribute schema + +--- + +## Task 2.6: Build Verification and Performance Baseline + +**Objective**: Verify the build succeeds with and without telemetry, and establish a performance baseline. + +**What to do**: + +1. Build with `telemetry=ON` and verify no compilation errors +2. Build with `telemetry=OFF` and verify no regressions +3. Run existing unit tests to verify no breakage +4. Document any build issues in lessons.md + +**Verification Checklist**: + +- [ ] `conan install . --build=missing -o telemetry=True` succeeds +- [ ] `cmake --preset default -Dtelemetry=ON` configures correctly +- [ ] Build succeeds with telemetry ON +- [ ] Build succeeds with telemetry OFF +- [ ] Existing tests pass with telemetry ON +- [ ] Existing tests pass with telemetry OFF + +--- + +## Summary + +| Task | Description | New Files | Modified Files | Depends On | +| ---- | ------------------------------------------- | --------- | -------------- | ---------- | +| 2.1 | W3C Trace Context header extraction | 2 | 2 | POC | +| 2.2 | Add XRPL_TRACE_PEER/LEDGER macros | 0 | 1 | POC | +| 2.3 | Add shouldTraceLedger() interface method | 0 | 3 | POC | +| 2.4 | Unit tests for core telemetry | 2 | 1 | POC | +| 2.5 | Enhanced RPC span attributes | 0 | 2 | POC | +| 2.6 | Build verification and performance baseline | 0 | 0 | 2.1-2.5 | + +**Parallel work**: Tasks 2.1, 2.2, 2.3 can run in parallel. Task 2.4 depends on 2.3. Task 2.5 can run in parallel with 2.4. Task 2.6 depends on all others. diff --git a/OpenTelemetryPlan/Phase3_taskList.md b/OpenTelemetryPlan/Phase3_taskList.md new file mode 100644 index 0000000000..18af7fff26 --- /dev/null +++ b/OpenTelemetryPlan/Phase3_taskList.md @@ -0,0 +1,238 @@ +# Phase 3: Transaction Tracing Task List + +> **Goal**: Trace the full transaction lifecycle from RPC submission through peer relay, including cross-node context propagation via Protocol Buffer extensions. This is the WALK phase that demonstrates true distributed tracing. +> +> **Scope**: Protocol Buffer `TraceContext` message, context serialization, PeerImp transaction instrumentation, NetworkOPs processing instrumentation, HashRouter visibility, and multi-node relay context propagation. +> +> **Branch**: `pratik/otel-phase3-tx-tracing` (from `pratik/otel-phase2-rpc-tracing`) + +### Related Plan Documents + +| Document | Relevance | +| ------------------------------------------------------------ | ------------------------------------------------------------------------------------------------ | +| [04-code-samples.md](./04-code-samples.md) | TraceContext protobuf (§4.4.1), PeerImp instrumentation (§4.5.1), context serialization (§4.4.2) | +| [01-architecture-analysis.md](./01-architecture-analysis.md) | Transaction flow (§1.3), key trace points (§1.6) | +| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 3 tasks (§6.4), definition of done (§6.11.3) | +| [02-design-decisions.md](./02-design-decisions.md) | Context propagation design (§2.5), attribute schema (§2.4.3) | + +--- + +## Task 3.1: Define TraceContext Protocol Buffer Message + +**Objective**: Add trace context fields to the P2P protocol messages so trace IDs can propagate across nodes. + +**What to do**: + +- Edit `include/xrpl/proto/xrpl.proto` (or `src/ripple/proto/ripple.proto`, wherever the proto is): + - Add `TraceContext` message definition: + ```protobuf + message TraceContext { + bytes trace_id = 1; // 16-byte trace identifier + bytes span_id = 2; // 8-byte span identifier + uint32 trace_flags = 3; // bit 0 = sampled + string trace_state = 4; // W3C tracestate value + } + ``` + - Add `optional TraceContext trace_context = 1001;` to: + - `TMTransaction` + - `TMProposeSet` (for Phase 4 use) + - `TMValidation` (for Phase 4 use) + - Use high field numbers (1001+) to avoid conflicts with existing fields + +- Regenerate protobuf C++ code + +**Key modified files**: + +- `include/xrpl/proto/xrpl.proto` (or equivalent) + +**Reference**: + +- [04-code-samples.md §4.4.1](./04-code-samples.md) — TraceContext message definition +- [02-design-decisions.md §2.5.2](./02-design-decisions.md) — Protocol buffer context propagation design + +--- + +## Task 3.2: Implement Protobuf Context Serialization + +**Objective**: Create utilities to serialize/deserialize OTel trace context to/from protobuf `TraceContext` messages. + +**What to do**: + +- Create `include/xrpl/telemetry/TraceContextPropagator.h` (extend from Phase 2 if exists, or add protobuf methods): + - Add protobuf-specific methods: + - `static Context extractFromProtobuf(protocol::TraceContext const& proto)` — reconstruct OTel context from protobuf fields + - `static void injectToProtobuf(Context const& ctx, protocol::TraceContext& proto)` — serialize current span context into protobuf fields + - Both methods guard behind `#ifdef XRPL_ENABLE_TELEMETRY` + +- Create/extend `src/libxrpl/telemetry/TraceContextPropagator.cpp`: + - Implement extraction: read trace_id (16 bytes), span_id (8 bytes), trace_flags from protobuf, construct `SpanContext`, wrap in `Context` + - Implement injection: get current span from context, serialize its TraceId, SpanId, and TraceFlags into protobuf fields + +**Key new/modified files**: + +- `include/xrpl/telemetry/TraceContextPropagator.h` +- `src/libxrpl/telemetry/TraceContextPropagator.cpp` + +**Reference**: + +- [04-code-samples.md §4.4.2](./04-code-samples.md) — Full extract/inject implementation + +--- + +## Task 3.3: Instrument PeerImp Transaction Handling + +**Objective**: Add trace spans to the peer-level transaction receive and relay path. + +**What to do**: + +- Edit `src/xrpld/overlay/detail/PeerImp.cpp`: + - In `onMessage(TMTransaction)` / `handleTransaction()`: + - Extract parent trace context from incoming `TMTransaction::trace_context` field (if present) + - Create `tx.receive` span as child of extracted context (or new root if none) + - Set attributes: `xrpl.tx.hash`, `xrpl.peer.id`, `xrpl.tx.status` + - On HashRouter suppression (duplicate): set `xrpl.tx.suppressed=true`, add `tx.duplicate` event + - Wrap validation call with child span `tx.validate` + - Wrap relay with `tx.relay` span + - When relaying to peers: + - Inject current trace context into outgoing `TMTransaction::trace_context` + - Set `xrpl.tx.relay_count` attribute + +- Include `TracingInstrumentation.h` and use `XRPL_TRACE_TX` macro + +**Key modified files**: + +- `src/xrpld/overlay/detail/PeerImp.cpp` + +**Reference**: + +- [04-code-samples.md §4.5.1](./04-code-samples.md) — Full PeerImp instrumentation example +- [01-architecture-analysis.md §1.3](./01-architecture-analysis.md) — Transaction flow diagram +- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — tx.receive trace point + +--- + +## Task 3.4: Instrument NetworkOPs Transaction Processing + +**Objective**: Trace the transaction processing pipeline in NetworkOPs, covering both sync and async paths. + +**What to do**: + +- Edit `src/xrpld/app/misc/NetworkOPs.cpp`: + - In `processTransaction()`: + - Create `tx.process` span + - Set attributes: `xrpl.tx.hash`, `xrpl.tx.type`, `xrpl.tx.local` (whether from RPC or peer) + - Record whether sync or async path is taken + + - In `doTransactionAsync()`: + - Capture parent context before queuing + - Create `tx.queue` span with queue depth attribute + - Add event when transaction is dequeued for processing + + - In `doTransactionSync()`: + - Create `tx.process_sync` span + - Record result (applied, queued, rejected) + +**Key modified files**: + +- `src/xrpld/app/misc/NetworkOPs.cpp` + +**Reference**: + +- [01-architecture-analysis.md §1.6](./01-architecture-analysis.md) — tx.validate and tx.process trace points +- [02-design-decisions.md §2.4.3](./02-design-decisions.md) — Transaction attribute schema + +--- + +## Task 3.5: Instrument HashRouter for Dedup Visibility + +**Objective**: Make transaction deduplication visible in traces by recording HashRouter decisions as span attributes/events. + +**What to do**: + +- Edit `src/xrpld/overlay/detail/PeerImp.cpp` (in handleTransaction): + - After calling `HashRouter::shouldProcess()` or `addSuppressionPeer()`: + - Record `xrpl.tx.suppressed` attribute (true/false) + - Record `xrpl.tx.flags` showing current HashRouter state (SAVED, TRUSTED, etc.) + - Add `tx.first_seen` or `tx.duplicate` event + +- This is NOT a modification to HashRouter itself — just recording its decisions as span attributes in the existing PeerImp instrumentation from Task 3.3. + +**Key modified files**: + +- `src/xrpld/overlay/detail/PeerImp.cpp` (same changes as 3.3, logically grouped) + +--- + +## Task 3.6: Context Propagation in Transaction Relay + +**Objective**: Ensure trace context flows correctly when transactions are relayed between peers, creating linked spans across nodes. + +**What to do**: + +- Verify the relay path injects trace context: + - When `PeerImp` relays a transaction, the `TMTransaction` message should carry `trace_context` + - When a remote peer receives it, the context is extracted and used as parent + +- Test context propagation: + - Manually verify with 2+ node setup that trace IDs match across nodes + - Confirm parent-child span relationships are correct in Jaeger + +- Handle edge cases: + - Missing trace context (older peers): create new root span + - Corrupted trace context: log warning, create new root span + - Sampled-out traces: respect trace flags + +**Key modified files**: + +- `src/xrpld/overlay/detail/PeerImp.cpp` +- `src/xrpld/overlay/detail/OverlayImpl.cpp` (if relay method needs context param) + +**Reference**: + +- [02-design-decisions.md §2.5](./02-design-decisions.md) — Context propagation design +- [04-code-samples.md §4.5.1](./04-code-samples.md) — Relay context injection pattern + +--- + +## Task 3.7: Build Verification and Testing + +**Objective**: Verify all Phase 3 changes compile and work correctly. + +**What to do**: + +1. Build with `telemetry=ON` — verify no compilation errors +2. Build with `telemetry=OFF` — verify no regressions +3. Run existing unit tests +4. Verify protobuf regeneration produces correct C++ code +5. Document any issues encountered + +**Verification Checklist**: + +- [ ] Protobuf changes generate valid C++ +- [ ] Build succeeds with telemetry ON +- [ ] Build succeeds with telemetry OFF +- [ ] Existing tests pass +- [ ] No undefined symbols from new telemetry calls + +--- + +## Summary + +| Task | Description | New Files | Modified Files | Depends On | +| ---- | ----------------------------------- | --------- | -------------- | ---------- | +| 3.1 | TraceContext protobuf message | 0 | 1 | Phase 2 | +| 3.2 | Protobuf context serialization | 1-2 | 0 | 3.1 | +| 3.3 | PeerImp transaction instrumentation | 0 | 1 | 3.2 | +| 3.4 | NetworkOPs transaction processing | 0 | 1 | Phase 2 | +| 3.5 | HashRouter dedup visibility | 0 | 1 | 3.3 | +| 3.6 | Relay context propagation | 0 | 1-2 | 3.3, 3.5 | +| 3.7 | Build verification and testing | 0 | 0 | 3.1-3.6 | + +**Parallel work**: Tasks 3.1 and 3.4 can start in parallel. Task 3.2 depends on 3.1. Tasks 3.3 and 3.5 depend on 3.2. Task 3.6 depends on 3.3 and 3.5. + +**Exit Criteria** (from [06-implementation-phases.md §6.11.3](./06-implementation-phases.md)): + +- [ ] Transaction traces span across nodes +- [ ] Trace context in Protocol Buffer messages +- [ ] HashRouter deduplication visible in traces +- [ ] <5% overhead on transaction throughput diff --git a/OpenTelemetryPlan/Phase4_taskList.md b/OpenTelemetryPlan/Phase4_taskList.md new file mode 100644 index 0000000000..a5ef457efd --- /dev/null +++ b/OpenTelemetryPlan/Phase4_taskList.md @@ -0,0 +1,221 @@ +# Phase 4: Consensus Tracing Task List + +> **Goal**: Full observability into consensus rounds — track round lifecycle, phase transitions, proposal handling, and validation. This is the RUN phase that completes the distributed tracing story. +> +> **Scope**: RCLConsensus instrumentation for round starts, phase transitions (open/establish/accept), proposal send/receive, validation handling, and correlation with transaction traces from Phase 3. +> +> **Branch**: `pratik/otel-phase4-consensus-tracing` (from `pratik/otel-phase3-tx-tracing`) + +### Related Plan Documents + +| Document | Relevance | +| ------------------------------------------------------------ | ----------------------------------------------------------- | +| [04-code-samples.md](./04-code-samples.md) | Consensus instrumentation (§4.5.2), consensus span patterns | +| [01-architecture-analysis.md](./01-architecture-analysis.md) | Consensus round flow (§1.4), key trace points (§1.6) | +| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 4 tasks (§6.5), definition of done (§6.11.4) | +| [02-design-decisions.md](./02-design-decisions.md) | Consensus attribute schema (§2.4.4) | + +--- + +## Task 4.1: Instrument Consensus Round Start + +**Objective**: Create a root span for each consensus round that captures the round's key parameters. + +**What to do**: + +- Edit `src/xrpld/app/consensus/RCLConsensus.cpp`: + - In `RCLConsensus::startRound()` (or the Adaptor's startRound): + - Create `consensus.round` span using `XRPL_TRACE_CONSENSUS` macro + - Set attributes: + - `xrpl.consensus.ledger.prev` — previous ledger hash + - `xrpl.consensus.ledger.seq` — target ledger sequence + - `xrpl.consensus.proposers` — number of trusted proposers + - `xrpl.consensus.mode` — "proposing" or "observing" + - Store the span context for use by child spans in phase transitions + +- Add a member to hold current round trace context: + - `opentelemetry::context::Context currentRoundContext_` (guarded by `#ifdef`) + - Updated at round start, used by phase transition spans + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` +- `src/xrpld/app/consensus/RCLConsensus.h` (add context member) + +**Reference**: + +- [04-code-samples.md §4.5.2](./04-code-samples.md) — startRound instrumentation example +- [01-architecture-analysis.md §1.4](./01-architecture-analysis.md) — Consensus round flow + +--- + +## Task 4.2: Instrument Phase Transitions + +**Objective**: Create child spans for each consensus phase (open, establish, accept) to show timing breakdown. + +**What to do**: + +- Edit `src/xrpld/app/consensus/RCLConsensus.cpp`: + - Identify where phase transitions occur (the `Consensus` template drives this) + - For each phase entry: + - Create span as child of `currentRoundContext_`: `consensus.phase.open`, `consensus.phase.establish`, `consensus.phase.accept` + - Set `xrpl.consensus.phase` attribute + - Add `phase.enter` event at start, `phase.exit` event at end + - Record phase duration in milliseconds + + - In the `onClose` adaptor method: + - Create `consensus.ledger_close` span + - Set attributes: close_time, mode, transaction count in initial position + + - Note: The Consensus template class in `include/xrpl/consensus/Consensus.h` drives phase transitions — check if instrumentation goes there or in the Adaptor + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` +- Possibly `include/xrpl/consensus/Consensus.h` (for template-level phase tracking) + +**Reference**: + +- [04-code-samples.md §4.5.2](./04-code-samples.md) — phaseTransition instrumentation + +--- + +## Task 4.3: Instrument Proposal Handling + +**Objective**: Trace proposal send and receive to show validator coordination. + +**What to do**: + +- Edit `src/xrpld/app/consensus/RCLConsensus.cpp`: + - In `Adaptor::propose()`: + - Create `consensus.proposal.send` span + - Set attributes: `xrpl.consensus.round` (proposal sequence), proposal hash + - Inject trace context into outgoing `TMProposeSet::trace_context` (from Phase 3 protobuf) + + - In `Adaptor::peerProposal()` (or wherever peer proposals are received): + - Extract trace context from incoming `TMProposeSet::trace_context` + - Create `consensus.proposal.receive` span as child of extracted context + - Set attributes: `xrpl.consensus.proposer` (node ID), `xrpl.consensus.round` + + - In `Adaptor::share(RCLCxPeerPos)`: + - Create `consensus.proposal.relay` span for relaying peer proposals + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` + +**Reference**: + +- [04-code-samples.md §4.5.2](./04-code-samples.md) — peerProposal instrumentation +- [02-design-decisions.md §2.4.4](./02-design-decisions.md) — Consensus attribute schema + +--- + +## Task 4.4: Instrument Validation Handling + +**Objective**: Trace validation send and receive to show ledger validation flow. + +**What to do**: + +- Edit `src/xrpld/app/consensus/RCLConsensus.cpp` (or the validation handler): + - When sending our validation: + - Create `consensus.validation.send` span + - Set attributes: validated ledger hash, sequence, signing time + + - When receiving a peer validation: + - Extract trace context from `TMValidation::trace_context` (if present) + - Create `consensus.validation.receive` span + - Set attributes: `xrpl.consensus.validator` (node ID), ledger hash + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` +- `src/xrpld/app/misc/NetworkOPs.cpp` (if validation handling is here) + +--- + +## Task 4.5: Add Consensus-Specific Attributes + +**Objective**: Enrich consensus spans with detailed attributes for debugging and analysis. + +**What to do**: + +- Review all consensus spans and ensure they include: + - `xrpl.consensus.ledger.seq` — target ledger sequence number + - `xrpl.consensus.round` — consensus round number + - `xrpl.consensus.mode` — proposing/observing/wrongLedger + - `xrpl.consensus.phase` — current phase name + - `xrpl.consensus.phase_duration_ms` — time spent in phase + - `xrpl.consensus.proposers` — number of trusted proposers + - `xrpl.consensus.tx_count` — transactions in proposed set + - `xrpl.consensus.disputes` — number of disputed transactions + - `xrpl.consensus.converge_percent` — convergence percentage + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` + +--- + +## Task 4.6: Correlate Transaction and Consensus Traces + +**Objective**: Link transaction traces from Phase 3 with consensus traces so you can follow a transaction from submission through consensus into the ledger. + +**What to do**: + +- In `onClose()` or `onAccept()`: + - When building the consensus position, link the round span to individual transaction spans using span links (if OTel SDK supports it) or events + - At minimum, record the transaction hashes included in the consensus set as span events: `tx.included` with `xrpl.tx.hash` attribute + +- In `processTransactionSet()` (NetworkOPs): + - If the consensus round span context is available, create child spans for each transaction applied to the ledger + +**Key modified files**: + +- `src/xrpld/app/consensus/RCLConsensus.cpp` +- `src/xrpld/app/misc/NetworkOPs.cpp` + +--- + +## Task 4.7: Build Verification and Testing + +**Objective**: Verify all Phase 4 changes compile and don't affect consensus timing. + +**What to do**: + +1. Build with `telemetry=ON` — verify no compilation errors +2. Build with `telemetry=OFF` — verify no regressions (critical for consensus code) +3. Run existing consensus-related unit tests +4. Verify that all macros expand to no-ops when disabled +5. Check that no consensus-critical code paths are affected by instrumentation overhead + +**Verification Checklist**: + +- [ ] Build succeeds with telemetry ON +- [ ] Build succeeds with telemetry OFF +- [ ] Existing consensus tests pass +- [ ] No new includes in consensus headers when telemetry is OFF +- [ ] Phase timing instrumentation doesn't use blocking operations + +--- + +## Summary + +| Task | Description | New Files | Modified Files | Depends On | +| ---- | ------------------------------------- | --------- | -------------- | ------------- | +| 4.1 | Consensus round start instrumentation | 0 | 2 | Phase 3 | +| 4.2 | Phase transition instrumentation | 0 | 1-2 | 4.1 | +| 4.3 | Proposal handling instrumentation | 0 | 1 | 4.1 | +| 4.4 | Validation handling instrumentation | 0 | 1-2 | 4.1 | +| 4.5 | Consensus-specific attributes | 0 | 1 | 4.2, 4.3, 4.4 | +| 4.6 | Transaction-consensus correlation | 0 | 2 | 4.2, Phase 3 | +| 4.7 | Build verification and testing | 0 | 0 | 4.1-4.6 | + +**Parallel work**: Tasks 4.2, 4.3, and 4.4 can run in parallel after 4.1 is complete. Task 4.5 depends on all three. Task 4.6 depends on 4.2 and Phase 3. + +**Exit Criteria** (from [06-implementation-phases.md §6.11.4](./06-implementation-phases.md)): + +- [ ] Complete consensus round traces +- [ ] Phase transitions visible +- [ ] Proposals and validations traced +- [ ] No impact on consensus timing diff --git a/OpenTelemetryPlan/Phase5_taskList.md b/OpenTelemetryPlan/Phase5_taskList.md new file mode 100644 index 0000000000..1447cf2dd1 --- /dev/null +++ b/OpenTelemetryPlan/Phase5_taskList.md @@ -0,0 +1,241 @@ +# Phase 5: Documentation & Deployment Task List + +> **Goal**: Production readiness — Grafana dashboards, spanmetrics pipeline, operator runbook, alert definitions, and final integration testing. This phase ensures the telemetry system is useful and maintainable in production. +> +> **Scope**: Grafana dashboard definitions, OTel Collector spanmetrics connector, Prometheus integration, alert rules, operator documentation, and production-ready Docker Compose stack. +> +> **Branch**: `pratik/otel-phase5-docs-deployment` (from `pratik/otel-phase4-consensus-tracing`) + +### Related Plan Documents + +| Document | Relevance | +| ---------------------------------------------------------------- | -------------------------------------------------------------------------- | +| [07-observability-backends.md](./07-observability-backends.md) | Jaeger setup (§7.1), Grafana dashboards (§7.6), alerts (§7.6.3) | +| [05-configuration-reference.md](./05-configuration-reference.md) | Collector config (§5.5), production config (§5.5.2), Docker Compose (§5.6) | +| [06-implementation-phases.md](./06-implementation-phases.md) | Phase 5 tasks (§6.6), definition of done (§6.11.5) | + +--- + +## Task 5.1: Add Spanmetrics Connector to OTel Collector + +**Objective**: Derive RED metrics (Rate, Errors, Duration) from trace spans automatically, enabling Grafana time-series dashboards. + +**What to do**: + +- Edit `docker/telemetry/otel-collector-config.yaml`: + - Add `spanmetrics` connector: + ```yaml + connectors: + spanmetrics: + histogram: + explicit: + buckets: [1ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 5s] + dimensions: + - name: xrpl.rpc.command + - name: xrpl.rpc.status + - name: xrpl.consensus.phase + - name: xrpl.tx.type + ``` + - Add `prometheus` exporter: + ```yaml + exporters: + prometheus: + endpoint: 0.0.0.0:8889 + ``` + - Wire the pipeline: + ```yaml + service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [debug, otlp/jaeger, spanmetrics] + metrics: + receivers: [spanmetrics] + exporters: [prometheus] + ``` + +- Edit `docker/telemetry/docker-compose.yml`: + - Expose port `8889` on the collector for Prometheus scraping + - Add Prometheus service + - Add Prometheus as Grafana datasource + +**Key modified files**: + +- `docker/telemetry/otel-collector-config.yaml` +- `docker/telemetry/docker-compose.yml` + +**Key new files**: + +- `docker/telemetry/prometheus.yml` (Prometheus scrape config) +- `docker/telemetry/grafana/provisioning/datasources/prometheus.yaml` + +**Reference**: + +- [POC_taskList.md §Next Steps](./POC_taskList.md) — Metrics pipeline for Grafana dashboards + +--- + +## Task 5.2: Create Grafana Dashboards + +**Objective**: Provide pre-built Grafana dashboards for RPC performance, transaction lifecycle, and consensus health. + +**What to do**: + +- Create `docker/telemetry/grafana/provisioning/dashboards/dashboards.yaml` (provisioning config) +- Create dashboard JSON files: + 1. **RPC Performance Dashboard** (`rpc-performance.json`): + - RPC request latency (p50/p95/p99) by command — histogram panel + - RPC throughput (requests/sec) by command — time series + - RPC error rate by command — bar gauge + - Top slowest RPC commands — table + + 2. **Transaction Overview Dashboard** (`transaction-overview.json`): + - Transaction processing rate — time series + - Transaction latency distribution — histogram + - Suppression rate (duplicates) — stat panel + - Transaction processing path (sync vs async) — pie chart + + 3. **Consensus Health Dashboard** (`consensus-health.json`): + - Consensus round duration — time series + - Phase duration breakdown (open/establish/accept) — stacked bar + - Proposals sent/received per round — stat panel + - Consensus mode distribution (proposing/observing) — pie chart + +- Store dashboards in `docker/telemetry/grafana/dashboards/` + +**Key new files**: + +- `docker/telemetry/grafana/provisioning/dashboards/dashboards.yaml` +- `docker/telemetry/grafana/dashboards/rpc-performance.json` +- `docker/telemetry/grafana/dashboards/transaction-overview.json` +- `docker/telemetry/grafana/dashboards/consensus-health.json` + +**Reference**: + +- [07-observability-backends.md §7.6](./07-observability-backends.md) — Grafana dashboard specifications +- [01-architecture-analysis.md §1.8.3](./01-architecture-analysis.md) — Dashboard panel examples + +--- + +## Task 5.3: Define Alert Rules + +**Objective**: Create alert definitions for key telemetry anomalies. + +**What to do**: + +- Create `docker/telemetry/grafana/provisioning/alerting/alerts.yaml`: + - **RPC Latency Alert**: p99 latency > 1s for any command over 5 minutes + - **RPC Error Rate Alert**: Error rate > 5% for any command over 5 minutes + - **Consensus Duration Alert**: Round duration > 10s (warn), > 30s (critical) + - **Transaction Processing Alert**: Processing rate drops below threshold + - **Telemetry Pipeline Health**: No spans received for > 2 minutes + +**Key new files**: + +- `docker/telemetry/grafana/provisioning/alerting/alerts.yaml` + +**Reference**: + +- [07-observability-backends.md §7.6.3](./07-observability-backends.md) — Alert rule definitions + +--- + +## Task 5.4: Production Collector Configuration + +**Objective**: Create a production-ready OTel Collector configuration with tail-based sampling and resource limits. + +**What to do**: + +- Create `docker/telemetry/otel-collector-config-production.yaml`: + - Tail-based sampling policy: + - Always sample errors and slow traces + - 10% base sampling rate for normal traces + - Always sample first trace for each unique RPC command + - Resource limits: + - Memory limiter processor (80% of available memory) + - Queued retry for export failures + - TLS configuration for production endpoints + - Health check endpoint + +**Key new files**: + +- `docker/telemetry/otel-collector-config-production.yaml` + +**Reference**: + +- [05-configuration-reference.md §5.5.2](./05-configuration-reference.md) — Production collector config + +--- + +## Task 5.5: Operator Runbook + +**Objective**: Create operator documentation for managing the telemetry system in production. + +**What to do**: + +- Create `docs/telemetry-runbook.md`: + - **Setup**: How to enable telemetry in rippled + - **Configuration**: All config options with descriptions + - **Collector Deployment**: Docker Compose vs. Kubernetes vs. bare metal + - **Troubleshooting**: Common issues and resolutions + - No traces appearing + - High memory usage from telemetry + - Collector connection failures + - Sampling configuration tuning + - **Performance Tuning**: Batch size, queue size, sampling ratio guidelines + - **Upgrading**: How to upgrade OTel SDK and Collector versions + +**Key new files**: + +- `docs/telemetry-runbook.md` + +--- + +## Task 5.6: Final Integration Testing + +**Objective**: Validate the complete telemetry stack end-to-end. + +**What to do**: + +1. Start full Docker stack (Collector, Jaeger, Grafana, Prometheus) +2. Build rippled with `telemetry=ON` +3. Run in standalone mode with telemetry enabled +4. Generate RPC traffic and verify traces in Jaeger +5. Verify dashboards populate in Grafana +6. Verify alerts trigger correctly +7. Test telemetry OFF path (no regressions) +8. Run full test suite + +**Verification Checklist**: + +- [ ] Docker stack starts without errors +- [ ] Traces appear in Jaeger with correct hierarchy +- [ ] Grafana dashboards show metrics derived from spans +- [ ] Prometheus scrapes spanmetrics successfully +- [ ] Alerts can be triggered by simulated conditions +- [ ] Build succeeds with telemetry ON and OFF +- [ ] Full test suite passes + +--- + +## Summary + +| Task | Description | New Files | Modified Files | Depends On | +| ---- | ---------------------------------- | --------- | -------------- | ---------- | +| 5.1 | Spanmetrics connector + Prometheus | 2 | 2 | Phase 4 | +| 5.2 | Grafana dashboards | 4 | 0 | 5.1 | +| 5.3 | Alert definitions | 1 | 0 | 5.1 | +| 5.4 | Production collector config | 1 | 0 | Phase 4 | +| 5.5 | Operator runbook | 1 | 0 | Phase 4 | +| 5.6 | Final integration testing | 0 | 0 | 5.1-5.5 | + +**Parallel work**: Tasks 5.1, 5.4, and 5.5 can run in parallel. Tasks 5.2 and 5.3 depend on 5.1. Task 5.6 depends on all others. + +**Exit Criteria** (from [06-implementation-phases.md §6.11.5](./06-implementation-phases.md)): + +- [ ] Dashboards deployed and showing data +- [ ] Alerts configured and tested +- [ ] Operator documentation complete +- [ ] Production collector config ready +- [ ] Full test suite passes diff --git a/cspell.config.yaml b/cspell.config.yaml index c6f69bfc29..f12a4dcc1b 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -87,6 +87,8 @@ words: - daria - dcmake - dearmor + - dedup + - Dedup - deleteme - demultiplexer - deserializaton @@ -193,6 +195,7 @@ words: - permissioned - pointee - populator + - pratik - preauth - preauthorization - preauthorize diff --git a/include/xrpl/basics/MallocTrim.h b/include/xrpl/basics/MallocTrim.h deleted file mode 100644 index 2d0cf989ba..0000000000 --- a/include/xrpl/basics/MallocTrim.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once - -#include - -#include -#include -#include - -namespace xrpl { - -// cSpell:ignore ptmalloc - -// ----------------------------------------------------------------------------- -// Allocator interaction note: -// - This facility invokes glibc's malloc_trim(0) on Linux/glibc to request that -// ptmalloc return free heap pages to the OS. -// - If an alternative allocator (e.g. jemalloc or tcmalloc) is linked or -// preloaded (LD_PRELOAD), calling glibc's malloc_trim typically has no effect -// on the *active* heap. The call is harmless but may not reclaim memory -// because those allocators manage their own arenas. -// - Only glibc sbrk/arena space is eligible for trimming; large mmap-backed -// allocations are usually returned to the OS on free regardless of trimming. -// - Call at known reclamation points (e.g., after cache sweeps / online delete) -// and consider rate limiting to avoid churn. -// ----------------------------------------------------------------------------- - -struct MallocTrimReport -{ - bool supported{false}; - int trimResult{-1}; - std::int64_t rssBeforeKB{-1}; - std::int64_t rssAfterKB{-1}; - std::chrono::microseconds durationUs{-1}; - std::int64_t minfltDelta{-1}; - std::int64_t majfltDelta{-1}; - - [[nodiscard]] std::int64_t - deltaKB() const noexcept - { - if (rssBeforeKB < 0 || rssAfterKB < 0) - return 0; - return rssAfterKB - rssBeforeKB; - } -}; - -/** - * @brief Attempt to return freed memory to the operating system. - * - * On Linux with glibc malloc, this issues ::malloc_trim(0), which may release - * free space from ptmalloc arenas back to the kernel. On other platforms, or if - * a different allocator is in use, this function is a no-op and the report will - * indicate that trimming is unsupported or had no effect. - * - * @param tag Identifier for logging/debugging purposes. - * @param journal Journal for diagnostic logging. - * @return Report containing before/after metrics and the trim result. - * - * @note If an alternative allocator (jemalloc/tcmalloc) is linked or preloaded, - * calling glibc's malloc_trim may have no effect on the active heap. The - * call is harmless but typically does not reclaim memory under those - * allocators. - * - * @note Only memory served from glibc's sbrk/arena heaps is eligible for trim. - * Large allocations satisfied via mmap are usually returned on free - * independently of trimming. - * - * @note Intended for use after operations that free significant memory (e.g., - * cache sweeps, ledger cleanup, online delete). Consider rate limiting. - */ -MallocTrimReport -mallocTrim(std::string_view tag, beast::Journal journal); - -} // namespace xrpl diff --git a/include/xrpl/nodestore/Backend.h b/include/xrpl/nodestore/Backend.h index 36fd36ec00..7c3ea57bb8 100644 --- a/include/xrpl/nodestore/Backend.h +++ b/include/xrpl/nodestore/Backend.h @@ -77,16 +77,16 @@ public: If the object is not found or an error is encountered, the result will indicate the condition. @note This will be called concurrently. - @param hash The hash of the object. + @param key A pointer to the key data. @param pObject [out] The created object if successful. @return The result of the operation. */ virtual Status - fetch(uint256 const& hash, std::shared_ptr* pObject) = 0; + fetch(void const* key, std::shared_ptr* pObject) = 0; /** Fetch a batch synchronously. */ virtual std::pair>, Status> - fetchBatch(std::vector const& hashes) = 0; + fetchBatch(std::vector const& hashes) = 0; /** Store a single object. Depending on the implementation this may happen immediately diff --git a/include/xrpl/protocol/detail/features.macro b/include/xrpl/protocol/detail/features.macro index a40d524c70..961bc6e44c 100644 --- a/include/xrpl/protocol/detail/features.macro +++ b/include/xrpl/protocol/detail/features.macro @@ -15,10 +15,9 @@ // Add new amendments to the top of this list. // Keep it sorted in reverse chronological order. - XRPL_FIX (PermissionedDomainInvariant, Supported::yes, VoteBehavior::DefaultNo) XRPL_FIX (ExpiredNFTokenOfferRemoval, Supported::yes, VoteBehavior::DefaultNo) -XRPL_FIX (BatchInnerSigs, Supported::no, VoteBehavior::DefaultNo) +XRPL_FIX (BatchInnerSigs, Supported::yes, VoteBehavior::DefaultNo) XRPL_FEATURE(LendingProtocol, Supported::yes, VoteBehavior::DefaultNo) XRPL_FEATURE(PermissionDelegationV1_1, Supported::no, VoteBehavior::DefaultNo) XRPL_FIX (DirectoryLimit, Supported::yes, VoteBehavior::DefaultNo) @@ -32,7 +31,7 @@ XRPL_FEATURE(TokenEscrow, Supported::yes, VoteBehavior::DefaultNo XRPL_FIX (EnforceNFTokenTrustlineV2, Supported::yes, VoteBehavior::DefaultNo) XRPL_FIX (AMMv1_3, Supported::yes, VoteBehavior::DefaultNo) XRPL_FEATURE(PermissionedDEX, Supported::yes, VoteBehavior::DefaultNo) -XRPL_FEATURE(Batch, Supported::no, VoteBehavior::DefaultNo) +XRPL_FEATURE(Batch, Supported::yes, VoteBehavior::DefaultNo) XRPL_FEATURE(SingleAssetVault, Supported::yes, VoteBehavior::DefaultNo) XRPL_FIX (PayChanCancelAfter, Supported::yes, VoteBehavior::DefaultNo) // Check flags in Credential transactions diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h index b237f4a621..281ab632a4 100644 --- a/include/xrpl/telemetry/Telemetry.h +++ b/include/xrpl/telemetry/Telemetry.h @@ -140,6 +140,10 @@ public: virtual bool shouldTracePeer() const = 0; + /** @return true if ledger close/accept should be traced. */ + virtual bool + shouldTraceLedger() const = 0; + #ifdef XRPL_ENABLE_TELEMETRY /** Get or create a named tracer instance. diff --git a/src/libxrpl/basics/MallocTrim.cpp b/src/libxrpl/basics/MallocTrim.cpp deleted file mode 100644 index 1b0932b39d..0000000000 --- a/src/libxrpl/basics/MallocTrim.cpp +++ /dev/null @@ -1,157 +0,0 @@ -#include -#include - -#include - -#include -#include -#include -#include -#include - -#if defined(__GLIBC__) && BOOST_OS_LINUX -#include - -#include -#include - -// Require RUSAGE_THREAD for thread-scoped page fault tracking -#ifndef RUSAGE_THREAD -#error "MallocTrim rusage instrumentation requires RUSAGE_THREAD on Linux/glibc" -#endif - -namespace { - -bool -getRusageThread(struct rusage& ru) -{ - return ::getrusage(RUSAGE_THREAD, &ru) == 0; // LCOV_EXCL_LINE -} - -} // namespace -#endif - -namespace xrpl { - -namespace detail { - -// cSpell:ignore statm - -#if defined(__GLIBC__) && BOOST_OS_LINUX - -inline int -mallocTrimWithPad(std::size_t padBytes) -{ - return ::malloc_trim(padBytes); -} - -long -parseStatmRSSkB(std::string const& statm) -{ - // /proc/self/statm format: size resident shared text lib data dt - // We want the second field (resident) which is in pages - std::istringstream iss(statm); - long size, resident; - if (!(iss >> size >> resident)) - return -1; - - // Convert pages to KB - long const pageSize = ::sysconf(_SC_PAGESIZE); - if (pageSize <= 0) - return -1; - - return (resident * pageSize) / 1024; -} - -#endif // __GLIBC__ && BOOST_OS_LINUX - -} // namespace detail - -MallocTrimReport -mallocTrim(std::string_view tag, beast::Journal journal) -{ - // LCOV_EXCL_START - - MallocTrimReport report; - -#if !(defined(__GLIBC__) && BOOST_OS_LINUX) - JLOG(journal.debug()) << "malloc_trim not supported on this platform (tag=" << tag << ")"; -#else - // Keep glibc malloc_trim padding at 0 (default): 12h Mainnet tests across 0/256KB/1MB/16MB - // showed no clear, consistent benefit from custom padding—0 provided the best overall balance - // of RSS reduction and trim-latency stability without adding a tuning surface. - constexpr std::size_t TRIM_PAD = 0; - - report.supported = true; - - if (journal.debug()) - { - auto readFile = [](std::string const& path) -> std::string { - std::ifstream ifs(path, std::ios::in | std::ios::binary); - if (!ifs.is_open()) - return {}; - - // /proc files are often not seekable; read as a stream. - std::ostringstream oss; - oss << ifs.rdbuf(); - return oss.str(); - }; - - std::string const tagStr{tag}; - std::string const statmPath = "/proc/self/statm"; - - auto const statmBefore = readFile(statmPath); - long const rssBeforeKB = detail::parseStatmRSSkB(statmBefore); - - struct rusage ru0{}; - bool const have_ru0 = getRusageThread(ru0); - - auto const t0 = std::chrono::steady_clock::now(); - - report.trimResult = detail::mallocTrimWithPad(TRIM_PAD); - - auto const t1 = std::chrono::steady_clock::now(); - - struct rusage ru1{}; - bool const have_ru1 = getRusageThread(ru1); - - auto const statmAfter = readFile(statmPath); - long const rssAfterKB = detail::parseStatmRSSkB(statmAfter); - - // Populate report fields - report.rssBeforeKB = rssBeforeKB; - report.rssAfterKB = rssAfterKB; - report.durationUs = std::chrono::duration_cast(t1 - t0); - - if (have_ru0 && have_ru1) - { - report.minfltDelta = ru1.ru_minflt - ru0.ru_minflt; - report.majfltDelta = ru1.ru_majflt - ru0.ru_majflt; - } - - std::int64_t const deltaKB = (rssBeforeKB < 0 || rssAfterKB < 0) - ? 0 - : (static_cast(rssAfterKB) - static_cast(rssBeforeKB)); - - JLOG(journal.debug()) << "malloc_trim tag=" << tagStr << " result=" << report.trimResult - << " pad=" << TRIM_PAD << " bytes" - << " rss_before=" << rssBeforeKB << "kB" - << " rss_after=" << rssAfterKB << "kB" - << " delta=" << deltaKB << "kB" - << " duration_us=" << report.durationUs.count() - << " minflt_delta=" << report.minfltDelta - << " majflt_delta=" << report.majfltDelta; - } - else - { - report.trimResult = detail::mallocTrimWithPad(TRIM_PAD); - } - -#endif - - return report; - - // LCOV_EXCL_STOP -} - -} // namespace xrpl diff --git a/src/libxrpl/nodestore/DatabaseNodeImp.cpp b/src/libxrpl/nodestore/DatabaseNodeImp.cpp index d1452dba86..5596cb4853 100644 --- a/src/libxrpl/nodestore/DatabaseNodeImp.cpp +++ b/src/libxrpl/nodestore/DatabaseNodeImp.cpp @@ -33,7 +33,7 @@ DatabaseNodeImp::fetchNodeObject( try { - status = backend_->fetch(hash, &nodeObject); + status = backend_->fetch(hash.data(), &nodeObject); } catch (std::exception const& e) { @@ -68,10 +68,18 @@ DatabaseNodeImp::fetchBatch(std::vector const& hashes) using namespace std::chrono; auto const before = steady_clock::now(); + std::vector batch{}; + batch.reserve(hashes.size()); + for (size_t i = 0; i < hashes.size(); ++i) + { + auto const& hash = hashes[i]; + batch.push_back(&hash); + } + // Get the node objects that match the hashes from the backend. To protect // against the backends returning fewer or more results than expected, the // container is resized to the number of hashes. - auto results = backend_->fetchBatch(hashes).first; + auto results = backend_->fetchBatch(batch).first; XRPL_ASSERT( results.size() == hashes.size() || results.empty(), "number of output objects either matches number of input hashes or is empty"); diff --git a/src/libxrpl/nodestore/DatabaseRotatingImp.cpp b/src/libxrpl/nodestore/DatabaseRotatingImp.cpp index e058fa76ac..26d8c30931 100644 --- a/src/libxrpl/nodestore/DatabaseRotatingImp.cpp +++ b/src/libxrpl/nodestore/DatabaseRotatingImp.cpp @@ -105,7 +105,7 @@ DatabaseRotatingImp::fetchNodeObject( std::shared_ptr nodeObject; try { - status = backend->fetch(hash, &nodeObject); + status = backend->fetch(hash.data(), &nodeObject); } catch (std::exception const& e) { diff --git a/src/libxrpl/nodestore/backend/MemoryFactory.cpp b/src/libxrpl/nodestore/backend/MemoryFactory.cpp index b11d90610a..8ac23a0bb6 100644 --- a/src/libxrpl/nodestore/backend/MemoryFactory.cpp +++ b/src/libxrpl/nodestore/backend/MemoryFactory.cpp @@ -116,9 +116,10 @@ public: //-------------------------------------------------------------------------- Status - fetch(uint256 const& hash, std::shared_ptr* pObject) override + fetch(void const* key, std::shared_ptr* pObject) override { XRPL_ASSERT(db_, "xrpl::NodeStore::MemoryBackend::fetch : non-null database"); + uint256 const hash(uint256::fromVoid(key)); std::lock_guard _(db_->mutex); @@ -133,14 +134,14 @@ public: } std::pair>, Status> - fetchBatch(std::vector const& hashes) override + fetchBatch(std::vector const& hashes) override { std::vector> results; results.reserve(hashes.size()); for (auto const& h : hashes) { std::shared_ptr nObj; - Status status = fetch(h, &nObj); + Status status = fetch(h->begin(), &nObj); if (status != ok) results.push_back({}); else diff --git a/src/libxrpl/nodestore/backend/NuDBFactory.cpp b/src/libxrpl/nodestore/backend/NuDBFactory.cpp index 4d7e7be668..e8efa464af 100644 --- a/src/libxrpl/nodestore/backend/NuDBFactory.cpp +++ b/src/libxrpl/nodestore/backend/NuDBFactory.cpp @@ -179,17 +179,17 @@ public: } Status - fetch(uint256 const& hash, std::shared_ptr* pno) override + fetch(void const* key, std::shared_ptr* pno) override { Status status; pno->reset(); nudb::error_code ec; db_.fetch( - hash.data(), - [&hash, pno, &status](void const* data, std::size_t size) { + key, + [key, pno, &status](void const* data, std::size_t size) { nudb::detail::buffer bf; auto const result = nodeobject_decompress(data, size, bf); - DecodedBlob decoded(hash.data(), result.first, result.second); + DecodedBlob decoded(key, result.first, result.second); if (!decoded.wasOk()) { status = dataCorrupt; @@ -207,14 +207,14 @@ public: } std::pair>, Status> - fetchBatch(std::vector const& hashes) override + fetchBatch(std::vector const& hashes) override { std::vector> results; results.reserve(hashes.size()); for (auto const& h : hashes) { std::shared_ptr nObj; - Status status = fetch(h, &nObj); + Status status = fetch(h->begin(), &nObj); if (status != ok) results.push_back({}); else diff --git a/src/libxrpl/nodestore/backend/NullFactory.cpp b/src/libxrpl/nodestore/backend/NullFactory.cpp index ab5b7d0117..4ecca46a9a 100644 --- a/src/libxrpl/nodestore/backend/NullFactory.cpp +++ b/src/libxrpl/nodestore/backend/NullFactory.cpp @@ -36,13 +36,13 @@ public: } Status - fetch(uint256 const&, std::shared_ptr*) override + fetch(void const*, std::shared_ptr*) override { return notFound; } std::pair>, Status> - fetchBatch(std::vector const& hashes) override + fetchBatch(std::vector const& hashes) override { return {}; } diff --git a/src/libxrpl/nodestore/backend/RocksDBFactory.cpp b/src/libxrpl/nodestore/backend/RocksDBFactory.cpp index 01bc74f5ed..c84c5f6982 100644 --- a/src/libxrpl/nodestore/backend/RocksDBFactory.cpp +++ b/src/libxrpl/nodestore/backend/RocksDBFactory.cpp @@ -244,7 +244,7 @@ public: //-------------------------------------------------------------------------- Status - fetch(uint256 const& hash, std::shared_ptr* pObject) override + fetch(void const* key, std::shared_ptr* pObject) override { XRPL_ASSERT(m_db, "xrpl::NodeStore::RocksDBBackend::fetch : non-null database"); pObject->reset(); @@ -252,7 +252,7 @@ public: Status status(ok); rocksdb::ReadOptions const options; - rocksdb::Slice const slice(std::bit_cast(hash.data()), m_keyBytes); + rocksdb::Slice const slice(static_cast(key), m_keyBytes); std::string string; @@ -260,7 +260,7 @@ public: if (getStatus.ok()) { - DecodedBlob decoded(hash.data(), string.data(), string.size()); + DecodedBlob decoded(key, string.data(), string.size()); if (decoded.wasOk()) { @@ -295,14 +295,14 @@ public: } std::pair>, Status> - fetchBatch(std::vector const& hashes) override + fetchBatch(std::vector const& hashes) override { std::vector> results; results.reserve(hashes.size()); for (auto const& h : hashes) { std::shared_ptr nObj; - Status status = fetch(h, &nObj); + Status status = fetch(h->begin(), &nObj); if (status != ok) results.push_back({}); else @@ -332,8 +332,9 @@ public: EncodedBlob encoded(e); wb.Put( - rocksdb::Slice(std::bit_cast(encoded.getKey()), m_keyBytes), - rocksdb::Slice(std::bit_cast(encoded.getData()), encoded.getSize())); + rocksdb::Slice(reinterpret_cast(encoded.getKey()), m_keyBytes), + rocksdb::Slice( + reinterpret_cast(encoded.getData()), encoded.getSize())); } rocksdb::WriteOptions const options; diff --git a/src/libxrpl/telemetry/NullTelemetry.cpp b/src/libxrpl/telemetry/NullTelemetry.cpp index faa81590cb..64c8f5e491 100644 --- a/src/libxrpl/telemetry/NullTelemetry.cpp +++ b/src/libxrpl/telemetry/NullTelemetry.cpp @@ -76,6 +76,12 @@ public: return false; } + bool + shouldTraceLedger() const override + { + return false; + } + #ifdef XRPL_ENABLE_TELEMETRY opentelemetry::nostd::shared_ptr getTracer(std::string_view) override diff --git a/src/libxrpl/telemetry/Telemetry.cpp b/src/libxrpl/telemetry/Telemetry.cpp index f2d6fa39ed..6ead430517 100644 --- a/src/libxrpl/telemetry/Telemetry.cpp +++ b/src/libxrpl/telemetry/Telemetry.cpp @@ -93,6 +93,12 @@ public: return false; } + bool + shouldTraceLedger() const override + { + return false; + } + opentelemetry::nostd::shared_ptr getTracer(std::string_view) override { @@ -232,6 +238,12 @@ public: return setup_.tracePeer; } + bool + shouldTraceLedger() const override + { + return setup_.traceLedger; + } + opentelemetry::nostd::shared_ptr getTracer(std::string_view name) override { diff --git a/src/libxrpl/telemetry/TelemetryConfig.cpp b/src/libxrpl/telemetry/TelemetryConfig.cpp index c5b25023e4..2cc74d1a4e 100644 --- a/src/libxrpl/telemetry/TelemetryConfig.cpp +++ b/src/libxrpl/telemetry/TelemetryConfig.cpp @@ -9,6 +9,8 @@ #include +#include + namespace xrpl { namespace telemetry { @@ -32,7 +34,7 @@ setup_Telemetry( setup.useTls = section.value_or("use_tls", 0) != 0; setup.tlsCertPath = section.value_or("tls_ca_cert", ""); - setup.samplingRatio = section.value_or("sampling_ratio", 1.0); + setup.samplingRatio = std::clamp(section.value_or("sampling_ratio", 1.0), 0.0, 1.0); setup.batchSize = section.value_or("batch_size", 512u); setup.batchDelay = diff --git a/src/test/app/Vault_test.cpp b/src/test/app/Vault_test.cpp index 7ae9faf18f..93ac94d7ce 100644 --- a/src/test/app/Vault_test.cpp +++ b/src/test/app/Vault_test.cpp @@ -5340,20 +5340,20 @@ class Vault_test : public beast::unit_test::suite env.close(); // 2. Mantissa larger than uint64 max - env.set_parse_failure_expected(true); try { tx[sfAssetsMaximum] = "18446744073709551617e5"; // uint64 max + 1 env(tx, THISLINE); - BEAST_EXPECTS(false, "Expected parse_error for mantissa larger than uint64 max"); + BEAST_EXPECT(false); } catch (parse_error const& e) { using namespace std::string_literals; BEAST_EXPECT( - e.what() == "invalidParamsField 'tx_json.AssetsMaximum' has invalid data."s); + e.what() == + "invalidParamsField 'tx_json.AssetsMaximum' has invalid " + "data."s); } - env.set_parse_failure_expected(false); } } diff --git a/src/test/nodestore/TestBase.h b/src/test/nodestore/TestBase.h index cb2a8e3bd5..4a4d21002e 100644 --- a/src/test/nodestore/TestBase.h +++ b/src/test/nodestore/TestBase.h @@ -138,7 +138,7 @@ public: { std::shared_ptr object; - Status const status = backend.fetch(batch[i]->getHash(), &object); + Status const status = backend.fetch(batch[i]->getHash().cbegin(), &object); BEAST_EXPECT(status == ok); @@ -158,7 +158,7 @@ public: { std::shared_ptr object; - Status const status = backend.fetch(batch[i]->getHash(), &object); + Status const status = backend.fetch(batch[i]->getHash().cbegin(), &object); BEAST_EXPECT(status == notFound); } diff --git a/src/test/nodestore/Timing_test.cpp b/src/test/nodestore/Timing_test.cpp index b537e3abb7..dae131e5e7 100644 --- a/src/test/nodestore/Timing_test.cpp +++ b/src/test/nodestore/Timing_test.cpp @@ -314,7 +314,7 @@ public: std::shared_ptr obj; std::shared_ptr result; obj = seq1_.obj(dist_(gen_)); - backend_.fetch(obj->getHash(), &result); + backend_.fetch(obj->getHash().data(), &result); suite_.expect(result && isSame(result, obj)); } catch (std::exception const& e) @@ -377,9 +377,9 @@ public: { try { - auto const hash = seq2_.key(i); + auto const key = seq2_.key(i); std::shared_ptr result; - backend_.fetch(hash, &result); + backend_.fetch(key.data(), &result); suite_.expect(!result); } catch (std::exception const& e) @@ -449,9 +449,9 @@ public: { if (rand_(gen_) < missingNodePercent) { - auto const hash = seq2_.key(dist_(gen_)); + auto const key = seq2_.key(dist_(gen_)); std::shared_ptr result; - backend_.fetch(hash, &result); + backend_.fetch(key.data(), &result); suite_.expect(!result); } else @@ -459,7 +459,7 @@ public: std::shared_ptr obj; std::shared_ptr result; obj = seq1_.obj(dist_(gen_)); - backend_.fetch(obj->getHash(), &result); + backend_.fetch(obj->getHash().data(), &result); suite_.expect(result && isSame(result, obj)); } } @@ -540,7 +540,8 @@ public: std::shared_ptr result; auto const j = older_(gen_); obj = seq1_.obj(j); - backend_.fetch(obj->getHash(), &result); + std::shared_ptr result1; + backend_.fetch(obj->getHash().data(), &result); suite_.expect(result != nullptr); suite_.expect(isSame(result, obj)); } @@ -558,7 +559,7 @@ public: std::shared_ptr result; auto const j = recent_(gen_); obj = seq1_.obj(j); - backend_.fetch(obj->getHash(), &result); + backend_.fetch(obj->getHash().data(), &result); suite_.expect(!result || isSame(result, obj)); break; } diff --git a/src/tests/libxrpl/CMakeLists.txt b/src/tests/libxrpl/CMakeLists.txt index cfa056b0aa..b02da4c90a 100644 --- a/src/tests/libxrpl/CMakeLists.txt +++ b/src/tests/libxrpl/CMakeLists.txt @@ -35,3 +35,11 @@ if (NOT WIN32) target_link_libraries(xrpl.test.net PRIVATE xrpl.imports.test) add_dependencies(xrpl.tests xrpl.test.net) endif () + +xrpl_add_test(telemetry) +target_link_libraries(xrpl.test.telemetry PRIVATE xrpl.imports.test) +target_include_directories(xrpl.test.telemetry PRIVATE ${CMAKE_SOURCE_DIR}/src) +if (telemetry) + target_link_libraries(xrpl.test.telemetry PRIVATE opentelemetry-cpp::opentelemetry-cpp) +endif () +add_dependencies(xrpl.tests xrpl.test.telemetry) diff --git a/src/tests/libxrpl/basics/MallocTrim.cpp b/src/tests/libxrpl/basics/MallocTrim.cpp deleted file mode 100644 index f01bd91bbf..0000000000 --- a/src/tests/libxrpl/basics/MallocTrim.cpp +++ /dev/null @@ -1,209 +0,0 @@ -#include - -#include - -#include - -using namespace xrpl; - -// cSpell:ignore statm - -#if defined(__GLIBC__) && BOOST_OS_LINUX -namespace xrpl::detail { -long -parseStatmRSSkB(std::string const& statm); -} // namespace xrpl::detail -#endif - -TEST(MallocTrimReport, structure) -{ - // Test default construction - MallocTrimReport report; - EXPECT_EQ(report.supported, false); - EXPECT_EQ(report.trimResult, -1); - EXPECT_EQ(report.rssBeforeKB, -1); - EXPECT_EQ(report.rssAfterKB, -1); - EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1}); - EXPECT_EQ(report.minfltDelta, -1); - EXPECT_EQ(report.majfltDelta, -1); - EXPECT_EQ(report.deltaKB(), 0); - - // Test deltaKB calculation - memory freed - report.rssBeforeKB = 1000; - report.rssAfterKB = 800; - EXPECT_EQ(report.deltaKB(), -200); - - // Test deltaKB calculation - memory increased - report.rssBeforeKB = 500; - report.rssAfterKB = 600; - EXPECT_EQ(report.deltaKB(), 100); - - // Test deltaKB calculation - no change - report.rssBeforeKB = 1234; - report.rssAfterKB = 1234; - EXPECT_EQ(report.deltaKB(), 0); -} - -#if defined(__GLIBC__) && BOOST_OS_LINUX -TEST(parseStatmRSSkB, standard_format) -{ - using xrpl::detail::parseStatmRSSkB; - - // Test standard format: size resident shared text lib data dt - // Assuming 4KB page size: resident=1000 pages = 4000 KB - { - std::string statm = "25365 1000 2377 0 0 5623 0"; - long result = parseStatmRSSkB(statm); - // Note: actual result depends on system page size - // On most systems it's 4KB, so 1000 pages = 4000 KB - EXPECT_GT(result, 0); - } - - // Test with newline - { - std::string statm = "12345 2000 1234 0 0 3456 0\n"; - long result = parseStatmRSSkB(statm); - EXPECT_GT(result, 0); - } - - // Test with tabs - { - std::string statm = "12345\t2000\t1234\t0\t0\t3456\t0"; - long result = parseStatmRSSkB(statm); - EXPECT_GT(result, 0); - } - - // Test zero resident pages - { - std::string statm = "25365 0 2377 0 0 5623 0"; - long result = parseStatmRSSkB(statm); - EXPECT_EQ(result, 0); - } - - // Test with extra whitespace - { - std::string statm = " 25365 1000 2377 "; - long result = parseStatmRSSkB(statm); - EXPECT_GT(result, 0); - } - - // Test empty string - { - std::string statm = ""; - long result = parseStatmRSSkB(statm); - EXPECT_EQ(result, -1); - } - - // Test malformed data (only one field) - { - std::string statm = "25365"; - long result = parseStatmRSSkB(statm); - EXPECT_EQ(result, -1); - } - - // Test malformed data (non-numeric) - { - std::string statm = "abc def ghi"; - long result = parseStatmRSSkB(statm); - EXPECT_EQ(result, -1); - } - - // Test malformed data (second field non-numeric) - { - std::string statm = "25365 abc 2377"; - long result = parseStatmRSSkB(statm); - EXPECT_EQ(result, -1); - } -} -#endif - -TEST(mallocTrim, without_debug_logging) -{ - beast::Journal journal{beast::Journal::getNullSink()}; - - MallocTrimReport report = mallocTrim("without_debug", journal); - -#if defined(__GLIBC__) && BOOST_OS_LINUX - EXPECT_EQ(report.supported, true); - EXPECT_GE(report.trimResult, 0); - EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1}); - EXPECT_EQ(report.minfltDelta, -1); - EXPECT_EQ(report.majfltDelta, -1); -#else - EXPECT_EQ(report.supported, false); - EXPECT_EQ(report.trimResult, -1); - EXPECT_EQ(report.rssBeforeKB, -1); - EXPECT_EQ(report.rssAfterKB, -1); - EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1}); - EXPECT_EQ(report.minfltDelta, -1); - EXPECT_EQ(report.majfltDelta, -1); -#endif -} - -TEST(mallocTrim, empty_tag) -{ - beast::Journal journal{beast::Journal::getNullSink()}; - MallocTrimReport report = mallocTrim("", journal); - -#if defined(__GLIBC__) && BOOST_OS_LINUX - EXPECT_EQ(report.supported, true); - EXPECT_GE(report.trimResult, 0); -#else - EXPECT_EQ(report.supported, false); -#endif -} - -TEST(mallocTrim, with_debug_logging) -{ - struct DebugSink : public beast::Journal::Sink - { - DebugSink() : Sink(beast::severities::kDebug, false) - { - } - void - write(beast::severities::Severity, std::string const&) override - { - } - void - writeAlways(beast::severities::Severity, std::string const&) override - { - } - }; - - DebugSink sink; - beast::Journal journal{sink}; - - MallocTrimReport report = mallocTrim("debug_test", journal); - -#if defined(__GLIBC__) && BOOST_OS_LINUX - EXPECT_EQ(report.supported, true); - EXPECT_GE(report.trimResult, 0); - EXPECT_GE(report.durationUs.count(), 0); - EXPECT_GE(report.minfltDelta, 0); - EXPECT_GE(report.majfltDelta, 0); -#else - EXPECT_EQ(report.supported, false); - EXPECT_EQ(report.trimResult, -1); - EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1}); - EXPECT_EQ(report.minfltDelta, -1); - EXPECT_EQ(report.majfltDelta, -1); -#endif -} - -TEST(mallocTrim, repeated_calls) -{ - beast::Journal journal{beast::Journal::getNullSink()}; - - // Call malloc_trim multiple times to ensure it's safe - for (int i = 0; i < 5; ++i) - { - MallocTrimReport report = mallocTrim("iteration_" + std::to_string(i), journal); - -#if defined(__GLIBC__) && BOOST_OS_LINUX - EXPECT_EQ(report.supported, true); - EXPECT_GE(report.trimResult, 0); -#else - EXPECT_EQ(report.supported, false); -#endif - } -} diff --git a/src/tests/libxrpl/telemetry/TelemetryConfig.cpp b/src/tests/libxrpl/telemetry/TelemetryConfig.cpp new file mode 100644 index 0000000000..de58a3827f --- /dev/null +++ b/src/tests/libxrpl/telemetry/TelemetryConfig.cpp @@ -0,0 +1,111 @@ +#include +#include + +#include + +#include + +using namespace xrpl; + +TEST(TelemetryConfig, setup_defaults) +{ + telemetry::Telemetry::Setup s; + EXPECT_FALSE(s.enabled); + EXPECT_EQ(s.serviceName, "rippled"); + EXPECT_TRUE(s.serviceVersion.empty()); + EXPECT_TRUE(s.serviceInstanceId.empty()); + EXPECT_EQ(s.exporterType, "otlp_http"); + EXPECT_EQ(s.exporterEndpoint, "http://localhost:4318/v1/traces"); + EXPECT_FALSE(s.useTls); + EXPECT_TRUE(s.tlsCertPath.empty()); + EXPECT_DOUBLE_EQ(s.samplingRatio, 1.0); + EXPECT_EQ(s.batchSize, 512u); + EXPECT_EQ(s.batchDelay, std::chrono::milliseconds{5000}); + EXPECT_EQ(s.maxQueueSize, 2048u); + EXPECT_EQ(s.networkId, 0u); + EXPECT_EQ(s.networkType, "mainnet"); + EXPECT_TRUE(s.traceTransactions); + EXPECT_TRUE(s.traceConsensus); + EXPECT_TRUE(s.traceRpc); + EXPECT_FALSE(s.tracePeer); + EXPECT_TRUE(s.traceLedger); +} + +TEST(TelemetryConfig, parse_empty_section) +{ + Section section; + auto setup = telemetry::setup_Telemetry(section, "nHUtest123", "2.0.0"); + + EXPECT_FALSE(setup.enabled); + EXPECT_EQ(setup.serviceName, "rippled"); + EXPECT_EQ(setup.serviceVersion, "2.0.0"); + EXPECT_EQ(setup.serviceInstanceId, "nHUtest123"); + EXPECT_EQ(setup.exporterType, "otlp_http"); + EXPECT_DOUBLE_EQ(setup.samplingRatio, 1.0); + EXPECT_TRUE(setup.traceRpc); + EXPECT_TRUE(setup.traceTransactions); + EXPECT_TRUE(setup.traceConsensus); + EXPECT_FALSE(setup.tracePeer); + EXPECT_TRUE(setup.traceLedger); +} + +TEST(TelemetryConfig, parse_full_section) +{ + Section section; + section.set("enabled", "1"); + section.set("service_name", "my-rippled"); + section.set("service_instance_id", "custom-id"); + section.set("exporter", "otlp_http"); + section.set("endpoint", "http://collector:4318/v1/traces"); + section.set("use_tls", "1"); + section.set("tls_ca_cert", "/etc/ssl/ca.pem"); + section.set("sampling_ratio", "0.5"); + section.set("batch_size", "256"); + section.set("batch_delay_ms", "3000"); + section.set("max_queue_size", "4096"); + section.set("trace_transactions", "0"); + section.set("trace_consensus", "0"); + section.set("trace_rpc", "1"); + section.set("trace_peer", "1"); + section.set("trace_ledger", "0"); + + auto setup = telemetry::setup_Telemetry(section, "nHUtest123", "2.0.0"); + + EXPECT_TRUE(setup.enabled); + EXPECT_EQ(setup.serviceName, "my-rippled"); + EXPECT_EQ(setup.serviceInstanceId, "custom-id"); + EXPECT_EQ(setup.exporterType, "otlp_http"); + EXPECT_EQ(setup.exporterEndpoint, "http://collector:4318/v1/traces"); + EXPECT_TRUE(setup.useTls); + EXPECT_EQ(setup.tlsCertPath, "/etc/ssl/ca.pem"); + EXPECT_DOUBLE_EQ(setup.samplingRatio, 0.5); + EXPECT_EQ(setup.batchSize, 256u); + EXPECT_EQ(setup.batchDelay, std::chrono::milliseconds{3000}); + EXPECT_EQ(setup.maxQueueSize, 4096u); + EXPECT_FALSE(setup.traceTransactions); + EXPECT_FALSE(setup.traceConsensus); + EXPECT_TRUE(setup.traceRpc); + EXPECT_TRUE(setup.tracePeer); + EXPECT_FALSE(setup.traceLedger); +} + +TEST(TelemetryConfig, null_telemetry_factory) +{ + telemetry::Telemetry::Setup setup; + setup.enabled = false; + + beast::Journal::Sink& sink = beast::Journal::getNullSink(); + beast::Journal j(sink); + auto tel = telemetry::make_Telemetry(setup, j); + EXPECT_TRUE(tel != nullptr); + EXPECT_FALSE(tel->isEnabled()); + EXPECT_FALSE(tel->shouldTraceRpc()); + EXPECT_FALSE(tel->shouldTraceTransactions()); + EXPECT_FALSE(tel->shouldTraceConsensus()); + EXPECT_FALSE(tel->shouldTracePeer()); + EXPECT_FALSE(tel->shouldTraceLedger()); + + // start/stop should be no-ops without crashing + tel->start(); + tel->stop(); +} diff --git a/src/tests/libxrpl/telemetry/TracingMacros.cpp b/src/tests/libxrpl/telemetry/TracingMacros.cpp new file mode 100644 index 0000000000..a8c1bb5e86 --- /dev/null +++ b/src/tests/libxrpl/telemetry/TracingMacros.cpp @@ -0,0 +1,141 @@ +#include + +#include + +#include + +using namespace xrpl; + +TEST(TracingMacros, macros_with_null_telemetry) +{ + telemetry::Telemetry::Setup setup; + setup.enabled = false; + beast::Journal::Sink& sink = beast::Journal::getNullSink(); + beast::Journal j(sink); + auto tel = telemetry::make_Telemetry(setup, j); + tel->start(); + + // Each macro should compile and execute without crashing. + { + XRPL_TRACE_RPC(*tel, "rpc.test.command"); + XRPL_TRACE_SET_ATTR("xrpl.rpc.command", "test"); + XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success"); + } + { + XRPL_TRACE_TX(*tel, "tx.test.process"); + XRPL_TRACE_SET_ATTR("xrpl.tx.hash", "abc123"); + } + { + XRPL_TRACE_CONSENSUS(*tel, "consensus.test"); + XRPL_TRACE_SET_ATTR("xrpl.consensus.mode", "proposing"); + } + { + XRPL_TRACE_PEER(*tel, "peer.test"); + } + { + XRPL_TRACE_LEDGER(*tel, "ledger.test"); + } + + tel->stop(); +} + +TEST(TracingMacros, separate_scopes) +{ + // Multiple macros in separate scopes should not collide on + // the _xrpl_guard_ variable name. + telemetry::Telemetry::Setup setup; + setup.enabled = false; + beast::Journal::Sink& sink = beast::Journal::getNullSink(); + beast::Journal j(sink); + auto tel = telemetry::make_Telemetry(setup, j); + + { + XRPL_TRACE_RPC(*tel, "rpc.outer"); + } + { + XRPL_TRACE_TX(*tel, "tx.inner"); + } + { + XRPL_TRACE_CONSENSUS(*tel, "consensus.other"); + } +} + +TEST(TracingMacros, conditional_guards) +{ + // NullTelemetry returns false for all shouldTrace* methods. + // XRPL_TRACE_SET_ATTR on an empty guard must be safe. + telemetry::Telemetry::Setup setup; + setup.enabled = false; + beast::Journal::Sink& sink = beast::Journal::getNullSink(); + beast::Journal j(sink); + auto tel = telemetry::make_Telemetry(setup, j); + + EXPECT_FALSE(tel->shouldTraceRpc()); + EXPECT_FALSE(tel->shouldTraceTransactions()); + EXPECT_FALSE(tel->shouldTraceConsensus()); + EXPECT_FALSE(tel->shouldTracePeer()); + EXPECT_FALSE(tel->shouldTraceLedger()); + + { + XRPL_TRACE_RPC(*tel, "should.not.create"); + XRPL_TRACE_SET_ATTR("key", "value"); + } +} + +#ifdef XRPL_ENABLE_TELEMETRY + +TEST(TracingMacros, span_guard_raii) +{ + telemetry::Telemetry::Setup setup; + setup.enabled = false; + beast::Journal::Sink& sink = beast::Journal::getNullSink(); + beast::Journal j(sink); + auto tel = telemetry::make_Telemetry(setup, j); + + auto span = tel->startSpan("test.guard"); + { + telemetry::SpanGuard guard(span); + guard.setAttribute("key", "value"); + guard.addEvent("test_event"); + guard.setOk(); + } +} + +TEST(TracingMacros, span_guard_move) +{ + telemetry::Telemetry::Setup setup; + setup.enabled = false; + beast::Journal::Sink& sink = beast::Journal::getNullSink(); + beast::Journal j(sink); + auto tel = telemetry::make_Telemetry(setup, j); + + auto span = tel->startSpan("test.move"); + std::optional opt; + opt.emplace(span); + EXPECT_TRUE(opt.has_value()); + opt.reset(); +} + +TEST(TracingMacros, span_guard_exception) +{ + telemetry::Telemetry::Setup setup; + setup.enabled = false; + beast::Journal::Sink& sink = beast::Journal::getNullSink(); + beast::Journal j(sink); + auto tel = telemetry::make_Telemetry(setup, j); + + auto span = tel->startSpan("test.exception"); + { + telemetry::SpanGuard guard(span); + try + { + throw std::runtime_error("test error"); + } + catch (std::exception const& e) + { + guard.recordException(e); + } + } +} + +#endif // XRPL_ENABLE_TELEMETRY diff --git a/src/tests/libxrpl/telemetry/main.cpp b/src/tests/libxrpl/telemetry/main.cpp new file mode 100644 index 0000000000..5142bbe08a --- /dev/null +++ b/src/tests/libxrpl/telemetry/main.cpp @@ -0,0 +1,8 @@ +#include + +int +main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/xrpld/rpc/detail/RPCHandler.cpp b/src/xrpld/rpc/detail/RPCHandler.cpp index 6538d83d37..0a1f55a605 100644 --- a/src/xrpld/rpc/detail/RPCHandler.cpp +++ b/src/xrpld/rpc/detail/RPCHandler.cpp @@ -175,10 +175,13 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object& auto ret = method(context, result); auto end = std::chrono::system_clock::now(); + [[maybe_unused]] auto const durationMs = + std::chrono::duration(end - start).count(); JLOG(context.j.debug()) << "RPC call " << name << " completed in " << ((end - start).count() / 1000000000.0) << "seconds"; perfLog.rpcFinish(name, curId); XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success"); + XRPL_TRACE_SET_ATTR("xrpl.rpc.duration_ms", durationMs); return ret; } catch (std::exception& e) @@ -187,6 +190,7 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object& JLOG(context.j.info()) << "Caught throw: " << e.what(); XRPL_TRACE_EXCEPTION(e); XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "error"); + XRPL_TRACE_SET_ATTR("xrpl.rpc.error_message", e.what()); if (context.loadType == Resource::feeReferenceRPC) context.loadType = Resource::feeExceptionRPC; diff --git a/src/xrpld/telemetry/TracingInstrumentation.h b/src/xrpld/telemetry/TracingInstrumentation.h index d7d2ecf912..39177ea95e 100644 --- a/src/xrpld/telemetry/TracingInstrumentation.h +++ b/src/xrpld/telemetry/TracingInstrumentation.h @@ -81,6 +81,30 @@ namespace telemetry { _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \ } +/** Conditionally start a span for peer message tracing. + The span is only created if shouldTracePeer() returns true. + @param _tel_obj_ Telemetry instance reference. + @param _span_name_ Span name string. +*/ +#define XRPL_TRACE_PEER(_tel_obj_, _span_name_) \ + std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \ + if ((_tel_obj_).shouldTracePeer()) \ + { \ + _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \ + } + +/** Conditionally start a span for ledger tracing. + The span is only created if shouldTraceLedger() returns true. + @param _tel_obj_ Telemetry instance reference. + @param _span_name_ Span name string. +*/ +#define XRPL_TRACE_LEDGER(_tel_obj_, _span_name_) \ + std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \ + if ((_tel_obj_).shouldTraceLedger()) \ + { \ + _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \ + } + /** Set a key-value attribute on the current span (if it exists). Must be used after one of the XRPL_TRACE_* span macros. */ @@ -109,6 +133,8 @@ namespace telemetry { #define XRPL_TRACE_RPC(_tel_obj_, _span_name_) ((void)0) #define XRPL_TRACE_TX(_tel_obj_, _span_name_) ((void)0) #define XRPL_TRACE_CONSENSUS(_tel_obj_, _span_name_) ((void)0) +#define XRPL_TRACE_PEER(_tel_obj_, _span_name_) ((void)0) +#define XRPL_TRACE_LEDGER(_tel_obj_, _span_name_) ((void)0) #define XRPL_TRACE_SET_ATTR(key, value) ((void)0) #define XRPL_TRACE_EXCEPTION(e) ((void)0)