From dff32cd9acd8f1ebf15df5a94230f8c0e315bb76 Mon Sep 17 00:00:00 2001 From: Dmitrii Kostyrev Date: Mon, 15 Dec 2025 23:02:12 +0000 Subject: [PATCH] Introduce load-balanced channel for OpenTelemetry exporters Add client-side load balancing to OTLP gRPC connections using ginepro. When NL_OTEL_ENDPOINT is set, the telemetry system creates a load-balanced channel shared across log, trace, and metric exporters. This enables better distribution of telemetry traffic across multiple OTLP collector instances and improves overall system resilience. - Add ginepro dependency for gRPC load balancing - Upgrade OpenTelemetry dependencies from 0.29 to 0.30 - Change init_tracing() to async to support channel initialization - Add NL_OTEL_ENDPOINT environment variable for configuration - Update all OTLP exporters to use shared load-balanced channel --- Cargo.lock | 351 +++++++++++++++++++++++-------- nativelink-scheduler/Cargo.toml | 4 +- nativelink-service/Cargo.toml | 4 +- nativelink-store/Cargo.toml | 2 +- nativelink-util/BUILD.bazel | 2 + nativelink-util/Cargo.toml | 16 +- nativelink-util/src/telemetry.rs | 63 +++++- nativelink-worker/Cargo.toml | 2 +- src/bin/nativelink.rs | 2 +- src/bin/redis_store_tester.rs | 2 +- 10 files changed, 340 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 75fdcd025..60a97b691 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -617,7 +617,7 @@ dependencies = [ "pin-project-lite", "serde_core", "sync_wrapper", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", ] @@ -1105,6 +1105,30 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1394,6 +1418,18 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-as-inner" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -1712,10 +1748,21 @@ dependencies = [ ] [[package]] -name = "glob" -version = "0.3.3" +name = "ginepro" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +checksum = "9197cb67b35f86badd2e5a66c3a651d037a398247a394399d80700ef07ba662b" +dependencies = [ + "anyhow", + "async-trait", + "hickory-resolver", + "http 1.3.1", + "thiserror 2.0.17", + "tokio", + "tonic", + "tower", + "tracing", +] [[package]] name = "group" @@ -1812,6 +1859,52 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hickory-proto" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna", + "ipnet", + "once_cell", + "rand 0.9.2", + "ring", + "thiserror 2.0.17", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "moka", + "once_cell", + "parking_lot", + "rand 0.9.2", + "resolv-conf", + "smallvec", + "thiserror 2.0.17", + "tokio", + "tracing", +] + [[package]] name = "hkdf" version = "0.12.4" @@ -2176,6 +2269,18 @@ dependencies = [ "serde_core", ] +[[package]] +name = "ipconfig" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" +dependencies = [ + "socket2 0.5.10", + "widestring", + "windows-sys 0.48.0", + "winreg", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -2530,6 +2635,24 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e1d4c44418358edcac6e1d9ce59cea7fb38052429c7704033f1196f0c179e6a" +[[package]] +name = "moka" +version = "0.12.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8261cd88c312e0004c1d51baad2980c66528dfdb2bee62003e643a4d8f86b077" +dependencies = [ + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "equivalent", + "parking_lot", + "portable-atomic", + "rustc_version", + "smallvec", + "tagptr", + "uuid", +] + [[package]] name = "mongocrypt" version = "0.3.1" @@ -2637,8 +2760,8 @@ dependencies = [ "rustls-pki-types", "tokio", "tokio-rustls", - "tonic 0.13.1", - "tower 0.5.2", + "tonic", + "tower", "tracing", ] @@ -2673,7 +2796,7 @@ dependencies = [ "serde", "serde_json5", "tokio", - "tonic 0.13.1", + "tonic", "url", "uuid", "walkdir", @@ -2716,7 +2839,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "tonic 0.13.1", + "tonic", "tonic-build", ] @@ -2762,7 +2885,7 @@ dependencies = [ "static_assertions", "tokio", "tokio-stream", - "tonic 0.13.1", + "tonic", "tracing", "tracing-test", "uuid", @@ -2801,8 +2924,8 @@ dependencies = [ "sha2", "tokio", "tokio-stream", - "tonic 0.13.1", - "tower 0.5.2", + "tonic", + "tower", "tracing", "tracing-test", "uuid", @@ -2867,7 +2990,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tonic 0.13.1", + "tonic", "tracing", "tracing-test", "url", @@ -2885,6 +3008,7 @@ dependencies = [ "blake3", "bytes", "futures", + "ginepro", "hex", "http-body-util", "humantime", @@ -2919,12 +3043,13 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tonic 0.13.1", - "tower 0.5.2", + "tonic", + "tower", "tracing", "tracing-opentelemetry", "tracing-subscriber", "tracing-test", + "url", "uuid", "walkdir", ] @@ -2961,7 +3086,7 @@ dependencies = [ "tempfile", "tokio", "tokio-stream", - "tonic 0.13.1", + "tonic", "tracing", "tracing-test", "uuid", @@ -3064,6 +3189,10 @@ name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +dependencies = [ + "critical-section", + "portable-atomic", +] [[package]] name = "once_cell_polyfill" @@ -3079,9 +3208,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "opentelemetry" -version = "0.29.1" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c" +checksum = "aaf416e4cb72756655126f7dd7bb0af49c674f4c1b9903e80c009e0c37e552e6" dependencies = [ "futures-core", "futures-sink", @@ -3093,9 +3222,9 @@ dependencies = [ [[package]] name = "opentelemetry-appender-tracing" -version = "0.29.1" +version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e716f864eb23007bdd9dc4aec381e188a1cee28eecf22066772b5fd822b9727d" +checksum = "e68f63eca5fad47e570e00e893094fc17be959c80c79a7d6ec1abdd5ae6ffc16" dependencies = [ "opentelemetry", "tracing", @@ -3105,9 +3234,9 @@ dependencies = [ [[package]] name = "opentelemetry-http" -version = "0.29.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed" +checksum = "50f6639e842a97dbea8886e3439710ae463120091e2e064518ba8e716e6ac36d" dependencies = [ "async-trait", "bytes", @@ -3117,11 +3246,10 @@ dependencies = [ [[package]] name = "opentelemetry-otlp" -version = "0.29.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656" +checksum = "dbee664a43e07615731afc539ca60c6d9f1a9425e25ca09c57bc36c87c55852b" dependencies = [ - "futures-core", "http 1.3.1", "opentelemetry", "opentelemetry-proto", @@ -3129,37 +3257,36 @@ dependencies = [ "prost", "thiserror 2.0.17", "tokio", - "tonic 0.12.3", + "tonic", ] [[package]] name = "opentelemetry-proto" -version = "0.29.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3" +checksum = "2e046fd7660710fe5a05e8748e70d9058dc15c94ba914e7c4faa7c728f0e8ddc" dependencies = [ "opentelemetry", "opentelemetry_sdk", "prost", - "tonic 0.12.3", + "tonic", ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.29.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b29a9f89f1a954936d5aa92f19b2feec3c8f3971d3e96206640db7f9706ae3" +checksum = "83d059a296a47436748557a353c5e6c5705b9470ef6c95cfc52c21a8814ddac2" [[package]] name = "opentelemetry_sdk" -version = "0.29.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b" +checksum = "11f644aa9e5e31d11896e024305d7e3c98a88884d9f8919dbf37a9991bc47a4b" dependencies = [ "futures-channel", "futures-executor", "futures-util", - "glob", "opentelemetry", "percent-encoding", "rand 0.9.2", @@ -3381,6 +3508,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + [[package]] name = "potential_utf" version = "0.1.3" @@ -3793,7 +3926,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-util", - "tower 0.5.2", + "tower", "tower-http", "tower-service", "url", @@ -3819,6 +3952,12 @@ dependencies = [ "tower-service", ] +[[package]] +name = "resolv-conf" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" + [[package]] name = "rfc6979" version = "0.4.0" @@ -4484,6 +4623,12 @@ dependencies = [ "syn", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "take_mut" version = "0.2.2" @@ -4695,33 +4840,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tonic" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" -dependencies = [ - "async-trait", - "base64 0.22.1", - "bytes", - "http 1.3.1", - "http-body 1.0.1", - "http-body-util", - "hyper 1.7.0", - "hyper-timeout", - "hyper-util", - "percent-encoding", - "pin-project", - "prost", - "tokio", - "tokio-stream", - "tower 0.4.13", - "tower-layer", - "tower-service", - "tracing", - "zstd", -] - [[package]] name = "tonic" version = "0.13.1" @@ -4748,10 +4866,11 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-stream", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", "tracing", + "zstd", ] [[package]] @@ -4768,26 +4887,6 @@ dependencies = [ "syn", ] -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap 1.9.3", - "pin-project", - "pin-project-lite", - "rand 0.8.5", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - [[package]] name = "tower" version = "0.5.2" @@ -4820,7 +4919,7 @@ dependencies = [ "http-body 1.0.1", "iri-string", "pin-project-lite", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", ] @@ -4882,9 +4981,9 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444" +checksum = "ddcf5959f39507d0d04d6413119c04f33b623f4f951ebcbdddddfad2d0623a9c" dependencies = [ "js-sys", "once_cell", @@ -5271,6 +5370,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + [[package]] name = "winapi-util" version = "0.1.11" @@ -5348,6 +5453,15 @@ dependencies = [ "windows-targets 0.42.2", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -5399,6 +5513,21 @@ dependencies = [ "windows_x86_64_msvc 0.42.2", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -5438,6 +5567,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -5456,6 +5591,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -5474,6 +5615,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -5504,6 +5651,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -5522,6 +5675,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -5540,6 +5699,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -5558,6 +5723,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -5570,6 +5741,16 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "wit-bindgen" version = "0.46.0" diff --git a/nativelink-scheduler/Cargo.toml b/nativelink-scheduler/Cargo.toml index 5f98f9fd8..079655b76 100644 --- a/nativelink-scheduler/Cargo.toml +++ b/nativelink-scheduler/Cargo.toml @@ -20,8 +20,8 @@ bytes = { version = "1.10.1", default-features = false } futures = { version = "0.3.31", default-features = false } lru = { version = "0.16.0", default-features = false } mock_instant = { version = "0.5.3", default-features = false } -opentelemetry = { version = "0.29.1", default-features = false } -opentelemetry-semantic-conventions = { version = "0.29.0", default-features = false, features = [ +opentelemetry = { version = "0.30.0", default-features = false } +opentelemetry-semantic-conventions = { version = "0.30.0", default-features = false, features = [ "default", "semconv_experimental", ] } diff --git a/nativelink-service/Cargo.toml b/nativelink-service/Cargo.toml index 3f14715d1..300388228 100644 --- a/nativelink-service/Cargo.toml +++ b/nativelink-service/Cargo.toml @@ -20,8 +20,8 @@ bytes = { version = "1.10.1", default-features = false } futures = { version = "0.3.31", default-features = false } http-body-util = { version = "0.1.3", default-features = false } hyper = { version = "1.6.0", default-features = false } -opentelemetry = { version = "0.29.1", default-features = false } -opentelemetry-semantic-conventions = { version = "0.29.0", default-features = false, features = [ +opentelemetry = { version = "0.30.0", default-features = false } +opentelemetry-semantic-conventions = { version = "0.30.0", default-features = false, features = [ "default", "semconv_experimental", ] } diff --git a/nativelink-store/Cargo.toml b/nativelink-store/Cargo.toml index 5a0a62928..455fc2d03 100644 --- a/nativelink-store/Cargo.toml +++ b/nativelink-store/Cargo.toml @@ -64,7 +64,7 @@ mongodb = { version = "3", features = [ "compat-3-0-0", "rustls-tls", ], default-features = false } -opentelemetry = { version = "0.29.1", default-features = false } +opentelemetry = { version = "0.30.0", default-features = false } parking_lot = { version = "0.12.3", features = [ "arc_lock", "send_guard", diff --git a/nativelink-util/BUILD.bazel b/nativelink-util/BUILD.bazel index 89fe53937..fede077c2 100644 --- a/nativelink-util/BUILD.bazel +++ b/nativelink-util/BUILD.bazel @@ -55,6 +55,7 @@ rust_library( "@crates//:blake3", "@crates//:bytes", "@crates//:futures", + "@crates//:ginepro", "@crates//:hex", "@crates//:humantime", "@crates//:hyper-1.7.0", @@ -84,6 +85,7 @@ rust_library( "@crates//:tracing", "@crates//:tracing-opentelemetry", "@crates//:tracing-subscriber", + "@crates//:url", "@crates//:uuid", "@crates//:walkdir", ], diff --git a/nativelink-util/Cargo.toml b/nativelink-util/Cargo.toml index 38235efc5..5a2f1eaeb 100644 --- a/nativelink-util/Cargo.toml +++ b/nativelink-util/Cargo.toml @@ -27,21 +27,21 @@ hyper-util = { version = "0.1.11", default-features = false } libc = { version = "0.2.177", default-features = false } lru = { version = "0.16.0", default-features = false } mock_instant = { version = "0.5.3", default-features = false } -opentelemetry = { version = "0.29.0", default-features = false } -opentelemetry-appender-tracing = { version = "0.29.1", default-features = false } -opentelemetry-http = { version = "0.29.0", default-features = false } -opentelemetry-otlp = { version = "0.29.0", default-features = false, features = [ +opentelemetry = { version = "0.30.0", default-features = false } +opentelemetry-appender-tracing = { version = "0.30.0", default-features = false } +opentelemetry-http = { version = "0.30.0", default-features = false } +opentelemetry-otlp = { version = "0.30.0", default-features = false, features = [ "grpc-tonic", "logs", "metrics", "trace", "zstd-tonic", ] } -opentelemetry-semantic-conventions = { version = "0.29.0", default-features = false, features = [ +opentelemetry-semantic-conventions = { version = "0.30.0", default-features = false, features = [ "default", "semconv_experimental", ] } -opentelemetry_sdk = { version = "0.29.0", default-features = false } +opentelemetry_sdk = { version = "0.30.0", default-features = false } parking_lot = { version = "0.12.3", features = [ "arc_lock", "send_guard", @@ -77,7 +77,7 @@ tonic = { version = "0.13.0", features = [ ], default-features = false } tower = { version = "0.5.2", default-features = false } tracing = { version = "0.1.41", default-features = false } -tracing-opentelemetry = { version = "0.30.0", default-features = false, features = [ +tracing-opentelemetry = { version = "0.31.0", default-features = false, features = [ "metrics", ] } tracing-subscriber = { version = "0.3.19", features = [ @@ -87,6 +87,8 @@ tracing-subscriber = { version = "0.3.19", features = [ ], default-features = false } tracing-test = { version = "0.2.5", default-features = false, features = [] } +ginepro = { version = "0.9.0", default-features = false } +url = { version = "2.5.7", default-features = false } uuid = { version = "1.16.0", default-features = false, features = [ "serde", "v4", diff --git a/nativelink-util/src/telemetry.rs b/nativelink-util/src/telemetry.rs index 344105d86..b7c968b7b 100644 --- a/nativelink-util/src/telemetry.rs +++ b/nativelink-util/src/telemetry.rs @@ -18,6 +18,7 @@ use std::sync::OnceLock; use base64::Engine; use base64::prelude::BASE64_STANDARD_NO_PAD; +use ginepro::LoadBalancedChannel; use hyper::http::Response; use nativelink_error::{Code, ResultExt, make_err}; use nativelink_proto::build::bazel::remote::execution::v2::RequestMetadata; @@ -26,7 +27,9 @@ use opentelemetry::trace::{TraceContextExt, Tracer, TracerProvider}; use opentelemetry::{KeyValue, global}; use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; use opentelemetry_http::HeaderExtractor; -use opentelemetry_otlp::{LogExporter, MetricExporter, Protocol, SpanExporter, WithExportConfig}; +use opentelemetry_otlp::{ + LogExporter, MetricExporter, Protocol, SpanExporter, WithExportConfig, WithTonicConfig, +}; use opentelemetry_sdk::Resource; use opentelemetry_sdk::logs::SdkLoggerProvider; use opentelemetry_sdk::metrics::SdkMeterProvider; @@ -103,7 +106,7 @@ fn tracing_stdout_layer() -> impl Layer { /// /// Returns `Err` if logging was already initialized or if the exporters can't /// be initialized. -pub fn init_tracing() -> Result<(), nativelink_error::Error> { +pub async fn init_tracing() -> Result<(), nativelink_error::Error> { static INITIALIZED: OnceLock<()> = OnceLock::new(); if INITIALIZED.get().is_some() { @@ -128,13 +131,18 @@ pub fn init_tracing() -> Result<(), nativelink_error::Error> { ]); global::set_text_map_propagator(propagator); + let maybe_channel = maybe_load_balanced_channel().await; + // Logs + let mut log_exporter_builder = LogExporter::builder().with_tonic(); + if let Some(channel) = maybe_channel.clone() { + log_exporter_builder = log_exporter_builder.with_channel(channel.into()); + } let otlp_log_layer = OpenTelemetryTracingBridge::new( &SdkLoggerProvider::builder() .with_resource(resource.clone()) .with_batch_exporter( - LogExporter::builder() - .with_tonic() + log_exporter_builder .with_protocol(Protocol::Grpc) .build() .map_err(|e| make_err!(Code::Internal, "{e}")) @@ -145,13 +153,16 @@ pub fn init_tracing() -> Result<(), nativelink_error::Error> { .with_filter(otlp_filter()); // Traces + let mut span_exporter_builder = SpanExporter::builder().with_tonic(); + if let Some(channel) = maybe_channel.clone() { + span_exporter_builder = span_exporter_builder.with_channel(channel.into()); + } let otlp_trace_layer = layer() .with_tracer( SdkTracerProvider::builder() .with_resource(resource.clone()) .with_batch_exporter( - SpanExporter::builder() - .with_tonic() + span_exporter_builder .with_protocol(Protocol::Grpc) .build() .map_err(|e| make_err!(Code::Internal, "{e}")) @@ -163,11 +174,14 @@ pub fn init_tracing() -> Result<(), nativelink_error::Error> { .with_filter(otlp_filter()); // Metrics + let mut metric_exporter_builder = MetricExporter::builder().with_tonic(); + if let Some(channel) = maybe_channel { + metric_exporter_builder = metric_exporter_builder.with_channel(channel.into()); + } let meter_provider = SdkMeterProvider::builder() .with_resource(resource) .with_periodic_exporter( - MetricExporter::builder() - .with_tonic() + metric_exporter_builder .with_protocol(Protocol::Grpc) .build() .map_err(|e| make_err!(Code::Internal, "{e}")) @@ -191,6 +205,38 @@ pub fn init_tracing() -> Result<(), nativelink_error::Error> { Ok(()) } +const NL_OTEL_ENDPOINT: &str = "NL_OTEL_ENDPOINT"; + +async fn maybe_load_balanced_channel() -> Option { + match env::var(NL_OTEL_ENDPOINT) { + Ok(endpoint) => { + let url = Url::parse(endpoint.as_str()) + .map_err(|e| { + make_err!(Code::Internal, "Unable to parse endpoint {endpoint}: {e:?}") + }) + .unwrap(); + + let host = url + .host() + .err_tip(|| format!("Unable to get host from endpoint {endpoint}")) + .unwrap(); + let port = url + .port() + .err_tip(|| format!("Unable to get port from endpoint {endpoint}")) + .unwrap(); + + Some( + LoadBalancedChannel::builder((host.to_string(), port)) + .channel() + .await + .map_err(|e| make_err!(Code::Internal, "Invalid hostname '{endpoint}': {e}")) + .unwrap(), + ) + } + Err(_) => None, + } +} + /// Custom metadata key field for Bazel metadata. const BAZEL_METADATA_KEY: &str = "bazel.metadata"; @@ -201,6 +247,7 @@ const BAZEL_REQUESTMETADATA_HEADER: &str = "build.bazel.remote.execution.v2.requ use opentelemetry::baggage::BaggageExt; use opentelemetry::context::FutureExt; +use url::Url; #[derive(Debug, Clone)] pub struct OtlpMiddleware { diff --git a/nativelink-worker/Cargo.toml b/nativelink-worker/Cargo.toml index 500ab104e..b23a5ccd1 100644 --- a/nativelink-worker/Cargo.toml +++ b/nativelink-worker/Cargo.toml @@ -22,7 +22,7 @@ bytes = { version = "1.10.1", default-features = false } filetime = { version = "0.2.25", default-features = false } formatx = { version = "0.2.3", default-features = false } futures = { version = "0.3.31", default-features = false } -opentelemetry = { version = "0.29.1", default-features = false } +opentelemetry = { version = "0.30.0", default-features = false } parking_lot = { version = "0.12.3", default-features = false } prost = { version = "0.13.5", default-features = false } relative-path = { version = "2.0.0", default-features = false, features = [ diff --git a/src/bin/nativelink.rs b/src/bin/nativelink.rs index cfad2a0e4..c3c7cc4ef 100644 --- a/src/bin/nativelink.rs +++ b/src/bin/nativelink.rs @@ -719,7 +719,7 @@ fn main() -> Result<(), Box> { // The OTLP exporters need to run in a Tokio context // Do this first so all the other logging works #[expect(clippy::disallowed_methods, reason = "tracing init on main runtime")] - runtime.block_on(async { tokio::spawn(async { init_tracing() }).await? })?; + runtime.block_on(async { tokio::spawn(async { init_tracing().await }).await? })?; let mut cfg = get_config()?; diff --git a/src/bin/redis_store_tester.rs b/src/bin/redis_store_tester.rs index 6007cab7f..ee9073b18 100644 --- a/src/bin/redis_store_tester.rs +++ b/src/bin/redis_store_tester.rs @@ -305,7 +305,7 @@ fn main() -> Result<(), Box> { .unwrap() .block_on(async { // The OTLP exporters need to run in a Tokio context. - spawn!("init tracing", async { init_tracing() }) + spawn!("init tracing", async { init_tracing().await }) .await? .expect("Init tracing should work");